Old src/hotspot/cpu/x86/x86

    1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   int framesize = C->output()->frame_size_in_bytes();
  615   int bangsize = C->output()->bang_size_in_bytes();
  616 
  617   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
  618 
  619   C->output()->set_frame_complete(cbuf.insts_size());
  620 
  621   if (C->has_mach_constant_base_node()) {
  622     // NOTE: We set the table base offset here because users might be
  623     // emitted before MachConstantBaseNode.
  624     ConstantTable& constant_table = C->output()->constant_table();
  625     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  626   }
  627 }
  628 
  629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  630   return MachNode::size(ra_); // too many variables; just compute it the hard way
  631 }
  632 
  633 int MachPrologNode::reloc() const {
  634   return 0; // a large enough number
  635 }
  636 
  637 //=============================================================================
  638 #ifndef PRODUCT
  639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  640   Compile *C = ra_->C;
  641   int framesize = C->output()->frame_size_in_bytes();
  642   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  643   // Remove two words for return addr and rbp,
  644   framesize -= 2*wordSize;
  645 
  646   if (C->max_vector_size() > 16) {
  647     st->print("VZEROUPPER");
  648     st->cr(); st->print("\t");
  649   }
  650   if (C->in_24_bit_fp_mode()) {
  651     st->print("FLDCW  standard control word");
  652     st->cr(); st->print("\t");
  653   }
  654   if (framesize) {
  655     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  656     st->cr(); st->print("\t");
  657   }
  658   st->print_cr("POPL   EBP"); st->print("\t");
  659   if (do_polling() && C->is_method_compilation()) {
  660     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  661               "JA      #safepoint_stub\t"
  662               "# Safepoint: poll for GC");
  663   }
  664 }
  665 #endif
  666 
  667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  668   Compile *C = ra_->C;
  669   MacroAssembler _masm(&cbuf);
  670 
  671   if (C->max_vector_size() > 16) {
  672     // Clear upper bits of YMM registers when current compiled code uses
  673     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  674     _masm.vzeroupper();
  675   }
  676   // If method set FPU control word, restore to standard control word
  677   if (C->in_24_bit_fp_mode()) {
  678     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  679   }
  680 
  681   int framesize = C->output()->frame_size_in_bytes();
  682   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  683   // Remove two words for return addr and rbp,
  684   framesize -= 2*wordSize;
  685 
  686   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  687 
  688   if (framesize >= 128) {
  689     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  690     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  691     emit_d32(cbuf, framesize);
  692   } else if (framesize) {
  693     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  695     emit_d8(cbuf, framesize);
  696   }
  697 
  698   emit_opcode(cbuf, 0x58 | EBP_enc);
  699 
  700   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  701     __ reserved_stack_check();
  702   }
  703 
  704   if (do_polling() && C->is_method_compilation()) {
  705     Register thread = as_Register(EBX_enc);
  706     MacroAssembler masm(&cbuf);
  707     __ get_thread(thread);
  708     Label dummy_label;
  709     Label* code_stub = &dummy_label;
  710     if (!C->output()->in_scratch_emit_size()) {
  711       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  712       C->output()->add_stub(stub);
  713       code_stub = &stub->entry();
  714     }
  715     __ relocate(relocInfo::poll_return_type);
  716     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  717   }
  718 }
  719 
  720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  721   return MachNode::size(ra_); // too many variables; just compute it
  722                               // the hard way
  723 }
  724 
  725 int MachEpilogNode::reloc() const {
  726   return 0; // a large enough number
  727 }
  728 
  729 const Pipeline * MachEpilogNode::pipeline() const {
  730   return MachNode::pipeline_class();
  731 }
  732 
  733 //=============================================================================
  734 
  735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  736 static enum RC rc_class( OptoReg::Name reg ) {
  737 
  738   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  739   if (OptoReg::is_stack(reg)) return rc_stack;
  740 
  741   VMReg r = OptoReg::as_VMReg(reg);
  742   if (r->is_Register()) return rc_int;
  743   if (r->is_FloatRegister()) {
  744     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  745     return rc_float;
  746   }
  747   if (r->is_KRegister()) return rc_kreg;
  748   assert(r->is_XMMRegister(), "must be");
  749   return rc_xmm;
  750 }
  751 
  752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  753                         int opcode, const char *op_str, int size, outputStream* st ) {
  754   if( cbuf ) {
  755     emit_opcode  (*cbuf, opcode );
  756     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757 #ifndef PRODUCT
  758   } else if( !do_size ) {
  759     if( size != 0 ) st->print("\n\t");
  760     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  761       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  762       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  763     } else { // FLD, FST, PUSH, POP
  764       st->print("%s [ESP + #%d]",op_str,offset);
  765     }
  766 #endif
  767   }
  768   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  769   return size+3+offset_size;
  770 }
  771 
  772 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  774                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  775   int in_size_in_bits = Assembler::EVEX_32bit;
  776   int evex_encoding = 0;
  777   if (reg_lo+1 == reg_hi) {
  778     in_size_in_bits = Assembler::EVEX_64bit;
  779     evex_encoding = Assembler::VEX_W;
  780   }
  781   if (cbuf) {
  782     MacroAssembler _masm(cbuf);
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     _masm.set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (cbuf) {
  835     MacroAssembler _masm(cbuf);
  836     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  837     _masm.set_managed();
  838     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  839       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     } else {
  842       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  843                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  844     }
  845 #ifndef PRODUCT
  846   } else if (!do_size) {
  847     if (size != 0) st->print("\n\t");
  848     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  849       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  850         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  851       } else {
  852         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  853       }
  854     } else {
  855       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  856         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  857       } else {
  858         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  859       }
  860     }
  861 #endif
  862   }
  863   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  864   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  865   int sz = (UseAVX > 2) ? 6 : 4;
  866   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  867       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  868   return size + sz;
  869 }
  870 
  871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  872                             int src_hi, int dst_hi, int size, outputStream* st ) {
  873   // 32-bit
  874   if (cbuf) {
  875     MacroAssembler _masm(cbuf);
  876     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  877     _masm.set_managed();
  878     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  879              as_Register(Matcher::_regEncode[src_lo]));
  880 #ifndef PRODUCT
  881   } else if (!do_size) {
  882     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  883 #endif
  884   }
  885   return (UseAVX> 2) ? 6 : 4;
  886 }
  887 
  888 
  889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  890                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  891   // 32-bit
  892   if (cbuf) {
  893     MacroAssembler _masm(cbuf);
  894     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  895     _masm.set_managed();
  896     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  897              as_XMMRegister(Matcher::_regEncode[src_lo]));
  898 #ifndef PRODUCT
  899   } else if (!do_size) {
  900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  901 #endif
  902   }
  903   return (UseAVX> 2) ? 6 : 4;
  904 }
  905 
  906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  907   if( cbuf ) {
  908     emit_opcode(*cbuf, 0x8B );
  909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  910 #ifndef PRODUCT
  911   } else if( !do_size ) {
  912     if( size != 0 ) st->print("\n\t");
  913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  914 #endif
  915   }
  916   return size+2;
  917 }
  918 
  919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  920                                  int offset, int size, outputStream* st ) {
  921   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  922     if( cbuf ) {
  923       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  924       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  925 #ifndef PRODUCT
  926     } else if( !do_size ) {
  927       if( size != 0 ) st->print("\n\t");
  928       st->print("FLD    %s",Matcher::regName[src_lo]);
  929 #endif
  930     }
  931     size += 2;
  932   }
  933 
  934   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  935   const char *op_str;
  936   int op;
  937   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  939     op = 0xDD;
  940   } else {                   // 32-bit store
  941     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  942     op = 0xD9;
  943     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  944   }
  945 
  946   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  947 }
  948 
  949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  951                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  952 
  953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  954                             int stack_offset, int reg, uint ireg, outputStream* st);
  955 
  956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  957                                      int dst_offset, uint ireg, outputStream* st) {
  958   if (cbuf) {
  959     MacroAssembler _masm(cbuf);
  960     switch (ireg) {
  961     case Op_VecS:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       break;
  965     case Op_VecD:
  966       __ pushl(Address(rsp, src_offset));
  967       __ popl (Address(rsp, dst_offset));
  968       __ pushl(Address(rsp, src_offset+4));
  969       __ popl (Address(rsp, dst_offset+4));
  970       break;
  971     case Op_VecX:
  972       __ movdqu(Address(rsp, -16), xmm0);
  973       __ movdqu(xmm0, Address(rsp, src_offset));
  974       __ movdqu(Address(rsp, dst_offset), xmm0);
  975       __ movdqu(xmm0, Address(rsp, -16));
  976       break;
  977     case Op_VecY:
  978       __ vmovdqu(Address(rsp, -32), xmm0);
  979       __ vmovdqu(xmm0, Address(rsp, src_offset));
  980       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  981       __ vmovdqu(xmm0, Address(rsp, -32));
  982       break;
  983     case Op_VecZ:
  984       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  985       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  986       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  987       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  988       break;
  989     default:
  990       ShouldNotReachHere();
  991     }
  992 #ifndef PRODUCT
  993   } else {
  994     switch (ireg) {
  995     case Op_VecS:
  996       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  997                 "popl    [rsp + #%d]",
  998                 src_offset, dst_offset);
  999       break;
 1000     case Op_VecD:
 1001       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1002                 "popq    [rsp + #%d]\n\t"
 1003                 "pushl   [rsp + #%d]\n\t"
 1004                 "popq    [rsp + #%d]",
 1005                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1006       break;
 1007      case Op_VecX:
 1008       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1009                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1010                 "movdqu  [rsp + #%d], xmm0\n\t"
 1011                 "movdqu  xmm0, [rsp - #16]",
 1012                 src_offset, dst_offset);
 1013       break;
 1014     case Op_VecY:
 1015       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1016                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1017                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1018                 "vmovdqu xmm0, [rsp - #32]",
 1019                 src_offset, dst_offset);
 1020       break;
 1021     case Op_VecZ:
 1022       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1023                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1024                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1025                 "vmovdqu xmm0, [rsp - #64]",
 1026                 src_offset, dst_offset);
 1027       break;
 1028     default:
 1029       ShouldNotReachHere();
 1030     }
 1031 #endif
 1032   }
 1033 }
 1034 
 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1036   // Get registers to move
 1037   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1038   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1039   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1040   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1041 
 1042   enum RC src_second_rc = rc_class(src_second);
 1043   enum RC src_first_rc = rc_class(src_first);
 1044   enum RC dst_second_rc = rc_class(dst_second);
 1045   enum RC dst_first_rc = rc_class(dst_first);
 1046 
 1047   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1048 
 1049   // Generate spill code!
 1050   int size = 0;
 1051 
 1052   if( src_first == dst_first && src_second == dst_second )
 1053     return size;            // Self copy, no move
 1054 
 1055   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1056     uint ireg = ideal_reg();
 1057     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1058     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1059     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1060     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1061       // mem -> mem
 1062       int src_offset = ra_->reg2offset(src_first);
 1063       int dst_offset = ra_->reg2offset(dst_first);
 1064       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1065     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1066       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1067     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1068       int stack_offset = ra_->reg2offset(dst_first);
 1069       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1070     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1071       int stack_offset = ra_->reg2offset(src_first);
 1072       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1073     } else {
 1074       ShouldNotReachHere();
 1075     }
 1076     return 0;
 1077   }
 1078 
 1079   // --------------------------------------
 1080   // Check for mem-mem move.  push/pop to move.
 1081   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1082     if( src_second == dst_first ) { // overlapping stack copy ranges
 1083       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1084       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1085       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1086       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1087     }
 1088     // move low bits
 1089     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1090     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1091     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1092       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1093       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1094     }
 1095     return size;
 1096   }
 1097 
 1098   // --------------------------------------
 1099   // Check for integer reg-reg copy
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1101     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1102 
 1103   // Check for integer store
 1104   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1105     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1106 
 1107   // Check for integer load
 1108   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1109     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1110 
 1111   // Check for integer reg-xmm reg copy
 1112   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1113     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1114             "no 64 bit integer-float reg moves" );
 1115     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1116   }
 1117   // --------------------------------------
 1118   // Check for float reg-reg copy
 1119   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1120     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1121             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1122     if( cbuf ) {
 1123 
 1124       // Note the mucking with the register encode to compensate for the 0/1
 1125       // indexing issue mentioned in a comment in the reg_def sections
 1126       // for FPR registers many lines above here.
 1127 
 1128       if( src_first != FPR1L_num ) {
 1129         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1130         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1131         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1132         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1133      } else {
 1134         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1135         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1136      }
 1137 #ifndef PRODUCT
 1138     } else if( !do_size ) {
 1139       if( size != 0 ) st->print("\n\t");
 1140       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1141       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1142 #endif
 1143     }
 1144     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1145   }
 1146 
 1147   // Check for float store
 1148   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1149     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1150   }
 1151 
 1152   // Check for float load
 1153   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1154     int offset = ra_->reg2offset(src_first);
 1155     const char *op_str;
 1156     int op;
 1157     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1158       op_str = "FLD_D";
 1159       op = 0xDD;
 1160     } else {                   // 32-bit load
 1161       op_str = "FLD_S";
 1162       op = 0xD9;
 1163       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1164     }
 1165     if( cbuf ) {
 1166       emit_opcode  (*cbuf, op );
 1167       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1168       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1169       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1170 #ifndef PRODUCT
 1171     } else if( !do_size ) {
 1172       if( size != 0 ) st->print("\n\t");
 1173       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1174 #endif
 1175     }
 1176     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1177     return size + 3+offset_size+2;
 1178   }
 1179 
 1180   // Check for xmm reg-reg copy
 1181   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1182     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1183             (src_first+1 == src_second && dst_first+1 == dst_second),
 1184             "no non-adjacent float-moves" );
 1185     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1186   }
 1187 
 1188   // Check for xmm reg-integer reg copy
 1189   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1190     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1191             "no 64 bit float-integer reg moves" );
 1192     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1193   }
 1194 
 1195   // Check for xmm store
 1196   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1197     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1198   }
 1199 
 1200   // Check for float xmm load
 1201   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1202     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1203   }
 1204 
 1205   // Copy from float reg to xmm reg
 1206   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1207     // copy to the top of stack from floating point reg
 1208     // and use LEA to preserve flags
 1209     if( cbuf ) {
 1210       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1211       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1212       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1213       emit_d8(*cbuf,0xF8);
 1214 #ifndef PRODUCT
 1215     } else if( !do_size ) {
 1216       if( size != 0 ) st->print("\n\t");
 1217       st->print("LEA    ESP,[ESP-8]");
 1218 #endif
 1219     }
 1220     size += 4;
 1221 
 1222     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1223 
 1224     // Copy from the temp memory to the xmm reg.
 1225     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1226 
 1227     if( cbuf ) {
 1228       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1229       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1230       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1231       emit_d8(*cbuf,0x08);
 1232 #ifndef PRODUCT
 1233     } else if( !do_size ) {
 1234       if( size != 0 ) st->print("\n\t");
 1235       st->print("LEA    ESP,[ESP+8]");
 1236 #endif
 1237     }
 1238     size += 4;
 1239     return size;
 1240   }
 1241 
 1242   // AVX-512 opmask specific spilling.
 1243   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1244     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1245     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1246     int offset = ra_->reg2offset(src_first);
 1247     if (cbuf != nullptr) {
 1248       MacroAssembler _masm(cbuf);
 1249       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1250 #ifndef PRODUCT
 1251     } else {
 1252       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1253 #endif
 1254     }
 1255     return 0;
 1256   }
 1257 
 1258   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1259     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1260     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1261     int offset = ra_->reg2offset(dst_first);
 1262     if (cbuf != nullptr) {
 1263       MacroAssembler _masm(cbuf);
 1264       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1265 #ifndef PRODUCT
 1266     } else {
 1267       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1268 #endif
 1269     }
 1270     return 0;
 1271   }
 1272 
 1273   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1274     Unimplemented();
 1275     return 0;
 1276   }
 1277 
 1278   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1279     Unimplemented();
 1280     return 0;
 1281   }
 1282 
 1283   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1284     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1285     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1286     if (cbuf != nullptr) {
 1287       MacroAssembler _masm(cbuf);
 1288       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1289 #ifndef PRODUCT
 1290     } else {
 1291       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1292 #endif
 1293     }
 1294     return 0;
 1295   }
 1296 
 1297   assert( size > 0, "missed a case" );
 1298 
 1299   // --------------------------------------------------------------------
 1300   // Check for second bits still needing moving.
 1301   if( src_second == dst_second )
 1302     return size;               // Self copy; no move
 1303   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1304 
 1305   // Check for second word int-int move
 1306   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1307     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1308 
 1309   // Check for second word integer store
 1310   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1311     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1312 
 1313   // Check for second word integer load
 1314   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1315     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1316 
 1317   Unimplemented();
 1318   return 0; // Mute compiler
 1319 }
 1320 
 1321 #ifndef PRODUCT
 1322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1323   implementation( NULL, ra_, false, st );
 1324 }
 1325 #endif
 1326 
 1327 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1328   implementation( &cbuf, ra_, false, NULL );
 1329 }
 1330 
 1331 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1332   return MachNode::size(ra_);
 1333 }
 1334 
 1335 
 1336 //=============================================================================
 1337 #ifndef PRODUCT
 1338 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1339   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1340   int reg = ra_->get_reg_first(this);
 1341   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1342 }
 1343 #endif
 1344 
 1345 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1346   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1347   int reg = ra_->get_encode(this);
 1348   if( offset >= 128 ) {
 1349     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1350     emit_rm(cbuf, 0x2, reg, 0x04);
 1351     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1352     emit_d32(cbuf, offset);
 1353   }
 1354   else {
 1355     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1356     emit_rm(cbuf, 0x1, reg, 0x04);
 1357     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1358     emit_d8(cbuf, offset);
 1359   }
 1360 }
 1361 
 1362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1363   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1364   if( offset >= 128 ) {
 1365     return 7;
 1366   }
 1367   else {
 1368     return 4;
 1369   }
 1370 }
 1371 
 1372 //=============================================================================
 1373 #ifndef PRODUCT
 1374 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1375   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1376   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1377   st->print_cr("\tNOP");
 1378   st->print_cr("\tNOP");
 1379   if( !OptoBreakpoint )
 1380     st->print_cr("\tNOP");
 1381 }
 1382 #endif
 1383 
 1384 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1385   MacroAssembler masm(&cbuf);
 1386 #ifdef ASSERT
 1387   uint insts_size = cbuf.insts_size();
 1388 #endif
 1389   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1390   masm.jump_cc(Assembler::notEqual,
 1391                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1392   /* WARNING these NOPs are critical so that verified entry point is properly
 1393      aligned for patching by NativeJump::patch_verified_entry() */
 1394   int nops_cnt = 2;
 1395   if( !OptoBreakpoint ) // Leave space for int3
 1396      nops_cnt += 1;
 1397   masm.nop(nops_cnt);
 1398 
 1399   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1400 }
 1401 
 1402 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1403   return OptoBreakpoint ? 11 : 12;
 1404 }
 1405 
 1406 
 1407 //=============================================================================
 1408 
 1409 // Vector calling convention not supported.
 1410 const bool Matcher::supports_vector_calling_convention() {
 1411   return false;
 1412 }
 1413 
 1414 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1415   Unimplemented();
 1416   return OptoRegPair(0, 0);
 1417 }
 1418 
 1419 // Is this branch offset short enough that a short branch can be used?
 1420 //
 1421 // NOTE: If the platform does not provide any short branch variants, then
 1422 //       this method should return false for offset 0.
 1423 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1424   // The passed offset is relative to address of the branch.
 1425   // On 86 a branch displacement is calculated relative to address
 1426   // of a next instruction.
 1427   offset -= br_size;
 1428 
 1429   // the short version of jmpConUCF2 contains multiple branches,
 1430   // making the reach slightly less
 1431   if (rule == jmpConUCF2_rule)
 1432     return (-126 <= offset && offset <= 125);
 1433   return (-128 <= offset && offset <= 127);
 1434 }
 1435 
 1436 // Return whether or not this register is ever used as an argument.  This
 1437 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1438 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1439 // arguments in those registers not be available to the callee.
 1440 bool Matcher::can_be_java_arg( int reg ) {
 1441   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1442   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1443   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1444   return false;
 1445 }
 1446 
 1447 bool Matcher::is_spillable_arg( int reg ) {
 1448   return can_be_java_arg(reg);
 1449 }
 1450 
 1451 uint Matcher::int_pressure_limit()
 1452 {
 1453   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1454 }
 1455 
 1456 uint Matcher::float_pressure_limit()
 1457 {
 1458   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1459 }
 1460 
 1461 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1462   // Use hardware integer DIV instruction when
 1463   // it is faster than a code which use multiply.
 1464   // Only when constant divisor fits into 32 bit
 1465   // (min_jint is excluded to get only correct
 1466   // positive 32 bit values from negative).
 1467   return VM_Version::has_fast_idiv() &&
 1468          (divisor == (int)divisor && divisor != min_jint);
 1469 }
 1470 
 1471 // Register for DIVI projection of divmodI
 1472 RegMask Matcher::divI_proj_mask() {
 1473   return EAX_REG_mask();
 1474 }
 1475 
 1476 // Register for MODI projection of divmodI
 1477 RegMask Matcher::modI_proj_mask() {
 1478   return EDX_REG_mask();
 1479 }
 1480 
 1481 // Register for DIVL projection of divmodL
 1482 RegMask Matcher::divL_proj_mask() {
 1483   ShouldNotReachHere();
 1484   return RegMask();
 1485 }
 1486 
 1487 // Register for MODL projection of divmodL
 1488 RegMask Matcher::modL_proj_mask() {
 1489   ShouldNotReachHere();
 1490   return RegMask();
 1491 }
 1492 
 1493 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1494   return NO_REG_mask();
 1495 }
 1496 
 1497 // Returns true if the high 32 bits of the value is known to be zero.
 1498 bool is_operand_hi32_zero(Node* n) {
 1499   int opc = n->Opcode();
 1500   if (opc == Op_AndL) {
 1501     Node* o2 = n->in(2);
 1502     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1503       return true;
 1504     }
 1505   }
 1506   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1507     return true;
 1508   }
 1509   return false;
 1510 }
 1511 
 1512 %}
 1513 
 1514 //----------ENCODING BLOCK-----------------------------------------------------
 1515 // This block specifies the encoding classes used by the compiler to output
 1516 // byte streams.  Encoding classes generate functions which are called by
 1517 // Machine Instruction Nodes in order to generate the bit encoding of the
 1518 // instruction.  Operands specify their base encoding interface with the
 1519 // interface keyword.  There are currently supported four interfaces,
 1520 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1521 // operand to generate a function which returns its register number when
 1522 // queried.   CONST_INTER causes an operand to generate a function which
 1523 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1524 // operand to generate four functions which return the Base Register, the
 1525 // Index Register, the Scale Value, and the Offset Value of the operand when
 1526 // queried.  COND_INTER causes an operand to generate six functions which
 1527 // return the encoding code (ie - encoding bits for the instruction)
 1528 // associated with each basic boolean condition for a conditional instruction.
 1529 // Instructions specify two basic values for encoding.  They use the
 1530 // ins_encode keyword to specify their encoding class (which must be one of
 1531 // the class names specified in the encoding block), and they use the
 1532 // opcode keyword to specify, in order, their primary, secondary, and
 1533 // tertiary opcode.  Only the opcode sections which a particular instruction
 1534 // needs for encoding need to be specified.
 1535 encode %{
 1536   // Build emit functions for each basic byte or larger field in the intel
 1537   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1538   // code in the enc_class source block.  Emit functions will live in the
 1539   // main source block for now.  In future, we can generalize this by
 1540   // adding a syntax that specifies the sizes of fields in an order,
 1541   // so that the adlc can build the emit functions automagically
 1542 
 1543   // Emit primary opcode
 1544   enc_class OpcP %{
 1545     emit_opcode(cbuf, $primary);
 1546   %}
 1547 
 1548   // Emit secondary opcode
 1549   enc_class OpcS %{
 1550     emit_opcode(cbuf, $secondary);
 1551   %}
 1552 
 1553   // Emit opcode directly
 1554   enc_class Opcode(immI d8) %{
 1555     emit_opcode(cbuf, $d8$$constant);
 1556   %}
 1557 
 1558   enc_class SizePrefix %{
 1559     emit_opcode(cbuf,0x66);
 1560   %}
 1561 
 1562   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1563     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1564   %}
 1565 
 1566   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1567     emit_opcode(cbuf,$opcode$$constant);
 1568     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1569   %}
 1570 
 1571   enc_class mov_r32_imm0( rRegI dst ) %{
 1572     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1573     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1574   %}
 1575 
 1576   enc_class cdq_enc %{
 1577     // Full implementation of Java idiv and irem; checks for
 1578     // special case as described in JVM spec., p.243 & p.271.
 1579     //
 1580     //         normal case                           special case
 1581     //
 1582     // input : rax,: dividend                         min_int
 1583     //         reg: divisor                          -1
 1584     //
 1585     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1586     //         rdx: remainder (= rax, irem reg)       0
 1587     //
 1588     //  Code sequnce:
 1589     //
 1590     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1591     //  0F 85 0B 00 00 00    jne         normal_case
 1592     //  33 D2                xor         rdx,edx
 1593     //  83 F9 FF             cmp         rcx,0FFh
 1594     //  0F 84 03 00 00 00    je          done
 1595     //                  normal_case:
 1596     //  99                   cdq
 1597     //  F7 F9                idiv        rax,ecx
 1598     //                  done:
 1599     //
 1600     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1601     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1602     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1603     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1604     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1605     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1606     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1607     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1608     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1609     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1610     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1611     // normal_case:
 1612     emit_opcode(cbuf,0x99);                                         // cdq
 1613     // idiv (note: must be emitted by the user of this rule)
 1614     // normal:
 1615   %}
 1616 
 1617   // Dense encoding for older common ops
 1618   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1619     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1620   %}
 1621 
 1622 
 1623   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1624   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1625     // Check for 8-bit immediate, and set sign extend bit in opcode
 1626     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1627       emit_opcode(cbuf, $primary | 0x02);
 1628     }
 1629     else {                          // If 32-bit immediate
 1630       emit_opcode(cbuf, $primary);
 1631     }
 1632   %}
 1633 
 1634   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1635     // Emit primary opcode and set sign-extend bit
 1636     // Check for 8-bit immediate, and set sign extend bit in opcode
 1637     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1638       emit_opcode(cbuf, $primary | 0x02);    }
 1639     else {                          // If 32-bit immediate
 1640       emit_opcode(cbuf, $primary);
 1641     }
 1642     // Emit r/m byte with secondary opcode, after primary opcode.
 1643     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1644   %}
 1645 
 1646   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1647     // Check for 8-bit immediate, and set sign extend bit in opcode
 1648     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1649       $$$emit8$imm$$constant;
 1650     }
 1651     else {                          // If 32-bit immediate
 1652       // Output immediate
 1653       $$$emit32$imm$$constant;
 1654     }
 1655   %}
 1656 
 1657   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1658     // Emit primary opcode and set sign-extend bit
 1659     // Check for 8-bit immediate, and set sign extend bit in opcode
 1660     int con = (int)$imm$$constant; // Throw away top bits
 1661     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1662     // Emit r/m byte with secondary opcode, after primary opcode.
 1663     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1664     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1665     else                               emit_d32(cbuf,con);
 1666   %}
 1667 
 1668   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1669     // Emit primary opcode and set sign-extend bit
 1670     // Check for 8-bit immediate, and set sign extend bit in opcode
 1671     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1672     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1673     // Emit r/m byte with tertiary opcode, after primary opcode.
 1674     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1675     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1676     else                               emit_d32(cbuf,con);
 1677   %}
 1678 
 1679   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1680     emit_cc(cbuf, $secondary, $dst$$reg );
 1681   %}
 1682 
 1683   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1684     int destlo = $dst$$reg;
 1685     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1686     // bswap lo
 1687     emit_opcode(cbuf, 0x0F);
 1688     emit_cc(cbuf, 0xC8, destlo);
 1689     // bswap hi
 1690     emit_opcode(cbuf, 0x0F);
 1691     emit_cc(cbuf, 0xC8, desthi);
 1692     // xchg lo and hi
 1693     emit_opcode(cbuf, 0x87);
 1694     emit_rm(cbuf, 0x3, destlo, desthi);
 1695   %}
 1696 
 1697   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1698     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1699   %}
 1700 
 1701   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1702     $$$emit8$primary;
 1703     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1704   %}
 1705 
 1706   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1707     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1708     emit_d8(cbuf, op >> 8 );
 1709     emit_d8(cbuf, op & 255);
 1710   %}
 1711 
 1712   // emulate a CMOV with a conditional branch around a MOV
 1713   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1714     // Invert sense of branch from sense of CMOV
 1715     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1716     emit_d8( cbuf, $brOffs$$constant );
 1717   %}
 1718 
 1719   enc_class enc_PartialSubtypeCheck( ) %{
 1720     Register Redi = as_Register(EDI_enc); // result register
 1721     Register Reax = as_Register(EAX_enc); // super class
 1722     Register Recx = as_Register(ECX_enc); // killed
 1723     Register Resi = as_Register(ESI_enc); // sub class
 1724     Label miss;
 1725 
 1726     MacroAssembler _masm(&cbuf);
 1727     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1728                                      NULL, &miss,
 1729                                      /*set_cond_codes:*/ true);
 1730     if ($primary) {
 1731       __ xorptr(Redi, Redi);
 1732     }
 1733     __ bind(miss);
 1734   %}
 1735 
 1736   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1737     MacroAssembler masm(&cbuf);
 1738     int start = masm.offset();
 1739     if (UseSSE >= 2) {
 1740       if (VerifyFPU) {
 1741         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1742       }
 1743     } else {
 1744       // External c_calling_convention expects the FPU stack to be 'clean'.
 1745       // Compiled code leaves it dirty.  Do cleanup now.
 1746       masm.empty_FPU_stack();
 1747     }
 1748     if (sizeof_FFree_Float_Stack_All == -1) {
 1749       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1750     } else {
 1751       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1752     }
 1753   %}
 1754 
 1755   enc_class Verify_FPU_For_Leaf %{
 1756     if( VerifyFPU ) {
 1757       MacroAssembler masm(&cbuf);
 1758       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1759     }
 1760   %}
 1761 
 1762   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1763     // This is the instruction starting address for relocation info.
 1764     MacroAssembler _masm(&cbuf);
 1765     cbuf.set_insts_mark();
 1766     $$$emit8$primary;
 1767     // CALL directly to the runtime
 1768     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1769                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1770     __ post_call_nop();
 1771 
 1772     if (UseSSE >= 2) {
 1773       MacroAssembler _masm(&cbuf);
 1774       BasicType rt = tf()->return_type();
 1775 
 1776       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1777         // A C runtime call where the return value is unused.  In SSE2+
 1778         // mode the result needs to be removed from the FPU stack.  It's
 1779         // likely that this function call could be removed by the
 1780         // optimizer if the C function is a pure function.
 1781         __ ffree(0);
 1782       } else if (rt == T_FLOAT) {
 1783         __ lea(rsp, Address(rsp, -4));
 1784         __ fstp_s(Address(rsp, 0));
 1785         __ movflt(xmm0, Address(rsp, 0));
 1786         __ lea(rsp, Address(rsp,  4));
 1787       } else if (rt == T_DOUBLE) {
 1788         __ lea(rsp, Address(rsp, -8));
 1789         __ fstp_d(Address(rsp, 0));
 1790         __ movdbl(xmm0, Address(rsp, 0));
 1791         __ lea(rsp, Address(rsp,  8));
 1792       }
 1793     }
 1794   %}
 1795 
 1796   enc_class pre_call_resets %{
 1797     // If method sets FPU control word restore it here
 1798     debug_only(int off0 = cbuf.insts_size());
 1799     if (ra_->C->in_24_bit_fp_mode()) {
 1800       MacroAssembler _masm(&cbuf);
 1801       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1802     }
 1803     // Clear upper bits of YMM registers when current compiled code uses
 1804     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1805     MacroAssembler _masm(&cbuf);
 1806     __ vzeroupper();
 1807     debug_only(int off1 = cbuf.insts_size());
 1808     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1809   %}
 1810 
 1811   enc_class post_call_FPU %{
 1812     // If method sets FPU control word do it here also
 1813     if (Compile::current()->in_24_bit_fp_mode()) {
 1814       MacroAssembler masm(&cbuf);
 1815       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1816     }
 1817   %}
 1818 
 1819   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1820     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1821     // who we intended to call.
 1822     MacroAssembler _masm(&cbuf);
 1823     cbuf.set_insts_mark();
 1824     $$$emit8$primary;
 1825 
 1826     if (!_method) {
 1827       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1828                      runtime_call_Relocation::spec(),
 1829                      RELOC_IMM32);
 1830       __ post_call_nop();
 1831     } else {
 1832       int method_index = resolved_method_index(cbuf);
 1833       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1834                                                   : static_call_Relocation::spec(method_index);
 1835       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1836                      rspec, RELOC_DISP32);
 1837       __ post_call_nop();
 1838       address mark = cbuf.insts_mark();
 1839       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1840         // Calls of the same statically bound method can share
 1841         // a stub to the interpreter.
 1842         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1843       } else {
 1844         // Emit stubs for static call.
 1845         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1846         if (stub == NULL) {
 1847           ciEnv::current()->record_failure("CodeCache is full");
 1848           return;
 1849         }
 1850       }
 1851     }
 1852   %}
 1853 
 1854   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1855     MacroAssembler _masm(&cbuf);
 1856     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1857     __ post_call_nop();
 1858   %}
 1859 
 1860   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1861     int disp = in_bytes(Method::from_compiled_offset());
 1862     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1863 
 1864     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1865     MacroAssembler _masm(&cbuf);
 1866     cbuf.set_insts_mark();
 1867     $$$emit8$primary;
 1868     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1869     emit_d8(cbuf, disp);             // Displacement
 1870     __ post_call_nop();
 1871   %}
 1872 
 1873 //   Following encoding is no longer used, but may be restored if calling
 1874 //   convention changes significantly.
 1875 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1876 //
 1877 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1878 //     // int ic_reg     = Matcher::inline_cache_reg();
 1879 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1880 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1881 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1882 //
 1883 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1884 //     // // so we load it immediately before the call
 1885 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1886 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1887 //
 1888 //     // xor rbp,ebp
 1889 //     emit_opcode(cbuf, 0x33);
 1890 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1891 //
 1892 //     // CALL to interpreter.
 1893 //     cbuf.set_insts_mark();
 1894 //     $$$emit8$primary;
 1895 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1896 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1897 //   %}
 1898 
 1899   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1900     $$$emit8$primary;
 1901     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1902     $$$emit8$shift$$constant;
 1903   %}
 1904 
 1905   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1906     // Load immediate does not have a zero or sign extended version
 1907     // for 8-bit immediates
 1908     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1909     $$$emit32$src$$constant;
 1910   %}
 1911 
 1912   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1913     // Load immediate does not have a zero or sign extended version
 1914     // for 8-bit immediates
 1915     emit_opcode(cbuf, $primary + $dst$$reg);
 1916     $$$emit32$src$$constant;
 1917   %}
 1918 
 1919   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1920     // Load immediate does not have a zero or sign extended version
 1921     // for 8-bit immediates
 1922     int dst_enc = $dst$$reg;
 1923     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1924     if (src_con == 0) {
 1925       // xor dst, dst
 1926       emit_opcode(cbuf, 0x33);
 1927       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1928     } else {
 1929       emit_opcode(cbuf, $primary + dst_enc);
 1930       emit_d32(cbuf, src_con);
 1931     }
 1932   %}
 1933 
 1934   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1935     // Load immediate does not have a zero or sign extended version
 1936     // for 8-bit immediates
 1937     int dst_enc = $dst$$reg + 2;
 1938     int src_con = ((julong)($src$$constant)) >> 32;
 1939     if (src_con == 0) {
 1940       // xor dst, dst
 1941       emit_opcode(cbuf, 0x33);
 1942       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1943     } else {
 1944       emit_opcode(cbuf, $primary + dst_enc);
 1945       emit_d32(cbuf, src_con);
 1946     }
 1947   %}
 1948 
 1949 
 1950   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1951   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1952     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1953   %}
 1954 
 1955   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1956     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1957   %}
 1958 
 1959   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1960     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1961   %}
 1962 
 1963   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1964     $$$emit8$primary;
 1965     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1966   %}
 1967 
 1968   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1969     $$$emit8$secondary;
 1970     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1971   %}
 1972 
 1973   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1974     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1975   %}
 1976 
 1977   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1978     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1979   %}
 1980 
 1981   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1982     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1983   %}
 1984 
 1985   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1986     // Output immediate
 1987     $$$emit32$src$$constant;
 1988   %}
 1989 
 1990   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1991     // Output Float immediate bits
 1992     jfloat jf = $src$$constant;
 1993     int    jf_as_bits = jint_cast( jf );
 1994     emit_d32(cbuf, jf_as_bits);
 1995   %}
 1996 
 1997   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1998     // Output Float immediate bits
 1999     jfloat jf = $src$$constant;
 2000     int    jf_as_bits = jint_cast( jf );
 2001     emit_d32(cbuf, jf_as_bits);
 2002   %}
 2003 
 2004   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 2005     // Output immediate
 2006     $$$emit16$src$$constant;
 2007   %}
 2008 
 2009   enc_class Con_d32(immI src) %{
 2010     emit_d32(cbuf,$src$$constant);
 2011   %}
 2012 
 2013   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 2014     // Output immediate memory reference
 2015     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2016     emit_d32(cbuf, 0x00);
 2017   %}
 2018 
 2019   enc_class lock_prefix( ) %{
 2020     emit_opcode(cbuf,0xF0);         // [Lock]
 2021   %}
 2022 
 2023   // Cmp-xchg long value.
 2024   // Note: we need to swap rbx, and rcx before and after the
 2025   //       cmpxchg8 instruction because the instruction uses
 2026   //       rcx as the high order word of the new value to store but
 2027   //       our register encoding uses rbx,.
 2028   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2029 
 2030     // XCHG  rbx,ecx
 2031     emit_opcode(cbuf,0x87);
 2032     emit_opcode(cbuf,0xD9);
 2033     // [Lock]
 2034     emit_opcode(cbuf,0xF0);
 2035     // CMPXCHG8 [Eptr]
 2036     emit_opcode(cbuf,0x0F);
 2037     emit_opcode(cbuf,0xC7);
 2038     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2039     // XCHG  rbx,ecx
 2040     emit_opcode(cbuf,0x87);
 2041     emit_opcode(cbuf,0xD9);
 2042   %}
 2043 
 2044   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2045     // [Lock]
 2046     emit_opcode(cbuf,0xF0);
 2047 
 2048     // CMPXCHG [Eptr]
 2049     emit_opcode(cbuf,0x0F);
 2050     emit_opcode(cbuf,0xB1);
 2051     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2052   %}
 2053 
 2054   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2055     // [Lock]
 2056     emit_opcode(cbuf,0xF0);
 2057 
 2058     // CMPXCHGB [Eptr]
 2059     emit_opcode(cbuf,0x0F);
 2060     emit_opcode(cbuf,0xB0);
 2061     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2062   %}
 2063 
 2064   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2065     // [Lock]
 2066     emit_opcode(cbuf,0xF0);
 2067 
 2068     // 16-bit mode
 2069     emit_opcode(cbuf, 0x66);
 2070 
 2071     // CMPXCHGW [Eptr]
 2072     emit_opcode(cbuf,0x0F);
 2073     emit_opcode(cbuf,0xB1);
 2074     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2075   %}
 2076 
 2077   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2078     int res_encoding = $res$$reg;
 2079 
 2080     // MOV  res,0
 2081     emit_opcode( cbuf, 0xB8 + res_encoding);
 2082     emit_d32( cbuf, 0 );
 2083     // JNE,s  fail
 2084     emit_opcode(cbuf,0x75);
 2085     emit_d8(cbuf, 5 );
 2086     // MOV  res,1
 2087     emit_opcode( cbuf, 0xB8 + res_encoding);
 2088     emit_d32( cbuf, 1 );
 2089     // fail:
 2090   %}
 2091 
 2092   enc_class set_instruction_start( ) %{
 2093     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2094   %}
 2095 
 2096   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2097     int reg_encoding = $ereg$$reg;
 2098     int base  = $mem$$base;
 2099     int index = $mem$$index;
 2100     int scale = $mem$$scale;
 2101     int displace = $mem$$disp;
 2102     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2103     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2104   %}
 2105 
 2106   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2107     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2108     int base  = $mem$$base;
 2109     int index = $mem$$index;
 2110     int scale = $mem$$scale;
 2111     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2112     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2113     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2114   %}
 2115 
 2116   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2117     int r1, r2;
 2118     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2119     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2120     emit_opcode(cbuf,0x0F);
 2121     emit_opcode(cbuf,$tertiary);
 2122     emit_rm(cbuf, 0x3, r1, r2);
 2123     emit_d8(cbuf,$cnt$$constant);
 2124     emit_d8(cbuf,$primary);
 2125     emit_rm(cbuf, 0x3, $secondary, r1);
 2126     emit_d8(cbuf,$cnt$$constant);
 2127   %}
 2128 
 2129   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2130     emit_opcode( cbuf, 0x8B ); // Move
 2131     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2132     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2133       emit_d8(cbuf,$primary);
 2134       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2135       emit_d8(cbuf,$cnt$$constant-32);
 2136     }
 2137     emit_d8(cbuf,$primary);
 2138     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2139     emit_d8(cbuf,31);
 2140   %}
 2141 
 2142   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2143     int r1, r2;
 2144     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2145     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2146 
 2147     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2148     emit_rm(cbuf, 0x3, r1, r2);
 2149     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2150       emit_opcode(cbuf,$primary);
 2151       emit_rm(cbuf, 0x3, $secondary, r1);
 2152       emit_d8(cbuf,$cnt$$constant-32);
 2153     }
 2154     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2155     emit_rm(cbuf, 0x3, r2, r2);
 2156   %}
 2157 
 2158   // Clone of RegMem but accepts an extra parameter to access each
 2159   // half of a double in memory; it never needs relocation info.
 2160   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2161     emit_opcode(cbuf,$opcode$$constant);
 2162     int reg_encoding = $rm_reg$$reg;
 2163     int base     = $mem$$base;
 2164     int index    = $mem$$index;
 2165     int scale    = $mem$$scale;
 2166     int displace = $mem$$disp + $disp_for_half$$constant;
 2167     relocInfo::relocType disp_reloc = relocInfo::none;
 2168     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2169   %}
 2170 
 2171   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2172   //
 2173   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2174   // and it never needs relocation information.
 2175   // Frequently used to move data between FPU's Stack Top and memory.
 2176   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2177     int rm_byte_opcode = $rm_opcode$$constant;
 2178     int base     = $mem$$base;
 2179     int index    = $mem$$index;
 2180     int scale    = $mem$$scale;
 2181     int displace = $mem$$disp;
 2182     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2183     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2184   %}
 2185 
 2186   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2187     int rm_byte_opcode = $rm_opcode$$constant;
 2188     int base     = $mem$$base;
 2189     int index    = $mem$$index;
 2190     int scale    = $mem$$scale;
 2191     int displace = $mem$$disp;
 2192     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2193     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2194   %}
 2195 
 2196   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2197     int reg_encoding = $dst$$reg;
 2198     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2199     int index        = 0x04;            // 0x04 indicates no index
 2200     int scale        = 0x00;            // 0x00 indicates no scale
 2201     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2202     relocInfo::relocType disp_reloc = relocInfo::none;
 2203     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2204   %}
 2205 
 2206   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2207     // Compare dst,src
 2208     emit_opcode(cbuf,0x3B);
 2209     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2210     // jmp dst < src around move
 2211     emit_opcode(cbuf,0x7C);
 2212     emit_d8(cbuf,2);
 2213     // move dst,src
 2214     emit_opcode(cbuf,0x8B);
 2215     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2216   %}
 2217 
 2218   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2219     // Compare dst,src
 2220     emit_opcode(cbuf,0x3B);
 2221     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2222     // jmp dst > src around move
 2223     emit_opcode(cbuf,0x7F);
 2224     emit_d8(cbuf,2);
 2225     // move dst,src
 2226     emit_opcode(cbuf,0x8B);
 2227     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2228   %}
 2229 
 2230   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2231     // If src is FPR1, we can just FST to store it.
 2232     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2233     int reg_encoding = 0x2; // Just store
 2234     int base  = $mem$$base;
 2235     int index = $mem$$index;
 2236     int scale = $mem$$scale;
 2237     int displace = $mem$$disp;
 2238     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2239     if( $src$$reg != FPR1L_enc ) {
 2240       reg_encoding = 0x3;  // Store & pop
 2241       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2242       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2243     }
 2244     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2245     emit_opcode(cbuf,$primary);
 2246     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2247   %}
 2248 
 2249   enc_class neg_reg(rRegI dst) %{
 2250     // NEG $dst
 2251     emit_opcode(cbuf,0xF7);
 2252     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2253   %}
 2254 
 2255   enc_class setLT_reg(eCXRegI dst) %{
 2256     // SETLT $dst
 2257     emit_opcode(cbuf,0x0F);
 2258     emit_opcode(cbuf,0x9C);
 2259     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2260   %}
 2261 
 2262   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2263     int tmpReg = $tmp$$reg;
 2264 
 2265     // SUB $p,$q
 2266     emit_opcode(cbuf,0x2B);
 2267     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2268     // SBB $tmp,$tmp
 2269     emit_opcode(cbuf,0x1B);
 2270     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2271     // AND $tmp,$y
 2272     emit_opcode(cbuf,0x23);
 2273     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2274     // ADD $p,$tmp
 2275     emit_opcode(cbuf,0x03);
 2276     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2277   %}
 2278 
 2279   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2280     // TEST shift,32
 2281     emit_opcode(cbuf,0xF7);
 2282     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2283     emit_d32(cbuf,0x20);
 2284     // JEQ,s small
 2285     emit_opcode(cbuf, 0x74);
 2286     emit_d8(cbuf, 0x04);
 2287     // MOV    $dst.hi,$dst.lo
 2288     emit_opcode( cbuf, 0x8B );
 2289     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2290     // CLR    $dst.lo
 2291     emit_opcode(cbuf, 0x33);
 2292     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2293 // small:
 2294     // SHLD   $dst.hi,$dst.lo,$shift
 2295     emit_opcode(cbuf,0x0F);
 2296     emit_opcode(cbuf,0xA5);
 2297     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2298     // SHL    $dst.lo,$shift"
 2299     emit_opcode(cbuf,0xD3);
 2300     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2301   %}
 2302 
 2303   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2304     // TEST shift,32
 2305     emit_opcode(cbuf,0xF7);
 2306     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2307     emit_d32(cbuf,0x20);
 2308     // JEQ,s small
 2309     emit_opcode(cbuf, 0x74);
 2310     emit_d8(cbuf, 0x04);
 2311     // MOV    $dst.lo,$dst.hi
 2312     emit_opcode( cbuf, 0x8B );
 2313     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2314     // CLR    $dst.hi
 2315     emit_opcode(cbuf, 0x33);
 2316     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2317 // small:
 2318     // SHRD   $dst.lo,$dst.hi,$shift
 2319     emit_opcode(cbuf,0x0F);
 2320     emit_opcode(cbuf,0xAD);
 2321     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2322     // SHR    $dst.hi,$shift"
 2323     emit_opcode(cbuf,0xD3);
 2324     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2325   %}
 2326 
 2327   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2328     // TEST shift,32
 2329     emit_opcode(cbuf,0xF7);
 2330     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2331     emit_d32(cbuf,0x20);
 2332     // JEQ,s small
 2333     emit_opcode(cbuf, 0x74);
 2334     emit_d8(cbuf, 0x05);
 2335     // MOV    $dst.lo,$dst.hi
 2336     emit_opcode( cbuf, 0x8B );
 2337     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2338     // SAR    $dst.hi,31
 2339     emit_opcode(cbuf, 0xC1);
 2340     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2341     emit_d8(cbuf, 0x1F );
 2342 // small:
 2343     // SHRD   $dst.lo,$dst.hi,$shift
 2344     emit_opcode(cbuf,0x0F);
 2345     emit_opcode(cbuf,0xAD);
 2346     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2347     // SAR    $dst.hi,$shift"
 2348     emit_opcode(cbuf,0xD3);
 2349     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2350   %}
 2351 
 2352 
 2353   // ----------------- Encodings for floating point unit -----------------
 2354   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2355   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2356     $$$emit8$primary;
 2357     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2358   %}
 2359 
 2360   // Pop argument in FPR0 with FSTP ST(0)
 2361   enc_class PopFPU() %{
 2362     emit_opcode( cbuf, 0xDD );
 2363     emit_d8( cbuf, 0xD8 );
 2364   %}
 2365 
 2366   // !!!!! equivalent to Pop_Reg_F
 2367   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2368     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2369     emit_d8( cbuf, 0xD8+$dst$$reg );
 2370   %}
 2371 
 2372   enc_class Push_Reg_DPR( regDPR dst ) %{
 2373     emit_opcode( cbuf, 0xD9 );
 2374     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2375   %}
 2376 
 2377   enc_class strictfp_bias1( regDPR dst ) %{
 2378     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2379     emit_opcode( cbuf, 0x2D );
 2380     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2381     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2382     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2383   %}
 2384 
 2385   enc_class strictfp_bias2( regDPR dst ) %{
 2386     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2387     emit_opcode( cbuf, 0x2D );
 2388     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2389     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2390     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2391   %}
 2392 
 2393   // Special case for moving an integer register to a stack slot.
 2394   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2395     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2396   %}
 2397 
 2398   // Special case for moving a register to a stack slot.
 2399   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2400     // Opcode already emitted
 2401     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2402     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2403     emit_d32(cbuf, $dst$$disp);   // Displacement
 2404   %}
 2405 
 2406   // Push the integer in stackSlot 'src' onto FP-stack
 2407   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2408     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2409   %}
 2410 
 2411   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2412   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2413     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2414   %}
 2415 
 2416   // Same as Pop_Mem_F except for opcode
 2417   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2418   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2419     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2420   %}
 2421 
 2422   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2423     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2424     emit_d8( cbuf, 0xD8+$dst$$reg );
 2425   %}
 2426 
 2427   enc_class Push_Reg_FPR( regFPR dst ) %{
 2428     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2429     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2430   %}
 2431 
 2432   // Push FPU's float to a stack-slot, and pop FPU-stack
 2433   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2434     int pop = 0x02;
 2435     if ($src$$reg != FPR1L_enc) {
 2436       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2437       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2438       pop = 0x03;
 2439     }
 2440     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2441   %}
 2442 
 2443   // Push FPU's double to a stack-slot, and pop FPU-stack
 2444   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2445     int pop = 0x02;
 2446     if ($src$$reg != FPR1L_enc) {
 2447       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2448       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2449       pop = 0x03;
 2450     }
 2451     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2452   %}
 2453 
 2454   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2455   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2456     int pop = 0xD0 - 1; // -1 since we skip FLD
 2457     if ($src$$reg != FPR1L_enc) {
 2458       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2459       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2460       pop = 0xD8;
 2461     }
 2462     emit_opcode( cbuf, 0xDD );
 2463     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2464   %}
 2465 
 2466 
 2467   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2468     // load dst in FPR0
 2469     emit_opcode( cbuf, 0xD9 );
 2470     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2471     if ($src$$reg != FPR1L_enc) {
 2472       // fincstp
 2473       emit_opcode (cbuf, 0xD9);
 2474       emit_opcode (cbuf, 0xF7);
 2475       // swap src with FPR1:
 2476       // FXCH FPR1 with src
 2477       emit_opcode(cbuf, 0xD9);
 2478       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2479       // fdecstp
 2480       emit_opcode (cbuf, 0xD9);
 2481       emit_opcode (cbuf, 0xF6);
 2482     }
 2483   %}
 2484 
 2485   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2486     MacroAssembler _masm(&cbuf);
 2487     __ subptr(rsp, 8);
 2488     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2489     __ fld_d(Address(rsp, 0));
 2490     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2491     __ fld_d(Address(rsp, 0));
 2492   %}
 2493 
 2494   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2495     MacroAssembler _masm(&cbuf);
 2496     __ subptr(rsp, 4);
 2497     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2498     __ fld_s(Address(rsp, 0));
 2499     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2500     __ fld_s(Address(rsp, 0));
 2501   %}
 2502 
 2503   enc_class Push_ResultD(regD dst) %{
 2504     MacroAssembler _masm(&cbuf);
 2505     __ fstp_d(Address(rsp, 0));
 2506     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2507     __ addptr(rsp, 8);
 2508   %}
 2509 
 2510   enc_class Push_ResultF(regF dst, immI d8) %{
 2511     MacroAssembler _masm(&cbuf);
 2512     __ fstp_s(Address(rsp, 0));
 2513     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2514     __ addptr(rsp, $d8$$constant);
 2515   %}
 2516 
 2517   enc_class Push_SrcD(regD src) %{
 2518     MacroAssembler _masm(&cbuf);
 2519     __ subptr(rsp, 8);
 2520     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2521     __ fld_d(Address(rsp, 0));
 2522   %}
 2523 
 2524   enc_class push_stack_temp_qword() %{
 2525     MacroAssembler _masm(&cbuf);
 2526     __ subptr(rsp, 8);
 2527   %}
 2528 
 2529   enc_class pop_stack_temp_qword() %{
 2530     MacroAssembler _masm(&cbuf);
 2531     __ addptr(rsp, 8);
 2532   %}
 2533 
 2534   enc_class push_xmm_to_fpr1(regD src) %{
 2535     MacroAssembler _masm(&cbuf);
 2536     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2537     __ fld_d(Address(rsp, 0));
 2538   %}
 2539 
 2540   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2541     if ($src$$reg != FPR1L_enc) {
 2542       // fincstp
 2543       emit_opcode (cbuf, 0xD9);
 2544       emit_opcode (cbuf, 0xF7);
 2545       // FXCH FPR1 with src
 2546       emit_opcode(cbuf, 0xD9);
 2547       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2548       // fdecstp
 2549       emit_opcode (cbuf, 0xD9);
 2550       emit_opcode (cbuf, 0xF6);
 2551     }
 2552     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2553     // // FSTP   FPR$dst$$reg
 2554     // emit_opcode( cbuf, 0xDD );
 2555     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2556   %}
 2557 
 2558   enc_class fnstsw_sahf_skip_parity() %{
 2559     // fnstsw ax
 2560     emit_opcode( cbuf, 0xDF );
 2561     emit_opcode( cbuf, 0xE0 );
 2562     // sahf
 2563     emit_opcode( cbuf, 0x9E );
 2564     // jnp  ::skip
 2565     emit_opcode( cbuf, 0x7B );
 2566     emit_opcode( cbuf, 0x05 );
 2567   %}
 2568 
 2569   enc_class emitModDPR() %{
 2570     // fprem must be iterative
 2571     // :: loop
 2572     // fprem
 2573     emit_opcode( cbuf, 0xD9 );
 2574     emit_opcode( cbuf, 0xF8 );
 2575     // wait
 2576     emit_opcode( cbuf, 0x9b );
 2577     // fnstsw ax
 2578     emit_opcode( cbuf, 0xDF );
 2579     emit_opcode( cbuf, 0xE0 );
 2580     // sahf
 2581     emit_opcode( cbuf, 0x9E );
 2582     // jp  ::loop
 2583     emit_opcode( cbuf, 0x0F );
 2584     emit_opcode( cbuf, 0x8A );
 2585     emit_opcode( cbuf, 0xF4 );
 2586     emit_opcode( cbuf, 0xFF );
 2587     emit_opcode( cbuf, 0xFF );
 2588     emit_opcode( cbuf, 0xFF );
 2589   %}
 2590 
 2591   enc_class fpu_flags() %{
 2592     // fnstsw_ax
 2593     emit_opcode( cbuf, 0xDF);
 2594     emit_opcode( cbuf, 0xE0);
 2595     // test ax,0x0400
 2596     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2597     emit_opcode( cbuf, 0xA9 );
 2598     emit_d16   ( cbuf, 0x0400 );
 2599     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2600     // // test rax,0x0400
 2601     // emit_opcode( cbuf, 0xA9 );
 2602     // emit_d32   ( cbuf, 0x00000400 );
 2603     //
 2604     // jz exit (no unordered comparison)
 2605     emit_opcode( cbuf, 0x74 );
 2606     emit_d8    ( cbuf, 0x02 );
 2607     // mov ah,1 - treat as LT case (set carry flag)
 2608     emit_opcode( cbuf, 0xB4 );
 2609     emit_d8    ( cbuf, 0x01 );
 2610     // sahf
 2611     emit_opcode( cbuf, 0x9E);
 2612   %}
 2613 
 2614   enc_class cmpF_P6_fixup() %{
 2615     // Fixup the integer flags in case comparison involved a NaN
 2616     //
 2617     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2618     emit_opcode( cbuf, 0x7B );
 2619     emit_d8    ( cbuf, 0x03 );
 2620     // MOV AH,1 - treat as LT case (set carry flag)
 2621     emit_opcode( cbuf, 0xB4 );
 2622     emit_d8    ( cbuf, 0x01 );
 2623     // SAHF
 2624     emit_opcode( cbuf, 0x9E);
 2625     // NOP     // target for branch to avoid branch to branch
 2626     emit_opcode( cbuf, 0x90);
 2627   %}
 2628 
 2629 //     fnstsw_ax();
 2630 //     sahf();
 2631 //     movl(dst, nan_result);
 2632 //     jcc(Assembler::parity, exit);
 2633 //     movl(dst, less_result);
 2634 //     jcc(Assembler::below, exit);
 2635 //     movl(dst, equal_result);
 2636 //     jcc(Assembler::equal, exit);
 2637 //     movl(dst, greater_result);
 2638 
 2639 // less_result     =  1;
 2640 // greater_result  = -1;
 2641 // equal_result    = 0;
 2642 // nan_result      = -1;
 2643 
 2644   enc_class CmpF_Result(rRegI dst) %{
 2645     // fnstsw_ax();
 2646     emit_opcode( cbuf, 0xDF);
 2647     emit_opcode( cbuf, 0xE0);
 2648     // sahf
 2649     emit_opcode( cbuf, 0x9E);
 2650     // movl(dst, nan_result);
 2651     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2652     emit_d32( cbuf, -1 );
 2653     // jcc(Assembler::parity, exit);
 2654     emit_opcode( cbuf, 0x7A );
 2655     emit_d8    ( cbuf, 0x13 );
 2656     // movl(dst, less_result);
 2657     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2658     emit_d32( cbuf, -1 );
 2659     // jcc(Assembler::below, exit);
 2660     emit_opcode( cbuf, 0x72 );
 2661     emit_d8    ( cbuf, 0x0C );
 2662     // movl(dst, equal_result);
 2663     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2664     emit_d32( cbuf, 0 );
 2665     // jcc(Assembler::equal, exit);
 2666     emit_opcode( cbuf, 0x74 );
 2667     emit_d8    ( cbuf, 0x05 );
 2668     // movl(dst, greater_result);
 2669     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2670     emit_d32( cbuf, 1 );
 2671   %}
 2672 
 2673 
 2674   // Compare the longs and set flags
 2675   // BROKEN!  Do Not use as-is
 2676   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2677     // CMP    $src1.hi,$src2.hi
 2678     emit_opcode( cbuf, 0x3B );
 2679     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2680     // JNE,s  done
 2681     emit_opcode(cbuf,0x75);
 2682     emit_d8(cbuf, 2 );
 2683     // CMP    $src1.lo,$src2.lo
 2684     emit_opcode( cbuf, 0x3B );
 2685     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2686 // done:
 2687   %}
 2688 
 2689   enc_class convert_int_long( regL dst, rRegI src ) %{
 2690     // mov $dst.lo,$src
 2691     int dst_encoding = $dst$$reg;
 2692     int src_encoding = $src$$reg;
 2693     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2694     // mov $dst.hi,$src
 2695     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2696     // sar $dst.hi,31
 2697     emit_opcode( cbuf, 0xC1 );
 2698     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2699     emit_d8(cbuf, 0x1F );
 2700   %}
 2701 
 2702   enc_class convert_long_double( eRegL src ) %{
 2703     // push $src.hi
 2704     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2705     // push $src.lo
 2706     emit_opcode(cbuf, 0x50+$src$$reg  );
 2707     // fild 64-bits at [SP]
 2708     emit_opcode(cbuf,0xdf);
 2709     emit_d8(cbuf, 0x6C);
 2710     emit_d8(cbuf, 0x24);
 2711     emit_d8(cbuf, 0x00);
 2712     // pop stack
 2713     emit_opcode(cbuf, 0x83); // add  SP, #8
 2714     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2715     emit_d8(cbuf, 0x8);
 2716   %}
 2717 
 2718   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2719     // IMUL   EDX:EAX,$src1
 2720     emit_opcode( cbuf, 0xF7 );
 2721     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2722     // SAR    EDX,$cnt-32
 2723     int shift_count = ((int)$cnt$$constant) - 32;
 2724     if (shift_count > 0) {
 2725       emit_opcode(cbuf, 0xC1);
 2726       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2727       emit_d8(cbuf, shift_count);
 2728     }
 2729   %}
 2730 
 2731   // this version doesn't have add sp, 8
 2732   enc_class convert_long_double2( eRegL src ) %{
 2733     // push $src.hi
 2734     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2735     // push $src.lo
 2736     emit_opcode(cbuf, 0x50+$src$$reg  );
 2737     // fild 64-bits at [SP]
 2738     emit_opcode(cbuf,0xdf);
 2739     emit_d8(cbuf, 0x6C);
 2740     emit_d8(cbuf, 0x24);
 2741     emit_d8(cbuf, 0x00);
 2742   %}
 2743 
 2744   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2745     // Basic idea: long = (long)int * (long)int
 2746     // IMUL EDX:EAX, src
 2747     emit_opcode( cbuf, 0xF7 );
 2748     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2749   %}
 2750 
 2751   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2752     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2753     // MUL EDX:EAX, src
 2754     emit_opcode( cbuf, 0xF7 );
 2755     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2756   %}
 2757 
 2758   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2759     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2760     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2761     // MOV    $tmp,$src.lo
 2762     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2763     // IMUL   $tmp,EDX
 2764     emit_opcode( cbuf, 0x0F );
 2765     emit_opcode( cbuf, 0xAF );
 2766     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2767     // MOV    EDX,$src.hi
 2768     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2769     // IMUL   EDX,EAX
 2770     emit_opcode( cbuf, 0x0F );
 2771     emit_opcode( cbuf, 0xAF );
 2772     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2773     // ADD    $tmp,EDX
 2774     emit_opcode( cbuf, 0x03 );
 2775     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2776     // MUL   EDX:EAX,$src.lo
 2777     emit_opcode( cbuf, 0xF7 );
 2778     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2779     // ADD    EDX,ESI
 2780     emit_opcode( cbuf, 0x03 );
 2781     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2782   %}
 2783 
 2784   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2785     // Basic idea: lo(result) = lo(src * y_lo)
 2786     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2787     // IMUL   $tmp,EDX,$src
 2788     emit_opcode( cbuf, 0x6B );
 2789     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2790     emit_d8( cbuf, (int)$src$$constant );
 2791     // MOV    EDX,$src
 2792     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2793     emit_d32( cbuf, (int)$src$$constant );
 2794     // MUL   EDX:EAX,EDX
 2795     emit_opcode( cbuf, 0xF7 );
 2796     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2797     // ADD    EDX,ESI
 2798     emit_opcode( cbuf, 0x03 );
 2799     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2800   %}
 2801 
 2802   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2803     // PUSH src1.hi
 2804     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2805     // PUSH src1.lo
 2806     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2807     // PUSH src2.hi
 2808     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2809     // PUSH src2.lo
 2810     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2811     // CALL directly to the runtime
 2812     MacroAssembler _masm(&cbuf);
 2813     cbuf.set_insts_mark();
 2814     emit_opcode(cbuf,0xE8);       // Call into runtime
 2815     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2816     __ post_call_nop();
 2817     // Restore stack
 2818     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2819     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2820     emit_d8(cbuf, 4*4);
 2821   %}
 2822 
 2823   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2824     // PUSH src1.hi
 2825     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2826     // PUSH src1.lo
 2827     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2828     // PUSH src2.hi
 2829     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2830     // PUSH src2.lo
 2831     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2832     // CALL directly to the runtime
 2833     MacroAssembler _masm(&cbuf);
 2834     cbuf.set_insts_mark();
 2835     emit_opcode(cbuf,0xE8);       // Call into runtime
 2836     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2837     __ post_call_nop();
 2838     // Restore stack
 2839     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2840     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2841     emit_d8(cbuf, 4*4);
 2842   %}
 2843 
 2844   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2845     // MOV   $tmp,$src.lo
 2846     emit_opcode(cbuf, 0x8B);
 2847     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2848     // OR    $tmp,$src.hi
 2849     emit_opcode(cbuf, 0x0B);
 2850     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2851   %}
 2852 
 2853   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2854     // CMP    $src1.lo,$src2.lo
 2855     emit_opcode( cbuf, 0x3B );
 2856     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2857     // JNE,s  skip
 2858     emit_cc(cbuf, 0x70, 0x5);
 2859     emit_d8(cbuf,2);
 2860     // CMP    $src1.hi,$src2.hi
 2861     emit_opcode( cbuf, 0x3B );
 2862     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2863   %}
 2864 
 2865   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2866     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2867     emit_opcode( cbuf, 0x3B );
 2868     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2869     // MOV    $tmp,$src1.hi
 2870     emit_opcode( cbuf, 0x8B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2872     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2873     emit_opcode( cbuf, 0x1B );
 2874     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2875   %}
 2876 
 2877   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2878     // XOR    $tmp,$tmp
 2879     emit_opcode(cbuf,0x33);  // XOR
 2880     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2881     // CMP    $tmp,$src.lo
 2882     emit_opcode( cbuf, 0x3B );
 2883     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2884     // SBB    $tmp,$src.hi
 2885     emit_opcode( cbuf, 0x1B );
 2886     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2887   %}
 2888 
 2889  // Sniff, sniff... smells like Gnu Superoptimizer
 2890   enc_class neg_long( eRegL dst ) %{
 2891     emit_opcode(cbuf,0xF7);    // NEG hi
 2892     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2893     emit_opcode(cbuf,0xF7);    // NEG lo
 2894     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2895     emit_opcode(cbuf,0x83);    // SBB hi,0
 2896     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2897     emit_d8    (cbuf,0 );
 2898   %}
 2899 
 2900   enc_class enc_pop_rdx() %{
 2901     emit_opcode(cbuf,0x5A);
 2902   %}
 2903 
 2904   enc_class enc_rethrow() %{
 2905     MacroAssembler _masm(&cbuf);
 2906     cbuf.set_insts_mark();
 2907     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2908     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2909                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2910     __ post_call_nop();
 2911   %}
 2912 
 2913 
 2914   // Convert a double to an int.  Java semantics require we do complex
 2915   // manglelations in the corner cases.  So we set the rounding mode to
 2916   // 'zero', store the darned double down as an int, and reset the
 2917   // rounding mode to 'nearest'.  The hardware throws an exception which
 2918   // patches up the correct value directly to the stack.
 2919   enc_class DPR2I_encoding( regDPR src ) %{
 2920     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2921     // exceptions here, so that a NAN or other corner-case value will
 2922     // thrown an exception (but normal values get converted at full speed).
 2923     // However, I2C adapters and other float-stack manglers leave pending
 2924     // invalid-op exceptions hanging.  We would have to clear them before
 2925     // enabling them and that is more expensive than just testing for the
 2926     // invalid value Intel stores down in the corner cases.
 2927     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2928     emit_opcode(cbuf,0x2D);
 2929     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2930     // Allocate a word
 2931     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2932     emit_opcode(cbuf,0xEC);
 2933     emit_d8(cbuf,0x04);
 2934     // Encoding assumes a double has been pushed into FPR0.
 2935     // Store down the double as an int, popping the FPU stack
 2936     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2937     emit_opcode(cbuf,0x1C);
 2938     emit_d8(cbuf,0x24);
 2939     // Restore the rounding mode; mask the exception
 2940     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2941     emit_opcode(cbuf,0x2D);
 2942     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2943         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2944         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2945 
 2946     // Load the converted int; adjust CPU stack
 2947     emit_opcode(cbuf,0x58);       // POP EAX
 2948     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2949     emit_d32   (cbuf,0x80000000); //         0x80000000
 2950     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2951     emit_d8    (cbuf,0x07);       // Size of slow_call
 2952     // Push src onto stack slow-path
 2953     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2954     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2955     // CALL directly to the runtime
 2956     MacroAssembler _masm(&cbuf);
 2957     cbuf.set_insts_mark();
 2958     emit_opcode(cbuf,0xE8);       // Call into runtime
 2959     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2960     __ post_call_nop();
 2961     // Carry on here...
 2962   %}
 2963 
 2964   enc_class DPR2L_encoding( regDPR src ) %{
 2965     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2966     emit_opcode(cbuf,0x2D);
 2967     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2968     // Allocate a word
 2969     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2970     emit_opcode(cbuf,0xEC);
 2971     emit_d8(cbuf,0x08);
 2972     // Encoding assumes a double has been pushed into FPR0.
 2973     // Store down the double as a long, popping the FPU stack
 2974     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2975     emit_opcode(cbuf,0x3C);
 2976     emit_d8(cbuf,0x24);
 2977     // Restore the rounding mode; mask the exception
 2978     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2979     emit_opcode(cbuf,0x2D);
 2980     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2981         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2982         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2983 
 2984     // Load the converted int; adjust CPU stack
 2985     emit_opcode(cbuf,0x58);       // POP EAX
 2986     emit_opcode(cbuf,0x5A);       // POP EDX
 2987     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2988     emit_d8    (cbuf,0xFA);       // rdx
 2989     emit_d32   (cbuf,0x80000000); //         0x80000000
 2990     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2991     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2992     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2993     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2994     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2995     emit_d8    (cbuf,0x07);       // Size of slow_call
 2996     // Push src onto stack slow-path
 2997     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2998     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2999     // CALL directly to the runtime
 3000     MacroAssembler _masm(&cbuf);
 3001     cbuf.set_insts_mark();
 3002     emit_opcode(cbuf,0xE8);       // Call into runtime
 3003     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 3004     __ post_call_nop();
 3005     // Carry on here...
 3006   %}
 3007 
 3008   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 3009     // Operand was loaded from memory into fp ST (stack top)
 3010     // FMUL   ST,$src  /* D8 C8+i */
 3011     emit_opcode(cbuf, 0xD8);
 3012     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 3013   %}
 3014 
 3015   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3016     // FADDP  ST,src2  /* D8 C0+i */
 3017     emit_opcode(cbuf, 0xD8);
 3018     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3019     //could use FADDP  src2,fpST  /* DE C0+i */
 3020   %}
 3021 
 3022   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3023     // FADDP  src2,ST  /* DE C0+i */
 3024     emit_opcode(cbuf, 0xDE);
 3025     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3026   %}
 3027 
 3028   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3029     // Operand has been loaded into fp ST (stack top)
 3030       // FSUB   ST,$src1
 3031       emit_opcode(cbuf, 0xD8);
 3032       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3033 
 3034       // FDIV
 3035       emit_opcode(cbuf, 0xD8);
 3036       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3037   %}
 3038 
 3039   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3040     // Operand was loaded from memory into fp ST (stack top)
 3041     // FADD   ST,$src  /* D8 C0+i */
 3042     emit_opcode(cbuf, 0xD8);
 3043     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3044 
 3045     // FMUL  ST,src2  /* D8 C*+i */
 3046     emit_opcode(cbuf, 0xD8);
 3047     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3048   %}
 3049 
 3050 
 3051   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3052     // Operand was loaded from memory into fp ST (stack top)
 3053     // FADD   ST,$src  /* D8 C0+i */
 3054     emit_opcode(cbuf, 0xD8);
 3055     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3056 
 3057     // FMULP  src2,ST  /* DE C8+i */
 3058     emit_opcode(cbuf, 0xDE);
 3059     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3060   %}
 3061 
 3062   // Atomically load the volatile long
 3063   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3064     emit_opcode(cbuf,0xDF);
 3065     int rm_byte_opcode = 0x05;
 3066     int base     = $mem$$base;
 3067     int index    = $mem$$index;
 3068     int scale    = $mem$$scale;
 3069     int displace = $mem$$disp;
 3070     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3071     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3072     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3073   %}
 3074 
 3075   // Volatile Store Long.  Must be atomic, so move it into
 3076   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3077   // target address before the store (for null-ptr checks)
 3078   // so the memory operand is used twice in the encoding.
 3079   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3080     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3081     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3082     emit_opcode(cbuf,0xDF);
 3083     int rm_byte_opcode = 0x07;
 3084     int base     = $mem$$base;
 3085     int index    = $mem$$index;
 3086     int scale    = $mem$$scale;
 3087     int displace = $mem$$disp;
 3088     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3089     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3090   %}
 3091 
 3092 %}
 3093 
 3094 
 3095 //----------FRAME--------------------------------------------------------------
 3096 // Definition of frame structure and management information.
 3097 //
 3098 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3099 //                             |   (to get allocators register number
 3100 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3101 //  r   CALLER     |        |
 3102 //  o     |        +--------+      pad to even-align allocators stack-slot
 3103 //  w     V        |  pad0  |        numbers; owned by CALLER
 3104 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3105 //  h     ^        |   in   |  5
 3106 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3107 //  |     |        |        |  3
 3108 //  |     |        +--------+
 3109 //  V     |        | old out|      Empty on Intel, window on Sparc
 3110 //        |    old |preserve|      Must be even aligned.
 3111 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3112 //        |        |   in   |  3   area for Intel ret address
 3113 //     Owned by    |preserve|      Empty on Sparc.
 3114 //       SELF      +--------+
 3115 //        |        |  pad2  |  2   pad to align old SP
 3116 //        |        +--------+  1
 3117 //        |        | locks  |  0
 3118 //        |        +--------+----> OptoReg::stack0(), even aligned
 3119 //        |        |  pad1  | 11   pad to align new SP
 3120 //        |        +--------+
 3121 //        |        |        | 10
 3122 //        |        | spills |  9   spills
 3123 //        V        |        |  8   (pad0 slot for callee)
 3124 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3125 //        ^        |  out   |  7
 3126 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3127 //     Owned by    +--------+
 3128 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3129 //        |    new |preserve|      Must be even-aligned.
 3130 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3131 //        |        |        |
 3132 //
 3133 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3134 //         known from SELF's arguments and the Java calling convention.
 3135 //         Region 6-7 is determined per call site.
 3136 // Note 2: If the calling convention leaves holes in the incoming argument
 3137 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3138 //         are owned by the CALLEE.  Holes should not be necessary in the
 3139 //         incoming area, as the Java calling convention is completely under
 3140 //         the control of the AD file.  Doubles can be sorted and packed to
 3141 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3142 //         varargs C calling conventions.
 3143 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3144 //         even aligned with pad0 as needed.
 3145 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3146 //         region 6-11 is even aligned; it may be padded out more so that
 3147 //         the region from SP to FP meets the minimum stack alignment.
 3148 
 3149 frame %{
 3150   // These three registers define part of the calling convention
 3151   // between compiled code and the interpreter.
 3152   inline_cache_reg(EAX);                // Inline Cache Register
 3153 
 3154   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3155   cisc_spilling_operand_name(indOffset32);
 3156 
 3157   // Number of stack slots consumed by locking an object
 3158   sync_stack_slots(1);
 3159 
 3160   // Compiled code's Frame Pointer
 3161   frame_pointer(ESP);
 3162   // Interpreter stores its frame pointer in a register which is
 3163   // stored to the stack by I2CAdaptors.
 3164   // I2CAdaptors convert from interpreted java to compiled java.
 3165   interpreter_frame_pointer(EBP);
 3166 
 3167   // Stack alignment requirement
 3168   // Alignment size in bytes (128-bit -> 16 bytes)
 3169   stack_alignment(StackAlignmentInBytes);
 3170 
 3171   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3172   // for calls to C.  Supports the var-args backing area for register parms.
 3173   varargs_C_out_slots_killed(0);
 3174 
 3175   // The after-PROLOG location of the return address.  Location of
 3176   // return address specifies a type (REG or STACK) and a number
 3177   // representing the register number (i.e. - use a register name) or
 3178   // stack slot.
 3179   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3180   // Otherwise, it is above the locks and verification slot and alignment word
 3181   return_addr(STACK - 1 +
 3182               align_up((Compile::current()->in_preserve_stack_slots() +
 3183                         Compile::current()->fixed_slots()),
 3184                        stack_alignment_in_slots()));
 3185 
 3186   // Location of C & interpreter return values
 3187   c_return_value %{
 3188     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3189     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3190     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3191 
 3192     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3193     // that C functions return float and double results in XMM0.
 3194     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3195       return OptoRegPair(XMM0b_num,XMM0_num);
 3196     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3197       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3198 
 3199     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3200   %}
 3201 
 3202   // Location of return values
 3203   return_value %{
 3204     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3205     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3206     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3207     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3208       return OptoRegPair(XMM0b_num,XMM0_num);
 3209     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3210       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3211     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3212   %}
 3213 
 3214 %}
 3215 
 3216 //----------ATTRIBUTES---------------------------------------------------------
 3217 //----------Operand Attributes-------------------------------------------------
 3218 op_attrib op_cost(0);        // Required cost attribute
 3219 
 3220 //----------Instruction Attributes---------------------------------------------
 3221 ins_attrib ins_cost(100);       // Required cost attribute
 3222 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3223 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3224                                 // non-matching short branch variant of some
 3225                                                             // long branch?
 3226 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3227                                 // specifies the alignment that some part of the instruction (not
 3228                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3229                                 // function must be provided for the instruction
 3230 
 3231 //----------OPERANDS-----------------------------------------------------------
 3232 // Operand definitions must precede instruction definitions for correct parsing
 3233 // in the ADLC because operands constitute user defined types which are used in
 3234 // instruction definitions.
 3235 
 3236 //----------Simple Operands----------------------------------------------------
 3237 // Immediate Operands
 3238 // Integer Immediate
 3239 operand immI() %{
 3240   match(ConI);
 3241 
 3242   op_cost(10);
 3243   format %{ %}
 3244   interface(CONST_INTER);
 3245 %}
 3246 
 3247 // Constant for test vs zero
 3248 operand immI_0() %{
 3249   predicate(n->get_int() == 0);
 3250   match(ConI);
 3251 
 3252   op_cost(0);
 3253   format %{ %}
 3254   interface(CONST_INTER);
 3255 %}
 3256 
 3257 // Constant for increment
 3258 operand immI_1() %{
 3259   predicate(n->get_int() == 1);
 3260   match(ConI);
 3261 
 3262   op_cost(0);
 3263   format %{ %}
 3264   interface(CONST_INTER);
 3265 %}
 3266 
 3267 // Constant for decrement
 3268 operand immI_M1() %{
 3269   predicate(n->get_int() == -1);
 3270   match(ConI);
 3271 
 3272   op_cost(0);
 3273   format %{ %}
 3274   interface(CONST_INTER);
 3275 %}
 3276 
 3277 // Valid scale values for addressing modes
 3278 operand immI2() %{
 3279   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3280   match(ConI);
 3281 
 3282   format %{ %}
 3283   interface(CONST_INTER);
 3284 %}
 3285 
 3286 operand immI8() %{
 3287   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3288   match(ConI);
 3289 
 3290   op_cost(5);
 3291   format %{ %}
 3292   interface(CONST_INTER);
 3293 %}
 3294 
 3295 operand immU8() %{
 3296   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3297   match(ConI);
 3298 
 3299   op_cost(5);
 3300   format %{ %}
 3301   interface(CONST_INTER);
 3302 %}
 3303 
 3304 operand immI16() %{
 3305   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3306   match(ConI);
 3307 
 3308   op_cost(10);
 3309   format %{ %}
 3310   interface(CONST_INTER);
 3311 %}
 3312 
 3313 // Int Immediate non-negative
 3314 operand immU31()
 3315 %{
 3316   predicate(n->get_int() >= 0);
 3317   match(ConI);
 3318 
 3319   op_cost(0);
 3320   format %{ %}
 3321   interface(CONST_INTER);
 3322 %}
 3323 
 3324 // Constant for long shifts
 3325 operand immI_32() %{
 3326   predicate( n->get_int() == 32 );
 3327   match(ConI);
 3328 
 3329   op_cost(0);
 3330   format %{ %}
 3331   interface(CONST_INTER);
 3332 %}
 3333 
 3334 operand immI_1_31() %{
 3335   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3336   match(ConI);
 3337 
 3338   op_cost(0);
 3339   format %{ %}
 3340   interface(CONST_INTER);
 3341 %}
 3342 
 3343 operand immI_32_63() %{
 3344   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3345   match(ConI);
 3346   op_cost(0);
 3347 
 3348   format %{ %}
 3349   interface(CONST_INTER);
 3350 %}
 3351 
 3352 operand immI_2() %{
 3353   predicate( n->get_int() == 2 );
 3354   match(ConI);
 3355 
 3356   op_cost(0);
 3357   format %{ %}
 3358   interface(CONST_INTER);
 3359 %}
 3360 
 3361 operand immI_3() %{
 3362   predicate( n->get_int() == 3 );
 3363   match(ConI);
 3364 
 3365   op_cost(0);
 3366   format %{ %}
 3367   interface(CONST_INTER);
 3368 %}
 3369 
 3370 operand immI_4()
 3371 %{
 3372   predicate(n->get_int() == 4);
 3373   match(ConI);
 3374 
 3375   op_cost(0);
 3376   format %{ %}
 3377   interface(CONST_INTER);
 3378 %}
 3379 
 3380 operand immI_8()
 3381 %{
 3382   predicate(n->get_int() == 8);
 3383   match(ConI);
 3384 
 3385   op_cost(0);
 3386   format %{ %}
 3387   interface(CONST_INTER);
 3388 %}
 3389 
 3390 // Pointer Immediate
 3391 operand immP() %{
 3392   match(ConP);
 3393 
 3394   op_cost(10);
 3395   format %{ %}
 3396   interface(CONST_INTER);
 3397 %}
 3398 
 3399 // NULL Pointer Immediate
 3400 operand immP0() %{
 3401   predicate( n->get_ptr() == 0 );
 3402   match(ConP);
 3403   op_cost(0);
 3404 
 3405   format %{ %}
 3406   interface(CONST_INTER);
 3407 %}
 3408 
 3409 // Long Immediate
 3410 operand immL() %{
 3411   match(ConL);
 3412 
 3413   op_cost(20);
 3414   format %{ %}
 3415   interface(CONST_INTER);
 3416 %}
 3417 
 3418 // Long Immediate zero
 3419 operand immL0() %{
 3420   predicate( n->get_long() == 0L );
 3421   match(ConL);
 3422   op_cost(0);
 3423 
 3424   format %{ %}
 3425   interface(CONST_INTER);
 3426 %}
 3427 
 3428 // Long Immediate zero
 3429 operand immL_M1() %{
 3430   predicate( n->get_long() == -1L );
 3431   match(ConL);
 3432   op_cost(0);
 3433 
 3434   format %{ %}
 3435   interface(CONST_INTER);
 3436 %}
 3437 
 3438 // Long immediate from 0 to 127.
 3439 // Used for a shorter form of long mul by 10.
 3440 operand immL_127() %{
 3441   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3442   match(ConL);
 3443   op_cost(0);
 3444 
 3445   format %{ %}
 3446   interface(CONST_INTER);
 3447 %}
 3448 
 3449 // Long Immediate: low 32-bit mask
 3450 operand immL_32bits() %{
 3451   predicate(n->get_long() == 0xFFFFFFFFL);
 3452   match(ConL);
 3453   op_cost(0);
 3454 
 3455   format %{ %}
 3456   interface(CONST_INTER);
 3457 %}
 3458 
 3459 // Long Immediate: low 32-bit mask
 3460 operand immL32() %{
 3461   predicate(n->get_long() == (int)(n->get_long()));
 3462   match(ConL);
 3463   op_cost(20);
 3464 
 3465   format %{ %}
 3466   interface(CONST_INTER);
 3467 %}
 3468 
 3469 //Double Immediate zero
 3470 operand immDPR0() %{
 3471   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3472   // bug that generates code such that NaNs compare equal to 0.0
 3473   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3474   match(ConD);
 3475 
 3476   op_cost(5);
 3477   format %{ %}
 3478   interface(CONST_INTER);
 3479 %}
 3480 
 3481 // Double Immediate one
 3482 operand immDPR1() %{
 3483   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3484   match(ConD);
 3485 
 3486   op_cost(5);
 3487   format %{ %}
 3488   interface(CONST_INTER);
 3489 %}
 3490 
 3491 // Double Immediate
 3492 operand immDPR() %{
 3493   predicate(UseSSE<=1);
 3494   match(ConD);
 3495 
 3496   op_cost(5);
 3497   format %{ %}
 3498   interface(CONST_INTER);
 3499 %}
 3500 
 3501 operand immD() %{
 3502   predicate(UseSSE>=2);
 3503   match(ConD);
 3504 
 3505   op_cost(5);
 3506   format %{ %}
 3507   interface(CONST_INTER);
 3508 %}
 3509 
 3510 // Double Immediate zero
 3511 operand immD0() %{
 3512   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3513   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3514   // compare equal to -0.0.
 3515   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3516   match(ConD);
 3517 
 3518   format %{ %}
 3519   interface(CONST_INTER);
 3520 %}
 3521 
 3522 // Float Immediate zero
 3523 operand immFPR0() %{
 3524   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3525   match(ConF);
 3526 
 3527   op_cost(5);
 3528   format %{ %}
 3529   interface(CONST_INTER);
 3530 %}
 3531 
 3532 // Float Immediate one
 3533 operand immFPR1() %{
 3534   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3535   match(ConF);
 3536 
 3537   op_cost(5);
 3538   format %{ %}
 3539   interface(CONST_INTER);
 3540 %}
 3541 
 3542 // Float Immediate
 3543 operand immFPR() %{
 3544   predicate( UseSSE == 0 );
 3545   match(ConF);
 3546 
 3547   op_cost(5);
 3548   format %{ %}
 3549   interface(CONST_INTER);
 3550 %}
 3551 
 3552 // Float Immediate
 3553 operand immF() %{
 3554   predicate(UseSSE >= 1);
 3555   match(ConF);
 3556 
 3557   op_cost(5);
 3558   format %{ %}
 3559   interface(CONST_INTER);
 3560 %}
 3561 
 3562 // Float Immediate zero.  Zero and not -0.0
 3563 operand immF0() %{
 3564   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3565   match(ConF);
 3566 
 3567   op_cost(5);
 3568   format %{ %}
 3569   interface(CONST_INTER);
 3570 %}
 3571 
 3572 // Immediates for special shifts (sign extend)
 3573 
 3574 // Constants for increment
 3575 operand immI_16() %{
 3576   predicate( n->get_int() == 16 );
 3577   match(ConI);
 3578 
 3579   format %{ %}
 3580   interface(CONST_INTER);
 3581 %}
 3582 
 3583 operand immI_24() %{
 3584   predicate( n->get_int() == 24 );
 3585   match(ConI);
 3586 
 3587   format %{ %}
 3588   interface(CONST_INTER);
 3589 %}
 3590 
 3591 // Constant for byte-wide masking
 3592 operand immI_255() %{
 3593   predicate( n->get_int() == 255 );
 3594   match(ConI);
 3595 
 3596   format %{ %}
 3597   interface(CONST_INTER);
 3598 %}
 3599 
 3600 // Constant for short-wide masking
 3601 operand immI_65535() %{
 3602   predicate(n->get_int() == 65535);
 3603   match(ConI);
 3604 
 3605   format %{ %}
 3606   interface(CONST_INTER);
 3607 %}
 3608 
 3609 operand kReg()
 3610 %{
 3611   constraint(ALLOC_IN_RC(vectmask_reg));
 3612   match(RegVectMask);
 3613   format %{%}
 3614   interface(REG_INTER);
 3615 %}
 3616 
 3617 operand kReg_K1()
 3618 %{
 3619   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3620   match(RegVectMask);
 3621   format %{%}
 3622   interface(REG_INTER);
 3623 %}
 3624 
 3625 operand kReg_K2()
 3626 %{
 3627   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3628   match(RegVectMask);
 3629   format %{%}
 3630   interface(REG_INTER);
 3631 %}
 3632 
 3633 // Special Registers
 3634 operand kReg_K3()
 3635 %{
 3636   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3637   match(RegVectMask);
 3638   format %{%}
 3639   interface(REG_INTER);
 3640 %}
 3641 
 3642 operand kReg_K4()
 3643 %{
 3644   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3645   match(RegVectMask);
 3646   format %{%}
 3647   interface(REG_INTER);
 3648 %}
 3649 
 3650 operand kReg_K5()
 3651 %{
 3652   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3653   match(RegVectMask);
 3654   format %{%}
 3655   interface(REG_INTER);
 3656 %}
 3657 
 3658 operand kReg_K6()
 3659 %{
 3660   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3661   match(RegVectMask);
 3662   format %{%}
 3663   interface(REG_INTER);
 3664 %}
 3665 
 3666 // Special Registers
 3667 operand kReg_K7()
 3668 %{
 3669   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3670   match(RegVectMask);
 3671   format %{%}
 3672   interface(REG_INTER);
 3673 %}
 3674 
 3675 // Register Operands
 3676 // Integer Register
 3677 operand rRegI() %{
 3678   constraint(ALLOC_IN_RC(int_reg));
 3679   match(RegI);
 3680   match(xRegI);
 3681   match(eAXRegI);
 3682   match(eBXRegI);
 3683   match(eCXRegI);
 3684   match(eDXRegI);
 3685   match(eDIRegI);
 3686   match(eSIRegI);
 3687 
 3688   format %{ %}
 3689   interface(REG_INTER);
 3690 %}
 3691 
 3692 // Subset of Integer Register
 3693 operand xRegI(rRegI reg) %{
 3694   constraint(ALLOC_IN_RC(int_x_reg));
 3695   match(reg);
 3696   match(eAXRegI);
 3697   match(eBXRegI);
 3698   match(eCXRegI);
 3699   match(eDXRegI);
 3700 
 3701   format %{ %}
 3702   interface(REG_INTER);
 3703 %}
 3704 
 3705 // Special Registers
 3706 operand eAXRegI(xRegI reg) %{
 3707   constraint(ALLOC_IN_RC(eax_reg));
 3708   match(reg);
 3709   match(rRegI);
 3710 
 3711   format %{ "EAX" %}
 3712   interface(REG_INTER);
 3713 %}
 3714 
 3715 // Special Registers
 3716 operand eBXRegI(xRegI reg) %{
 3717   constraint(ALLOC_IN_RC(ebx_reg));
 3718   match(reg);
 3719   match(rRegI);
 3720 
 3721   format %{ "EBX" %}
 3722   interface(REG_INTER);
 3723 %}
 3724 
 3725 operand eCXRegI(xRegI reg) %{
 3726   constraint(ALLOC_IN_RC(ecx_reg));
 3727   match(reg);
 3728   match(rRegI);
 3729 
 3730   format %{ "ECX" %}
 3731   interface(REG_INTER);
 3732 %}
 3733 
 3734 operand eDXRegI(xRegI reg) %{
 3735   constraint(ALLOC_IN_RC(edx_reg));
 3736   match(reg);
 3737   match(rRegI);
 3738 
 3739   format %{ "EDX" %}
 3740   interface(REG_INTER);
 3741 %}
 3742 
 3743 operand eDIRegI(xRegI reg) %{
 3744   constraint(ALLOC_IN_RC(edi_reg));
 3745   match(reg);
 3746   match(rRegI);
 3747 
 3748   format %{ "EDI" %}
 3749   interface(REG_INTER);
 3750 %}
 3751 
 3752 operand naxRegI() %{
 3753   constraint(ALLOC_IN_RC(nax_reg));
 3754   match(RegI);
 3755   match(eCXRegI);
 3756   match(eDXRegI);
 3757   match(eSIRegI);
 3758   match(eDIRegI);
 3759 
 3760   format %{ %}
 3761   interface(REG_INTER);
 3762 %}
 3763 
 3764 operand nadxRegI() %{
 3765   constraint(ALLOC_IN_RC(nadx_reg));
 3766   match(RegI);
 3767   match(eBXRegI);
 3768   match(eCXRegI);
 3769   match(eSIRegI);
 3770   match(eDIRegI);
 3771 
 3772   format %{ %}
 3773   interface(REG_INTER);
 3774 %}
 3775 
 3776 operand ncxRegI() %{
 3777   constraint(ALLOC_IN_RC(ncx_reg));
 3778   match(RegI);
 3779   match(eAXRegI);
 3780   match(eDXRegI);
 3781   match(eSIRegI);
 3782   match(eDIRegI);
 3783 
 3784   format %{ %}
 3785   interface(REG_INTER);
 3786 %}
 3787 
 3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3789 // //
 3790 operand eSIRegI(xRegI reg) %{
 3791    constraint(ALLOC_IN_RC(esi_reg));
 3792    match(reg);
 3793    match(rRegI);
 3794 
 3795    format %{ "ESI" %}
 3796    interface(REG_INTER);
 3797 %}
 3798 
 3799 // Pointer Register
 3800 operand anyRegP() %{
 3801   constraint(ALLOC_IN_RC(any_reg));
 3802   match(RegP);
 3803   match(eAXRegP);
 3804   match(eBXRegP);
 3805   match(eCXRegP);
 3806   match(eDIRegP);
 3807   match(eRegP);
 3808 
 3809   format %{ %}
 3810   interface(REG_INTER);
 3811 %}
 3812 
 3813 operand eRegP() %{
 3814   constraint(ALLOC_IN_RC(int_reg));
 3815   match(RegP);
 3816   match(eAXRegP);
 3817   match(eBXRegP);
 3818   match(eCXRegP);
 3819   match(eDIRegP);
 3820 
 3821   format %{ %}
 3822   interface(REG_INTER);
 3823 %}
 3824 
 3825 operand rRegP() %{
 3826   constraint(ALLOC_IN_RC(int_reg));
 3827   match(RegP);
 3828   match(eAXRegP);
 3829   match(eBXRegP);
 3830   match(eCXRegP);
 3831   match(eDIRegP);
 3832 
 3833   format %{ %}
 3834   interface(REG_INTER);
 3835 %}
 3836 
 3837 // On windows95, EBP is not safe to use for implicit null tests.
 3838 operand eRegP_no_EBP() %{
 3839   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3840   match(RegP);
 3841   match(eAXRegP);
 3842   match(eBXRegP);
 3843   match(eCXRegP);
 3844   match(eDIRegP);
 3845 
 3846   op_cost(100);
 3847   format %{ %}
 3848   interface(REG_INTER);
 3849 %}
 3850 
 3851 operand naxRegP() %{
 3852   constraint(ALLOC_IN_RC(nax_reg));
 3853   match(RegP);
 3854   match(eBXRegP);
 3855   match(eDXRegP);
 3856   match(eCXRegP);
 3857   match(eSIRegP);
 3858   match(eDIRegP);
 3859 
 3860   format %{ %}
 3861   interface(REG_INTER);
 3862 %}
 3863 
 3864 operand nabxRegP() %{
 3865   constraint(ALLOC_IN_RC(nabx_reg));
 3866   match(RegP);
 3867   match(eCXRegP);
 3868   match(eDXRegP);
 3869   match(eSIRegP);
 3870   match(eDIRegP);
 3871 
 3872   format %{ %}
 3873   interface(REG_INTER);
 3874 %}
 3875 
 3876 operand pRegP() %{
 3877   constraint(ALLOC_IN_RC(p_reg));
 3878   match(RegP);
 3879   match(eBXRegP);
 3880   match(eDXRegP);
 3881   match(eSIRegP);
 3882   match(eDIRegP);
 3883 
 3884   format %{ %}
 3885   interface(REG_INTER);
 3886 %}
 3887 
 3888 // Special Registers
 3889 // Return a pointer value
 3890 operand eAXRegP(eRegP reg) %{
 3891   constraint(ALLOC_IN_RC(eax_reg));
 3892   match(reg);
 3893   format %{ "EAX" %}
 3894   interface(REG_INTER);
 3895 %}
 3896 
 3897 // Used in AtomicAdd
 3898 operand eBXRegP(eRegP reg) %{
 3899   constraint(ALLOC_IN_RC(ebx_reg));
 3900   match(reg);
 3901   format %{ "EBX" %}
 3902   interface(REG_INTER);
 3903 %}
 3904 
 3905 // Tail-call (interprocedural jump) to interpreter
 3906 operand eCXRegP(eRegP reg) %{
 3907   constraint(ALLOC_IN_RC(ecx_reg));
 3908   match(reg);
 3909   format %{ "ECX" %}
 3910   interface(REG_INTER);
 3911 %}
 3912 
 3913 operand eDXRegP(eRegP reg) %{
 3914   constraint(ALLOC_IN_RC(edx_reg));
 3915   match(reg);
 3916   format %{ "EDX" %}
 3917   interface(REG_INTER);
 3918 %}
 3919 
 3920 operand eSIRegP(eRegP reg) %{
 3921   constraint(ALLOC_IN_RC(esi_reg));
 3922   match(reg);
 3923   format %{ "ESI" %}
 3924   interface(REG_INTER);
 3925 %}
 3926 
 3927 // Used in rep stosw
 3928 operand eDIRegP(eRegP reg) %{
 3929   constraint(ALLOC_IN_RC(edi_reg));
 3930   match(reg);
 3931   format %{ "EDI" %}
 3932   interface(REG_INTER);
 3933 %}
 3934 
 3935 operand eRegL() %{
 3936   constraint(ALLOC_IN_RC(long_reg));
 3937   match(RegL);
 3938   match(eADXRegL);
 3939 
 3940   format %{ %}
 3941   interface(REG_INTER);
 3942 %}
 3943 
 3944 operand eADXRegL( eRegL reg ) %{
 3945   constraint(ALLOC_IN_RC(eadx_reg));
 3946   match(reg);
 3947 
 3948   format %{ "EDX:EAX" %}
 3949   interface(REG_INTER);
 3950 %}
 3951 
 3952 operand eBCXRegL( eRegL reg ) %{
 3953   constraint(ALLOC_IN_RC(ebcx_reg));
 3954   match(reg);
 3955 
 3956   format %{ "EBX:ECX" %}
 3957   interface(REG_INTER);
 3958 %}
 3959 
 3960 operand eBDPRegL( eRegL reg ) %{
 3961   constraint(ALLOC_IN_RC(ebpd_reg));
 3962   match(reg);
 3963 
 3964   format %{ "EBP:EDI" %}
 3965   interface(REG_INTER);
 3966 %}
 3967 // Special case for integer high multiply
 3968 operand eADXRegL_low_only() %{
 3969   constraint(ALLOC_IN_RC(eadx_reg));
 3970   match(RegL);
 3971 
 3972   format %{ "EAX" %}
 3973   interface(REG_INTER);
 3974 %}
 3975 
 3976 // Flags register, used as output of compare instructions
 3977 operand rFlagsReg() %{
 3978   constraint(ALLOC_IN_RC(int_flags));
 3979   match(RegFlags);
 3980 
 3981   format %{ "EFLAGS" %}
 3982   interface(REG_INTER);
 3983 %}
 3984 
 3985 // Flags register, used as output of compare instructions
 3986 operand eFlagsReg() %{
 3987   constraint(ALLOC_IN_RC(int_flags));
 3988   match(RegFlags);
 3989 
 3990   format %{ "EFLAGS" %}
 3991   interface(REG_INTER);
 3992 %}
 3993 
 3994 // Flags register, used as output of FLOATING POINT compare instructions
 3995 operand eFlagsRegU() %{
 3996   constraint(ALLOC_IN_RC(int_flags));
 3997   match(RegFlags);
 3998 
 3999   format %{ "EFLAGS_U" %}
 4000   interface(REG_INTER);
 4001 %}
 4002 
 4003 operand eFlagsRegUCF() %{
 4004   constraint(ALLOC_IN_RC(int_flags));
 4005   match(RegFlags);
 4006   predicate(false);
 4007 
 4008   format %{ "EFLAGS_U_CF" %}
 4009   interface(REG_INTER);
 4010 %}
 4011 
 4012 // Condition Code Register used by long compare
 4013 operand flagsReg_long_LTGE() %{
 4014   constraint(ALLOC_IN_RC(int_flags));
 4015   match(RegFlags);
 4016   format %{ "FLAGS_LTGE" %}
 4017   interface(REG_INTER);
 4018 %}
 4019 operand flagsReg_long_EQNE() %{
 4020   constraint(ALLOC_IN_RC(int_flags));
 4021   match(RegFlags);
 4022   format %{ "FLAGS_EQNE" %}
 4023   interface(REG_INTER);
 4024 %}
 4025 operand flagsReg_long_LEGT() %{
 4026   constraint(ALLOC_IN_RC(int_flags));
 4027   match(RegFlags);
 4028   format %{ "FLAGS_LEGT" %}
 4029   interface(REG_INTER);
 4030 %}
 4031 
 4032 // Condition Code Register used by unsigned long compare
 4033 operand flagsReg_ulong_LTGE() %{
 4034   constraint(ALLOC_IN_RC(int_flags));
 4035   match(RegFlags);
 4036   format %{ "FLAGS_U_LTGE" %}
 4037   interface(REG_INTER);
 4038 %}
 4039 operand flagsReg_ulong_EQNE() %{
 4040   constraint(ALLOC_IN_RC(int_flags));
 4041   match(RegFlags);
 4042   format %{ "FLAGS_U_EQNE" %}
 4043   interface(REG_INTER);
 4044 %}
 4045 operand flagsReg_ulong_LEGT() %{
 4046   constraint(ALLOC_IN_RC(int_flags));
 4047   match(RegFlags);
 4048   format %{ "FLAGS_U_LEGT" %}
 4049   interface(REG_INTER);
 4050 %}
 4051 
 4052 // Float register operands
 4053 operand regDPR() %{
 4054   predicate( UseSSE < 2 );
 4055   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4056   match(RegD);
 4057   match(regDPR1);
 4058   match(regDPR2);
 4059   format %{ %}
 4060   interface(REG_INTER);
 4061 %}
 4062 
 4063 operand regDPR1(regDPR reg) %{
 4064   predicate( UseSSE < 2 );
 4065   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4066   match(reg);
 4067   format %{ "FPR1" %}
 4068   interface(REG_INTER);
 4069 %}
 4070 
 4071 operand regDPR2(regDPR reg) %{
 4072   predicate( UseSSE < 2 );
 4073   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4074   match(reg);
 4075   format %{ "FPR2" %}
 4076   interface(REG_INTER);
 4077 %}
 4078 
 4079 operand regnotDPR1(regDPR reg) %{
 4080   predicate( UseSSE < 2 );
 4081   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4082   match(reg);
 4083   format %{ %}
 4084   interface(REG_INTER);
 4085 %}
 4086 
 4087 // Float register operands
 4088 operand regFPR() %{
 4089   predicate( UseSSE < 2 );
 4090   constraint(ALLOC_IN_RC(fp_flt_reg));
 4091   match(RegF);
 4092   match(regFPR1);
 4093   format %{ %}
 4094   interface(REG_INTER);
 4095 %}
 4096 
 4097 // Float register operands
 4098 operand regFPR1(regFPR reg) %{
 4099   predicate( UseSSE < 2 );
 4100   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4101   match(reg);
 4102   format %{ "FPR1" %}
 4103   interface(REG_INTER);
 4104 %}
 4105 
 4106 // XMM Float register operands
 4107 operand regF() %{
 4108   predicate( UseSSE>=1 );
 4109   constraint(ALLOC_IN_RC(float_reg_legacy));
 4110   match(RegF);
 4111   format %{ %}
 4112   interface(REG_INTER);
 4113 %}
 4114 
 4115 operand legRegF() %{
 4116   predicate( UseSSE>=1 );
 4117   constraint(ALLOC_IN_RC(float_reg_legacy));
 4118   match(RegF);
 4119   format %{ %}
 4120   interface(REG_INTER);
 4121 %}
 4122 
 4123 // Float register operands
 4124 operand vlRegF() %{
 4125    constraint(ALLOC_IN_RC(float_reg_vl));
 4126    match(RegF);
 4127 
 4128    format %{ %}
 4129    interface(REG_INTER);
 4130 %}
 4131 
 4132 // XMM Double register operands
 4133 operand regD() %{
 4134   predicate( UseSSE>=2 );
 4135   constraint(ALLOC_IN_RC(double_reg_legacy));
 4136   match(RegD);
 4137   format %{ %}
 4138   interface(REG_INTER);
 4139 %}
 4140 
 4141 // Double register operands
 4142 operand legRegD() %{
 4143   predicate( UseSSE>=2 );
 4144   constraint(ALLOC_IN_RC(double_reg_legacy));
 4145   match(RegD);
 4146   format %{ %}
 4147   interface(REG_INTER);
 4148 %}
 4149 
 4150 operand vlRegD() %{
 4151    constraint(ALLOC_IN_RC(double_reg_vl));
 4152    match(RegD);
 4153 
 4154    format %{ %}
 4155    interface(REG_INTER);
 4156 %}
 4157 
 4158 //----------Memory Operands----------------------------------------------------
 4159 // Direct Memory Operand
 4160 operand direct(immP addr) %{
 4161   match(addr);
 4162 
 4163   format %{ "[$addr]" %}
 4164   interface(MEMORY_INTER) %{
 4165     base(0xFFFFFFFF);
 4166     index(0x4);
 4167     scale(0x0);
 4168     disp($addr);
 4169   %}
 4170 %}
 4171 
 4172 // Indirect Memory Operand
 4173 operand indirect(eRegP reg) %{
 4174   constraint(ALLOC_IN_RC(int_reg));
 4175   match(reg);
 4176 
 4177   format %{ "[$reg]" %}
 4178   interface(MEMORY_INTER) %{
 4179     base($reg);
 4180     index(0x4);
 4181     scale(0x0);
 4182     disp(0x0);
 4183   %}
 4184 %}
 4185 
 4186 // Indirect Memory Plus Short Offset Operand
 4187 operand indOffset8(eRegP reg, immI8 off) %{
 4188   match(AddP reg off);
 4189 
 4190   format %{ "[$reg + $off]" %}
 4191   interface(MEMORY_INTER) %{
 4192     base($reg);
 4193     index(0x4);
 4194     scale(0x0);
 4195     disp($off);
 4196   %}
 4197 %}
 4198 
 4199 // Indirect Memory Plus Long Offset Operand
 4200 operand indOffset32(eRegP reg, immI off) %{
 4201   match(AddP reg off);
 4202 
 4203   format %{ "[$reg + $off]" %}
 4204   interface(MEMORY_INTER) %{
 4205     base($reg);
 4206     index(0x4);
 4207     scale(0x0);
 4208     disp($off);
 4209   %}
 4210 %}
 4211 
 4212 // Indirect Memory Plus Long Offset Operand
 4213 operand indOffset32X(rRegI reg, immP off) %{
 4214   match(AddP off reg);
 4215 
 4216   format %{ "[$reg + $off]" %}
 4217   interface(MEMORY_INTER) %{
 4218     base($reg);
 4219     index(0x4);
 4220     scale(0x0);
 4221     disp($off);
 4222   %}
 4223 %}
 4224 
 4225 // Indirect Memory Plus Index Register Plus Offset Operand
 4226 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4227   match(AddP (AddP reg ireg) off);
 4228 
 4229   op_cost(10);
 4230   format %{"[$reg + $off + $ireg]" %}
 4231   interface(MEMORY_INTER) %{
 4232     base($reg);
 4233     index($ireg);
 4234     scale(0x0);
 4235     disp($off);
 4236   %}
 4237 %}
 4238 
 4239 // Indirect Memory Plus Index Register Plus Offset Operand
 4240 operand indIndex(eRegP reg, rRegI ireg) %{
 4241   match(AddP reg ireg);
 4242 
 4243   op_cost(10);
 4244   format %{"[$reg + $ireg]" %}
 4245   interface(MEMORY_INTER) %{
 4246     base($reg);
 4247     index($ireg);
 4248     scale(0x0);
 4249     disp(0x0);
 4250   %}
 4251 %}
 4252 
 4253 // // -------------------------------------------------------------------------
 4254 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4255 // // -------------------------------------------------------------------------
 4256 // // Scaled Memory Operands
 4257 // // Indirect Memory Times Scale Plus Offset Operand
 4258 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4259 //   match(AddP off (LShiftI ireg scale));
 4260 //
 4261 //   op_cost(10);
 4262 //   format %{"[$off + $ireg << $scale]" %}
 4263 //   interface(MEMORY_INTER) %{
 4264 //     base(0x4);
 4265 //     index($ireg);
 4266 //     scale($scale);
 4267 //     disp($off);
 4268 //   %}
 4269 // %}
 4270 
 4271 // Indirect Memory Times Scale Plus Index Register
 4272 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4273   match(AddP reg (LShiftI ireg scale));
 4274 
 4275   op_cost(10);
 4276   format %{"[$reg + $ireg << $scale]" %}
 4277   interface(MEMORY_INTER) %{
 4278     base($reg);
 4279     index($ireg);
 4280     scale($scale);
 4281     disp(0x0);
 4282   %}
 4283 %}
 4284 
 4285 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4286 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4287   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4288 
 4289   op_cost(10);
 4290   format %{"[$reg + $off + $ireg << $scale]" %}
 4291   interface(MEMORY_INTER) %{
 4292     base($reg);
 4293     index($ireg);
 4294     scale($scale);
 4295     disp($off);
 4296   %}
 4297 %}
 4298 
 4299 //----------Load Long Memory Operands------------------------------------------
 4300 // The load-long idiom will use it's address expression again after loading
 4301 // the first word of the long.  If the load-long destination overlaps with
 4302 // registers used in the addressing expression, the 2nd half will be loaded
 4303 // from a clobbered address.  Fix this by requiring that load-long use
 4304 // address registers that do not overlap with the load-long target.
 4305 
 4306 // load-long support
 4307 operand load_long_RegP() %{
 4308   constraint(ALLOC_IN_RC(esi_reg));
 4309   match(RegP);
 4310   match(eSIRegP);
 4311   op_cost(100);
 4312   format %{  %}
 4313   interface(REG_INTER);
 4314 %}
 4315 
 4316 // Indirect Memory Operand Long
 4317 operand load_long_indirect(load_long_RegP reg) %{
 4318   constraint(ALLOC_IN_RC(esi_reg));
 4319   match(reg);
 4320 
 4321   format %{ "[$reg]" %}
 4322   interface(MEMORY_INTER) %{
 4323     base($reg);
 4324     index(0x4);
 4325     scale(0x0);
 4326     disp(0x0);
 4327   %}
 4328 %}
 4329 
 4330 // Indirect Memory Plus Long Offset Operand
 4331 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4332   match(AddP reg off);
 4333 
 4334   format %{ "[$reg + $off]" %}
 4335   interface(MEMORY_INTER) %{
 4336     base($reg);
 4337     index(0x4);
 4338     scale(0x0);
 4339     disp($off);
 4340   %}
 4341 %}
 4342 
 4343 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4344 
 4345 
 4346 //----------Special Memory Operands--------------------------------------------
 4347 // Stack Slot Operand - This operand is used for loading and storing temporary
 4348 //                      values on the stack where a match requires a value to
 4349 //                      flow through memory.
 4350 operand stackSlotP(sRegP reg) %{
 4351   constraint(ALLOC_IN_RC(stack_slots));
 4352   // No match rule because this operand is only generated in matching
 4353   format %{ "[$reg]" %}
 4354   interface(MEMORY_INTER) %{
 4355     base(0x4);   // ESP
 4356     index(0x4);  // No Index
 4357     scale(0x0);  // No Scale
 4358     disp($reg);  // Stack Offset
 4359   %}
 4360 %}
 4361 
 4362 operand stackSlotI(sRegI reg) %{
 4363   constraint(ALLOC_IN_RC(stack_slots));
 4364   // No match rule because this operand is only generated in matching
 4365   format %{ "[$reg]" %}
 4366   interface(MEMORY_INTER) %{
 4367     base(0x4);   // ESP
 4368     index(0x4);  // No Index
 4369     scale(0x0);  // No Scale
 4370     disp($reg);  // Stack Offset
 4371   %}
 4372 %}
 4373 
 4374 operand stackSlotF(sRegF reg) %{
 4375   constraint(ALLOC_IN_RC(stack_slots));
 4376   // No match rule because this operand is only generated in matching
 4377   format %{ "[$reg]" %}
 4378   interface(MEMORY_INTER) %{
 4379     base(0x4);   // ESP
 4380     index(0x4);  // No Index
 4381     scale(0x0);  // No Scale
 4382     disp($reg);  // Stack Offset
 4383   %}
 4384 %}
 4385 
 4386 operand stackSlotD(sRegD reg) %{
 4387   constraint(ALLOC_IN_RC(stack_slots));
 4388   // No match rule because this operand is only generated in matching
 4389   format %{ "[$reg]" %}
 4390   interface(MEMORY_INTER) %{
 4391     base(0x4);   // ESP
 4392     index(0x4);  // No Index
 4393     scale(0x0);  // No Scale
 4394     disp($reg);  // Stack Offset
 4395   %}
 4396 %}
 4397 
 4398 operand stackSlotL(sRegL reg) %{
 4399   constraint(ALLOC_IN_RC(stack_slots));
 4400   // No match rule because this operand is only generated in matching
 4401   format %{ "[$reg]" %}
 4402   interface(MEMORY_INTER) %{
 4403     base(0x4);   // ESP
 4404     index(0x4);  // No Index
 4405     scale(0x0);  // No Scale
 4406     disp($reg);  // Stack Offset
 4407   %}
 4408 %}
 4409 
 4410 //----------Conditional Branch Operands----------------------------------------
 4411 // Comparison Op  - This is the operation of the comparison, and is limited to
 4412 //                  the following set of codes:
 4413 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4414 //
 4415 // Other attributes of the comparison, such as unsignedness, are specified
 4416 // by the comparison instruction that sets a condition code flags register.
 4417 // That result is represented by a flags operand whose subtype is appropriate
 4418 // to the unsignedness (etc.) of the comparison.
 4419 //
 4420 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4421 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4422 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4423 
 4424 // Comparison Code
 4425 operand cmpOp() %{
 4426   match(Bool);
 4427 
 4428   format %{ "" %}
 4429   interface(COND_INTER) %{
 4430     equal(0x4, "e");
 4431     not_equal(0x5, "ne");
 4432     less(0xC, "l");
 4433     greater_equal(0xD, "ge");
 4434     less_equal(0xE, "le");
 4435     greater(0xF, "g");
 4436     overflow(0x0, "o");
 4437     no_overflow(0x1, "no");
 4438   %}
 4439 %}
 4440 
 4441 // Comparison Code, unsigned compare.  Used by FP also, with
 4442 // C2 (unordered) turned into GT or LT already.  The other bits
 4443 // C0 and C3 are turned into Carry & Zero flags.
 4444 operand cmpOpU() %{
 4445   match(Bool);
 4446 
 4447   format %{ "" %}
 4448   interface(COND_INTER) %{
 4449     equal(0x4, "e");
 4450     not_equal(0x5, "ne");
 4451     less(0x2, "b");
 4452     greater_equal(0x3, "nb");
 4453     less_equal(0x6, "be");
 4454     greater(0x7, "nbe");
 4455     overflow(0x0, "o");
 4456     no_overflow(0x1, "no");
 4457   %}
 4458 %}
 4459 
 4460 // Floating comparisons that don't require any fixup for the unordered case
 4461 operand cmpOpUCF() %{
 4462   match(Bool);
 4463   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4464             n->as_Bool()->_test._test == BoolTest::ge ||
 4465             n->as_Bool()->_test._test == BoolTest::le ||
 4466             n->as_Bool()->_test._test == BoolTest::gt);
 4467   format %{ "" %}
 4468   interface(COND_INTER) %{
 4469     equal(0x4, "e");
 4470     not_equal(0x5, "ne");
 4471     less(0x2, "b");
 4472     greater_equal(0x3, "nb");
 4473     less_equal(0x6, "be");
 4474     greater(0x7, "nbe");
 4475     overflow(0x0, "o");
 4476     no_overflow(0x1, "no");
 4477   %}
 4478 %}
 4479 
 4480 
 4481 // Floating comparisons that can be fixed up with extra conditional jumps
 4482 operand cmpOpUCF2() %{
 4483   match(Bool);
 4484   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4485             n->as_Bool()->_test._test == BoolTest::eq);
 4486   format %{ "" %}
 4487   interface(COND_INTER) %{
 4488     equal(0x4, "e");
 4489     not_equal(0x5, "ne");
 4490     less(0x2, "b");
 4491     greater_equal(0x3, "nb");
 4492     less_equal(0x6, "be");
 4493     greater(0x7, "nbe");
 4494     overflow(0x0, "o");
 4495     no_overflow(0x1, "no");
 4496   %}
 4497 %}
 4498 
 4499 // Comparison Code for FP conditional move
 4500 operand cmpOp_fcmov() %{
 4501   match(Bool);
 4502 
 4503   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4504             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4505   format %{ "" %}
 4506   interface(COND_INTER) %{
 4507     equal        (0x0C8);
 4508     not_equal    (0x1C8);
 4509     less         (0x0C0);
 4510     greater_equal(0x1C0);
 4511     less_equal   (0x0D0);
 4512     greater      (0x1D0);
 4513     overflow(0x0, "o"); // not really supported by the instruction
 4514     no_overflow(0x1, "no"); // not really supported by the instruction
 4515   %}
 4516 %}
 4517 
 4518 // Comparison Code used in long compares
 4519 operand cmpOp_commute() %{
 4520   match(Bool);
 4521 
 4522   format %{ "" %}
 4523   interface(COND_INTER) %{
 4524     equal(0x4, "e");
 4525     not_equal(0x5, "ne");
 4526     less(0xF, "g");
 4527     greater_equal(0xE, "le");
 4528     less_equal(0xD, "ge");
 4529     greater(0xC, "l");
 4530     overflow(0x0, "o");
 4531     no_overflow(0x1, "no");
 4532   %}
 4533 %}
 4534 
 4535 // Comparison Code used in unsigned long compares
 4536 operand cmpOpU_commute() %{
 4537   match(Bool);
 4538 
 4539   format %{ "" %}
 4540   interface(COND_INTER) %{
 4541     equal(0x4, "e");
 4542     not_equal(0x5, "ne");
 4543     less(0x7, "nbe");
 4544     greater_equal(0x6, "be");
 4545     less_equal(0x3, "nb");
 4546     greater(0x2, "b");
 4547     overflow(0x0, "o");
 4548     no_overflow(0x1, "no");
 4549   %}
 4550 %}
 4551 
 4552 //----------OPERAND CLASSES----------------------------------------------------
 4553 // Operand Classes are groups of operands that are used as to simplify
 4554 // instruction definitions by not requiring the AD writer to specify separate
 4555 // instructions for every form of operand when the instruction accepts
 4556 // multiple operand types with the same basic encoding and format.  The classic
 4557 // case of this is memory operands.
 4558 
 4559 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4560                indIndex, indIndexScale, indIndexScaleOffset);
 4561 
 4562 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4563 // This means some kind of offset is always required and you cannot use
 4564 // an oop as the offset (done when working on static globals).
 4565 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4566                     indIndex, indIndexScale, indIndexScaleOffset);
 4567 
 4568 
 4569 //----------PIPELINE-----------------------------------------------------------
 4570 // Rules which define the behavior of the target architectures pipeline.
 4571 pipeline %{
 4572 
 4573 //----------ATTRIBUTES---------------------------------------------------------
 4574 attributes %{
 4575   variable_size_instructions;        // Fixed size instructions
 4576   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4577   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4578   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4579   instruction_fetch_units = 1;       // of 16 bytes
 4580 
 4581   // List of nop instructions
 4582   nops( MachNop );
 4583 %}
 4584 
 4585 //----------RESOURCES----------------------------------------------------------
 4586 // Resources are the functional units available to the machine
 4587 
 4588 // Generic P2/P3 pipeline
 4589 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4590 // 3 instructions decoded per cycle.
 4591 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4592 // 2 ALU op, only ALU0 handles mul/div instructions.
 4593 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4594            MS0, MS1, MEM = MS0 | MS1,
 4595            BR, FPU,
 4596            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4597 
 4598 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4599 // Pipeline Description specifies the stages in the machine's pipeline
 4600 
 4601 // Generic P2/P3 pipeline
 4602 pipe_desc(S0, S1, S2, S3, S4, S5);
 4603 
 4604 //----------PIPELINE CLASSES---------------------------------------------------
 4605 // Pipeline Classes describe the stages in which input and output are
 4606 // referenced by the hardware pipeline.
 4607 
 4608 // Naming convention: ialu or fpu
 4609 // Then: _reg
 4610 // Then: _reg if there is a 2nd register
 4611 // Then: _long if it's a pair of instructions implementing a long
 4612 // Then: _fat if it requires the big decoder
 4613 //   Or: _mem if it requires the big decoder and a memory unit.
 4614 
 4615 // Integer ALU reg operation
 4616 pipe_class ialu_reg(rRegI dst) %{
 4617     single_instruction;
 4618     dst    : S4(write);
 4619     dst    : S3(read);
 4620     DECODE : S0;        // any decoder
 4621     ALU    : S3;        // any alu
 4622 %}
 4623 
 4624 // Long ALU reg operation
 4625 pipe_class ialu_reg_long(eRegL dst) %{
 4626     instruction_count(2);
 4627     dst    : S4(write);
 4628     dst    : S3(read);
 4629     DECODE : S0(2);     // any 2 decoders
 4630     ALU    : S3(2);     // both alus
 4631 %}
 4632 
 4633 // Integer ALU reg operation using big decoder
 4634 pipe_class ialu_reg_fat(rRegI dst) %{
 4635     single_instruction;
 4636     dst    : S4(write);
 4637     dst    : S3(read);
 4638     D0     : S0;        // big decoder only
 4639     ALU    : S3;        // any alu
 4640 %}
 4641 
 4642 // Long ALU reg operation using big decoder
 4643 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4644     instruction_count(2);
 4645     dst    : S4(write);
 4646     dst    : S3(read);
 4647     D0     : S0(2);     // big decoder only; twice
 4648     ALU    : S3(2);     // any 2 alus
 4649 %}
 4650 
 4651 // Integer ALU reg-reg operation
 4652 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4653     single_instruction;
 4654     dst    : S4(write);
 4655     src    : S3(read);
 4656     DECODE : S0;        // any decoder
 4657     ALU    : S3;        // any alu
 4658 %}
 4659 
 4660 // Long ALU reg-reg operation
 4661 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4662     instruction_count(2);
 4663     dst    : S4(write);
 4664     src    : S3(read);
 4665     DECODE : S0(2);     // any 2 decoders
 4666     ALU    : S3(2);     // both alus
 4667 %}
 4668 
 4669 // Integer ALU reg-reg operation
 4670 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4671     single_instruction;
 4672     dst    : S4(write);
 4673     src    : S3(read);
 4674     D0     : S0;        // big decoder only
 4675     ALU    : S3;        // any alu
 4676 %}
 4677 
 4678 // Long ALU reg-reg operation
 4679 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4680     instruction_count(2);
 4681     dst    : S4(write);
 4682     src    : S3(read);
 4683     D0     : S0(2);     // big decoder only; twice
 4684     ALU    : S3(2);     // both alus
 4685 %}
 4686 
 4687 // Integer ALU reg-mem operation
 4688 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4689     single_instruction;
 4690     dst    : S5(write);
 4691     mem    : S3(read);
 4692     D0     : S0;        // big decoder only
 4693     ALU    : S4;        // any alu
 4694     MEM    : S3;        // any mem
 4695 %}
 4696 
 4697 // Long ALU reg-mem operation
 4698 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4699     instruction_count(2);
 4700     dst    : S5(write);
 4701     mem    : S3(read);
 4702     D0     : S0(2);     // big decoder only; twice
 4703     ALU    : S4(2);     // any 2 alus
 4704     MEM    : S3(2);     // both mems
 4705 %}
 4706 
 4707 // Integer mem operation (prefetch)
 4708 pipe_class ialu_mem(memory mem)
 4709 %{
 4710     single_instruction;
 4711     mem    : S3(read);
 4712     D0     : S0;        // big decoder only
 4713     MEM    : S3;        // any mem
 4714 %}
 4715 
 4716 // Integer Store to Memory
 4717 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4718     single_instruction;
 4719     mem    : S3(read);
 4720     src    : S5(read);
 4721     D0     : S0;        // big decoder only
 4722     ALU    : S4;        // any alu
 4723     MEM    : S3;
 4724 %}
 4725 
 4726 // Long Store to Memory
 4727 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4728     instruction_count(2);
 4729     mem    : S3(read);
 4730     src    : S5(read);
 4731     D0     : S0(2);     // big decoder only; twice
 4732     ALU    : S4(2);     // any 2 alus
 4733     MEM    : S3(2);     // Both mems
 4734 %}
 4735 
 4736 // Integer Store to Memory
 4737 pipe_class ialu_mem_imm(memory mem) %{
 4738     single_instruction;
 4739     mem    : S3(read);
 4740     D0     : S0;        // big decoder only
 4741     ALU    : S4;        // any alu
 4742     MEM    : S3;
 4743 %}
 4744 
 4745 // Integer ALU0 reg-reg operation
 4746 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4747     single_instruction;
 4748     dst    : S4(write);
 4749     src    : S3(read);
 4750     D0     : S0;        // Big decoder only
 4751     ALU0   : S3;        // only alu0
 4752 %}
 4753 
 4754 // Integer ALU0 reg-mem operation
 4755 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4756     single_instruction;
 4757     dst    : S5(write);
 4758     mem    : S3(read);
 4759     D0     : S0;        // big decoder only
 4760     ALU0   : S4;        // ALU0 only
 4761     MEM    : S3;        // any mem
 4762 %}
 4763 
 4764 // Integer ALU reg-reg operation
 4765 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4766     single_instruction;
 4767     cr     : S4(write);
 4768     src1   : S3(read);
 4769     src2   : S3(read);
 4770     DECODE : S0;        // any decoder
 4771     ALU    : S3;        // any alu
 4772 %}
 4773 
 4774 // Integer ALU reg-imm operation
 4775 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4776     single_instruction;
 4777     cr     : S4(write);
 4778     src1   : S3(read);
 4779     DECODE : S0;        // any decoder
 4780     ALU    : S3;        // any alu
 4781 %}
 4782 
 4783 // Integer ALU reg-mem operation
 4784 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4785     single_instruction;
 4786     cr     : S4(write);
 4787     src1   : S3(read);
 4788     src2   : S3(read);
 4789     D0     : S0;        // big decoder only
 4790     ALU    : S4;        // any alu
 4791     MEM    : S3;
 4792 %}
 4793 
 4794 // Conditional move reg-reg
 4795 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4796     instruction_count(4);
 4797     y      : S4(read);
 4798     q      : S3(read);
 4799     p      : S3(read);
 4800     DECODE : S0(4);     // any decoder
 4801 %}
 4802 
 4803 // Conditional move reg-reg
 4804 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4805     single_instruction;
 4806     dst    : S4(write);
 4807     src    : S3(read);
 4808     cr     : S3(read);
 4809     DECODE : S0;        // any decoder
 4810 %}
 4811 
 4812 // Conditional move reg-mem
 4813 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4814     single_instruction;
 4815     dst    : S4(write);
 4816     src    : S3(read);
 4817     cr     : S3(read);
 4818     DECODE : S0;        // any decoder
 4819     MEM    : S3;
 4820 %}
 4821 
 4822 // Conditional move reg-reg long
 4823 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4824     single_instruction;
 4825     dst    : S4(write);
 4826     src    : S3(read);
 4827     cr     : S3(read);
 4828     DECODE : S0(2);     // any 2 decoders
 4829 %}
 4830 
 4831 // Conditional move double reg-reg
 4832 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4833     single_instruction;
 4834     dst    : S4(write);
 4835     src    : S3(read);
 4836     cr     : S3(read);
 4837     DECODE : S0;        // any decoder
 4838 %}
 4839 
 4840 // Float reg-reg operation
 4841 pipe_class fpu_reg(regDPR dst) %{
 4842     instruction_count(2);
 4843     dst    : S3(read);
 4844     DECODE : S0(2);     // any 2 decoders
 4845     FPU    : S3;
 4846 %}
 4847 
 4848 // Float reg-reg operation
 4849 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4850     instruction_count(2);
 4851     dst    : S4(write);
 4852     src    : S3(read);
 4853     DECODE : S0(2);     // any 2 decoders
 4854     FPU    : S3;
 4855 %}
 4856 
 4857 // Float reg-reg operation
 4858 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4859     instruction_count(3);
 4860     dst    : S4(write);
 4861     src1   : S3(read);
 4862     src2   : S3(read);
 4863     DECODE : S0(3);     // any 3 decoders
 4864     FPU    : S3(2);
 4865 %}
 4866 
 4867 // Float reg-reg operation
 4868 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4869     instruction_count(4);
 4870     dst    : S4(write);
 4871     src1   : S3(read);
 4872     src2   : S3(read);
 4873     src3   : S3(read);
 4874     DECODE : S0(4);     // any 3 decoders
 4875     FPU    : S3(2);
 4876 %}
 4877 
 4878 // Float reg-reg operation
 4879 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4880     instruction_count(4);
 4881     dst    : S4(write);
 4882     src1   : S3(read);
 4883     src2   : S3(read);
 4884     src3   : S3(read);
 4885     DECODE : S1(3);     // any 3 decoders
 4886     D0     : S0;        // Big decoder only
 4887     FPU    : S3(2);
 4888     MEM    : S3;
 4889 %}
 4890 
 4891 // Float reg-mem operation
 4892 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4893     instruction_count(2);
 4894     dst    : S5(write);
 4895     mem    : S3(read);
 4896     D0     : S0;        // big decoder only
 4897     DECODE : S1;        // any decoder for FPU POP
 4898     FPU    : S4;
 4899     MEM    : S3;        // any mem
 4900 %}
 4901 
 4902 // Float reg-mem operation
 4903 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4904     instruction_count(3);
 4905     dst    : S5(write);
 4906     src1   : S3(read);
 4907     mem    : S3(read);
 4908     D0     : S0;        // big decoder only
 4909     DECODE : S1(2);     // any decoder for FPU POP
 4910     FPU    : S4;
 4911     MEM    : S3;        // any mem
 4912 %}
 4913 
 4914 // Float mem-reg operation
 4915 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4916     instruction_count(2);
 4917     src    : S5(read);
 4918     mem    : S3(read);
 4919     DECODE : S0;        // any decoder for FPU PUSH
 4920     D0     : S1;        // big decoder only
 4921     FPU    : S4;
 4922     MEM    : S3;        // any mem
 4923 %}
 4924 
 4925 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4926     instruction_count(3);
 4927     src1   : S3(read);
 4928     src2   : S3(read);
 4929     mem    : S3(read);
 4930     DECODE : S0(2);     // any decoder for FPU PUSH
 4931     D0     : S1;        // big decoder only
 4932     FPU    : S4;
 4933     MEM    : S3;        // any mem
 4934 %}
 4935 
 4936 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4937     instruction_count(3);
 4938     src1   : S3(read);
 4939     src2   : S3(read);
 4940     mem    : S4(read);
 4941     DECODE : S0;        // any decoder for FPU PUSH
 4942     D0     : S0(2);     // big decoder only
 4943     FPU    : S4;
 4944     MEM    : S3(2);     // any mem
 4945 %}
 4946 
 4947 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4948     instruction_count(2);
 4949     src1   : S3(read);
 4950     dst    : S4(read);
 4951     D0     : S0(2);     // big decoder only
 4952     MEM    : S3(2);     // any mem
 4953 %}
 4954 
 4955 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4956     instruction_count(3);
 4957     src1   : S3(read);
 4958     src2   : S3(read);
 4959     dst    : S4(read);
 4960     D0     : S0(3);     // big decoder only
 4961     FPU    : S4;
 4962     MEM    : S3(3);     // any mem
 4963 %}
 4964 
 4965 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4966     instruction_count(3);
 4967     src1   : S4(read);
 4968     mem    : S4(read);
 4969     DECODE : S0;        // any decoder for FPU PUSH
 4970     D0     : S0(2);     // big decoder only
 4971     FPU    : S4;
 4972     MEM    : S3(2);     // any mem
 4973 %}
 4974 
 4975 // Float load constant
 4976 pipe_class fpu_reg_con(regDPR dst) %{
 4977     instruction_count(2);
 4978     dst    : S5(write);
 4979     D0     : S0;        // big decoder only for the load
 4980     DECODE : S1;        // any decoder for FPU POP
 4981     FPU    : S4;
 4982     MEM    : S3;        // any mem
 4983 %}
 4984 
 4985 // Float load constant
 4986 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4987     instruction_count(3);
 4988     dst    : S5(write);
 4989     src    : S3(read);
 4990     D0     : S0;        // big decoder only for the load
 4991     DECODE : S1(2);     // any decoder for FPU POP
 4992     FPU    : S4;
 4993     MEM    : S3;        // any mem
 4994 %}
 4995 
 4996 // UnConditional branch
 4997 pipe_class pipe_jmp( label labl ) %{
 4998     single_instruction;
 4999     BR   : S3;
 5000 %}
 5001 
 5002 // Conditional branch
 5003 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 5004     single_instruction;
 5005     cr    : S1(read);
 5006     BR    : S3;
 5007 %}
 5008 
 5009 // Allocation idiom
 5010 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 5011     instruction_count(1); force_serialization;
 5012     fixed_latency(6);
 5013     heap_ptr : S3(read);
 5014     DECODE   : S0(3);
 5015     D0       : S2;
 5016     MEM      : S3;
 5017     ALU      : S3(2);
 5018     dst      : S5(write);
 5019     BR       : S5;
 5020 %}
 5021 
 5022 // Generic big/slow expanded idiom
 5023 pipe_class pipe_slow(  ) %{
 5024     instruction_count(10); multiple_bundles; force_serialization;
 5025     fixed_latency(100);
 5026     D0  : S0(2);
 5027     MEM : S3(2);
 5028 %}
 5029 
 5030 // The real do-nothing guy
 5031 pipe_class empty( ) %{
 5032     instruction_count(0);
 5033 %}
 5034 
 5035 // Define the class for the Nop node
 5036 define %{
 5037    MachNop = empty;
 5038 %}
 5039 
 5040 %}
 5041 
 5042 //----------INSTRUCTIONS-------------------------------------------------------
 5043 //
 5044 // match      -- States which machine-independent subtree may be replaced
 5045 //               by this instruction.
 5046 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5047 //               selection to identify a minimum cost tree of machine
 5048 //               instructions that matches a tree of machine-independent
 5049 //               instructions.
 5050 // format     -- A string providing the disassembly for this instruction.
 5051 //               The value of an instruction's operand may be inserted
 5052 //               by referring to it with a '$' prefix.
 5053 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5054 //               to within an encode class as $primary, $secondary, and $tertiary
 5055 //               respectively.  The primary opcode is commonly used to
 5056 //               indicate the type of machine instruction, while secondary
 5057 //               and tertiary are often used for prefix options or addressing
 5058 //               modes.
 5059 // ins_encode -- A list of encode classes with parameters. The encode class
 5060 //               name must have been defined in an 'enc_class' specification
 5061 //               in the encode section of the architecture description.
 5062 
 5063 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5064 // Load Float
 5065 instruct MoveF2LEG(legRegF dst, regF src) %{
 5066   match(Set dst src);
 5067   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5068   ins_encode %{
 5069     ShouldNotReachHere();
 5070   %}
 5071   ins_pipe( fpu_reg_reg );
 5072 %}
 5073 
 5074 // Load Float
 5075 instruct MoveLEG2F(regF dst, legRegF src) %{
 5076   match(Set dst src);
 5077   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5078   ins_encode %{
 5079     ShouldNotReachHere();
 5080   %}
 5081   ins_pipe( fpu_reg_reg );
 5082 %}
 5083 
 5084 // Load Float
 5085 instruct MoveF2VL(vlRegF dst, regF src) %{
 5086   match(Set dst src);
 5087   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5088   ins_encode %{
 5089     ShouldNotReachHere();
 5090   %}
 5091   ins_pipe( fpu_reg_reg );
 5092 %}
 5093 
 5094 // Load Float
 5095 instruct MoveVL2F(regF dst, vlRegF src) %{
 5096   match(Set dst src);
 5097   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5098   ins_encode %{
 5099     ShouldNotReachHere();
 5100   %}
 5101   ins_pipe( fpu_reg_reg );
 5102 %}
 5103 
 5104 
 5105 
 5106 // Load Double
 5107 instruct MoveD2LEG(legRegD dst, regD src) %{
 5108   match(Set dst src);
 5109   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5110   ins_encode %{
 5111     ShouldNotReachHere();
 5112   %}
 5113   ins_pipe( fpu_reg_reg );
 5114 %}
 5115 
 5116 // Load Double
 5117 instruct MoveLEG2D(regD dst, legRegD src) %{
 5118   match(Set dst src);
 5119   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5120   ins_encode %{
 5121     ShouldNotReachHere();
 5122   %}
 5123   ins_pipe( fpu_reg_reg );
 5124 %}
 5125 
 5126 // Load Double
 5127 instruct MoveD2VL(vlRegD dst, regD src) %{
 5128   match(Set dst src);
 5129   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5130   ins_encode %{
 5131     ShouldNotReachHere();
 5132   %}
 5133   ins_pipe( fpu_reg_reg );
 5134 %}
 5135 
 5136 // Load Double
 5137 instruct MoveVL2D(regD dst, vlRegD src) %{
 5138   match(Set dst src);
 5139   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5140   ins_encode %{
 5141     ShouldNotReachHere();
 5142   %}
 5143   ins_pipe( fpu_reg_reg );
 5144 %}
 5145 
 5146 //----------BSWAP-Instruction--------------------------------------------------
 5147 instruct bytes_reverse_int(rRegI dst) %{
 5148   match(Set dst (ReverseBytesI dst));
 5149 
 5150   format %{ "BSWAP  $dst" %}
 5151   opcode(0x0F, 0xC8);
 5152   ins_encode( OpcP, OpcSReg(dst) );
 5153   ins_pipe( ialu_reg );
 5154 %}
 5155 
 5156 instruct bytes_reverse_long(eRegL dst) %{
 5157   match(Set dst (ReverseBytesL dst));
 5158 
 5159   format %{ "BSWAP  $dst.lo\n\t"
 5160             "BSWAP  $dst.hi\n\t"
 5161             "XCHG   $dst.lo $dst.hi" %}
 5162 
 5163   ins_cost(125);
 5164   ins_encode( bswap_long_bytes(dst) );
 5165   ins_pipe( ialu_reg_reg);
 5166 %}
 5167 
 5168 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5169   match(Set dst (ReverseBytesUS dst));
 5170   effect(KILL cr);
 5171 
 5172   format %{ "BSWAP  $dst\n\t"
 5173             "SHR    $dst,16\n\t" %}
 5174   ins_encode %{
 5175     __ bswapl($dst$$Register);
 5176     __ shrl($dst$$Register, 16);
 5177   %}
 5178   ins_pipe( ialu_reg );
 5179 %}
 5180 
 5181 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5182   match(Set dst (ReverseBytesS dst));
 5183   effect(KILL cr);
 5184 
 5185   format %{ "BSWAP  $dst\n\t"
 5186             "SAR    $dst,16\n\t" %}
 5187   ins_encode %{
 5188     __ bswapl($dst$$Register);
 5189     __ sarl($dst$$Register, 16);
 5190   %}
 5191   ins_pipe( ialu_reg );
 5192 %}
 5193 
 5194 
 5195 //---------- Zeros Count Instructions ------------------------------------------
 5196 
 5197 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5198   predicate(UseCountLeadingZerosInstruction);
 5199   match(Set dst (CountLeadingZerosI src));
 5200   effect(KILL cr);
 5201 
 5202   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5203   ins_encode %{
 5204     __ lzcntl($dst$$Register, $src$$Register);
 5205   %}
 5206   ins_pipe(ialu_reg);
 5207 %}
 5208 
 5209 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5210   predicate(!UseCountLeadingZerosInstruction);
 5211   match(Set dst (CountLeadingZerosI src));
 5212   effect(KILL cr);
 5213 
 5214   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5215             "JNZ    skip\n\t"
 5216             "MOV    $dst, -1\n"
 5217       "skip:\n\t"
 5218             "NEG    $dst\n\t"
 5219             "ADD    $dst, 31" %}
 5220   ins_encode %{
 5221     Register Rdst = $dst$$Register;
 5222     Register Rsrc = $src$$Register;
 5223     Label skip;
 5224     __ bsrl(Rdst, Rsrc);
 5225     __ jccb(Assembler::notZero, skip);
 5226     __ movl(Rdst, -1);
 5227     __ bind(skip);
 5228     __ negl(Rdst);
 5229     __ addl(Rdst, BitsPerInt - 1);
 5230   %}
 5231   ins_pipe(ialu_reg);
 5232 %}
 5233 
 5234 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5235   predicate(UseCountLeadingZerosInstruction);
 5236   match(Set dst (CountLeadingZerosL src));
 5237   effect(TEMP dst, KILL cr);
 5238 
 5239   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5240             "JNC    done\n\t"
 5241             "LZCNT  $dst, $src.lo\n\t"
 5242             "ADD    $dst, 32\n"
 5243       "done:" %}
 5244   ins_encode %{
 5245     Register Rdst = $dst$$Register;
 5246     Register Rsrc = $src$$Register;
 5247     Label done;
 5248     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5249     __ jccb(Assembler::carryClear, done);
 5250     __ lzcntl(Rdst, Rsrc);
 5251     __ addl(Rdst, BitsPerInt);
 5252     __ bind(done);
 5253   %}
 5254   ins_pipe(ialu_reg);
 5255 %}
 5256 
 5257 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5258   predicate(!UseCountLeadingZerosInstruction);
 5259   match(Set dst (CountLeadingZerosL src));
 5260   effect(TEMP dst, KILL cr);
 5261 
 5262   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5263             "JZ     msw_is_zero\n\t"
 5264             "ADD    $dst, 32\n\t"
 5265             "JMP    not_zero\n"
 5266       "msw_is_zero:\n\t"
 5267             "BSR    $dst, $src.lo\n\t"
 5268             "JNZ    not_zero\n\t"
 5269             "MOV    $dst, -1\n"
 5270       "not_zero:\n\t"
 5271             "NEG    $dst\n\t"
 5272             "ADD    $dst, 63\n" %}
 5273  ins_encode %{
 5274     Register Rdst = $dst$$Register;
 5275     Register Rsrc = $src$$Register;
 5276     Label msw_is_zero;
 5277     Label not_zero;
 5278     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5279     __ jccb(Assembler::zero, msw_is_zero);
 5280     __ addl(Rdst, BitsPerInt);
 5281     __ jmpb(not_zero);
 5282     __ bind(msw_is_zero);
 5283     __ bsrl(Rdst, Rsrc);
 5284     __ jccb(Assembler::notZero, not_zero);
 5285     __ movl(Rdst, -1);
 5286     __ bind(not_zero);
 5287     __ negl(Rdst);
 5288     __ addl(Rdst, BitsPerLong - 1);
 5289   %}
 5290   ins_pipe(ialu_reg);
 5291 %}
 5292 
 5293 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5294   predicate(UseCountTrailingZerosInstruction);
 5295   match(Set dst (CountTrailingZerosI src));
 5296   effect(KILL cr);
 5297 
 5298   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5299   ins_encode %{
 5300     __ tzcntl($dst$$Register, $src$$Register);
 5301   %}
 5302   ins_pipe(ialu_reg);
 5303 %}
 5304 
 5305 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5306   predicate(!UseCountTrailingZerosInstruction);
 5307   match(Set dst (CountTrailingZerosI src));
 5308   effect(KILL cr);
 5309 
 5310   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5311             "JNZ    done\n\t"
 5312             "MOV    $dst, 32\n"
 5313       "done:" %}
 5314   ins_encode %{
 5315     Register Rdst = $dst$$Register;
 5316     Label done;
 5317     __ bsfl(Rdst, $src$$Register);
 5318     __ jccb(Assembler::notZero, done);
 5319     __ movl(Rdst, BitsPerInt);
 5320     __ bind(done);
 5321   %}
 5322   ins_pipe(ialu_reg);
 5323 %}
 5324 
 5325 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5326   predicate(UseCountTrailingZerosInstruction);
 5327   match(Set dst (CountTrailingZerosL src));
 5328   effect(TEMP dst, KILL cr);
 5329 
 5330   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5331             "JNC    done\n\t"
 5332             "TZCNT  $dst, $src.hi\n\t"
 5333             "ADD    $dst, 32\n"
 5334             "done:" %}
 5335   ins_encode %{
 5336     Register Rdst = $dst$$Register;
 5337     Register Rsrc = $src$$Register;
 5338     Label done;
 5339     __ tzcntl(Rdst, Rsrc);
 5340     __ jccb(Assembler::carryClear, done);
 5341     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5342     __ addl(Rdst, BitsPerInt);
 5343     __ bind(done);
 5344   %}
 5345   ins_pipe(ialu_reg);
 5346 %}
 5347 
 5348 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5349   predicate(!UseCountTrailingZerosInstruction);
 5350   match(Set dst (CountTrailingZerosL src));
 5351   effect(TEMP dst, KILL cr);
 5352 
 5353   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5354             "JNZ    done\n\t"
 5355             "BSF    $dst, $src.hi\n\t"
 5356             "JNZ    msw_not_zero\n\t"
 5357             "MOV    $dst, 32\n"
 5358       "msw_not_zero:\n\t"
 5359             "ADD    $dst, 32\n"
 5360       "done:" %}
 5361   ins_encode %{
 5362     Register Rdst = $dst$$Register;
 5363     Register Rsrc = $src$$Register;
 5364     Label msw_not_zero;
 5365     Label done;
 5366     __ bsfl(Rdst, Rsrc);
 5367     __ jccb(Assembler::notZero, done);
 5368     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5369     __ jccb(Assembler::notZero, msw_not_zero);
 5370     __ movl(Rdst, BitsPerInt);
 5371     __ bind(msw_not_zero);
 5372     __ addl(Rdst, BitsPerInt);
 5373     __ bind(done);
 5374   %}
 5375   ins_pipe(ialu_reg);
 5376 %}
 5377 
 5378 
 5379 //---------- Population Count Instructions -------------------------------------
 5380 
 5381 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5382   predicate(UsePopCountInstruction);
 5383   match(Set dst (PopCountI src));
 5384   effect(KILL cr);
 5385 
 5386   format %{ "POPCNT $dst, $src" %}
 5387   ins_encode %{
 5388     __ popcntl($dst$$Register, $src$$Register);
 5389   %}
 5390   ins_pipe(ialu_reg);
 5391 %}
 5392 
 5393 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5394   predicate(UsePopCountInstruction);
 5395   match(Set dst (PopCountI (LoadI mem)));
 5396   effect(KILL cr);
 5397 
 5398   format %{ "POPCNT $dst, $mem" %}
 5399   ins_encode %{
 5400     __ popcntl($dst$$Register, $mem$$Address);
 5401   %}
 5402   ins_pipe(ialu_reg);
 5403 %}
 5404 
 5405 // Note: Long.bitCount(long) returns an int.
 5406 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5407   predicate(UsePopCountInstruction);
 5408   match(Set dst (PopCountL src));
 5409   effect(KILL cr, TEMP tmp, TEMP dst);
 5410 
 5411   format %{ "POPCNT $dst, $src.lo\n\t"
 5412             "POPCNT $tmp, $src.hi\n\t"
 5413             "ADD    $dst, $tmp" %}
 5414   ins_encode %{
 5415     __ popcntl($dst$$Register, $src$$Register);
 5416     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5417     __ addl($dst$$Register, $tmp$$Register);
 5418   %}
 5419   ins_pipe(ialu_reg);
 5420 %}
 5421 
 5422 // Note: Long.bitCount(long) returns an int.
 5423 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5424   predicate(UsePopCountInstruction);
 5425   match(Set dst (PopCountL (LoadL mem)));
 5426   effect(KILL cr, TEMP tmp, TEMP dst);
 5427 
 5428   format %{ "POPCNT $dst, $mem\n\t"
 5429             "POPCNT $tmp, $mem+4\n\t"
 5430             "ADD    $dst, $tmp" %}
 5431   ins_encode %{
 5432     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5433     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5434     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5435     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5436     __ addl($dst$$Register, $tmp$$Register);
 5437   %}
 5438   ins_pipe(ialu_reg);
 5439 %}
 5440 
 5441 
 5442 //----------Load/Store/Move Instructions---------------------------------------
 5443 //----------Load Instructions--------------------------------------------------
 5444 // Load Byte (8bit signed)
 5445 instruct loadB(xRegI dst, memory mem) %{
 5446   match(Set dst (LoadB mem));
 5447 
 5448   ins_cost(125);
 5449   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5450 
 5451   ins_encode %{
 5452     __ movsbl($dst$$Register, $mem$$Address);
 5453   %}
 5454 
 5455   ins_pipe(ialu_reg_mem);
 5456 %}
 5457 
 5458 // Load Byte (8bit signed) into Long Register
 5459 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5460   match(Set dst (ConvI2L (LoadB mem)));
 5461   effect(KILL cr);
 5462 
 5463   ins_cost(375);
 5464   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5465             "MOV    $dst.hi,$dst.lo\n\t"
 5466             "SAR    $dst.hi,7" %}
 5467 
 5468   ins_encode %{
 5469     __ movsbl($dst$$Register, $mem$$Address);
 5470     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5471     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5472   %}
 5473 
 5474   ins_pipe(ialu_reg_mem);
 5475 %}
 5476 
 5477 // Load Unsigned Byte (8bit UNsigned)
 5478 instruct loadUB(xRegI dst, memory mem) %{
 5479   match(Set dst (LoadUB mem));
 5480 
 5481   ins_cost(125);
 5482   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5483 
 5484   ins_encode %{
 5485     __ movzbl($dst$$Register, $mem$$Address);
 5486   %}
 5487 
 5488   ins_pipe(ialu_reg_mem);
 5489 %}
 5490 
 5491 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5492 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5493   match(Set dst (ConvI2L (LoadUB mem)));
 5494   effect(KILL cr);
 5495 
 5496   ins_cost(250);
 5497   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5498             "XOR    $dst.hi,$dst.hi" %}
 5499 
 5500   ins_encode %{
 5501     Register Rdst = $dst$$Register;
 5502     __ movzbl(Rdst, $mem$$Address);
 5503     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5504   %}
 5505 
 5506   ins_pipe(ialu_reg_mem);
 5507 %}
 5508 
 5509 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5510 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5511   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5512   effect(KILL cr);
 5513 
 5514   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5515             "XOR    $dst.hi,$dst.hi\n\t"
 5516             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5517   ins_encode %{
 5518     Register Rdst = $dst$$Register;
 5519     __ movzbl(Rdst, $mem$$Address);
 5520     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5521     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5522   %}
 5523   ins_pipe(ialu_reg_mem);
 5524 %}
 5525 
 5526 // Load Short (16bit signed)
 5527 instruct loadS(rRegI dst, memory mem) %{
 5528   match(Set dst (LoadS mem));
 5529 
 5530   ins_cost(125);
 5531   format %{ "MOVSX  $dst,$mem\t# short" %}
 5532 
 5533   ins_encode %{
 5534     __ movswl($dst$$Register, $mem$$Address);
 5535   %}
 5536 
 5537   ins_pipe(ialu_reg_mem);
 5538 %}
 5539 
 5540 // Load Short (16 bit signed) to Byte (8 bit signed)
 5541 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5542   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5543 
 5544   ins_cost(125);
 5545   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5546   ins_encode %{
 5547     __ movsbl($dst$$Register, $mem$$Address);
 5548   %}
 5549   ins_pipe(ialu_reg_mem);
 5550 %}
 5551 
 5552 // Load Short (16bit signed) into Long Register
 5553 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5554   match(Set dst (ConvI2L (LoadS mem)));
 5555   effect(KILL cr);
 5556 
 5557   ins_cost(375);
 5558   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5559             "MOV    $dst.hi,$dst.lo\n\t"
 5560             "SAR    $dst.hi,15" %}
 5561 
 5562   ins_encode %{
 5563     __ movswl($dst$$Register, $mem$$Address);
 5564     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5565     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5566   %}
 5567 
 5568   ins_pipe(ialu_reg_mem);
 5569 %}
 5570 
 5571 // Load Unsigned Short/Char (16bit unsigned)
 5572 instruct loadUS(rRegI dst, memory mem) %{
 5573   match(Set dst (LoadUS mem));
 5574 
 5575   ins_cost(125);
 5576   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5577 
 5578   ins_encode %{
 5579     __ movzwl($dst$$Register, $mem$$Address);
 5580   %}
 5581 
 5582   ins_pipe(ialu_reg_mem);
 5583 %}
 5584 
 5585 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5586 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5587   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5588 
 5589   ins_cost(125);
 5590   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5591   ins_encode %{
 5592     __ movsbl($dst$$Register, $mem$$Address);
 5593   %}
 5594   ins_pipe(ialu_reg_mem);
 5595 %}
 5596 
 5597 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5598 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5599   match(Set dst (ConvI2L (LoadUS mem)));
 5600   effect(KILL cr);
 5601 
 5602   ins_cost(250);
 5603   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5604             "XOR    $dst.hi,$dst.hi" %}
 5605 
 5606   ins_encode %{
 5607     __ movzwl($dst$$Register, $mem$$Address);
 5608     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5609   %}
 5610 
 5611   ins_pipe(ialu_reg_mem);
 5612 %}
 5613 
 5614 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5615 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5616   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5617   effect(KILL cr);
 5618 
 5619   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5620             "XOR    $dst.hi,$dst.hi" %}
 5621   ins_encode %{
 5622     Register Rdst = $dst$$Register;
 5623     __ movzbl(Rdst, $mem$$Address);
 5624     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5625   %}
 5626   ins_pipe(ialu_reg_mem);
 5627 %}
 5628 
 5629 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5630 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5631   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5632   effect(KILL cr);
 5633 
 5634   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5635             "XOR    $dst.hi,$dst.hi\n\t"
 5636             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5637   ins_encode %{
 5638     Register Rdst = $dst$$Register;
 5639     __ movzwl(Rdst, $mem$$Address);
 5640     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5641     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5642   %}
 5643   ins_pipe(ialu_reg_mem);
 5644 %}
 5645 
 5646 // Load Integer
 5647 instruct loadI(rRegI dst, memory mem) %{
 5648   match(Set dst (LoadI mem));
 5649 
 5650   ins_cost(125);
 5651   format %{ "MOV    $dst,$mem\t# int" %}
 5652 
 5653   ins_encode %{
 5654     __ movl($dst$$Register, $mem$$Address);
 5655   %}
 5656 
 5657   ins_pipe(ialu_reg_mem);
 5658 %}
 5659 
 5660 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5661 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5662   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5663 
 5664   ins_cost(125);
 5665   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5666   ins_encode %{
 5667     __ movsbl($dst$$Register, $mem$$Address);
 5668   %}
 5669   ins_pipe(ialu_reg_mem);
 5670 %}
 5671 
 5672 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5673 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5674   match(Set dst (AndI (LoadI mem) mask));
 5675 
 5676   ins_cost(125);
 5677   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5678   ins_encode %{
 5679     __ movzbl($dst$$Register, $mem$$Address);
 5680   %}
 5681   ins_pipe(ialu_reg_mem);
 5682 %}
 5683 
 5684 // Load Integer (32 bit signed) to Short (16 bit signed)
 5685 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5686   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5687 
 5688   ins_cost(125);
 5689   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5690   ins_encode %{
 5691     __ movswl($dst$$Register, $mem$$Address);
 5692   %}
 5693   ins_pipe(ialu_reg_mem);
 5694 %}
 5695 
 5696 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5697 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5698   match(Set dst (AndI (LoadI mem) mask));
 5699 
 5700   ins_cost(125);
 5701   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5702   ins_encode %{
 5703     __ movzwl($dst$$Register, $mem$$Address);
 5704   %}
 5705   ins_pipe(ialu_reg_mem);
 5706 %}
 5707 
 5708 // Load Integer into Long Register
 5709 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5710   match(Set dst (ConvI2L (LoadI mem)));
 5711   effect(KILL cr);
 5712 
 5713   ins_cost(375);
 5714   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5715             "MOV    $dst.hi,$dst.lo\n\t"
 5716             "SAR    $dst.hi,31" %}
 5717 
 5718   ins_encode %{
 5719     __ movl($dst$$Register, $mem$$Address);
 5720     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5721     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5722   %}
 5723 
 5724   ins_pipe(ialu_reg_mem);
 5725 %}
 5726 
 5727 // Load Integer with mask 0xFF into Long Register
 5728 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5729   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5730   effect(KILL cr);
 5731 
 5732   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5733             "XOR    $dst.hi,$dst.hi" %}
 5734   ins_encode %{
 5735     Register Rdst = $dst$$Register;
 5736     __ movzbl(Rdst, $mem$$Address);
 5737     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5738   %}
 5739   ins_pipe(ialu_reg_mem);
 5740 %}
 5741 
 5742 // Load Integer with mask 0xFFFF into Long Register
 5743 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5744   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5745   effect(KILL cr);
 5746 
 5747   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5748             "XOR    $dst.hi,$dst.hi" %}
 5749   ins_encode %{
 5750     Register Rdst = $dst$$Register;
 5751     __ movzwl(Rdst, $mem$$Address);
 5752     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5753   %}
 5754   ins_pipe(ialu_reg_mem);
 5755 %}
 5756 
 5757 // Load Integer with 31-bit mask into Long Register
 5758 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5759   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5760   effect(KILL cr);
 5761 
 5762   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5763             "XOR    $dst.hi,$dst.hi\n\t"
 5764             "AND    $dst.lo,$mask" %}
 5765   ins_encode %{
 5766     Register Rdst = $dst$$Register;
 5767     __ movl(Rdst, $mem$$Address);
 5768     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5769     __ andl(Rdst, $mask$$constant);
 5770   %}
 5771   ins_pipe(ialu_reg_mem);
 5772 %}
 5773 
 5774 // Load Unsigned Integer into Long Register
 5775 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5776   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5777   effect(KILL cr);
 5778 
 5779   ins_cost(250);
 5780   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5781             "XOR    $dst.hi,$dst.hi" %}
 5782 
 5783   ins_encode %{
 5784     __ movl($dst$$Register, $mem$$Address);
 5785     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5786   %}
 5787 
 5788   ins_pipe(ialu_reg_mem);
 5789 %}
 5790 
 5791 // Load Long.  Cannot clobber address while loading, so restrict address
 5792 // register to ESI
 5793 instruct loadL(eRegL dst, load_long_memory mem) %{
 5794   predicate(!((LoadLNode*)n)->require_atomic_access());
 5795   match(Set dst (LoadL mem));
 5796 
 5797   ins_cost(250);
 5798   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5799             "MOV    $dst.hi,$mem+4" %}
 5800 
 5801   ins_encode %{
 5802     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5803     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5804     __ movl($dst$$Register, Amemlo);
 5805     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5806   %}
 5807 
 5808   ins_pipe(ialu_reg_long_mem);
 5809 %}
 5810 
 5811 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5812 // then store it down to the stack and reload on the int
 5813 // side.
 5814 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5815   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5816   match(Set dst (LoadL mem));
 5817 
 5818   ins_cost(200);
 5819   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5820             "FISTp  $dst" %}
 5821   ins_encode(enc_loadL_volatile(mem,dst));
 5822   ins_pipe( fpu_reg_mem );
 5823 %}
 5824 
 5825 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5826   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5827   match(Set dst (LoadL mem));
 5828   effect(TEMP tmp);
 5829   ins_cost(180);
 5830   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5831             "MOVSD  $dst,$tmp" %}
 5832   ins_encode %{
 5833     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5834     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5835   %}
 5836   ins_pipe( pipe_slow );
 5837 %}
 5838 
 5839 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5840   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5841   match(Set dst (LoadL mem));
 5842   effect(TEMP tmp);
 5843   ins_cost(160);
 5844   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5845             "MOVD   $dst.lo,$tmp\n\t"
 5846             "PSRLQ  $tmp,32\n\t"
 5847             "MOVD   $dst.hi,$tmp" %}
 5848   ins_encode %{
 5849     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5850     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5851     __ psrlq($tmp$$XMMRegister, 32);
 5852     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5853   %}
 5854   ins_pipe( pipe_slow );
 5855 %}
 5856 
 5857 // Load Range
 5858 instruct loadRange(rRegI dst, memory mem) %{
 5859   match(Set dst (LoadRange mem));
 5860 
 5861   ins_cost(125);
 5862   format %{ "MOV    $dst,$mem" %}
 5863   opcode(0x8B);
 5864   ins_encode( OpcP, RegMem(dst,mem));
 5865   ins_pipe( ialu_reg_mem );
 5866 %}
 5867 
 5868 
 5869 // Load Pointer
 5870 instruct loadP(eRegP dst, memory mem) %{
 5871   match(Set dst (LoadP mem));
 5872 
 5873   ins_cost(125);
 5874   format %{ "MOV    $dst,$mem" %}
 5875   opcode(0x8B);
 5876   ins_encode( OpcP, RegMem(dst,mem));
 5877   ins_pipe( ialu_reg_mem );
 5878 %}
 5879 
 5880 // Load Klass Pointer
 5881 instruct loadKlass(eRegP dst, memory mem) %{
 5882   match(Set dst (LoadKlass mem));
 5883 
 5884   ins_cost(125);
 5885   format %{ "MOV    $dst,$mem" %}
 5886   opcode(0x8B);
 5887   ins_encode( OpcP, RegMem(dst,mem));
 5888   ins_pipe( ialu_reg_mem );
 5889 %}
 5890 
 5891 // Load Double
 5892 instruct loadDPR(regDPR dst, memory mem) %{
 5893   predicate(UseSSE<=1);
 5894   match(Set dst (LoadD mem));
 5895 
 5896   ins_cost(150);
 5897   format %{ "FLD_D  ST,$mem\n\t"
 5898             "FSTP   $dst" %}
 5899   opcode(0xDD);               /* DD /0 */
 5900   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5901               Pop_Reg_DPR(dst) );
 5902   ins_pipe( fpu_reg_mem );
 5903 %}
 5904 
 5905 // Load Double to XMM
 5906 instruct loadD(regD dst, memory mem) %{
 5907   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5908   match(Set dst (LoadD mem));
 5909   ins_cost(145);
 5910   format %{ "MOVSD  $dst,$mem" %}
 5911   ins_encode %{
 5912     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5913   %}
 5914   ins_pipe( pipe_slow );
 5915 %}
 5916 
 5917 instruct loadD_partial(regD dst, memory mem) %{
 5918   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5919   match(Set dst (LoadD mem));
 5920   ins_cost(145);
 5921   format %{ "MOVLPD $dst,$mem" %}
 5922   ins_encode %{
 5923     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5924   %}
 5925   ins_pipe( pipe_slow );
 5926 %}
 5927 
 5928 // Load to XMM register (single-precision floating point)
 5929 // MOVSS instruction
 5930 instruct loadF(regF dst, memory mem) %{
 5931   predicate(UseSSE>=1);
 5932   match(Set dst (LoadF mem));
 5933   ins_cost(145);
 5934   format %{ "MOVSS  $dst,$mem" %}
 5935   ins_encode %{
 5936     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5937   %}
 5938   ins_pipe( pipe_slow );
 5939 %}
 5940 
 5941 // Load Float
 5942 instruct loadFPR(regFPR dst, memory mem) %{
 5943   predicate(UseSSE==0);
 5944   match(Set dst (LoadF mem));
 5945 
 5946   ins_cost(150);
 5947   format %{ "FLD_S  ST,$mem\n\t"
 5948             "FSTP   $dst" %}
 5949   opcode(0xD9);               /* D9 /0 */
 5950   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5951               Pop_Reg_FPR(dst) );
 5952   ins_pipe( fpu_reg_mem );
 5953 %}
 5954 
 5955 // Load Effective Address
 5956 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5957   match(Set dst mem);
 5958 
 5959   ins_cost(110);
 5960   format %{ "LEA    $dst,$mem" %}
 5961   opcode(0x8D);
 5962   ins_encode( OpcP, RegMem(dst,mem));
 5963   ins_pipe( ialu_reg_reg_fat );
 5964 %}
 5965 
 5966 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5967   match(Set dst mem);
 5968 
 5969   ins_cost(110);
 5970   format %{ "LEA    $dst,$mem" %}
 5971   opcode(0x8D);
 5972   ins_encode( OpcP, RegMem(dst,mem));
 5973   ins_pipe( ialu_reg_reg_fat );
 5974 %}
 5975 
 5976 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5977   match(Set dst mem);
 5978 
 5979   ins_cost(110);
 5980   format %{ "LEA    $dst,$mem" %}
 5981   opcode(0x8D);
 5982   ins_encode( OpcP, RegMem(dst,mem));
 5983   ins_pipe( ialu_reg_reg_fat );
 5984 %}
 5985 
 5986 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5987   match(Set dst mem);
 5988 
 5989   ins_cost(110);
 5990   format %{ "LEA    $dst,$mem" %}
 5991   opcode(0x8D);
 5992   ins_encode( OpcP, RegMem(dst,mem));
 5993   ins_pipe( ialu_reg_reg_fat );
 5994 %}
 5995 
 5996 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5997   match(Set dst mem);
 5998 
 5999   ins_cost(110);
 6000   format %{ "LEA    $dst,$mem" %}
 6001   opcode(0x8D);
 6002   ins_encode( OpcP, RegMem(dst,mem));
 6003   ins_pipe( ialu_reg_reg_fat );
 6004 %}
 6005 
 6006 // Load Constant
 6007 instruct loadConI(rRegI dst, immI src) %{
 6008   match(Set dst src);
 6009 
 6010   format %{ "MOV    $dst,$src" %}
 6011   ins_encode( LdImmI(dst, src) );
 6012   ins_pipe( ialu_reg_fat );
 6013 %}
 6014 
 6015 // Load Constant zero
 6016 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 6017   match(Set dst src);
 6018   effect(KILL cr);
 6019 
 6020   ins_cost(50);
 6021   format %{ "XOR    $dst,$dst" %}
 6022   opcode(0x33);  /* + rd */
 6023   ins_encode( OpcP, RegReg( dst, dst ) );
 6024   ins_pipe( ialu_reg );
 6025 %}
 6026 
 6027 instruct loadConP(eRegP dst, immP src) %{
 6028   match(Set dst src);
 6029 
 6030   format %{ "MOV    $dst,$src" %}
 6031   opcode(0xB8);  /* + rd */
 6032   ins_encode( LdImmP(dst, src) );
 6033   ins_pipe( ialu_reg_fat );
 6034 %}
 6035 
 6036 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6037   match(Set dst src);
 6038   effect(KILL cr);
 6039   ins_cost(200);
 6040   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6041             "MOV    $dst.hi,$src.hi" %}
 6042   opcode(0xB8);
 6043   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6044   ins_pipe( ialu_reg_long_fat );
 6045 %}
 6046 
 6047 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6048   match(Set dst src);
 6049   effect(KILL cr);
 6050   ins_cost(150);
 6051   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6052             "XOR    $dst.hi,$dst.hi" %}
 6053   opcode(0x33,0x33);
 6054   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6055   ins_pipe( ialu_reg_long );
 6056 %}
 6057 
 6058 // The instruction usage is guarded by predicate in operand immFPR().
 6059 instruct loadConFPR(regFPR dst, immFPR con) %{
 6060   match(Set dst con);
 6061   ins_cost(125);
 6062   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6063             "FSTP   $dst" %}
 6064   ins_encode %{
 6065     __ fld_s($constantaddress($con));
 6066     __ fstp_d($dst$$reg);
 6067   %}
 6068   ins_pipe(fpu_reg_con);
 6069 %}
 6070 
 6071 // The instruction usage is guarded by predicate in operand immFPR0().
 6072 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6073   match(Set dst con);
 6074   ins_cost(125);
 6075   format %{ "FLDZ   ST\n\t"
 6076             "FSTP   $dst" %}
 6077   ins_encode %{
 6078     __ fldz();
 6079     __ fstp_d($dst$$reg);
 6080   %}
 6081   ins_pipe(fpu_reg_con);
 6082 %}
 6083 
 6084 // The instruction usage is guarded by predicate in operand immFPR1().
 6085 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6086   match(Set dst con);
 6087   ins_cost(125);
 6088   format %{ "FLD1   ST\n\t"
 6089             "FSTP   $dst" %}
 6090   ins_encode %{
 6091     __ fld1();
 6092     __ fstp_d($dst$$reg);
 6093   %}
 6094   ins_pipe(fpu_reg_con);
 6095 %}
 6096 
 6097 // The instruction usage is guarded by predicate in operand immF().
 6098 instruct loadConF(regF dst, immF con) %{
 6099   match(Set dst con);
 6100   ins_cost(125);
 6101   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6102   ins_encode %{
 6103     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6104   %}
 6105   ins_pipe(pipe_slow);
 6106 %}
 6107 
 6108 // The instruction usage is guarded by predicate in operand immF0().
 6109 instruct loadConF0(regF dst, immF0 src) %{
 6110   match(Set dst src);
 6111   ins_cost(100);
 6112   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6113   ins_encode %{
 6114     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6115   %}
 6116   ins_pipe(pipe_slow);
 6117 %}
 6118 
 6119 // The instruction usage is guarded by predicate in operand immDPR().
 6120 instruct loadConDPR(regDPR dst, immDPR con) %{
 6121   match(Set dst con);
 6122   ins_cost(125);
 6123 
 6124   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6125             "FSTP   $dst" %}
 6126   ins_encode %{
 6127     __ fld_d($constantaddress($con));
 6128     __ fstp_d($dst$$reg);
 6129   %}
 6130   ins_pipe(fpu_reg_con);
 6131 %}
 6132 
 6133 // The instruction usage is guarded by predicate in operand immDPR0().
 6134 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6135   match(Set dst con);
 6136   ins_cost(125);
 6137 
 6138   format %{ "FLDZ   ST\n\t"
 6139             "FSTP   $dst" %}
 6140   ins_encode %{
 6141     __ fldz();
 6142     __ fstp_d($dst$$reg);
 6143   %}
 6144   ins_pipe(fpu_reg_con);
 6145 %}
 6146 
 6147 // The instruction usage is guarded by predicate in operand immDPR1().
 6148 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6149   match(Set dst con);
 6150   ins_cost(125);
 6151 
 6152   format %{ "FLD1   ST\n\t"
 6153             "FSTP   $dst" %}
 6154   ins_encode %{
 6155     __ fld1();
 6156     __ fstp_d($dst$$reg);
 6157   %}
 6158   ins_pipe(fpu_reg_con);
 6159 %}
 6160 
 6161 // The instruction usage is guarded by predicate in operand immD().
 6162 instruct loadConD(regD dst, immD con) %{
 6163   match(Set dst con);
 6164   ins_cost(125);
 6165   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6166   ins_encode %{
 6167     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6168   %}
 6169   ins_pipe(pipe_slow);
 6170 %}
 6171 
 6172 // The instruction usage is guarded by predicate in operand immD0().
 6173 instruct loadConD0(regD dst, immD0 src) %{
 6174   match(Set dst src);
 6175   ins_cost(100);
 6176   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6177   ins_encode %{
 6178     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6179   %}
 6180   ins_pipe( pipe_slow );
 6181 %}
 6182 
 6183 // Load Stack Slot
 6184 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6185   match(Set dst src);
 6186   ins_cost(125);
 6187 
 6188   format %{ "MOV    $dst,$src" %}
 6189   opcode(0x8B);
 6190   ins_encode( OpcP, RegMem(dst,src));
 6191   ins_pipe( ialu_reg_mem );
 6192 %}
 6193 
 6194 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6195   match(Set dst src);
 6196 
 6197   ins_cost(200);
 6198   format %{ "MOV    $dst,$src.lo\n\t"
 6199             "MOV    $dst+4,$src.hi" %}
 6200   opcode(0x8B, 0x8B);
 6201   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6202   ins_pipe( ialu_mem_long_reg );
 6203 %}
 6204 
 6205 // Load Stack Slot
 6206 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6207   match(Set dst src);
 6208   ins_cost(125);
 6209 
 6210   format %{ "MOV    $dst,$src" %}
 6211   opcode(0x8B);
 6212   ins_encode( OpcP, RegMem(dst,src));
 6213   ins_pipe( ialu_reg_mem );
 6214 %}
 6215 
 6216 // Load Stack Slot
 6217 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6218   match(Set dst src);
 6219   ins_cost(125);
 6220 
 6221   format %{ "FLD_S  $src\n\t"
 6222             "FSTP   $dst" %}
 6223   opcode(0xD9);               /* D9 /0, FLD m32real */
 6224   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6225               Pop_Reg_FPR(dst) );
 6226   ins_pipe( fpu_reg_mem );
 6227 %}
 6228 
 6229 // Load Stack Slot
 6230 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6231   match(Set dst src);
 6232   ins_cost(125);
 6233 
 6234   format %{ "FLD_D  $src\n\t"
 6235             "FSTP   $dst" %}
 6236   opcode(0xDD);               /* DD /0, FLD m64real */
 6237   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6238               Pop_Reg_DPR(dst) );
 6239   ins_pipe( fpu_reg_mem );
 6240 %}
 6241 
 6242 // Prefetch instructions for allocation.
 6243 // Must be safe to execute with invalid address (cannot fault).
 6244 
 6245 instruct prefetchAlloc0( memory mem ) %{
 6246   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6247   match(PrefetchAllocation mem);
 6248   ins_cost(0);
 6249   size(0);
 6250   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6251   ins_encode();
 6252   ins_pipe(empty);
 6253 %}
 6254 
 6255 instruct prefetchAlloc( memory mem ) %{
 6256   predicate(AllocatePrefetchInstr==3);
 6257   match( PrefetchAllocation mem );
 6258   ins_cost(100);
 6259 
 6260   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6261   ins_encode %{
 6262     __ prefetchw($mem$$Address);
 6263   %}
 6264   ins_pipe(ialu_mem);
 6265 %}
 6266 
 6267 instruct prefetchAllocNTA( memory mem ) %{
 6268   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6269   match(PrefetchAllocation mem);
 6270   ins_cost(100);
 6271 
 6272   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6273   ins_encode %{
 6274     __ prefetchnta($mem$$Address);
 6275   %}
 6276   ins_pipe(ialu_mem);
 6277 %}
 6278 
 6279 instruct prefetchAllocT0( memory mem ) %{
 6280   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6281   match(PrefetchAllocation mem);
 6282   ins_cost(100);
 6283 
 6284   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6285   ins_encode %{
 6286     __ prefetcht0($mem$$Address);
 6287   %}
 6288   ins_pipe(ialu_mem);
 6289 %}
 6290 
 6291 instruct prefetchAllocT2( memory mem ) %{
 6292   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6293   match(PrefetchAllocation mem);
 6294   ins_cost(100);
 6295 
 6296   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6297   ins_encode %{
 6298     __ prefetcht2($mem$$Address);
 6299   %}
 6300   ins_pipe(ialu_mem);
 6301 %}
 6302 
 6303 //----------Store Instructions-------------------------------------------------
 6304 
 6305 // Store Byte
 6306 instruct storeB(memory mem, xRegI src) %{
 6307   match(Set mem (StoreB mem src));
 6308 
 6309   ins_cost(125);
 6310   format %{ "MOV8   $mem,$src" %}
 6311   opcode(0x88);
 6312   ins_encode( OpcP, RegMem( src, mem ) );
 6313   ins_pipe( ialu_mem_reg );
 6314 %}
 6315 
 6316 // Store Char/Short
 6317 instruct storeC(memory mem, rRegI src) %{
 6318   match(Set mem (StoreC mem src));
 6319 
 6320   ins_cost(125);
 6321   format %{ "MOV16  $mem,$src" %}
 6322   opcode(0x89, 0x66);
 6323   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6324   ins_pipe( ialu_mem_reg );
 6325 %}
 6326 
 6327 // Store Integer
 6328 instruct storeI(memory mem, rRegI src) %{
 6329   match(Set mem (StoreI mem src));
 6330 
 6331   ins_cost(125);
 6332   format %{ "MOV    $mem,$src" %}
 6333   opcode(0x89);
 6334   ins_encode( OpcP, RegMem( src, mem ) );
 6335   ins_pipe( ialu_mem_reg );
 6336 %}
 6337 
 6338 // Store Long
 6339 instruct storeL(long_memory mem, eRegL src) %{
 6340   predicate(!((StoreLNode*)n)->require_atomic_access());
 6341   match(Set mem (StoreL mem src));
 6342 
 6343   ins_cost(200);
 6344   format %{ "MOV    $mem,$src.lo\n\t"
 6345             "MOV    $mem+4,$src.hi" %}
 6346   opcode(0x89, 0x89);
 6347   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6348   ins_pipe( ialu_mem_long_reg );
 6349 %}
 6350 
 6351 // Store Long to Integer
 6352 instruct storeL2I(memory mem, eRegL src) %{
 6353   match(Set mem (StoreI mem (ConvL2I src)));
 6354 
 6355   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6356   ins_encode %{
 6357     __ movl($mem$$Address, $src$$Register);
 6358   %}
 6359   ins_pipe(ialu_mem_reg);
 6360 %}
 6361 
 6362 // Volatile Store Long.  Must be atomic, so move it into
 6363 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6364 // target address before the store (for null-ptr checks)
 6365 // so the memory operand is used twice in the encoding.
 6366 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6367   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6368   match(Set mem (StoreL mem src));
 6369   effect( KILL cr );
 6370   ins_cost(400);
 6371   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6372             "FILD   $src\n\t"
 6373             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6374   opcode(0x3B);
 6375   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6376   ins_pipe( fpu_reg_mem );
 6377 %}
 6378 
 6379 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6380   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6381   match(Set mem (StoreL mem src));
 6382   effect( TEMP tmp, KILL cr );
 6383   ins_cost(380);
 6384   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6385             "MOVSD  $tmp,$src\n\t"
 6386             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6387   ins_encode %{
 6388     __ cmpl(rax, $mem$$Address);
 6389     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6390     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6391   %}
 6392   ins_pipe( pipe_slow );
 6393 %}
 6394 
 6395 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6396   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6397   match(Set mem (StoreL mem src));
 6398   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6399   ins_cost(360);
 6400   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6401             "MOVD   $tmp,$src.lo\n\t"
 6402             "MOVD   $tmp2,$src.hi\n\t"
 6403             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6404             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6405   ins_encode %{
 6406     __ cmpl(rax, $mem$$Address);
 6407     __ movdl($tmp$$XMMRegister, $src$$Register);
 6408     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6409     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6410     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6411   %}
 6412   ins_pipe( pipe_slow );
 6413 %}
 6414 
 6415 // Store Pointer; for storing unknown oops and raw pointers
 6416 instruct storeP(memory mem, anyRegP src) %{
 6417   match(Set mem (StoreP mem src));
 6418 
 6419   ins_cost(125);
 6420   format %{ "MOV    $mem,$src" %}
 6421   opcode(0x89);
 6422   ins_encode( OpcP, RegMem( src, mem ) );
 6423   ins_pipe( ialu_mem_reg );
 6424 %}
 6425 
 6426 // Store Integer Immediate
 6427 instruct storeImmI(memory mem, immI src) %{
 6428   match(Set mem (StoreI mem src));
 6429 
 6430   ins_cost(150);
 6431   format %{ "MOV    $mem,$src" %}
 6432   opcode(0xC7);               /* C7 /0 */
 6433   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6434   ins_pipe( ialu_mem_imm );
 6435 %}
 6436 
 6437 // Store Short/Char Immediate
 6438 instruct storeImmI16(memory mem, immI16 src) %{
 6439   predicate(UseStoreImmI16);
 6440   match(Set mem (StoreC mem src));
 6441 
 6442   ins_cost(150);
 6443   format %{ "MOV16  $mem,$src" %}
 6444   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6445   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6446   ins_pipe( ialu_mem_imm );
 6447 %}
 6448 
 6449 // Store Pointer Immediate; null pointers or constant oops that do not
 6450 // need card-mark barriers.
 6451 instruct storeImmP(memory mem, immP src) %{
 6452   match(Set mem (StoreP mem src));
 6453 
 6454   ins_cost(150);
 6455   format %{ "MOV    $mem,$src" %}
 6456   opcode(0xC7);               /* C7 /0 */
 6457   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6458   ins_pipe( ialu_mem_imm );
 6459 %}
 6460 
 6461 // Store Byte Immediate
 6462 instruct storeImmB(memory mem, immI8 src) %{
 6463   match(Set mem (StoreB mem src));
 6464 
 6465   ins_cost(150);
 6466   format %{ "MOV8   $mem,$src" %}
 6467   opcode(0xC6);               /* C6 /0 */
 6468   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6469   ins_pipe( ialu_mem_imm );
 6470 %}
 6471 
 6472 // Store CMS card-mark Immediate
 6473 instruct storeImmCM(memory mem, immI8 src) %{
 6474   match(Set mem (StoreCM mem src));
 6475 
 6476   ins_cost(150);
 6477   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6478   opcode(0xC6);               /* C6 /0 */
 6479   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6480   ins_pipe( ialu_mem_imm );
 6481 %}
 6482 
 6483 // Store Double
 6484 instruct storeDPR( memory mem, regDPR1 src) %{
 6485   predicate(UseSSE<=1);
 6486   match(Set mem (StoreD mem src));
 6487 
 6488   ins_cost(100);
 6489   format %{ "FST_D  $mem,$src" %}
 6490   opcode(0xDD);       /* DD /2 */
 6491   ins_encode( enc_FPR_store(mem,src) );
 6492   ins_pipe( fpu_mem_reg );
 6493 %}
 6494 
 6495 // Store double does rounding on x86
 6496 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6497   predicate(UseSSE<=1);
 6498   match(Set mem (StoreD mem (RoundDouble src)));
 6499 
 6500   ins_cost(100);
 6501   format %{ "FST_D  $mem,$src\t# round" %}
 6502   opcode(0xDD);       /* DD /2 */
 6503   ins_encode( enc_FPR_store(mem,src) );
 6504   ins_pipe( fpu_mem_reg );
 6505 %}
 6506 
 6507 // Store XMM register to memory (double-precision floating points)
 6508 // MOVSD instruction
 6509 instruct storeD(memory mem, regD src) %{
 6510   predicate(UseSSE>=2);
 6511   match(Set mem (StoreD mem src));
 6512   ins_cost(95);
 6513   format %{ "MOVSD  $mem,$src" %}
 6514   ins_encode %{
 6515     __ movdbl($mem$$Address, $src$$XMMRegister);
 6516   %}
 6517   ins_pipe( pipe_slow );
 6518 %}
 6519 
 6520 // Store XMM register to memory (single-precision floating point)
 6521 // MOVSS instruction
 6522 instruct storeF(memory mem, regF src) %{
 6523   predicate(UseSSE>=1);
 6524   match(Set mem (StoreF mem src));
 6525   ins_cost(95);
 6526   format %{ "MOVSS  $mem,$src" %}
 6527   ins_encode %{
 6528     __ movflt($mem$$Address, $src$$XMMRegister);
 6529   %}
 6530   ins_pipe( pipe_slow );
 6531 %}
 6532 
 6533 
 6534 // Store Float
 6535 instruct storeFPR( memory mem, regFPR1 src) %{
 6536   predicate(UseSSE==0);
 6537   match(Set mem (StoreF mem src));
 6538 
 6539   ins_cost(100);
 6540   format %{ "FST_S  $mem,$src" %}
 6541   opcode(0xD9);       /* D9 /2 */
 6542   ins_encode( enc_FPR_store(mem,src) );
 6543   ins_pipe( fpu_mem_reg );
 6544 %}
 6545 
 6546 // Store Float does rounding on x86
 6547 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6548   predicate(UseSSE==0);
 6549   match(Set mem (StoreF mem (RoundFloat src)));
 6550 
 6551   ins_cost(100);
 6552   format %{ "FST_S  $mem,$src\t# round" %}
 6553   opcode(0xD9);       /* D9 /2 */
 6554   ins_encode( enc_FPR_store(mem,src) );
 6555   ins_pipe( fpu_mem_reg );
 6556 %}
 6557 
 6558 // Store Float does rounding on x86
 6559 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6560   predicate(UseSSE<=1);
 6561   match(Set mem (StoreF mem (ConvD2F src)));
 6562 
 6563   ins_cost(100);
 6564   format %{ "FST_S  $mem,$src\t# D-round" %}
 6565   opcode(0xD9);       /* D9 /2 */
 6566   ins_encode( enc_FPR_store(mem,src) );
 6567   ins_pipe( fpu_mem_reg );
 6568 %}
 6569 
 6570 // Store immediate Float value (it is faster than store from FPU register)
 6571 // The instruction usage is guarded by predicate in operand immFPR().
 6572 instruct storeFPR_imm( memory mem, immFPR src) %{
 6573   match(Set mem (StoreF mem src));
 6574 
 6575   ins_cost(50);
 6576   format %{ "MOV    $mem,$src\t# store float" %}
 6577   opcode(0xC7);               /* C7 /0 */
 6578   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6579   ins_pipe( ialu_mem_imm );
 6580 %}
 6581 
 6582 // Store immediate Float value (it is faster than store from XMM register)
 6583 // The instruction usage is guarded by predicate in operand immF().
 6584 instruct storeF_imm( memory mem, immF src) %{
 6585   match(Set mem (StoreF mem src));
 6586 
 6587   ins_cost(50);
 6588   format %{ "MOV    $mem,$src\t# store float" %}
 6589   opcode(0xC7);               /* C7 /0 */
 6590   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6591   ins_pipe( ialu_mem_imm );
 6592 %}
 6593 
 6594 // Store Integer to stack slot
 6595 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6596   match(Set dst src);
 6597 
 6598   ins_cost(100);
 6599   format %{ "MOV    $dst,$src" %}
 6600   opcode(0x89);
 6601   ins_encode( OpcPRegSS( dst, src ) );
 6602   ins_pipe( ialu_mem_reg );
 6603 %}
 6604 
 6605 // Store Integer to stack slot
 6606 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6607   match(Set dst src);
 6608 
 6609   ins_cost(100);
 6610   format %{ "MOV    $dst,$src" %}
 6611   opcode(0x89);
 6612   ins_encode( OpcPRegSS( dst, src ) );
 6613   ins_pipe( ialu_mem_reg );
 6614 %}
 6615 
 6616 // Store Long to stack slot
 6617 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6618   match(Set dst src);
 6619 
 6620   ins_cost(200);
 6621   format %{ "MOV    $dst,$src.lo\n\t"
 6622             "MOV    $dst+4,$src.hi" %}
 6623   opcode(0x89, 0x89);
 6624   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6625   ins_pipe( ialu_mem_long_reg );
 6626 %}
 6627 
 6628 //----------MemBar Instructions-----------------------------------------------
 6629 // Memory barrier flavors
 6630 
 6631 instruct membar_acquire() %{
 6632   match(MemBarAcquire);
 6633   match(LoadFence);
 6634   ins_cost(400);
 6635 
 6636   size(0);
 6637   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6638   ins_encode();
 6639   ins_pipe(empty);
 6640 %}
 6641 
 6642 instruct membar_acquire_lock() %{
 6643   match(MemBarAcquireLock);
 6644   ins_cost(0);
 6645 
 6646   size(0);
 6647   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6648   ins_encode( );
 6649   ins_pipe(empty);
 6650 %}
 6651 
 6652 instruct membar_release() %{
 6653   match(MemBarRelease);
 6654   match(StoreFence);
 6655   ins_cost(400);
 6656 
 6657   size(0);
 6658   format %{ "MEMBAR-release ! (empty encoding)" %}
 6659   ins_encode( );
 6660   ins_pipe(empty);
 6661 %}
 6662 
 6663 instruct membar_release_lock() %{
 6664   match(MemBarReleaseLock);
 6665   ins_cost(0);
 6666 
 6667   size(0);
 6668   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6669   ins_encode( );
 6670   ins_pipe(empty);
 6671 %}
 6672 
 6673 instruct membar_volatile(eFlagsReg cr) %{
 6674   match(MemBarVolatile);
 6675   effect(KILL cr);
 6676   ins_cost(400);
 6677 
 6678   format %{
 6679     $$template
 6680     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6681   %}
 6682   ins_encode %{
 6683     __ membar(Assembler::StoreLoad);
 6684   %}
 6685   ins_pipe(pipe_slow);
 6686 %}
 6687 
 6688 instruct unnecessary_membar_volatile() %{
 6689   match(MemBarVolatile);
 6690   predicate(Matcher::post_store_load_barrier(n));
 6691   ins_cost(0);
 6692 
 6693   size(0);
 6694   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6695   ins_encode( );
 6696   ins_pipe(empty);
 6697 %}
 6698 
 6699 instruct membar_storestore() %{
 6700   match(MemBarStoreStore);
 6701   match(StoreStoreFence);
 6702   ins_cost(0);
 6703 
 6704   size(0);
 6705   format %{ "MEMBAR-storestore (empty encoding)" %}
 6706   ins_encode( );
 6707   ins_pipe(empty);
 6708 %}
 6709 
 6710 //----------Move Instructions--------------------------------------------------
 6711 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6712   match(Set dst (CastX2P src));
 6713   format %{ "# X2P  $dst, $src" %}
 6714   ins_encode( /*empty encoding*/ );
 6715   ins_cost(0);
 6716   ins_pipe(empty);
 6717 %}
 6718 
 6719 instruct castP2X(rRegI dst, eRegP src ) %{
 6720   match(Set dst (CastP2X src));
 6721   ins_cost(50);
 6722   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6723   ins_encode( enc_Copy( dst, src) );
 6724   ins_pipe( ialu_reg_reg );
 6725 %}
 6726 
 6727 //----------Conditional Move---------------------------------------------------
 6728 // Conditional move
 6729 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6730   predicate(!VM_Version::supports_cmov() );
 6731   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6732   ins_cost(200);
 6733   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6734             "MOV    $dst,$src\n"
 6735       "skip:" %}
 6736   ins_encode %{
 6737     Label Lskip;
 6738     // Invert sense of branch from sense of CMOV
 6739     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6740     __ movl($dst$$Register, $src$$Register);
 6741     __ bind(Lskip);
 6742   %}
 6743   ins_pipe( pipe_cmov_reg );
 6744 %}
 6745 
 6746 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6747   predicate(!VM_Version::supports_cmov() );
 6748   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6749   ins_cost(200);
 6750   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6751             "MOV    $dst,$src\n"
 6752       "skip:" %}
 6753   ins_encode %{
 6754     Label Lskip;
 6755     // Invert sense of branch from sense of CMOV
 6756     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6757     __ movl($dst$$Register, $src$$Register);
 6758     __ bind(Lskip);
 6759   %}
 6760   ins_pipe( pipe_cmov_reg );
 6761 %}
 6762 
 6763 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6764   predicate(VM_Version::supports_cmov() );
 6765   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6766   ins_cost(200);
 6767   format %{ "CMOV$cop $dst,$src" %}
 6768   opcode(0x0F,0x40);
 6769   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6770   ins_pipe( pipe_cmov_reg );
 6771 %}
 6772 
 6773 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6774   predicate(VM_Version::supports_cmov() );
 6775   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6776   ins_cost(200);
 6777   format %{ "CMOV$cop $dst,$src" %}
 6778   opcode(0x0F,0x40);
 6779   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6780   ins_pipe( pipe_cmov_reg );
 6781 %}
 6782 
 6783 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6784   predicate(VM_Version::supports_cmov() );
 6785   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6786   ins_cost(200);
 6787   expand %{
 6788     cmovI_regU(cop, cr, dst, src);
 6789   %}
 6790 %}
 6791 
 6792 // Conditional move
 6793 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6794   predicate(VM_Version::supports_cmov() );
 6795   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6796   ins_cost(250);
 6797   format %{ "CMOV$cop $dst,$src" %}
 6798   opcode(0x0F,0x40);
 6799   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6800   ins_pipe( pipe_cmov_mem );
 6801 %}
 6802 
 6803 // Conditional move
 6804 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6805   predicate(VM_Version::supports_cmov() );
 6806   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6807   ins_cost(250);
 6808   format %{ "CMOV$cop $dst,$src" %}
 6809   opcode(0x0F,0x40);
 6810   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6811   ins_pipe( pipe_cmov_mem );
 6812 %}
 6813 
 6814 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6815   predicate(VM_Version::supports_cmov() );
 6816   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6817   ins_cost(250);
 6818   expand %{
 6819     cmovI_memU(cop, cr, dst, src);
 6820   %}
 6821 %}
 6822 
 6823 // Conditional move
 6824 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6825   predicate(VM_Version::supports_cmov() );
 6826   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6827   ins_cost(200);
 6828   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6829   opcode(0x0F,0x40);
 6830   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6831   ins_pipe( pipe_cmov_reg );
 6832 %}
 6833 
 6834 // Conditional move (non-P6 version)
 6835 // Note:  a CMoveP is generated for  stubs and native wrappers
 6836 //        regardless of whether we are on a P6, so we
 6837 //        emulate a cmov here
 6838 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6839   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6840   ins_cost(300);
 6841   format %{ "Jn$cop   skip\n\t"
 6842           "MOV    $dst,$src\t# pointer\n"
 6843       "skip:" %}
 6844   opcode(0x8b);
 6845   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6846   ins_pipe( pipe_cmov_reg );
 6847 %}
 6848 
 6849 // Conditional move
 6850 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6851   predicate(VM_Version::supports_cmov() );
 6852   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6853   ins_cost(200);
 6854   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6855   opcode(0x0F,0x40);
 6856   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6857   ins_pipe( pipe_cmov_reg );
 6858 %}
 6859 
 6860 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6861   predicate(VM_Version::supports_cmov() );
 6862   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6863   ins_cost(200);
 6864   expand %{
 6865     cmovP_regU(cop, cr, dst, src);
 6866   %}
 6867 %}
 6868 
 6869 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6870 // correctly meets the two pointer arguments; one is an incoming
 6871 // register but the other is a memory operand.  ALSO appears to
 6872 // be buggy with implicit null checks.
 6873 //
 6874 //// Conditional move
 6875 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6876 //  predicate(VM_Version::supports_cmov() );
 6877 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6878 //  ins_cost(250);
 6879 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6880 //  opcode(0x0F,0x40);
 6881 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6882 //  ins_pipe( pipe_cmov_mem );
 6883 //%}
 6884 //
 6885 //// Conditional move
 6886 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6887 //  predicate(VM_Version::supports_cmov() );
 6888 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6889 //  ins_cost(250);
 6890 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6891 //  opcode(0x0F,0x40);
 6892 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6893 //  ins_pipe( pipe_cmov_mem );
 6894 //%}
 6895 
 6896 // Conditional move
 6897 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6898   predicate(UseSSE<=1);
 6899   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6900   ins_cost(200);
 6901   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6902   opcode(0xDA);
 6903   ins_encode( enc_cmov_dpr(cop,src) );
 6904   ins_pipe( pipe_cmovDPR_reg );
 6905 %}
 6906 
 6907 // Conditional move
 6908 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6909   predicate(UseSSE==0);
 6910   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6911   ins_cost(200);
 6912   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6913   opcode(0xDA);
 6914   ins_encode( enc_cmov_dpr(cop,src) );
 6915   ins_pipe( pipe_cmovDPR_reg );
 6916 %}
 6917 
 6918 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6919 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6920   predicate(UseSSE<=1);
 6921   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6922   ins_cost(200);
 6923   format %{ "Jn$cop   skip\n\t"
 6924             "MOV    $dst,$src\t# double\n"
 6925       "skip:" %}
 6926   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6927   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6928   ins_pipe( pipe_cmovDPR_reg );
 6929 %}
 6930 
 6931 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6932 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6933   predicate(UseSSE==0);
 6934   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6935   ins_cost(200);
 6936   format %{ "Jn$cop    skip\n\t"
 6937             "MOV    $dst,$src\t# float\n"
 6938       "skip:" %}
 6939   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6940   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6941   ins_pipe( pipe_cmovDPR_reg );
 6942 %}
 6943 
 6944 // No CMOVE with SSE/SSE2
 6945 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6946   predicate (UseSSE>=1);
 6947   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6948   ins_cost(200);
 6949   format %{ "Jn$cop   skip\n\t"
 6950             "MOVSS  $dst,$src\t# float\n"
 6951       "skip:" %}
 6952   ins_encode %{
 6953     Label skip;
 6954     // Invert sense of branch from sense of CMOV
 6955     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6956     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6957     __ bind(skip);
 6958   %}
 6959   ins_pipe( pipe_slow );
 6960 %}
 6961 
 6962 // No CMOVE with SSE/SSE2
 6963 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6964   predicate (UseSSE>=2);
 6965   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6966   ins_cost(200);
 6967   format %{ "Jn$cop   skip\n\t"
 6968             "MOVSD  $dst,$src\t# float\n"
 6969       "skip:" %}
 6970   ins_encode %{
 6971     Label skip;
 6972     // Invert sense of branch from sense of CMOV
 6973     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6974     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6975     __ bind(skip);
 6976   %}
 6977   ins_pipe( pipe_slow );
 6978 %}
 6979 
 6980 // unsigned version
 6981 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6982   predicate (UseSSE>=1);
 6983   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6984   ins_cost(200);
 6985   format %{ "Jn$cop   skip\n\t"
 6986             "MOVSS  $dst,$src\t# float\n"
 6987       "skip:" %}
 6988   ins_encode %{
 6989     Label skip;
 6990     // Invert sense of branch from sense of CMOV
 6991     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6992     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6993     __ bind(skip);
 6994   %}
 6995   ins_pipe( pipe_slow );
 6996 %}
 6997 
 6998 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6999   predicate (UseSSE>=1);
 7000   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 7001   ins_cost(200);
 7002   expand %{
 7003     fcmovF_regU(cop, cr, dst, src);
 7004   %}
 7005 %}
 7006 
 7007 // unsigned version
 7008 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 7009   predicate (UseSSE>=2);
 7010   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7011   ins_cost(200);
 7012   format %{ "Jn$cop   skip\n\t"
 7013             "MOVSD  $dst,$src\t# float\n"
 7014       "skip:" %}
 7015   ins_encode %{
 7016     Label skip;
 7017     // Invert sense of branch from sense of CMOV
 7018     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 7019     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7020     __ bind(skip);
 7021   %}
 7022   ins_pipe( pipe_slow );
 7023 %}
 7024 
 7025 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7026   predicate (UseSSE>=2);
 7027   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7028   ins_cost(200);
 7029   expand %{
 7030     fcmovD_regU(cop, cr, dst, src);
 7031   %}
 7032 %}
 7033 
 7034 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7035   predicate(VM_Version::supports_cmov() );
 7036   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7037   ins_cost(200);
 7038   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7039             "CMOV$cop $dst.hi,$src.hi" %}
 7040   opcode(0x0F,0x40);
 7041   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7042   ins_pipe( pipe_cmov_reg_long );
 7043 %}
 7044 
 7045 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7046   predicate(VM_Version::supports_cmov() );
 7047   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7048   ins_cost(200);
 7049   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7050             "CMOV$cop $dst.hi,$src.hi" %}
 7051   opcode(0x0F,0x40);
 7052   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7053   ins_pipe( pipe_cmov_reg_long );
 7054 %}
 7055 
 7056 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7057   predicate(VM_Version::supports_cmov() );
 7058   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7059   ins_cost(200);
 7060   expand %{
 7061     cmovL_regU(cop, cr, dst, src);
 7062   %}
 7063 %}
 7064 
 7065 //----------Arithmetic Instructions--------------------------------------------
 7066 //----------Addition Instructions----------------------------------------------
 7067 
 7068 // Integer Addition Instructions
 7069 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7070   match(Set dst (AddI dst src));
 7071   effect(KILL cr);
 7072 
 7073   size(2);
 7074   format %{ "ADD    $dst,$src" %}
 7075   opcode(0x03);
 7076   ins_encode( OpcP, RegReg( dst, src) );
 7077   ins_pipe( ialu_reg_reg );
 7078 %}
 7079 
 7080 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7081   match(Set dst (AddI dst src));
 7082   effect(KILL cr);
 7083 
 7084   format %{ "ADD    $dst,$src" %}
 7085   opcode(0x81, 0x00); /* /0 id */
 7086   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7087   ins_pipe( ialu_reg );
 7088 %}
 7089 
 7090 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7091   predicate(UseIncDec);
 7092   match(Set dst (AddI dst src));
 7093   effect(KILL cr);
 7094 
 7095   size(1);
 7096   format %{ "INC    $dst" %}
 7097   opcode(0x40); /*  */
 7098   ins_encode( Opc_plus( primary, dst ) );
 7099   ins_pipe( ialu_reg );
 7100 %}
 7101 
 7102 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7103   match(Set dst (AddI src0 src1));
 7104   ins_cost(110);
 7105 
 7106   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7107   opcode(0x8D); /* 0x8D /r */
 7108   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7109   ins_pipe( ialu_reg_reg );
 7110 %}
 7111 
 7112 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7113   match(Set dst (AddP src0 src1));
 7114   ins_cost(110);
 7115 
 7116   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7117   opcode(0x8D); /* 0x8D /r */
 7118   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7119   ins_pipe( ialu_reg_reg );
 7120 %}
 7121 
 7122 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7123   predicate(UseIncDec);
 7124   match(Set dst (AddI dst src));
 7125   effect(KILL cr);
 7126 
 7127   size(1);
 7128   format %{ "DEC    $dst" %}
 7129   opcode(0x48); /*  */
 7130   ins_encode( Opc_plus( primary, dst ) );
 7131   ins_pipe( ialu_reg );
 7132 %}
 7133 
 7134 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7135   match(Set dst (AddP dst src));
 7136   effect(KILL cr);
 7137 
 7138   size(2);
 7139   format %{ "ADD    $dst,$src" %}
 7140   opcode(0x03);
 7141   ins_encode( OpcP, RegReg( dst, src) );
 7142   ins_pipe( ialu_reg_reg );
 7143 %}
 7144 
 7145 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7146   match(Set dst (AddP dst src));
 7147   effect(KILL cr);
 7148 
 7149   format %{ "ADD    $dst,$src" %}
 7150   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7151   // ins_encode( RegImm( dst, src) );
 7152   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7153   ins_pipe( ialu_reg );
 7154 %}
 7155 
 7156 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7157   match(Set dst (AddI dst (LoadI src)));
 7158   effect(KILL cr);
 7159 
 7160   ins_cost(150);
 7161   format %{ "ADD    $dst,$src" %}
 7162   opcode(0x03);
 7163   ins_encode( OpcP, RegMem( dst, src) );
 7164   ins_pipe( ialu_reg_mem );
 7165 %}
 7166 
 7167 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7169   effect(KILL cr);
 7170 
 7171   ins_cost(150);
 7172   format %{ "ADD    $dst,$src" %}
 7173   opcode(0x01);  /* Opcode 01 /r */
 7174   ins_encode( OpcP, RegMem( src, dst ) );
 7175   ins_pipe( ialu_mem_reg );
 7176 %}
 7177 
 7178 // Add Memory with Immediate
 7179 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7180   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7181   effect(KILL cr);
 7182 
 7183   ins_cost(125);
 7184   format %{ "ADD    $dst,$src" %}
 7185   opcode(0x81);               /* Opcode 81 /0 id */
 7186   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7187   ins_pipe( ialu_mem_imm );
 7188 %}
 7189 
 7190 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7191   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7192   effect(KILL cr);
 7193 
 7194   ins_cost(125);
 7195   format %{ "INC    $dst" %}
 7196   opcode(0xFF);               /* Opcode FF /0 */
 7197   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7198   ins_pipe( ialu_mem_imm );
 7199 %}
 7200 
 7201 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7202   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7203   effect(KILL cr);
 7204 
 7205   ins_cost(125);
 7206   format %{ "DEC    $dst" %}
 7207   opcode(0xFF);               /* Opcode FF /1 */
 7208   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7209   ins_pipe( ialu_mem_imm );
 7210 %}
 7211 
 7212 
 7213 instruct checkCastPP( eRegP dst ) %{
 7214   match(Set dst (CheckCastPP dst));
 7215 
 7216   size(0);
 7217   format %{ "#checkcastPP of $dst" %}
 7218   ins_encode( /*empty encoding*/ );
 7219   ins_pipe( empty );
 7220 %}
 7221 
 7222 instruct castPP( eRegP dst ) %{
 7223   match(Set dst (CastPP dst));
 7224   format %{ "#castPP of $dst" %}
 7225   ins_encode( /*empty encoding*/ );
 7226   ins_pipe( empty );
 7227 %}
 7228 
 7229 instruct castII( rRegI dst ) %{
 7230   match(Set dst (CastII dst));
 7231   format %{ "#castII of $dst" %}
 7232   ins_encode( /*empty encoding*/ );
 7233   ins_cost(0);
 7234   ins_pipe( empty );
 7235 %}
 7236 
 7237 instruct castLL( eRegL dst ) %{
 7238   match(Set dst (CastLL dst));
 7239   format %{ "#castLL of $dst" %}
 7240   ins_encode( /*empty encoding*/ );
 7241   ins_cost(0);
 7242   ins_pipe( empty );
 7243 %}
 7244 
 7245 instruct castFF( regF dst ) %{
 7246   predicate(UseSSE >= 1);
 7247   match(Set dst (CastFF dst));
 7248   format %{ "#castFF of $dst" %}
 7249   ins_encode( /*empty encoding*/ );
 7250   ins_cost(0);
 7251   ins_pipe( empty );
 7252 %}
 7253 
 7254 instruct castDD( regD dst ) %{
 7255   predicate(UseSSE >= 2);
 7256   match(Set dst (CastDD dst));
 7257   format %{ "#castDD of $dst" %}
 7258   ins_encode( /*empty encoding*/ );
 7259   ins_cost(0);
 7260   ins_pipe( empty );
 7261 %}
 7262 
 7263 instruct castFF_PR( regFPR dst ) %{
 7264   predicate(UseSSE < 1);
 7265   match(Set dst (CastFF dst));
 7266   format %{ "#castFF of $dst" %}
 7267   ins_encode( /*empty encoding*/ );
 7268   ins_cost(0);
 7269   ins_pipe( empty );
 7270 %}
 7271 
 7272 instruct castDD_PR( regDPR dst ) %{
 7273   predicate(UseSSE < 2);
 7274   match(Set dst (CastDD dst));
 7275   format %{ "#castDD of $dst" %}
 7276   ins_encode( /*empty encoding*/ );
 7277   ins_cost(0);
 7278   ins_pipe( empty );
 7279 %}
 7280 
 7281 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7282 
 7283 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7284   predicate(VM_Version::supports_cx8());
 7285   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7286   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7287   effect(KILL cr, KILL oldval);
 7288   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7289             "MOV    $res,0\n\t"
 7290             "JNE,s  fail\n\t"
 7291             "MOV    $res,1\n"
 7292           "fail:" %}
 7293   ins_encode( enc_cmpxchg8(mem_ptr),
 7294               enc_flags_ne_to_boolean(res) );
 7295   ins_pipe( pipe_cmpxchg );
 7296 %}
 7297 
 7298 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7299   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7300   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7301   effect(KILL cr, KILL oldval);
 7302   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7303             "MOV    $res,0\n\t"
 7304             "JNE,s  fail\n\t"
 7305             "MOV    $res,1\n"
 7306           "fail:" %}
 7307   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7308   ins_pipe( pipe_cmpxchg );
 7309 %}
 7310 
 7311 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7312   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7313   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7314   effect(KILL cr, KILL oldval);
 7315   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7316             "MOV    $res,0\n\t"
 7317             "JNE,s  fail\n\t"
 7318             "MOV    $res,1\n"
 7319           "fail:" %}
 7320   ins_encode( enc_cmpxchgb(mem_ptr),
 7321               enc_flags_ne_to_boolean(res) );
 7322   ins_pipe( pipe_cmpxchg );
 7323 %}
 7324 
 7325 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7326   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7327   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7328   effect(KILL cr, KILL oldval);
 7329   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7330             "MOV    $res,0\n\t"
 7331             "JNE,s  fail\n\t"
 7332             "MOV    $res,1\n"
 7333           "fail:" %}
 7334   ins_encode( enc_cmpxchgw(mem_ptr),
 7335               enc_flags_ne_to_boolean(res) );
 7336   ins_pipe( pipe_cmpxchg );
 7337 %}
 7338 
 7339 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7340   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7341   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7342   effect(KILL cr, KILL oldval);
 7343   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7344             "MOV    $res,0\n\t"
 7345             "JNE,s  fail\n\t"
 7346             "MOV    $res,1\n"
 7347           "fail:" %}
 7348   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7349   ins_pipe( pipe_cmpxchg );
 7350 %}
 7351 
 7352 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7353   predicate(VM_Version::supports_cx8());
 7354   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7355   effect(KILL cr);
 7356   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7357   ins_encode( enc_cmpxchg8(mem_ptr) );
 7358   ins_pipe( pipe_cmpxchg );
 7359 %}
 7360 
 7361 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7362   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7363   effect(KILL cr);
 7364   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7365   ins_encode( enc_cmpxchg(mem_ptr) );
 7366   ins_pipe( pipe_cmpxchg );
 7367 %}
 7368 
 7369 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7370   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7371   effect(KILL cr);
 7372   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7373   ins_encode( enc_cmpxchgb(mem_ptr) );
 7374   ins_pipe( pipe_cmpxchg );
 7375 %}
 7376 
 7377 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7378   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7379   effect(KILL cr);
 7380   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7381   ins_encode( enc_cmpxchgw(mem_ptr) );
 7382   ins_pipe( pipe_cmpxchg );
 7383 %}
 7384 
 7385 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7386   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7387   effect(KILL cr);
 7388   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7389   ins_encode( enc_cmpxchg(mem_ptr) );
 7390   ins_pipe( pipe_cmpxchg );
 7391 %}
 7392 
 7393 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7394   predicate(n->as_LoadStore()->result_not_used());
 7395   match(Set dummy (GetAndAddB mem add));
 7396   effect(KILL cr);
 7397   format %{ "ADDB  [$mem],$add" %}
 7398   ins_encode %{
 7399     __ lock();
 7400     __ addb($mem$$Address, $add$$constant);
 7401   %}
 7402   ins_pipe( pipe_cmpxchg );
 7403 %}
 7404 
 7405 // Important to match to xRegI: only 8-bit regs.
 7406 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7407   match(Set newval (GetAndAddB mem newval));
 7408   effect(KILL cr);
 7409   format %{ "XADDB  [$mem],$newval" %}
 7410   ins_encode %{
 7411     __ lock();
 7412     __ xaddb($mem$$Address, $newval$$Register);
 7413   %}
 7414   ins_pipe( pipe_cmpxchg );
 7415 %}
 7416 
 7417 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7418   predicate(n->as_LoadStore()->result_not_used());
 7419   match(Set dummy (GetAndAddS mem add));
 7420   effect(KILL cr);
 7421   format %{ "ADDS  [$mem],$add" %}
 7422   ins_encode %{
 7423     __ lock();
 7424     __ addw($mem$$Address, $add$$constant);
 7425   %}
 7426   ins_pipe( pipe_cmpxchg );
 7427 %}
 7428 
 7429 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7430   match(Set newval (GetAndAddS mem newval));
 7431   effect(KILL cr);
 7432   format %{ "XADDS  [$mem],$newval" %}
 7433   ins_encode %{
 7434     __ lock();
 7435     __ xaddw($mem$$Address, $newval$$Register);
 7436   %}
 7437   ins_pipe( pipe_cmpxchg );
 7438 %}
 7439 
 7440 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7441   predicate(n->as_LoadStore()->result_not_used());
 7442   match(Set dummy (GetAndAddI mem add));
 7443   effect(KILL cr);
 7444   format %{ "ADDL  [$mem],$add" %}
 7445   ins_encode %{
 7446     __ lock();
 7447     __ addl($mem$$Address, $add$$constant);
 7448   %}
 7449   ins_pipe( pipe_cmpxchg );
 7450 %}
 7451 
 7452 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7453   match(Set newval (GetAndAddI mem newval));
 7454   effect(KILL cr);
 7455   format %{ "XADDL  [$mem],$newval" %}
 7456   ins_encode %{
 7457     __ lock();
 7458     __ xaddl($mem$$Address, $newval$$Register);
 7459   %}
 7460   ins_pipe( pipe_cmpxchg );
 7461 %}
 7462 
 7463 // Important to match to xRegI: only 8-bit regs.
 7464 instruct xchgB( memory mem, xRegI newval) %{
 7465   match(Set newval (GetAndSetB mem newval));
 7466   format %{ "XCHGB  $newval,[$mem]" %}
 7467   ins_encode %{
 7468     __ xchgb($newval$$Register, $mem$$Address);
 7469   %}
 7470   ins_pipe( pipe_cmpxchg );
 7471 %}
 7472 
 7473 instruct xchgS( memory mem, rRegI newval) %{
 7474   match(Set newval (GetAndSetS mem newval));
 7475   format %{ "XCHGW  $newval,[$mem]" %}
 7476   ins_encode %{
 7477     __ xchgw($newval$$Register, $mem$$Address);
 7478   %}
 7479   ins_pipe( pipe_cmpxchg );
 7480 %}
 7481 
 7482 instruct xchgI( memory mem, rRegI newval) %{
 7483   match(Set newval (GetAndSetI mem newval));
 7484   format %{ "XCHGL  $newval,[$mem]" %}
 7485   ins_encode %{
 7486     __ xchgl($newval$$Register, $mem$$Address);
 7487   %}
 7488   ins_pipe( pipe_cmpxchg );
 7489 %}
 7490 
 7491 instruct xchgP( memory mem, pRegP newval) %{
 7492   match(Set newval (GetAndSetP mem newval));
 7493   format %{ "XCHGL  $newval,[$mem]" %}
 7494   ins_encode %{
 7495     __ xchgl($newval$$Register, $mem$$Address);
 7496   %}
 7497   ins_pipe( pipe_cmpxchg );
 7498 %}
 7499 
 7500 //----------Subtraction Instructions-------------------------------------------
 7501 
 7502 // Integer Subtraction Instructions
 7503 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7504   match(Set dst (SubI dst src));
 7505   effect(KILL cr);
 7506 
 7507   size(2);
 7508   format %{ "SUB    $dst,$src" %}
 7509   opcode(0x2B);
 7510   ins_encode( OpcP, RegReg( dst, src) );
 7511   ins_pipe( ialu_reg_reg );
 7512 %}
 7513 
 7514 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7515   match(Set dst (SubI dst src));
 7516   effect(KILL cr);
 7517 
 7518   format %{ "SUB    $dst,$src" %}
 7519   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7520   // ins_encode( RegImm( dst, src) );
 7521   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7522   ins_pipe( ialu_reg );
 7523 %}
 7524 
 7525 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7526   match(Set dst (SubI dst (LoadI src)));
 7527   effect(KILL cr);
 7528 
 7529   ins_cost(150);
 7530   format %{ "SUB    $dst,$src" %}
 7531   opcode(0x2B);
 7532   ins_encode( OpcP, RegMem( dst, src) );
 7533   ins_pipe( ialu_reg_mem );
 7534 %}
 7535 
 7536 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7537   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7538   effect(KILL cr);
 7539 
 7540   ins_cost(150);
 7541   format %{ "SUB    $dst,$src" %}
 7542   opcode(0x29);  /* Opcode 29 /r */
 7543   ins_encode( OpcP, RegMem( src, dst ) );
 7544   ins_pipe( ialu_mem_reg );
 7545 %}
 7546 
 7547 // Subtract from a pointer
 7548 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7549   match(Set dst (AddP dst (SubI zero src)));
 7550   effect(KILL cr);
 7551 
 7552   size(2);
 7553   format %{ "SUB    $dst,$src" %}
 7554   opcode(0x2B);
 7555   ins_encode( OpcP, RegReg( dst, src) );
 7556   ins_pipe( ialu_reg_reg );
 7557 %}
 7558 
 7559 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7560   match(Set dst (SubI zero dst));
 7561   effect(KILL cr);
 7562 
 7563   size(2);
 7564   format %{ "NEG    $dst" %}
 7565   opcode(0xF7,0x03);  // Opcode F7 /3
 7566   ins_encode( OpcP, RegOpc( dst ) );
 7567   ins_pipe( ialu_reg );
 7568 %}
 7569 
 7570 //----------Multiplication/Division Instructions-------------------------------
 7571 // Integer Multiplication Instructions
 7572 // Multiply Register
 7573 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7574   match(Set dst (MulI dst src));
 7575   effect(KILL cr);
 7576 
 7577   size(3);
 7578   ins_cost(300);
 7579   format %{ "IMUL   $dst,$src" %}
 7580   opcode(0xAF, 0x0F);
 7581   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7582   ins_pipe( ialu_reg_reg_alu0 );
 7583 %}
 7584 
 7585 // Multiply 32-bit Immediate
 7586 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7587   match(Set dst (MulI src imm));
 7588   effect(KILL cr);
 7589 
 7590   ins_cost(300);
 7591   format %{ "IMUL   $dst,$src,$imm" %}
 7592   opcode(0x69);  /* 69 /r id */
 7593   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7594   ins_pipe( ialu_reg_reg_alu0 );
 7595 %}
 7596 
 7597 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7598   match(Set dst src);
 7599   effect(KILL cr);
 7600 
 7601   // Note that this is artificially increased to make it more expensive than loadConL
 7602   ins_cost(250);
 7603   format %{ "MOV    EAX,$src\t// low word only" %}
 7604   opcode(0xB8);
 7605   ins_encode( LdImmL_Lo(dst, src) );
 7606   ins_pipe( ialu_reg_fat );
 7607 %}
 7608 
 7609 // Multiply by 32-bit Immediate, taking the shifted high order results
 7610 //  (special case for shift by 32)
 7611 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7612   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7613   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7614              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7615              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7616   effect(USE src1, KILL cr);
 7617 
 7618   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7619   ins_cost(0*100 + 1*400 - 150);
 7620   format %{ "IMUL   EDX:EAX,$src1" %}
 7621   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7622   ins_pipe( pipe_slow );
 7623 %}
 7624 
 7625 // Multiply by 32-bit Immediate, taking the shifted high order results
 7626 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7627   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7628   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7629              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7630              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7631   effect(USE src1, KILL cr);
 7632 
 7633   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7634   ins_cost(1*100 + 1*400 - 150);
 7635   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7636             "SAR    EDX,$cnt-32" %}
 7637   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7638   ins_pipe( pipe_slow );
 7639 %}
 7640 
 7641 // Multiply Memory 32-bit Immediate
 7642 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7643   match(Set dst (MulI (LoadI src) imm));
 7644   effect(KILL cr);
 7645 
 7646   ins_cost(300);
 7647   format %{ "IMUL   $dst,$src,$imm" %}
 7648   opcode(0x69);  /* 69 /r id */
 7649   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7650   ins_pipe( ialu_reg_mem_alu0 );
 7651 %}
 7652 
 7653 // Multiply Memory
 7654 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7655   match(Set dst (MulI dst (LoadI src)));
 7656   effect(KILL cr);
 7657 
 7658   ins_cost(350);
 7659   format %{ "IMUL   $dst,$src" %}
 7660   opcode(0xAF, 0x0F);
 7661   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7662   ins_pipe( ialu_reg_mem_alu0 );
 7663 %}
 7664 
 7665 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7666 %{
 7667   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7668   effect(KILL cr, KILL src2);
 7669 
 7670   expand %{ mulI_eReg(dst, src1, cr);
 7671            mulI_eReg(src2, src3, cr);
 7672            addI_eReg(dst, src2, cr); %}
 7673 %}
 7674 
 7675 // Multiply Register Int to Long
 7676 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7677   // Basic Idea: long = (long)int * (long)int
 7678   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7679   effect(DEF dst, USE src, USE src1, KILL flags);
 7680 
 7681   ins_cost(300);
 7682   format %{ "IMUL   $dst,$src1" %}
 7683 
 7684   ins_encode( long_int_multiply( dst, src1 ) );
 7685   ins_pipe( ialu_reg_reg_alu0 );
 7686 %}
 7687 
 7688 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7689   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7690   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7691   effect(KILL flags);
 7692 
 7693   ins_cost(300);
 7694   format %{ "MUL    $dst,$src1" %}
 7695 
 7696   ins_encode( long_uint_multiply(dst, src1) );
 7697   ins_pipe( ialu_reg_reg_alu0 );
 7698 %}
 7699 
 7700 // Multiply Register Long
 7701 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7702   match(Set dst (MulL dst src));
 7703   effect(KILL cr, TEMP tmp);
 7704   ins_cost(4*100+3*400);
 7705 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7706 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7707   format %{ "MOV    $tmp,$src.lo\n\t"
 7708             "IMUL   $tmp,EDX\n\t"
 7709             "MOV    EDX,$src.hi\n\t"
 7710             "IMUL   EDX,EAX\n\t"
 7711             "ADD    $tmp,EDX\n\t"
 7712             "MUL    EDX:EAX,$src.lo\n\t"
 7713             "ADD    EDX,$tmp" %}
 7714   ins_encode( long_multiply( dst, src, tmp ) );
 7715   ins_pipe( pipe_slow );
 7716 %}
 7717 
 7718 // Multiply Register Long where the left operand's high 32 bits are zero
 7719 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7720   predicate(is_operand_hi32_zero(n->in(1)));
 7721   match(Set dst (MulL dst src));
 7722   effect(KILL cr, TEMP tmp);
 7723   ins_cost(2*100+2*400);
 7724 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7725 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7726   format %{ "MOV    $tmp,$src.hi\n\t"
 7727             "IMUL   $tmp,EAX\n\t"
 7728             "MUL    EDX:EAX,$src.lo\n\t"
 7729             "ADD    EDX,$tmp" %}
 7730   ins_encode %{
 7731     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7732     __ imull($tmp$$Register, rax);
 7733     __ mull($src$$Register);
 7734     __ addl(rdx, $tmp$$Register);
 7735   %}
 7736   ins_pipe( pipe_slow );
 7737 %}
 7738 
 7739 // Multiply Register Long where the right operand's high 32 bits are zero
 7740 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7741   predicate(is_operand_hi32_zero(n->in(2)));
 7742   match(Set dst (MulL dst src));
 7743   effect(KILL cr, TEMP tmp);
 7744   ins_cost(2*100+2*400);
 7745 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7746 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7747   format %{ "MOV    $tmp,$src.lo\n\t"
 7748             "IMUL   $tmp,EDX\n\t"
 7749             "MUL    EDX:EAX,$src.lo\n\t"
 7750             "ADD    EDX,$tmp" %}
 7751   ins_encode %{
 7752     __ movl($tmp$$Register, $src$$Register);
 7753     __ imull($tmp$$Register, rdx);
 7754     __ mull($src$$Register);
 7755     __ addl(rdx, $tmp$$Register);
 7756   %}
 7757   ins_pipe( pipe_slow );
 7758 %}
 7759 
 7760 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7761 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7762   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7763   match(Set dst (MulL dst src));
 7764   effect(KILL cr);
 7765   ins_cost(1*400);
 7766 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7767 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7768   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7769   ins_encode %{
 7770     __ mull($src$$Register);
 7771   %}
 7772   ins_pipe( pipe_slow );
 7773 %}
 7774 
 7775 // Multiply Register Long by small constant
 7776 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7777   match(Set dst (MulL dst src));
 7778   effect(KILL cr, TEMP tmp);
 7779   ins_cost(2*100+2*400);
 7780   size(12);
 7781 // Basic idea: lo(result) = lo(src * EAX)
 7782 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7783   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7784             "MOV    EDX,$src\n\t"
 7785             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7786             "ADD    EDX,$tmp" %}
 7787   ins_encode( long_multiply_con( dst, src, tmp ) );
 7788   ins_pipe( pipe_slow );
 7789 %}
 7790 
 7791 // Integer DIV with Register
 7792 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7793   match(Set rax (DivI rax div));
 7794   effect(KILL rdx, KILL cr);
 7795   size(26);
 7796   ins_cost(30*100+10*100);
 7797   format %{ "CMP    EAX,0x80000000\n\t"
 7798             "JNE,s  normal\n\t"
 7799             "XOR    EDX,EDX\n\t"
 7800             "CMP    ECX,-1\n\t"
 7801             "JE,s   done\n"
 7802     "normal: CDQ\n\t"
 7803             "IDIV   $div\n\t"
 7804     "done:"        %}
 7805   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7806   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7807   ins_pipe( ialu_reg_reg_alu0 );
 7808 %}
 7809 
 7810 // Divide Register Long
 7811 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7812   match(Set dst (DivL src1 src2));
 7813   effect(CALL);
 7814   ins_cost(10000);
 7815   format %{ "PUSH   $src1.hi\n\t"
 7816             "PUSH   $src1.lo\n\t"
 7817             "PUSH   $src2.hi\n\t"
 7818             "PUSH   $src2.lo\n\t"
 7819             "CALL   SharedRuntime::ldiv\n\t"
 7820             "ADD    ESP,16" %}
 7821   ins_encode( long_div(src1,src2) );
 7822   ins_pipe( pipe_slow );
 7823 %}
 7824 
 7825 // Integer DIVMOD with Register, both quotient and mod results
 7826 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7827   match(DivModI rax div);
 7828   effect(KILL cr);
 7829   size(26);
 7830   ins_cost(30*100+10*100);
 7831   format %{ "CMP    EAX,0x80000000\n\t"
 7832             "JNE,s  normal\n\t"
 7833             "XOR    EDX,EDX\n\t"
 7834             "CMP    ECX,-1\n\t"
 7835             "JE,s   done\n"
 7836     "normal: CDQ\n\t"
 7837             "IDIV   $div\n\t"
 7838     "done:"        %}
 7839   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7840   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7841   ins_pipe( pipe_slow );
 7842 %}
 7843 
 7844 // Integer MOD with Register
 7845 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7846   match(Set rdx (ModI rax div));
 7847   effect(KILL rax, KILL cr);
 7848 
 7849   size(26);
 7850   ins_cost(300);
 7851   format %{ "CDQ\n\t"
 7852             "IDIV   $div" %}
 7853   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7854   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7855   ins_pipe( ialu_reg_reg_alu0 );
 7856 %}
 7857 
 7858 // Remainder Register Long
 7859 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7860   match(Set dst (ModL src1 src2));
 7861   effect(CALL);
 7862   ins_cost(10000);
 7863   format %{ "PUSH   $src1.hi\n\t"
 7864             "PUSH   $src1.lo\n\t"
 7865             "PUSH   $src2.hi\n\t"
 7866             "PUSH   $src2.lo\n\t"
 7867             "CALL   SharedRuntime::lrem\n\t"
 7868             "ADD    ESP,16" %}
 7869   ins_encode( long_mod(src1,src2) );
 7870   ins_pipe( pipe_slow );
 7871 %}
 7872 
 7873 // Divide Register Long (no special case since divisor != -1)
 7874 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7875   match(Set dst (DivL dst imm));
 7876   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7877   ins_cost(1000);
 7878   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7879             "XOR    $tmp2,$tmp2\n\t"
 7880             "CMP    $tmp,EDX\n\t"
 7881             "JA,s   fast\n\t"
 7882             "MOV    $tmp2,EAX\n\t"
 7883             "MOV    EAX,EDX\n\t"
 7884             "MOV    EDX,0\n\t"
 7885             "JLE,s  pos\n\t"
 7886             "LNEG   EAX : $tmp2\n\t"
 7887             "DIV    $tmp # unsigned division\n\t"
 7888             "XCHG   EAX,$tmp2\n\t"
 7889             "DIV    $tmp\n\t"
 7890             "LNEG   $tmp2 : EAX\n\t"
 7891             "JMP,s  done\n"
 7892     "pos:\n\t"
 7893             "DIV    $tmp\n\t"
 7894             "XCHG   EAX,$tmp2\n"
 7895     "fast:\n\t"
 7896             "DIV    $tmp\n"
 7897     "done:\n\t"
 7898             "MOV    EDX,$tmp2\n\t"
 7899             "NEG    EDX:EAX # if $imm < 0" %}
 7900   ins_encode %{
 7901     int con = (int)$imm$$constant;
 7902     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7903     int pcon = (con > 0) ? con : -con;
 7904     Label Lfast, Lpos, Ldone;
 7905 
 7906     __ movl($tmp$$Register, pcon);
 7907     __ xorl($tmp2$$Register,$tmp2$$Register);
 7908     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7909     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7910 
 7911     __ movl($tmp2$$Register, $dst$$Register); // save
 7912     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7913     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7914     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7915 
 7916     // Negative dividend.
 7917     // convert value to positive to use unsigned division
 7918     __ lneg($dst$$Register, $tmp2$$Register);
 7919     __ divl($tmp$$Register);
 7920     __ xchgl($dst$$Register, $tmp2$$Register);
 7921     __ divl($tmp$$Register);
 7922     // revert result back to negative
 7923     __ lneg($tmp2$$Register, $dst$$Register);
 7924     __ jmpb(Ldone);
 7925 
 7926     __ bind(Lpos);
 7927     __ divl($tmp$$Register); // Use unsigned division
 7928     __ xchgl($dst$$Register, $tmp2$$Register);
 7929     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7930 
 7931     __ bind(Lfast);
 7932     // fast path: src is positive
 7933     __ divl($tmp$$Register); // Use unsigned division
 7934 
 7935     __ bind(Ldone);
 7936     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7937     if (con < 0) {
 7938       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7939     }
 7940   %}
 7941   ins_pipe( pipe_slow );
 7942 %}
 7943 
 7944 // Remainder Register Long (remainder fit into 32 bits)
 7945 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7946   match(Set dst (ModL dst imm));
 7947   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7948   ins_cost(1000);
 7949   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7950             "CMP    $tmp,EDX\n\t"
 7951             "JA,s   fast\n\t"
 7952             "MOV    $tmp2,EAX\n\t"
 7953             "MOV    EAX,EDX\n\t"
 7954             "MOV    EDX,0\n\t"
 7955             "JLE,s  pos\n\t"
 7956             "LNEG   EAX : $tmp2\n\t"
 7957             "DIV    $tmp # unsigned division\n\t"
 7958             "MOV    EAX,$tmp2\n\t"
 7959             "DIV    $tmp\n\t"
 7960             "NEG    EDX\n\t"
 7961             "JMP,s  done\n"
 7962     "pos:\n\t"
 7963             "DIV    $tmp\n\t"
 7964             "MOV    EAX,$tmp2\n"
 7965     "fast:\n\t"
 7966             "DIV    $tmp\n"
 7967     "done:\n\t"
 7968             "MOV    EAX,EDX\n\t"
 7969             "SAR    EDX,31\n\t" %}
 7970   ins_encode %{
 7971     int con = (int)$imm$$constant;
 7972     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7973     int pcon = (con > 0) ? con : -con;
 7974     Label  Lfast, Lpos, Ldone;
 7975 
 7976     __ movl($tmp$$Register, pcon);
 7977     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7978     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7979 
 7980     __ movl($tmp2$$Register, $dst$$Register); // save
 7981     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7982     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7983     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7984 
 7985     // Negative dividend.
 7986     // convert value to positive to use unsigned division
 7987     __ lneg($dst$$Register, $tmp2$$Register);
 7988     __ divl($tmp$$Register);
 7989     __ movl($dst$$Register, $tmp2$$Register);
 7990     __ divl($tmp$$Register);
 7991     // revert remainder back to negative
 7992     __ negl(HIGH_FROM_LOW($dst$$Register));
 7993     __ jmpb(Ldone);
 7994 
 7995     __ bind(Lpos);
 7996     __ divl($tmp$$Register);
 7997     __ movl($dst$$Register, $tmp2$$Register);
 7998 
 7999     __ bind(Lfast);
 8000     // fast path: src is positive
 8001     __ divl($tmp$$Register);
 8002 
 8003     __ bind(Ldone);
 8004     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8005     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8006 
 8007   %}
 8008   ins_pipe( pipe_slow );
 8009 %}
 8010 
 8011 // Integer Shift Instructions
 8012 // Shift Left by one
 8013 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8014   match(Set dst (LShiftI dst shift));
 8015   effect(KILL cr);
 8016 
 8017   size(2);
 8018   format %{ "SHL    $dst,$shift" %}
 8019   opcode(0xD1, 0x4);  /* D1 /4 */
 8020   ins_encode( OpcP, RegOpc( dst ) );
 8021   ins_pipe( ialu_reg );
 8022 %}
 8023 
 8024 // Shift Left by 8-bit immediate
 8025 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8026   match(Set dst (LShiftI dst shift));
 8027   effect(KILL cr);
 8028 
 8029   size(3);
 8030   format %{ "SHL    $dst,$shift" %}
 8031   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8032   ins_encode( RegOpcImm( dst, shift) );
 8033   ins_pipe( ialu_reg );
 8034 %}
 8035 
 8036 // Shift Left by variable
 8037 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8038   match(Set dst (LShiftI dst shift));
 8039   effect(KILL cr);
 8040 
 8041   size(2);
 8042   format %{ "SHL    $dst,$shift" %}
 8043   opcode(0xD3, 0x4);  /* D3 /4 */
 8044   ins_encode( OpcP, RegOpc( dst ) );
 8045   ins_pipe( ialu_reg_reg );
 8046 %}
 8047 
 8048 // Arithmetic shift right by one
 8049 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8050   match(Set dst (RShiftI dst shift));
 8051   effect(KILL cr);
 8052 
 8053   size(2);
 8054   format %{ "SAR    $dst,$shift" %}
 8055   opcode(0xD1, 0x7);  /* D1 /7 */
 8056   ins_encode( OpcP, RegOpc( dst ) );
 8057   ins_pipe( ialu_reg );
 8058 %}
 8059 
 8060 // Arithmetic shift right by one
 8061 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8062   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8063   effect(KILL cr);
 8064   format %{ "SAR    $dst,$shift" %}
 8065   opcode(0xD1, 0x7);  /* D1 /7 */
 8066   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8067   ins_pipe( ialu_mem_imm );
 8068 %}
 8069 
 8070 // Arithmetic Shift Right by 8-bit immediate
 8071 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8072   match(Set dst (RShiftI dst shift));
 8073   effect(KILL cr);
 8074 
 8075   size(3);
 8076   format %{ "SAR    $dst,$shift" %}
 8077   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8078   ins_encode( RegOpcImm( dst, shift ) );
 8079   ins_pipe( ialu_mem_imm );
 8080 %}
 8081 
 8082 // Arithmetic Shift Right by 8-bit immediate
 8083 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8084   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8085   effect(KILL cr);
 8086 
 8087   format %{ "SAR    $dst,$shift" %}
 8088   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8089   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8090   ins_pipe( ialu_mem_imm );
 8091 %}
 8092 
 8093 // Arithmetic Shift Right by variable
 8094 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8095   match(Set dst (RShiftI dst shift));
 8096   effect(KILL cr);
 8097 
 8098   size(2);
 8099   format %{ "SAR    $dst,$shift" %}
 8100   opcode(0xD3, 0x7);  /* D3 /7 */
 8101   ins_encode( OpcP, RegOpc( dst ) );
 8102   ins_pipe( ialu_reg_reg );
 8103 %}
 8104 
 8105 // Logical shift right by one
 8106 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8107   match(Set dst (URShiftI dst shift));
 8108   effect(KILL cr);
 8109 
 8110   size(2);
 8111   format %{ "SHR    $dst,$shift" %}
 8112   opcode(0xD1, 0x5);  /* D1 /5 */
 8113   ins_encode( OpcP, RegOpc( dst ) );
 8114   ins_pipe( ialu_reg );
 8115 %}
 8116 
 8117 // Logical Shift Right by 8-bit immediate
 8118 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8119   match(Set dst (URShiftI dst shift));
 8120   effect(KILL cr);
 8121 
 8122   size(3);
 8123   format %{ "SHR    $dst,$shift" %}
 8124   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8125   ins_encode( RegOpcImm( dst, shift) );
 8126   ins_pipe( ialu_reg );
 8127 %}
 8128 
 8129 
 8130 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8131 // This idiom is used by the compiler for the i2b bytecode.
 8132 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8133   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8134 
 8135   size(3);
 8136   format %{ "MOVSX  $dst,$src :8" %}
 8137   ins_encode %{
 8138     __ movsbl($dst$$Register, $src$$Register);
 8139   %}
 8140   ins_pipe(ialu_reg_reg);
 8141 %}
 8142 
 8143 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8144 // This idiom is used by the compiler the i2s bytecode.
 8145 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8146   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8147 
 8148   size(3);
 8149   format %{ "MOVSX  $dst,$src :16" %}
 8150   ins_encode %{
 8151     __ movswl($dst$$Register, $src$$Register);
 8152   %}
 8153   ins_pipe(ialu_reg_reg);
 8154 %}
 8155 
 8156 
 8157 // Logical Shift Right by variable
 8158 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8159   match(Set dst (URShiftI dst shift));
 8160   effect(KILL cr);
 8161 
 8162   size(2);
 8163   format %{ "SHR    $dst,$shift" %}
 8164   opcode(0xD3, 0x5);  /* D3 /5 */
 8165   ins_encode( OpcP, RegOpc( dst ) );
 8166   ins_pipe( ialu_reg_reg );
 8167 %}
 8168 
 8169 
 8170 //----------Logical Instructions-----------------------------------------------
 8171 //----------Integer Logical Instructions---------------------------------------
 8172 // And Instructions
 8173 // And Register with Register
 8174 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8175   match(Set dst (AndI dst src));
 8176   effect(KILL cr);
 8177 
 8178   size(2);
 8179   format %{ "AND    $dst,$src" %}
 8180   opcode(0x23);
 8181   ins_encode( OpcP, RegReg( dst, src) );
 8182   ins_pipe( ialu_reg_reg );
 8183 %}
 8184 
 8185 // And Register with Immediate
 8186 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8187   match(Set dst (AndI dst src));
 8188   effect(KILL cr);
 8189 
 8190   format %{ "AND    $dst,$src" %}
 8191   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8192   // ins_encode( RegImm( dst, src) );
 8193   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8194   ins_pipe( ialu_reg );
 8195 %}
 8196 
 8197 // And Register with Memory
 8198 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8199   match(Set dst (AndI dst (LoadI src)));
 8200   effect(KILL cr);
 8201 
 8202   ins_cost(150);
 8203   format %{ "AND    $dst,$src" %}
 8204   opcode(0x23);
 8205   ins_encode( OpcP, RegMem( dst, src) );
 8206   ins_pipe( ialu_reg_mem );
 8207 %}
 8208 
 8209 // And Memory with Register
 8210 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8211   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8212   effect(KILL cr);
 8213 
 8214   ins_cost(150);
 8215   format %{ "AND    $dst,$src" %}
 8216   opcode(0x21);  /* Opcode 21 /r */
 8217   ins_encode( OpcP, RegMem( src, dst ) );
 8218   ins_pipe( ialu_mem_reg );
 8219 %}
 8220 
 8221 // And Memory with Immediate
 8222 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8223   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8224   effect(KILL cr);
 8225 
 8226   ins_cost(125);
 8227   format %{ "AND    $dst,$src" %}
 8228   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8229   // ins_encode( MemImm( dst, src) );
 8230   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8231   ins_pipe( ialu_mem_imm );
 8232 %}
 8233 
 8234 // BMI1 instructions
 8235 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8236   match(Set dst (AndI (XorI src1 minus_1) src2));
 8237   predicate(UseBMI1Instructions);
 8238   effect(KILL cr);
 8239 
 8240   format %{ "ANDNL  $dst, $src1, $src2" %}
 8241 
 8242   ins_encode %{
 8243     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8244   %}
 8245   ins_pipe(ialu_reg);
 8246 %}
 8247 
 8248 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8249   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8250   predicate(UseBMI1Instructions);
 8251   effect(KILL cr);
 8252 
 8253   ins_cost(125);
 8254   format %{ "ANDNL  $dst, $src1, $src2" %}
 8255 
 8256   ins_encode %{
 8257     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8258   %}
 8259   ins_pipe(ialu_reg_mem);
 8260 %}
 8261 
 8262 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8263   match(Set dst (AndI (SubI imm_zero src) src));
 8264   predicate(UseBMI1Instructions);
 8265   effect(KILL cr);
 8266 
 8267   format %{ "BLSIL  $dst, $src" %}
 8268 
 8269   ins_encode %{
 8270     __ blsil($dst$$Register, $src$$Register);
 8271   %}
 8272   ins_pipe(ialu_reg);
 8273 %}
 8274 
 8275 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8276   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8277   predicate(UseBMI1Instructions);
 8278   effect(KILL cr);
 8279 
 8280   ins_cost(125);
 8281   format %{ "BLSIL  $dst, $src" %}
 8282 
 8283   ins_encode %{
 8284     __ blsil($dst$$Register, $src$$Address);
 8285   %}
 8286   ins_pipe(ialu_reg_mem);
 8287 %}
 8288 
 8289 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8290 %{
 8291   match(Set dst (XorI (AddI src minus_1) src));
 8292   predicate(UseBMI1Instructions);
 8293   effect(KILL cr);
 8294 
 8295   format %{ "BLSMSKL $dst, $src" %}
 8296 
 8297   ins_encode %{
 8298     __ blsmskl($dst$$Register, $src$$Register);
 8299   %}
 8300 
 8301   ins_pipe(ialu_reg);
 8302 %}
 8303 
 8304 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8305 %{
 8306   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8307   predicate(UseBMI1Instructions);
 8308   effect(KILL cr);
 8309 
 8310   ins_cost(125);
 8311   format %{ "BLSMSKL $dst, $src" %}
 8312 
 8313   ins_encode %{
 8314     __ blsmskl($dst$$Register, $src$$Address);
 8315   %}
 8316 
 8317   ins_pipe(ialu_reg_mem);
 8318 %}
 8319 
 8320 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8321 %{
 8322   match(Set dst (AndI (AddI src minus_1) src) );
 8323   predicate(UseBMI1Instructions);
 8324   effect(KILL cr);
 8325 
 8326   format %{ "BLSRL  $dst, $src" %}
 8327 
 8328   ins_encode %{
 8329     __ blsrl($dst$$Register, $src$$Register);
 8330   %}
 8331 
 8332   ins_pipe(ialu_reg);
 8333 %}
 8334 
 8335 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8336 %{
 8337   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8338   predicate(UseBMI1Instructions);
 8339   effect(KILL cr);
 8340 
 8341   ins_cost(125);
 8342   format %{ "BLSRL  $dst, $src" %}
 8343 
 8344   ins_encode %{
 8345     __ blsrl($dst$$Register, $src$$Address);
 8346   %}
 8347 
 8348   ins_pipe(ialu_reg_mem);
 8349 %}
 8350 
 8351 // Or Instructions
 8352 // Or Register with Register
 8353 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8354   match(Set dst (OrI dst src));
 8355   effect(KILL cr);
 8356 
 8357   size(2);
 8358   format %{ "OR     $dst,$src" %}
 8359   opcode(0x0B);
 8360   ins_encode( OpcP, RegReg( dst, src) );
 8361   ins_pipe( ialu_reg_reg );
 8362 %}
 8363 
 8364 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8365   match(Set dst (OrI dst (CastP2X src)));
 8366   effect(KILL cr);
 8367 
 8368   size(2);
 8369   format %{ "OR     $dst,$src" %}
 8370   opcode(0x0B);
 8371   ins_encode( OpcP, RegReg( dst, src) );
 8372   ins_pipe( ialu_reg_reg );
 8373 %}
 8374 
 8375 
 8376 // Or Register with Immediate
 8377 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8378   match(Set dst (OrI dst src));
 8379   effect(KILL cr);
 8380 
 8381   format %{ "OR     $dst,$src" %}
 8382   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8383   // ins_encode( RegImm( dst, src) );
 8384   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8385   ins_pipe( ialu_reg );
 8386 %}
 8387 
 8388 // Or Register with Memory
 8389 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8390   match(Set dst (OrI dst (LoadI src)));
 8391   effect(KILL cr);
 8392 
 8393   ins_cost(150);
 8394   format %{ "OR     $dst,$src" %}
 8395   opcode(0x0B);
 8396   ins_encode( OpcP, RegMem( dst, src) );
 8397   ins_pipe( ialu_reg_mem );
 8398 %}
 8399 
 8400 // Or Memory with Register
 8401 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8402   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8403   effect(KILL cr);
 8404 
 8405   ins_cost(150);
 8406   format %{ "OR     $dst,$src" %}
 8407   opcode(0x09);  /* Opcode 09 /r */
 8408   ins_encode( OpcP, RegMem( src, dst ) );
 8409   ins_pipe( ialu_mem_reg );
 8410 %}
 8411 
 8412 // Or Memory with Immediate
 8413 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8414   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8415   effect(KILL cr);
 8416 
 8417   ins_cost(125);
 8418   format %{ "OR     $dst,$src" %}
 8419   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8420   // ins_encode( MemImm( dst, src) );
 8421   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8422   ins_pipe( ialu_mem_imm );
 8423 %}
 8424 
 8425 // ROL/ROR
 8426 // ROL expand
 8427 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8428   effect(USE_DEF dst, USE shift, KILL cr);
 8429 
 8430   format %{ "ROL    $dst, $shift" %}
 8431   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8432   ins_encode( OpcP, RegOpc( dst ));
 8433   ins_pipe( ialu_reg );
 8434 %}
 8435 
 8436 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8437   effect(USE_DEF dst, USE shift, KILL cr);
 8438 
 8439   format %{ "ROL    $dst, $shift" %}
 8440   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8441   ins_encode( RegOpcImm(dst, shift) );
 8442   ins_pipe(ialu_reg);
 8443 %}
 8444 
 8445 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8446   effect(USE_DEF dst, USE shift, KILL cr);
 8447 
 8448   format %{ "ROL    $dst, $shift" %}
 8449   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8450   ins_encode(OpcP, RegOpc(dst));
 8451   ins_pipe( ialu_reg_reg );
 8452 %}
 8453 // end of ROL expand
 8454 
 8455 // ROL 32bit by one once
 8456 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8457   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8458 
 8459   expand %{
 8460     rolI_eReg_imm1(dst, lshift, cr);
 8461   %}
 8462 %}
 8463 
 8464 // ROL 32bit var by imm8 once
 8465 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8466   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8467   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8468 
 8469   expand %{
 8470     rolI_eReg_imm8(dst, lshift, cr);
 8471   %}
 8472 %}
 8473 
 8474 // ROL 32bit var by var once
 8475 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8476   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8477 
 8478   expand %{
 8479     rolI_eReg_CL(dst, shift, cr);
 8480   %}
 8481 %}
 8482 
 8483 // ROL 32bit var by var once
 8484 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8485   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8486 
 8487   expand %{
 8488     rolI_eReg_CL(dst, shift, cr);
 8489   %}
 8490 %}
 8491 
 8492 // ROR expand
 8493 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8494   effect(USE_DEF dst, USE shift, KILL cr);
 8495 
 8496   format %{ "ROR    $dst, $shift" %}
 8497   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8498   ins_encode( OpcP, RegOpc( dst ) );
 8499   ins_pipe( ialu_reg );
 8500 %}
 8501 
 8502 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8503   effect (USE_DEF dst, USE shift, KILL cr);
 8504 
 8505   format %{ "ROR    $dst, $shift" %}
 8506   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8507   ins_encode( RegOpcImm(dst, shift) );
 8508   ins_pipe( ialu_reg );
 8509 %}
 8510 
 8511 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8512   effect(USE_DEF dst, USE shift, KILL cr);
 8513 
 8514   format %{ "ROR    $dst, $shift" %}
 8515   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8516   ins_encode(OpcP, RegOpc(dst));
 8517   ins_pipe( ialu_reg_reg );
 8518 %}
 8519 // end of ROR expand
 8520 
 8521 // ROR right once
 8522 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8523   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8524 
 8525   expand %{
 8526     rorI_eReg_imm1(dst, rshift, cr);
 8527   %}
 8528 %}
 8529 
 8530 // ROR 32bit by immI8 once
 8531 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8532   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8533   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8534 
 8535   expand %{
 8536     rorI_eReg_imm8(dst, rshift, cr);
 8537   %}
 8538 %}
 8539 
 8540 // ROR 32bit var by var once
 8541 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8542   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8543 
 8544   expand %{
 8545     rorI_eReg_CL(dst, shift, cr);
 8546   %}
 8547 %}
 8548 
 8549 // ROR 32bit var by var once
 8550 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8551   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8552 
 8553   expand %{
 8554     rorI_eReg_CL(dst, shift, cr);
 8555   %}
 8556 %}
 8557 
 8558 // Xor Instructions
 8559 // Xor Register with Register
 8560 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8561   match(Set dst (XorI dst src));
 8562   effect(KILL cr);
 8563 
 8564   size(2);
 8565   format %{ "XOR    $dst,$src" %}
 8566   opcode(0x33);
 8567   ins_encode( OpcP, RegReg( dst, src) );
 8568   ins_pipe( ialu_reg_reg );
 8569 %}
 8570 
 8571 // Xor Register with Immediate -1
 8572 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8573   match(Set dst (XorI dst imm));
 8574 
 8575   size(2);
 8576   format %{ "NOT    $dst" %}
 8577   ins_encode %{
 8578      __ notl($dst$$Register);
 8579   %}
 8580   ins_pipe( ialu_reg );
 8581 %}
 8582 
 8583 // Xor Register with Immediate
 8584 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8585   match(Set dst (XorI dst src));
 8586   effect(KILL cr);
 8587 
 8588   format %{ "XOR    $dst,$src" %}
 8589   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8590   // ins_encode( RegImm( dst, src) );
 8591   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8592   ins_pipe( ialu_reg );
 8593 %}
 8594 
 8595 // Xor Register with Memory
 8596 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8597   match(Set dst (XorI dst (LoadI src)));
 8598   effect(KILL cr);
 8599 
 8600   ins_cost(150);
 8601   format %{ "XOR    $dst,$src" %}
 8602   opcode(0x33);
 8603   ins_encode( OpcP, RegMem(dst, src) );
 8604   ins_pipe( ialu_reg_mem );
 8605 %}
 8606 
 8607 // Xor Memory with Register
 8608 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8609   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8610   effect(KILL cr);
 8611 
 8612   ins_cost(150);
 8613   format %{ "XOR    $dst,$src" %}
 8614   opcode(0x31);  /* Opcode 31 /r */
 8615   ins_encode( OpcP, RegMem( src, dst ) );
 8616   ins_pipe( ialu_mem_reg );
 8617 %}
 8618 
 8619 // Xor Memory with Immediate
 8620 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8621   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8622   effect(KILL cr);
 8623 
 8624   ins_cost(125);
 8625   format %{ "XOR    $dst,$src" %}
 8626   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8627   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8628   ins_pipe( ialu_mem_imm );
 8629 %}
 8630 
 8631 //----------Convert Int to Boolean---------------------------------------------
 8632 
 8633 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8634   effect( DEF dst, USE src );
 8635   format %{ "MOV    $dst,$src" %}
 8636   ins_encode( enc_Copy( dst, src) );
 8637   ins_pipe( ialu_reg_reg );
 8638 %}
 8639 
 8640 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8641   effect( USE_DEF dst, USE src, KILL cr );
 8642 
 8643   size(4);
 8644   format %{ "NEG    $dst\n\t"
 8645             "ADC    $dst,$src" %}
 8646   ins_encode( neg_reg(dst),
 8647               OpcRegReg(0x13,dst,src) );
 8648   ins_pipe( ialu_reg_reg_long );
 8649 %}
 8650 
 8651 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8652   match(Set dst (Conv2B src));
 8653 
 8654   expand %{
 8655     movI_nocopy(dst,src);
 8656     ci2b(dst,src,cr);
 8657   %}
 8658 %}
 8659 
 8660 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8661   effect( DEF dst, USE src );
 8662   format %{ "MOV    $dst,$src" %}
 8663   ins_encode( enc_Copy( dst, src) );
 8664   ins_pipe( ialu_reg_reg );
 8665 %}
 8666 
 8667 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8668   effect( USE_DEF dst, USE src, KILL cr );
 8669   format %{ "NEG    $dst\n\t"
 8670             "ADC    $dst,$src" %}
 8671   ins_encode( neg_reg(dst),
 8672               OpcRegReg(0x13,dst,src) );
 8673   ins_pipe( ialu_reg_reg_long );
 8674 %}
 8675 
 8676 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8677   match(Set dst (Conv2B src));
 8678 
 8679   expand %{
 8680     movP_nocopy(dst,src);
 8681     cp2b(dst,src,cr);
 8682   %}
 8683 %}
 8684 
 8685 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8686   match(Set dst (CmpLTMask p q));
 8687   effect(KILL cr);
 8688   ins_cost(400);
 8689 
 8690   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8691   format %{ "XOR    $dst,$dst\n\t"
 8692             "CMP    $p,$q\n\t"
 8693             "SETlt  $dst\n\t"
 8694             "NEG    $dst" %}
 8695   ins_encode %{
 8696     Register Rp = $p$$Register;
 8697     Register Rq = $q$$Register;
 8698     Register Rd = $dst$$Register;
 8699     Label done;
 8700     __ xorl(Rd, Rd);
 8701     __ cmpl(Rp, Rq);
 8702     __ setb(Assembler::less, Rd);
 8703     __ negl(Rd);
 8704   %}
 8705 
 8706   ins_pipe(pipe_slow);
 8707 %}
 8708 
 8709 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8710   match(Set dst (CmpLTMask dst zero));
 8711   effect(DEF dst, KILL cr);
 8712   ins_cost(100);
 8713 
 8714   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8715   ins_encode %{
 8716   __ sarl($dst$$Register, 31);
 8717   %}
 8718   ins_pipe(ialu_reg);
 8719 %}
 8720 
 8721 /* better to save a register than avoid a branch */
 8722 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8723   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8724   effect(KILL cr);
 8725   ins_cost(400);
 8726   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8727             "JGE    done\n\t"
 8728             "ADD    $p,$y\n"
 8729             "done:  " %}
 8730   ins_encode %{
 8731     Register Rp = $p$$Register;
 8732     Register Rq = $q$$Register;
 8733     Register Ry = $y$$Register;
 8734     Label done;
 8735     __ subl(Rp, Rq);
 8736     __ jccb(Assembler::greaterEqual, done);
 8737     __ addl(Rp, Ry);
 8738     __ bind(done);
 8739   %}
 8740 
 8741   ins_pipe(pipe_cmplt);
 8742 %}
 8743 
 8744 /* better to save a register than avoid a branch */
 8745 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8746   match(Set y (AndI (CmpLTMask p q) y));
 8747   effect(KILL cr);
 8748 
 8749   ins_cost(300);
 8750 
 8751   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8752             "JLT      done\n\t"
 8753             "XORL     $y, $y\n"
 8754             "done:  " %}
 8755   ins_encode %{
 8756     Register Rp = $p$$Register;
 8757     Register Rq = $q$$Register;
 8758     Register Ry = $y$$Register;
 8759     Label done;
 8760     __ cmpl(Rp, Rq);
 8761     __ jccb(Assembler::less, done);
 8762     __ xorl(Ry, Ry);
 8763     __ bind(done);
 8764   %}
 8765 
 8766   ins_pipe(pipe_cmplt);
 8767 %}
 8768 
 8769 /* If I enable this, I encourage spilling in the inner loop of compress.
 8770 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8771   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8772 */
 8773 //----------Overflow Math Instructions-----------------------------------------
 8774 
 8775 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8776 %{
 8777   match(Set cr (OverflowAddI op1 op2));
 8778   effect(DEF cr, USE_KILL op1, USE op2);
 8779 
 8780   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8781 
 8782   ins_encode %{
 8783     __ addl($op1$$Register, $op2$$Register);
 8784   %}
 8785   ins_pipe(ialu_reg_reg);
 8786 %}
 8787 
 8788 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8789 %{
 8790   match(Set cr (OverflowAddI op1 op2));
 8791   effect(DEF cr, USE_KILL op1, USE op2);
 8792 
 8793   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8794 
 8795   ins_encode %{
 8796     __ addl($op1$$Register, $op2$$constant);
 8797   %}
 8798   ins_pipe(ialu_reg_reg);
 8799 %}
 8800 
 8801 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8802 %{
 8803   match(Set cr (OverflowSubI op1 op2));
 8804 
 8805   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8806   ins_encode %{
 8807     __ cmpl($op1$$Register, $op2$$Register);
 8808   %}
 8809   ins_pipe(ialu_reg_reg);
 8810 %}
 8811 
 8812 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8813 %{
 8814   match(Set cr (OverflowSubI op1 op2));
 8815 
 8816   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8817   ins_encode %{
 8818     __ cmpl($op1$$Register, $op2$$constant);
 8819   %}
 8820   ins_pipe(ialu_reg_reg);
 8821 %}
 8822 
 8823 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8824 %{
 8825   match(Set cr (OverflowSubI zero op2));
 8826   effect(DEF cr, USE_KILL op2);
 8827 
 8828   format %{ "NEG    $op2\t# overflow check int" %}
 8829   ins_encode %{
 8830     __ negl($op2$$Register);
 8831   %}
 8832   ins_pipe(ialu_reg_reg);
 8833 %}
 8834 
 8835 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8836 %{
 8837   match(Set cr (OverflowMulI op1 op2));
 8838   effect(DEF cr, USE_KILL op1, USE op2);
 8839 
 8840   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8841   ins_encode %{
 8842     __ imull($op1$$Register, $op2$$Register);
 8843   %}
 8844   ins_pipe(ialu_reg_reg_alu0);
 8845 %}
 8846 
 8847 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8848 %{
 8849   match(Set cr (OverflowMulI op1 op2));
 8850   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8851 
 8852   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8853   ins_encode %{
 8854     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8855   %}
 8856   ins_pipe(ialu_reg_reg_alu0);
 8857 %}
 8858 
 8859 // Integer Absolute Instructions
 8860 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8861 %{
 8862   match(Set dst (AbsI src));
 8863   effect(TEMP dst, TEMP tmp, KILL cr);
 8864   format %{ "movl $tmp, $src\n\t"
 8865             "sarl $tmp, 31\n\t"
 8866             "movl $dst, $src\n\t"
 8867             "xorl $dst, $tmp\n\t"
 8868             "subl $dst, $tmp\n"
 8869           %}
 8870   ins_encode %{
 8871     __ movl($tmp$$Register, $src$$Register);
 8872     __ sarl($tmp$$Register, 31);
 8873     __ movl($dst$$Register, $src$$Register);
 8874     __ xorl($dst$$Register, $tmp$$Register);
 8875     __ subl($dst$$Register, $tmp$$Register);
 8876   %}
 8877 
 8878   ins_pipe(ialu_reg_reg);
 8879 %}
 8880 
 8881 //----------Long Instructions------------------------------------------------
 8882 // Add Long Register with Register
 8883 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8884   match(Set dst (AddL dst src));
 8885   effect(KILL cr);
 8886   ins_cost(200);
 8887   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8888             "ADC    $dst.hi,$src.hi" %}
 8889   opcode(0x03, 0x13);
 8890   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8891   ins_pipe( ialu_reg_reg_long );
 8892 %}
 8893 
 8894 // Add Long Register with Immediate
 8895 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8896   match(Set dst (AddL dst src));
 8897   effect(KILL cr);
 8898   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8899             "ADC    $dst.hi,$src.hi" %}
 8900   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8901   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8902   ins_pipe( ialu_reg_long );
 8903 %}
 8904 
 8905 // Add Long Register with Memory
 8906 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8907   match(Set dst (AddL dst (LoadL mem)));
 8908   effect(KILL cr);
 8909   ins_cost(125);
 8910   format %{ "ADD    $dst.lo,$mem\n\t"
 8911             "ADC    $dst.hi,$mem+4" %}
 8912   opcode(0x03, 0x13);
 8913   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8914   ins_pipe( ialu_reg_long_mem );
 8915 %}
 8916 
 8917 // Subtract Long Register with Register.
 8918 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8919   match(Set dst (SubL dst src));
 8920   effect(KILL cr);
 8921   ins_cost(200);
 8922   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8923             "SBB    $dst.hi,$src.hi" %}
 8924   opcode(0x2B, 0x1B);
 8925   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8926   ins_pipe( ialu_reg_reg_long );
 8927 %}
 8928 
 8929 // Subtract Long Register with Immediate
 8930 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8931   match(Set dst (SubL dst src));
 8932   effect(KILL cr);
 8933   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8934             "SBB    $dst.hi,$src.hi" %}
 8935   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8936   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8937   ins_pipe( ialu_reg_long );
 8938 %}
 8939 
 8940 // Subtract Long Register with Memory
 8941 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8942   match(Set dst (SubL dst (LoadL mem)));
 8943   effect(KILL cr);
 8944   ins_cost(125);
 8945   format %{ "SUB    $dst.lo,$mem\n\t"
 8946             "SBB    $dst.hi,$mem+4" %}
 8947   opcode(0x2B, 0x1B);
 8948   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8949   ins_pipe( ialu_reg_long_mem );
 8950 %}
 8951 
 8952 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8953   match(Set dst (SubL zero dst));
 8954   effect(KILL cr);
 8955   ins_cost(300);
 8956   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8957   ins_encode( neg_long(dst) );
 8958   ins_pipe( ialu_reg_reg_long );
 8959 %}
 8960 
 8961 // And Long Register with Register
 8962 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8963   match(Set dst (AndL dst src));
 8964   effect(KILL cr);
 8965   format %{ "AND    $dst.lo,$src.lo\n\t"
 8966             "AND    $dst.hi,$src.hi" %}
 8967   opcode(0x23,0x23);
 8968   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8969   ins_pipe( ialu_reg_reg_long );
 8970 %}
 8971 
 8972 // And Long Register with Immediate
 8973 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8974   match(Set dst (AndL dst src));
 8975   effect(KILL cr);
 8976   format %{ "AND    $dst.lo,$src.lo\n\t"
 8977             "AND    $dst.hi,$src.hi" %}
 8978   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8979   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8980   ins_pipe( ialu_reg_long );
 8981 %}
 8982 
 8983 // And Long Register with Memory
 8984 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8985   match(Set dst (AndL dst (LoadL mem)));
 8986   effect(KILL cr);
 8987   ins_cost(125);
 8988   format %{ "AND    $dst.lo,$mem\n\t"
 8989             "AND    $dst.hi,$mem+4" %}
 8990   opcode(0x23, 0x23);
 8991   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8992   ins_pipe( ialu_reg_long_mem );
 8993 %}
 8994 
 8995 // BMI1 instructions
 8996 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8997   match(Set dst (AndL (XorL src1 minus_1) src2));
 8998   predicate(UseBMI1Instructions);
 8999   effect(KILL cr, TEMP dst);
 9000 
 9001   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 9002             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9003          %}
 9004 
 9005   ins_encode %{
 9006     Register Rdst = $dst$$Register;
 9007     Register Rsrc1 = $src1$$Register;
 9008     Register Rsrc2 = $src2$$Register;
 9009     __ andnl(Rdst, Rsrc1, Rsrc2);
 9010     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9011   %}
 9012   ins_pipe(ialu_reg_reg_long);
 9013 %}
 9014 
 9015 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9016   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9017   predicate(UseBMI1Instructions);
 9018   effect(KILL cr, TEMP dst);
 9019 
 9020   ins_cost(125);
 9021   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9022             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9023          %}
 9024 
 9025   ins_encode %{
 9026     Register Rdst = $dst$$Register;
 9027     Register Rsrc1 = $src1$$Register;
 9028     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9029 
 9030     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9031     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9032   %}
 9033   ins_pipe(ialu_reg_mem);
 9034 %}
 9035 
 9036 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9037   match(Set dst (AndL (SubL imm_zero src) src));
 9038   predicate(UseBMI1Instructions);
 9039   effect(KILL cr, TEMP dst);
 9040 
 9041   format %{ "MOVL   $dst.hi, 0\n\t"
 9042             "BLSIL  $dst.lo, $src.lo\n\t"
 9043             "JNZ    done\n\t"
 9044             "BLSIL  $dst.hi, $src.hi\n"
 9045             "done:"
 9046          %}
 9047 
 9048   ins_encode %{
 9049     Label done;
 9050     Register Rdst = $dst$$Register;
 9051     Register Rsrc = $src$$Register;
 9052     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9053     __ blsil(Rdst, Rsrc);
 9054     __ jccb(Assembler::notZero, done);
 9055     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9056     __ bind(done);
 9057   %}
 9058   ins_pipe(ialu_reg);
 9059 %}
 9060 
 9061 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9062   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9063   predicate(UseBMI1Instructions);
 9064   effect(KILL cr, TEMP dst);
 9065 
 9066   ins_cost(125);
 9067   format %{ "MOVL   $dst.hi, 0\n\t"
 9068             "BLSIL  $dst.lo, $src\n\t"
 9069             "JNZ    done\n\t"
 9070             "BLSIL  $dst.hi, $src+4\n"
 9071             "done:"
 9072          %}
 9073 
 9074   ins_encode %{
 9075     Label done;
 9076     Register Rdst = $dst$$Register;
 9077     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9078 
 9079     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9080     __ blsil(Rdst, $src$$Address);
 9081     __ jccb(Assembler::notZero, done);
 9082     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9083     __ bind(done);
 9084   %}
 9085   ins_pipe(ialu_reg_mem);
 9086 %}
 9087 
 9088 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9089 %{
 9090   match(Set dst (XorL (AddL src minus_1) src));
 9091   predicate(UseBMI1Instructions);
 9092   effect(KILL cr, TEMP dst);
 9093 
 9094   format %{ "MOVL    $dst.hi, 0\n\t"
 9095             "BLSMSKL $dst.lo, $src.lo\n\t"
 9096             "JNC     done\n\t"
 9097             "BLSMSKL $dst.hi, $src.hi\n"
 9098             "done:"
 9099          %}
 9100 
 9101   ins_encode %{
 9102     Label done;
 9103     Register Rdst = $dst$$Register;
 9104     Register Rsrc = $src$$Register;
 9105     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9106     __ blsmskl(Rdst, Rsrc);
 9107     __ jccb(Assembler::carryClear, done);
 9108     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9109     __ bind(done);
 9110   %}
 9111 
 9112   ins_pipe(ialu_reg);
 9113 %}
 9114 
 9115 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9116 %{
 9117   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9118   predicate(UseBMI1Instructions);
 9119   effect(KILL cr, TEMP dst);
 9120 
 9121   ins_cost(125);
 9122   format %{ "MOVL    $dst.hi, 0\n\t"
 9123             "BLSMSKL $dst.lo, $src\n\t"
 9124             "JNC     done\n\t"
 9125             "BLSMSKL $dst.hi, $src+4\n"
 9126             "done:"
 9127          %}
 9128 
 9129   ins_encode %{
 9130     Label done;
 9131     Register Rdst = $dst$$Register;
 9132     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9133 
 9134     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9135     __ blsmskl(Rdst, $src$$Address);
 9136     __ jccb(Assembler::carryClear, done);
 9137     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9138     __ bind(done);
 9139   %}
 9140 
 9141   ins_pipe(ialu_reg_mem);
 9142 %}
 9143 
 9144 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9145 %{
 9146   match(Set dst (AndL (AddL src minus_1) src) );
 9147   predicate(UseBMI1Instructions);
 9148   effect(KILL cr, TEMP dst);
 9149 
 9150   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9151             "BLSRL  $dst.lo, $src.lo\n\t"
 9152             "JNC    done\n\t"
 9153             "BLSRL  $dst.hi, $src.hi\n"
 9154             "done:"
 9155   %}
 9156 
 9157   ins_encode %{
 9158     Label done;
 9159     Register Rdst = $dst$$Register;
 9160     Register Rsrc = $src$$Register;
 9161     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9162     __ blsrl(Rdst, Rsrc);
 9163     __ jccb(Assembler::carryClear, done);
 9164     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9165     __ bind(done);
 9166   %}
 9167 
 9168   ins_pipe(ialu_reg);
 9169 %}
 9170 
 9171 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9172 %{
 9173   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9174   predicate(UseBMI1Instructions);
 9175   effect(KILL cr, TEMP dst);
 9176 
 9177   ins_cost(125);
 9178   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9179             "BLSRL  $dst.lo, $src\n\t"
 9180             "JNC    done\n\t"
 9181             "BLSRL  $dst.hi, $src+4\n"
 9182             "done:"
 9183   %}
 9184 
 9185   ins_encode %{
 9186     Label done;
 9187     Register Rdst = $dst$$Register;
 9188     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9189     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9190     __ blsrl(Rdst, $src$$Address);
 9191     __ jccb(Assembler::carryClear, done);
 9192     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9193     __ bind(done);
 9194   %}
 9195 
 9196   ins_pipe(ialu_reg_mem);
 9197 %}
 9198 
 9199 // Or Long Register with Register
 9200 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9201   match(Set dst (OrL dst src));
 9202   effect(KILL cr);
 9203   format %{ "OR     $dst.lo,$src.lo\n\t"
 9204             "OR     $dst.hi,$src.hi" %}
 9205   opcode(0x0B,0x0B);
 9206   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9207   ins_pipe( ialu_reg_reg_long );
 9208 %}
 9209 
 9210 // Or Long Register with Immediate
 9211 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9212   match(Set dst (OrL dst src));
 9213   effect(KILL cr);
 9214   format %{ "OR     $dst.lo,$src.lo\n\t"
 9215             "OR     $dst.hi,$src.hi" %}
 9216   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9217   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9218   ins_pipe( ialu_reg_long );
 9219 %}
 9220 
 9221 // Or Long Register with Memory
 9222 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9223   match(Set dst (OrL dst (LoadL mem)));
 9224   effect(KILL cr);
 9225   ins_cost(125);
 9226   format %{ "OR     $dst.lo,$mem\n\t"
 9227             "OR     $dst.hi,$mem+4" %}
 9228   opcode(0x0B,0x0B);
 9229   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9230   ins_pipe( ialu_reg_long_mem );
 9231 %}
 9232 
 9233 // Xor Long Register with Register
 9234 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9235   match(Set dst (XorL dst src));
 9236   effect(KILL cr);
 9237   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9238             "XOR    $dst.hi,$src.hi" %}
 9239   opcode(0x33,0x33);
 9240   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9241   ins_pipe( ialu_reg_reg_long );
 9242 %}
 9243 
 9244 // Xor Long Register with Immediate -1
 9245 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9246   match(Set dst (XorL dst imm));
 9247   format %{ "NOT    $dst.lo\n\t"
 9248             "NOT    $dst.hi" %}
 9249   ins_encode %{
 9250      __ notl($dst$$Register);
 9251      __ notl(HIGH_FROM_LOW($dst$$Register));
 9252   %}
 9253   ins_pipe( ialu_reg_long );
 9254 %}
 9255 
 9256 // Xor Long Register with Immediate
 9257 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9258   match(Set dst (XorL dst src));
 9259   effect(KILL cr);
 9260   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9261             "XOR    $dst.hi,$src.hi" %}
 9262   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9263   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9264   ins_pipe( ialu_reg_long );
 9265 %}
 9266 
 9267 // Xor Long Register with Memory
 9268 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9269   match(Set dst (XorL dst (LoadL mem)));
 9270   effect(KILL cr);
 9271   ins_cost(125);
 9272   format %{ "XOR    $dst.lo,$mem\n\t"
 9273             "XOR    $dst.hi,$mem+4" %}
 9274   opcode(0x33,0x33);
 9275   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9276   ins_pipe( ialu_reg_long_mem );
 9277 %}
 9278 
 9279 // Shift Left Long by 1
 9280 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9281   predicate(UseNewLongLShift);
 9282   match(Set dst (LShiftL dst cnt));
 9283   effect(KILL cr);
 9284   ins_cost(100);
 9285   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9286             "ADC    $dst.hi,$dst.hi" %}
 9287   ins_encode %{
 9288     __ addl($dst$$Register,$dst$$Register);
 9289     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9290   %}
 9291   ins_pipe( ialu_reg_long );
 9292 %}
 9293 
 9294 // Shift Left Long by 2
 9295 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9296   predicate(UseNewLongLShift);
 9297   match(Set dst (LShiftL dst cnt));
 9298   effect(KILL cr);
 9299   ins_cost(100);
 9300   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9301             "ADC    $dst.hi,$dst.hi\n\t"
 9302             "ADD    $dst.lo,$dst.lo\n\t"
 9303             "ADC    $dst.hi,$dst.hi" %}
 9304   ins_encode %{
 9305     __ addl($dst$$Register,$dst$$Register);
 9306     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9307     __ addl($dst$$Register,$dst$$Register);
 9308     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9309   %}
 9310   ins_pipe( ialu_reg_long );
 9311 %}
 9312 
 9313 // Shift Left Long by 3
 9314 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9315   predicate(UseNewLongLShift);
 9316   match(Set dst (LShiftL dst cnt));
 9317   effect(KILL cr);
 9318   ins_cost(100);
 9319   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9320             "ADC    $dst.hi,$dst.hi\n\t"
 9321             "ADD    $dst.lo,$dst.lo\n\t"
 9322             "ADC    $dst.hi,$dst.hi\n\t"
 9323             "ADD    $dst.lo,$dst.lo\n\t"
 9324             "ADC    $dst.hi,$dst.hi" %}
 9325   ins_encode %{
 9326     __ addl($dst$$Register,$dst$$Register);
 9327     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9328     __ addl($dst$$Register,$dst$$Register);
 9329     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9330     __ addl($dst$$Register,$dst$$Register);
 9331     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9332   %}
 9333   ins_pipe( ialu_reg_long );
 9334 %}
 9335 
 9336 // Shift Left Long by 1-31
 9337 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9338   match(Set dst (LShiftL dst cnt));
 9339   effect(KILL cr);
 9340   ins_cost(200);
 9341   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9342             "SHL    $dst.lo,$cnt" %}
 9343   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9344   ins_encode( move_long_small_shift(dst,cnt) );
 9345   ins_pipe( ialu_reg_long );
 9346 %}
 9347 
 9348 // Shift Left Long by 32-63
 9349 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9350   match(Set dst (LShiftL dst cnt));
 9351   effect(KILL cr);
 9352   ins_cost(300);
 9353   format %{ "MOV    $dst.hi,$dst.lo\n"
 9354           "\tSHL    $dst.hi,$cnt-32\n"
 9355           "\tXOR    $dst.lo,$dst.lo" %}
 9356   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9357   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9358   ins_pipe( ialu_reg_long );
 9359 %}
 9360 
 9361 // Shift Left Long by variable
 9362 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9363   match(Set dst (LShiftL dst shift));
 9364   effect(KILL cr);
 9365   ins_cost(500+200);
 9366   size(17);
 9367   format %{ "TEST   $shift,32\n\t"
 9368             "JEQ,s  small\n\t"
 9369             "MOV    $dst.hi,$dst.lo\n\t"
 9370             "XOR    $dst.lo,$dst.lo\n"
 9371     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9372             "SHL    $dst.lo,$shift" %}
 9373   ins_encode( shift_left_long( dst, shift ) );
 9374   ins_pipe( pipe_slow );
 9375 %}
 9376 
 9377 // Shift Right Long by 1-31
 9378 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9379   match(Set dst (URShiftL dst cnt));
 9380   effect(KILL cr);
 9381   ins_cost(200);
 9382   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9383             "SHR    $dst.hi,$cnt" %}
 9384   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9385   ins_encode( move_long_small_shift(dst,cnt) );
 9386   ins_pipe( ialu_reg_long );
 9387 %}
 9388 
 9389 // Shift Right Long by 32-63
 9390 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9391   match(Set dst (URShiftL dst cnt));
 9392   effect(KILL cr);
 9393   ins_cost(300);
 9394   format %{ "MOV    $dst.lo,$dst.hi\n"
 9395           "\tSHR    $dst.lo,$cnt-32\n"
 9396           "\tXOR    $dst.hi,$dst.hi" %}
 9397   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9398   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9399   ins_pipe( ialu_reg_long );
 9400 %}
 9401 
 9402 // Shift Right Long by variable
 9403 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9404   match(Set dst (URShiftL dst shift));
 9405   effect(KILL cr);
 9406   ins_cost(600);
 9407   size(17);
 9408   format %{ "TEST   $shift,32\n\t"
 9409             "JEQ,s  small\n\t"
 9410             "MOV    $dst.lo,$dst.hi\n\t"
 9411             "XOR    $dst.hi,$dst.hi\n"
 9412     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9413             "SHR    $dst.hi,$shift" %}
 9414   ins_encode( shift_right_long( dst, shift ) );
 9415   ins_pipe( pipe_slow );
 9416 %}
 9417 
 9418 // Shift Right Long by 1-31
 9419 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9420   match(Set dst (RShiftL dst cnt));
 9421   effect(KILL cr);
 9422   ins_cost(200);
 9423   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9424             "SAR    $dst.hi,$cnt" %}
 9425   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9426   ins_encode( move_long_small_shift(dst,cnt) );
 9427   ins_pipe( ialu_reg_long );
 9428 %}
 9429 
 9430 // Shift Right Long by 32-63
 9431 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9432   match(Set dst (RShiftL dst cnt));
 9433   effect(KILL cr);
 9434   ins_cost(300);
 9435   format %{ "MOV    $dst.lo,$dst.hi\n"
 9436           "\tSAR    $dst.lo,$cnt-32\n"
 9437           "\tSAR    $dst.hi,31" %}
 9438   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9439   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9440   ins_pipe( ialu_reg_long );
 9441 %}
 9442 
 9443 // Shift Right arithmetic Long by variable
 9444 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9445   match(Set dst (RShiftL dst shift));
 9446   effect(KILL cr);
 9447   ins_cost(600);
 9448   size(18);
 9449   format %{ "TEST   $shift,32\n\t"
 9450             "JEQ,s  small\n\t"
 9451             "MOV    $dst.lo,$dst.hi\n\t"
 9452             "SAR    $dst.hi,31\n"
 9453     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9454             "SAR    $dst.hi,$shift" %}
 9455   ins_encode( shift_right_arith_long( dst, shift ) );
 9456   ins_pipe( pipe_slow );
 9457 %}
 9458 
 9459 
 9460 //----------Double Instructions------------------------------------------------
 9461 // Double Math
 9462 
 9463 // Compare & branch
 9464 
 9465 // P6 version of float compare, sets condition codes in EFLAGS
 9466 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9467   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9468   match(Set cr (CmpD src1 src2));
 9469   effect(KILL rax);
 9470   ins_cost(150);
 9471   format %{ "FLD    $src1\n\t"
 9472             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9473             "JNP    exit\n\t"
 9474             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9475             "SAHF\n"
 9476      "exit:\tNOP               // avoid branch to branch" %}
 9477   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9478   ins_encode( Push_Reg_DPR(src1),
 9479               OpcP, RegOpc(src2),
 9480               cmpF_P6_fixup );
 9481   ins_pipe( pipe_slow );
 9482 %}
 9483 
 9484 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9485   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9486   match(Set cr (CmpD src1 src2));
 9487   ins_cost(150);
 9488   format %{ "FLD    $src1\n\t"
 9489             "FUCOMIP ST,$src2  // P6 instruction" %}
 9490   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9491   ins_encode( Push_Reg_DPR(src1),
 9492               OpcP, RegOpc(src2));
 9493   ins_pipe( pipe_slow );
 9494 %}
 9495 
 9496 // Compare & branch
 9497 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9498   predicate(UseSSE<=1);
 9499   match(Set cr (CmpD src1 src2));
 9500   effect(KILL rax);
 9501   ins_cost(200);
 9502   format %{ "FLD    $src1\n\t"
 9503             "FCOMp  $src2\n\t"
 9504             "FNSTSW AX\n\t"
 9505             "TEST   AX,0x400\n\t"
 9506             "JZ,s   flags\n\t"
 9507             "MOV    AH,1\t# unordered treat as LT\n"
 9508     "flags:\tSAHF" %}
 9509   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9510   ins_encode( Push_Reg_DPR(src1),
 9511               OpcP, RegOpc(src2),
 9512               fpu_flags);
 9513   ins_pipe( pipe_slow );
 9514 %}
 9515 
 9516 // Compare vs zero into -1,0,1
 9517 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9518   predicate(UseSSE<=1);
 9519   match(Set dst (CmpD3 src1 zero));
 9520   effect(KILL cr, KILL rax);
 9521   ins_cost(280);
 9522   format %{ "FTSTD  $dst,$src1" %}
 9523   opcode(0xE4, 0xD9);
 9524   ins_encode( Push_Reg_DPR(src1),
 9525               OpcS, OpcP, PopFPU,
 9526               CmpF_Result(dst));
 9527   ins_pipe( pipe_slow );
 9528 %}
 9529 
 9530 // Compare into -1,0,1
 9531 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9532   predicate(UseSSE<=1);
 9533   match(Set dst (CmpD3 src1 src2));
 9534   effect(KILL cr, KILL rax);
 9535   ins_cost(300);
 9536   format %{ "FCMPD  $dst,$src1,$src2" %}
 9537   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9538   ins_encode( Push_Reg_DPR(src1),
 9539               OpcP, RegOpc(src2),
 9540               CmpF_Result(dst));
 9541   ins_pipe( pipe_slow );
 9542 %}
 9543 
 9544 // float compare and set condition codes in EFLAGS by XMM regs
 9545 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9546   predicate(UseSSE>=2);
 9547   match(Set cr (CmpD src1 src2));
 9548   ins_cost(145);
 9549   format %{ "UCOMISD $src1,$src2\n\t"
 9550             "JNP,s   exit\n\t"
 9551             "PUSHF\t# saw NaN, set CF\n\t"
 9552             "AND     [rsp], #0xffffff2b\n\t"
 9553             "POPF\n"
 9554     "exit:" %}
 9555   ins_encode %{
 9556     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9557     emit_cmpfp_fixup(_masm);
 9558   %}
 9559   ins_pipe( pipe_slow );
 9560 %}
 9561 
 9562 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9563   predicate(UseSSE>=2);
 9564   match(Set cr (CmpD src1 src2));
 9565   ins_cost(100);
 9566   format %{ "UCOMISD $src1,$src2" %}
 9567   ins_encode %{
 9568     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9569   %}
 9570   ins_pipe( pipe_slow );
 9571 %}
 9572 
 9573 // float compare and set condition codes in EFLAGS by XMM regs
 9574 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9575   predicate(UseSSE>=2);
 9576   match(Set cr (CmpD src1 (LoadD src2)));
 9577   ins_cost(145);
 9578   format %{ "UCOMISD $src1,$src2\n\t"
 9579             "JNP,s   exit\n\t"
 9580             "PUSHF\t# saw NaN, set CF\n\t"
 9581             "AND     [rsp], #0xffffff2b\n\t"
 9582             "POPF\n"
 9583     "exit:" %}
 9584   ins_encode %{
 9585     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9586     emit_cmpfp_fixup(_masm);
 9587   %}
 9588   ins_pipe( pipe_slow );
 9589 %}
 9590 
 9591 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9592   predicate(UseSSE>=2);
 9593   match(Set cr (CmpD src1 (LoadD src2)));
 9594   ins_cost(100);
 9595   format %{ "UCOMISD $src1,$src2" %}
 9596   ins_encode %{
 9597     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9598   %}
 9599   ins_pipe( pipe_slow );
 9600 %}
 9601 
 9602 // Compare into -1,0,1 in XMM
 9603 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9604   predicate(UseSSE>=2);
 9605   match(Set dst (CmpD3 src1 src2));
 9606   effect(KILL cr);
 9607   ins_cost(255);
 9608   format %{ "UCOMISD $src1, $src2\n\t"
 9609             "MOV     $dst, #-1\n\t"
 9610             "JP,s    done\n\t"
 9611             "JB,s    done\n\t"
 9612             "SETNE   $dst\n\t"
 9613             "MOVZB   $dst, $dst\n"
 9614     "done:" %}
 9615   ins_encode %{
 9616     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9617     emit_cmpfp3(_masm, $dst$$Register);
 9618   %}
 9619   ins_pipe( pipe_slow );
 9620 %}
 9621 
 9622 // Compare into -1,0,1 in XMM and memory
 9623 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9624   predicate(UseSSE>=2);
 9625   match(Set dst (CmpD3 src1 (LoadD src2)));
 9626   effect(KILL cr);
 9627   ins_cost(275);
 9628   format %{ "UCOMISD $src1, $src2\n\t"
 9629             "MOV     $dst, #-1\n\t"
 9630             "JP,s    done\n\t"
 9631             "JB,s    done\n\t"
 9632             "SETNE   $dst\n\t"
 9633             "MOVZB   $dst, $dst\n"
 9634     "done:" %}
 9635   ins_encode %{
 9636     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9637     emit_cmpfp3(_masm, $dst$$Register);
 9638   %}
 9639   ins_pipe( pipe_slow );
 9640 %}
 9641 
 9642 
 9643 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9644   predicate (UseSSE <=1);
 9645   match(Set dst (SubD dst src));
 9646 
 9647   format %{ "FLD    $src\n\t"
 9648             "DSUBp  $dst,ST" %}
 9649   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9650   ins_cost(150);
 9651   ins_encode( Push_Reg_DPR(src),
 9652               OpcP, RegOpc(dst) );
 9653   ins_pipe( fpu_reg_reg );
 9654 %}
 9655 
 9656 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9657   predicate (UseSSE <=1);
 9658   match(Set dst (RoundDouble (SubD src1 src2)));
 9659   ins_cost(250);
 9660 
 9661   format %{ "FLD    $src2\n\t"
 9662             "DSUB   ST,$src1\n\t"
 9663             "FSTP_D $dst\t# D-round" %}
 9664   opcode(0xD8, 0x5);
 9665   ins_encode( Push_Reg_DPR(src2),
 9666               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9667   ins_pipe( fpu_mem_reg_reg );
 9668 %}
 9669 
 9670 
 9671 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9672   predicate (UseSSE <=1);
 9673   match(Set dst (SubD dst (LoadD src)));
 9674   ins_cost(150);
 9675 
 9676   format %{ "FLD    $src\n\t"
 9677             "DSUBp  $dst,ST" %}
 9678   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9679   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9680               OpcP, RegOpc(dst) );
 9681   ins_pipe( fpu_reg_mem );
 9682 %}
 9683 
 9684 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9685   predicate (UseSSE<=1);
 9686   match(Set dst (AbsD src));
 9687   ins_cost(100);
 9688   format %{ "FABS" %}
 9689   opcode(0xE1, 0xD9);
 9690   ins_encode( OpcS, OpcP );
 9691   ins_pipe( fpu_reg_reg );
 9692 %}
 9693 
 9694 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9695   predicate(UseSSE<=1);
 9696   match(Set dst (NegD src));
 9697   ins_cost(100);
 9698   format %{ "FCHS" %}
 9699   opcode(0xE0, 0xD9);
 9700   ins_encode( OpcS, OpcP );
 9701   ins_pipe( fpu_reg_reg );
 9702 %}
 9703 
 9704 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9705   predicate(UseSSE<=1);
 9706   match(Set dst (AddD dst src));
 9707   format %{ "FLD    $src\n\t"
 9708             "DADD   $dst,ST" %}
 9709   size(4);
 9710   ins_cost(150);
 9711   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9712   ins_encode( Push_Reg_DPR(src),
 9713               OpcP, RegOpc(dst) );
 9714   ins_pipe( fpu_reg_reg );
 9715 %}
 9716 
 9717 
 9718 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9719   predicate(UseSSE<=1);
 9720   match(Set dst (RoundDouble (AddD src1 src2)));
 9721   ins_cost(250);
 9722 
 9723   format %{ "FLD    $src2\n\t"
 9724             "DADD   ST,$src1\n\t"
 9725             "FSTP_D $dst\t# D-round" %}
 9726   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9727   ins_encode( Push_Reg_DPR(src2),
 9728               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9729   ins_pipe( fpu_mem_reg_reg );
 9730 %}
 9731 
 9732 
 9733 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9734   predicate(UseSSE<=1);
 9735   match(Set dst (AddD dst (LoadD src)));
 9736   ins_cost(150);
 9737 
 9738   format %{ "FLD    $src\n\t"
 9739             "DADDp  $dst,ST" %}
 9740   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9741   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9742               OpcP, RegOpc(dst) );
 9743   ins_pipe( fpu_reg_mem );
 9744 %}
 9745 
 9746 // add-to-memory
 9747 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9748   predicate(UseSSE<=1);
 9749   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9750   ins_cost(150);
 9751 
 9752   format %{ "FLD_D  $dst\n\t"
 9753             "DADD   ST,$src\n\t"
 9754             "FST_D  $dst" %}
 9755   opcode(0xDD, 0x0);
 9756   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9757               Opcode(0xD8), RegOpc(src),
 9758               set_instruction_start,
 9759               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9760   ins_pipe( fpu_reg_mem );
 9761 %}
 9762 
 9763 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9764   predicate(UseSSE<=1);
 9765   match(Set dst (AddD dst con));
 9766   ins_cost(125);
 9767   format %{ "FLD1\n\t"
 9768             "DADDp  $dst,ST" %}
 9769   ins_encode %{
 9770     __ fld1();
 9771     __ faddp($dst$$reg);
 9772   %}
 9773   ins_pipe(fpu_reg);
 9774 %}
 9775 
 9776 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9777   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9778   match(Set dst (AddD dst con));
 9779   ins_cost(200);
 9780   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9781             "DADDp  $dst,ST" %}
 9782   ins_encode %{
 9783     __ fld_d($constantaddress($con));
 9784     __ faddp($dst$$reg);
 9785   %}
 9786   ins_pipe(fpu_reg_mem);
 9787 %}
 9788 
 9789 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9790   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9791   match(Set dst (RoundDouble (AddD src con)));
 9792   ins_cost(200);
 9793   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9794             "DADD   ST,$src\n\t"
 9795             "FSTP_D $dst\t# D-round" %}
 9796   ins_encode %{
 9797     __ fld_d($constantaddress($con));
 9798     __ fadd($src$$reg);
 9799     __ fstp_d(Address(rsp, $dst$$disp));
 9800   %}
 9801   ins_pipe(fpu_mem_reg_con);
 9802 %}
 9803 
 9804 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9805   predicate(UseSSE<=1);
 9806   match(Set dst (MulD dst src));
 9807   format %{ "FLD    $src\n\t"
 9808             "DMULp  $dst,ST" %}
 9809   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9810   ins_cost(150);
 9811   ins_encode( Push_Reg_DPR(src),
 9812               OpcP, RegOpc(dst) );
 9813   ins_pipe( fpu_reg_reg );
 9814 %}
 9815 
 9816 // Strict FP instruction biases argument before multiply then
 9817 // biases result to avoid double rounding of subnormals.
 9818 //
 9819 // scale arg1 by multiplying arg1 by 2^(-15360)
 9820 // load arg2
 9821 // multiply scaled arg1 by arg2
 9822 // rescale product by 2^(15360)
 9823 //
 9824 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9825   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9826   match(Set dst (MulD dst src));
 9827   ins_cost(1);   // Select this instruction for all FP double multiplies
 9828 
 9829   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9830             "DMULp  $dst,ST\n\t"
 9831             "FLD    $src\n\t"
 9832             "DMULp  $dst,ST\n\t"
 9833             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9834             "DMULp  $dst,ST\n\t" %}
 9835   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9836   ins_encode( strictfp_bias1(dst),
 9837               Push_Reg_DPR(src),
 9838               OpcP, RegOpc(dst),
 9839               strictfp_bias2(dst) );
 9840   ins_pipe( fpu_reg_reg );
 9841 %}
 9842 
 9843 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9844   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9845   match(Set dst (MulD dst con));
 9846   ins_cost(200);
 9847   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9848             "DMULp  $dst,ST" %}
 9849   ins_encode %{
 9850     __ fld_d($constantaddress($con));
 9851     __ fmulp($dst$$reg);
 9852   %}
 9853   ins_pipe(fpu_reg_mem);
 9854 %}
 9855 
 9856 
 9857 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9858   predicate( UseSSE<=1 );
 9859   match(Set dst (MulD dst (LoadD src)));
 9860   ins_cost(200);
 9861   format %{ "FLD_D  $src\n\t"
 9862             "DMULp  $dst,ST" %}
 9863   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9864   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9865               OpcP, RegOpc(dst) );
 9866   ins_pipe( fpu_reg_mem );
 9867 %}
 9868 
 9869 //
 9870 // Cisc-alternate to reg-reg multiply
 9871 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9872   predicate( UseSSE<=1 );
 9873   match(Set dst (MulD src (LoadD mem)));
 9874   ins_cost(250);
 9875   format %{ "FLD_D  $mem\n\t"
 9876             "DMUL   ST,$src\n\t"
 9877             "FSTP_D $dst" %}
 9878   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9879   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9880               OpcReg_FPR(src),
 9881               Pop_Reg_DPR(dst) );
 9882   ins_pipe( fpu_reg_reg_mem );
 9883 %}
 9884 
 9885 
 9886 // MACRO3 -- addDPR a mulDPR
 9887 // This instruction is a '2-address' instruction in that the result goes
 9888 // back to src2.  This eliminates a move from the macro; possibly the
 9889 // register allocator will have to add it back (and maybe not).
 9890 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9891   predicate( UseSSE<=1 );
 9892   match(Set src2 (AddD (MulD src0 src1) src2));
 9893   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9894             "DMUL   ST,$src1\n\t"
 9895             "DADDp  $src2,ST" %}
 9896   ins_cost(250);
 9897   opcode(0xDD); /* LoadD DD /0 */
 9898   ins_encode( Push_Reg_FPR(src0),
 9899               FMul_ST_reg(src1),
 9900               FAddP_reg_ST(src2) );
 9901   ins_pipe( fpu_reg_reg_reg );
 9902 %}
 9903 
 9904 
 9905 // MACRO3 -- subDPR a mulDPR
 9906 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9907   predicate( UseSSE<=1 );
 9908   match(Set src2 (SubD (MulD src0 src1) src2));
 9909   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9910             "DMUL   ST,$src1\n\t"
 9911             "DSUBRp $src2,ST" %}
 9912   ins_cost(250);
 9913   ins_encode( Push_Reg_FPR(src0),
 9914               FMul_ST_reg(src1),
 9915               Opcode(0xDE), Opc_plus(0xE0,src2));
 9916   ins_pipe( fpu_reg_reg_reg );
 9917 %}
 9918 
 9919 
 9920 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9921   predicate( UseSSE<=1 );
 9922   match(Set dst (DivD dst src));
 9923 
 9924   format %{ "FLD    $src\n\t"
 9925             "FDIVp  $dst,ST" %}
 9926   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9927   ins_cost(150);
 9928   ins_encode( Push_Reg_DPR(src),
 9929               OpcP, RegOpc(dst) );
 9930   ins_pipe( fpu_reg_reg );
 9931 %}
 9932 
 9933 // Strict FP instruction biases argument before division then
 9934 // biases result, to avoid double rounding of subnormals.
 9935 //
 9936 // scale dividend by multiplying dividend by 2^(-15360)
 9937 // load divisor
 9938 // divide scaled dividend by divisor
 9939 // rescale quotient by 2^(15360)
 9940 //
 9941 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9942   predicate (UseSSE<=1);
 9943   match(Set dst (DivD dst src));
 9944   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9945   ins_cost(01);
 9946 
 9947   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9948             "DMULp  $dst,ST\n\t"
 9949             "FLD    $src\n\t"
 9950             "FDIVp  $dst,ST\n\t"
 9951             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9952             "DMULp  $dst,ST\n\t" %}
 9953   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9954   ins_encode( strictfp_bias1(dst),
 9955               Push_Reg_DPR(src),
 9956               OpcP, RegOpc(dst),
 9957               strictfp_bias2(dst) );
 9958   ins_pipe( fpu_reg_reg );
 9959 %}
 9960 
 9961 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9962   predicate(UseSSE<=1);
 9963   match(Set dst (ModD dst src));
 9964   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9965 
 9966   format %{ "DMOD   $dst,$src" %}
 9967   ins_cost(250);
 9968   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9969               emitModDPR(),
 9970               Push_Result_Mod_DPR(src),
 9971               Pop_Reg_DPR(dst));
 9972   ins_pipe( pipe_slow );
 9973 %}
 9974 
 9975 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9976   predicate(UseSSE>=2);
 9977   match(Set dst (ModD src0 src1));
 9978   effect(KILL rax, KILL cr);
 9979 
 9980   format %{ "SUB    ESP,8\t # DMOD\n"
 9981           "\tMOVSD  [ESP+0],$src1\n"
 9982           "\tFLD_D  [ESP+0]\n"
 9983           "\tMOVSD  [ESP+0],$src0\n"
 9984           "\tFLD_D  [ESP+0]\n"
 9985      "loop:\tFPREM\n"
 9986           "\tFWAIT\n"
 9987           "\tFNSTSW AX\n"
 9988           "\tSAHF\n"
 9989           "\tJP     loop\n"
 9990           "\tFSTP_D [ESP+0]\n"
 9991           "\tMOVSD  $dst,[ESP+0]\n"
 9992           "\tADD    ESP,8\n"
 9993           "\tFSTP   ST0\t # Restore FPU Stack"
 9994     %}
 9995   ins_cost(250);
 9996   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9997   ins_pipe( pipe_slow );
 9998 %}
 9999 
10000 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10001   predicate (UseSSE<=1);
10002   match(Set dst(AtanD dst src));
10003   format %{ "DATA   $dst,$src" %}
10004   opcode(0xD9, 0xF3);
10005   ins_encode( Push_Reg_DPR(src),
10006               OpcP, OpcS, RegOpc(dst) );
10007   ins_pipe( pipe_slow );
10008 %}
10009 
10010 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10011   predicate (UseSSE>=2);
10012   match(Set dst(AtanD dst src));
10013   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10014   format %{ "DATA   $dst,$src" %}
10015   opcode(0xD9, 0xF3);
10016   ins_encode( Push_SrcD(src),
10017               OpcP, OpcS, Push_ResultD(dst) );
10018   ins_pipe( pipe_slow );
10019 %}
10020 
10021 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10022   predicate (UseSSE<=1);
10023   match(Set dst (SqrtD src));
10024   format %{ "DSQRT  $dst,$src" %}
10025   opcode(0xFA, 0xD9);
10026   ins_encode( Push_Reg_DPR(src),
10027               OpcS, OpcP, Pop_Reg_DPR(dst) );
10028   ins_pipe( pipe_slow );
10029 %}
10030 
10031 //-------------Float Instructions-------------------------------
10032 // Float Math
10033 
10034 // Code for float compare:
10035 //     fcompp();
10036 //     fwait(); fnstsw_ax();
10037 //     sahf();
10038 //     movl(dst, unordered_result);
10039 //     jcc(Assembler::parity, exit);
10040 //     movl(dst, less_result);
10041 //     jcc(Assembler::below, exit);
10042 //     movl(dst, equal_result);
10043 //     jcc(Assembler::equal, exit);
10044 //     movl(dst, greater_result);
10045 //   exit:
10046 
10047 // P6 version of float compare, sets condition codes in EFLAGS
10048 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10049   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10050   match(Set cr (CmpF src1 src2));
10051   effect(KILL rax);
10052   ins_cost(150);
10053   format %{ "FLD    $src1\n\t"
10054             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10055             "JNP    exit\n\t"
10056             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10057             "SAHF\n"
10058      "exit:\tNOP               // avoid branch to branch" %}
10059   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10060   ins_encode( Push_Reg_DPR(src1),
10061               OpcP, RegOpc(src2),
10062               cmpF_P6_fixup );
10063   ins_pipe( pipe_slow );
10064 %}
10065 
10066 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10067   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10068   match(Set cr (CmpF src1 src2));
10069   ins_cost(100);
10070   format %{ "FLD    $src1\n\t"
10071             "FUCOMIP ST,$src2  // P6 instruction" %}
10072   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10073   ins_encode( Push_Reg_DPR(src1),
10074               OpcP, RegOpc(src2));
10075   ins_pipe( pipe_slow );
10076 %}
10077 
10078 
10079 // Compare & branch
10080 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10081   predicate(UseSSE == 0);
10082   match(Set cr (CmpF src1 src2));
10083   effect(KILL rax);
10084   ins_cost(200);
10085   format %{ "FLD    $src1\n\t"
10086             "FCOMp  $src2\n\t"
10087             "FNSTSW AX\n\t"
10088             "TEST   AX,0x400\n\t"
10089             "JZ,s   flags\n\t"
10090             "MOV    AH,1\t# unordered treat as LT\n"
10091     "flags:\tSAHF" %}
10092   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10093   ins_encode( Push_Reg_DPR(src1),
10094               OpcP, RegOpc(src2),
10095               fpu_flags);
10096   ins_pipe( pipe_slow );
10097 %}
10098 
10099 // Compare vs zero into -1,0,1
10100 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10101   predicate(UseSSE == 0);
10102   match(Set dst (CmpF3 src1 zero));
10103   effect(KILL cr, KILL rax);
10104   ins_cost(280);
10105   format %{ "FTSTF  $dst,$src1" %}
10106   opcode(0xE4, 0xD9);
10107   ins_encode( Push_Reg_DPR(src1),
10108               OpcS, OpcP, PopFPU,
10109               CmpF_Result(dst));
10110   ins_pipe( pipe_slow );
10111 %}
10112 
10113 // Compare into -1,0,1
10114 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10115   predicate(UseSSE == 0);
10116   match(Set dst (CmpF3 src1 src2));
10117   effect(KILL cr, KILL rax);
10118   ins_cost(300);
10119   format %{ "FCMPF  $dst,$src1,$src2" %}
10120   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10121   ins_encode( Push_Reg_DPR(src1),
10122               OpcP, RegOpc(src2),
10123               CmpF_Result(dst));
10124   ins_pipe( pipe_slow );
10125 %}
10126 
10127 // float compare and set condition codes in EFLAGS by XMM regs
10128 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10129   predicate(UseSSE>=1);
10130   match(Set cr (CmpF src1 src2));
10131   ins_cost(145);
10132   format %{ "UCOMISS $src1,$src2\n\t"
10133             "JNP,s   exit\n\t"
10134             "PUSHF\t# saw NaN, set CF\n\t"
10135             "AND     [rsp], #0xffffff2b\n\t"
10136             "POPF\n"
10137     "exit:" %}
10138   ins_encode %{
10139     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10140     emit_cmpfp_fixup(_masm);
10141   %}
10142   ins_pipe( pipe_slow );
10143 %}
10144 
10145 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10146   predicate(UseSSE>=1);
10147   match(Set cr (CmpF src1 src2));
10148   ins_cost(100);
10149   format %{ "UCOMISS $src1,$src2" %}
10150   ins_encode %{
10151     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10152   %}
10153   ins_pipe( pipe_slow );
10154 %}
10155 
10156 // float compare and set condition codes in EFLAGS by XMM regs
10157 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10158   predicate(UseSSE>=1);
10159   match(Set cr (CmpF src1 (LoadF src2)));
10160   ins_cost(165);
10161   format %{ "UCOMISS $src1,$src2\n\t"
10162             "JNP,s   exit\n\t"
10163             "PUSHF\t# saw NaN, set CF\n\t"
10164             "AND     [rsp], #0xffffff2b\n\t"
10165             "POPF\n"
10166     "exit:" %}
10167   ins_encode %{
10168     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10169     emit_cmpfp_fixup(_masm);
10170   %}
10171   ins_pipe( pipe_slow );
10172 %}
10173 
10174 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10175   predicate(UseSSE>=1);
10176   match(Set cr (CmpF src1 (LoadF src2)));
10177   ins_cost(100);
10178   format %{ "UCOMISS $src1,$src2" %}
10179   ins_encode %{
10180     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10181   %}
10182   ins_pipe( pipe_slow );
10183 %}
10184 
10185 // Compare into -1,0,1 in XMM
10186 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10187   predicate(UseSSE>=1);
10188   match(Set dst (CmpF3 src1 src2));
10189   effect(KILL cr);
10190   ins_cost(255);
10191   format %{ "UCOMISS $src1, $src2\n\t"
10192             "MOV     $dst, #-1\n\t"
10193             "JP,s    done\n\t"
10194             "JB,s    done\n\t"
10195             "SETNE   $dst\n\t"
10196             "MOVZB   $dst, $dst\n"
10197     "done:" %}
10198   ins_encode %{
10199     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10200     emit_cmpfp3(_masm, $dst$$Register);
10201   %}
10202   ins_pipe( pipe_slow );
10203 %}
10204 
10205 // Compare into -1,0,1 in XMM and memory
10206 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10207   predicate(UseSSE>=1);
10208   match(Set dst (CmpF3 src1 (LoadF src2)));
10209   effect(KILL cr);
10210   ins_cost(275);
10211   format %{ "UCOMISS $src1, $src2\n\t"
10212             "MOV     $dst, #-1\n\t"
10213             "JP,s    done\n\t"
10214             "JB,s    done\n\t"
10215             "SETNE   $dst\n\t"
10216             "MOVZB   $dst, $dst\n"
10217     "done:" %}
10218   ins_encode %{
10219     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10220     emit_cmpfp3(_masm, $dst$$Register);
10221   %}
10222   ins_pipe( pipe_slow );
10223 %}
10224 
10225 // Spill to obtain 24-bit precision
10226 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10227   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10228   match(Set dst (SubF src1 src2));
10229 
10230   format %{ "FSUB   $dst,$src1 - $src2" %}
10231   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10232   ins_encode( Push_Reg_FPR(src1),
10233               OpcReg_FPR(src2),
10234               Pop_Mem_FPR(dst) );
10235   ins_pipe( fpu_mem_reg_reg );
10236 %}
10237 //
10238 // This instruction does not round to 24-bits
10239 instruct subFPR_reg(regFPR dst, regFPR src) %{
10240   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10241   match(Set dst (SubF dst src));
10242 
10243   format %{ "FSUB   $dst,$src" %}
10244   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10245   ins_encode( Push_Reg_FPR(src),
10246               OpcP, RegOpc(dst) );
10247   ins_pipe( fpu_reg_reg );
10248 %}
10249 
10250 // Spill to obtain 24-bit precision
10251 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10252   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10253   match(Set dst (AddF src1 src2));
10254 
10255   format %{ "FADD   $dst,$src1,$src2" %}
10256   opcode(0xD8, 0x0); /* D8 C0+i */
10257   ins_encode( Push_Reg_FPR(src2),
10258               OpcReg_FPR(src1),
10259               Pop_Mem_FPR(dst) );
10260   ins_pipe( fpu_mem_reg_reg );
10261 %}
10262 //
10263 // This instruction does not round to 24-bits
10264 instruct addFPR_reg(regFPR dst, regFPR src) %{
10265   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10266   match(Set dst (AddF dst src));
10267 
10268   format %{ "FLD    $src\n\t"
10269             "FADDp  $dst,ST" %}
10270   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10271   ins_encode( Push_Reg_FPR(src),
10272               OpcP, RegOpc(dst) );
10273   ins_pipe( fpu_reg_reg );
10274 %}
10275 
10276 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10277   predicate(UseSSE==0);
10278   match(Set dst (AbsF src));
10279   ins_cost(100);
10280   format %{ "FABS" %}
10281   opcode(0xE1, 0xD9);
10282   ins_encode( OpcS, OpcP );
10283   ins_pipe( fpu_reg_reg );
10284 %}
10285 
10286 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10287   predicate(UseSSE==0);
10288   match(Set dst (NegF src));
10289   ins_cost(100);
10290   format %{ "FCHS" %}
10291   opcode(0xE0, 0xD9);
10292   ins_encode( OpcS, OpcP );
10293   ins_pipe( fpu_reg_reg );
10294 %}
10295 
10296 // Cisc-alternate to addFPR_reg
10297 // Spill to obtain 24-bit precision
10298 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10299   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10300   match(Set dst (AddF src1 (LoadF src2)));
10301 
10302   format %{ "FLD    $src2\n\t"
10303             "FADD   ST,$src1\n\t"
10304             "FSTP_S $dst" %}
10305   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10306   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10307               OpcReg_FPR(src1),
10308               Pop_Mem_FPR(dst) );
10309   ins_pipe( fpu_mem_reg_mem );
10310 %}
10311 //
10312 // Cisc-alternate to addFPR_reg
10313 // This instruction does not round to 24-bits
10314 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10315   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10316   match(Set dst (AddF dst (LoadF src)));
10317 
10318   format %{ "FADD   $dst,$src" %}
10319   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10320   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10321               OpcP, RegOpc(dst) );
10322   ins_pipe( fpu_reg_mem );
10323 %}
10324 
10325 // // Following two instructions for _222_mpegaudio
10326 // Spill to obtain 24-bit precision
10327 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10328   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10329   match(Set dst (AddF src1 src2));
10330 
10331   format %{ "FADD   $dst,$src1,$src2" %}
10332   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10333   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10334               OpcReg_FPR(src2),
10335               Pop_Mem_FPR(dst) );
10336   ins_pipe( fpu_mem_reg_mem );
10337 %}
10338 
10339 // Cisc-spill variant
10340 // Spill to obtain 24-bit precision
10341 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10342   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10343   match(Set dst (AddF src1 (LoadF src2)));
10344 
10345   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10346   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10347   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10348               set_instruction_start,
10349               OpcP, RMopc_Mem(secondary,src1),
10350               Pop_Mem_FPR(dst) );
10351   ins_pipe( fpu_mem_mem_mem );
10352 %}
10353 
10354 // Spill to obtain 24-bit precision
10355 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10356   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10357   match(Set dst (AddF src1 src2));
10358 
10359   format %{ "FADD   $dst,$src1,$src2" %}
10360   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10361   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10362               set_instruction_start,
10363               OpcP, RMopc_Mem(secondary,src1),
10364               Pop_Mem_FPR(dst) );
10365   ins_pipe( fpu_mem_mem_mem );
10366 %}
10367 
10368 
10369 // Spill to obtain 24-bit precision
10370 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10371   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10372   match(Set dst (AddF src con));
10373   format %{ "FLD    $src\n\t"
10374             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10375             "FSTP_S $dst"  %}
10376   ins_encode %{
10377     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10378     __ fadd_s($constantaddress($con));
10379     __ fstp_s(Address(rsp, $dst$$disp));
10380   %}
10381   ins_pipe(fpu_mem_reg_con);
10382 %}
10383 //
10384 // This instruction does not round to 24-bits
10385 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10386   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10387   match(Set dst (AddF src con));
10388   format %{ "FLD    $src\n\t"
10389             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10390             "FSTP   $dst"  %}
10391   ins_encode %{
10392     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10393     __ fadd_s($constantaddress($con));
10394     __ fstp_d($dst$$reg);
10395   %}
10396   ins_pipe(fpu_reg_reg_con);
10397 %}
10398 
10399 // Spill to obtain 24-bit precision
10400 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10401   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10402   match(Set dst (MulF src1 src2));
10403 
10404   format %{ "FLD    $src1\n\t"
10405             "FMUL   $src2\n\t"
10406             "FSTP_S $dst"  %}
10407   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10408   ins_encode( Push_Reg_FPR(src1),
10409               OpcReg_FPR(src2),
10410               Pop_Mem_FPR(dst) );
10411   ins_pipe( fpu_mem_reg_reg );
10412 %}
10413 //
10414 // This instruction does not round to 24-bits
10415 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10416   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10417   match(Set dst (MulF src1 src2));
10418 
10419   format %{ "FLD    $src1\n\t"
10420             "FMUL   $src2\n\t"
10421             "FSTP_S $dst"  %}
10422   opcode(0xD8, 0x1); /* D8 C8+i */
10423   ins_encode( Push_Reg_FPR(src2),
10424               OpcReg_FPR(src1),
10425               Pop_Reg_FPR(dst) );
10426   ins_pipe( fpu_reg_reg_reg );
10427 %}
10428 
10429 
10430 // Spill to obtain 24-bit precision
10431 // Cisc-alternate to reg-reg multiply
10432 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10433   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10434   match(Set dst (MulF src1 (LoadF src2)));
10435 
10436   format %{ "FLD_S  $src2\n\t"
10437             "FMUL   $src1\n\t"
10438             "FSTP_S $dst"  %}
10439   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10440   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10441               OpcReg_FPR(src1),
10442               Pop_Mem_FPR(dst) );
10443   ins_pipe( fpu_mem_reg_mem );
10444 %}
10445 //
10446 // This instruction does not round to 24-bits
10447 // Cisc-alternate to reg-reg multiply
10448 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10449   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10450   match(Set dst (MulF src1 (LoadF src2)));
10451 
10452   format %{ "FMUL   $dst,$src1,$src2" %}
10453   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10454   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10455               OpcReg_FPR(src1),
10456               Pop_Reg_FPR(dst) );
10457   ins_pipe( fpu_reg_reg_mem );
10458 %}
10459 
10460 // Spill to obtain 24-bit precision
10461 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10462   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10463   match(Set dst (MulF src1 src2));
10464 
10465   format %{ "FMUL   $dst,$src1,$src2" %}
10466   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10467   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10468               set_instruction_start,
10469               OpcP, RMopc_Mem(secondary,src1),
10470               Pop_Mem_FPR(dst) );
10471   ins_pipe( fpu_mem_mem_mem );
10472 %}
10473 
10474 // Spill to obtain 24-bit precision
10475 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10476   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10477   match(Set dst (MulF src con));
10478 
10479   format %{ "FLD    $src\n\t"
10480             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10481             "FSTP_S $dst"  %}
10482   ins_encode %{
10483     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10484     __ fmul_s($constantaddress($con));
10485     __ fstp_s(Address(rsp, $dst$$disp));
10486   %}
10487   ins_pipe(fpu_mem_reg_con);
10488 %}
10489 //
10490 // This instruction does not round to 24-bits
10491 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10492   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10493   match(Set dst (MulF src con));
10494 
10495   format %{ "FLD    $src\n\t"
10496             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10497             "FSTP   $dst"  %}
10498   ins_encode %{
10499     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10500     __ fmul_s($constantaddress($con));
10501     __ fstp_d($dst$$reg);
10502   %}
10503   ins_pipe(fpu_reg_reg_con);
10504 %}
10505 
10506 
10507 //
10508 // MACRO1 -- subsume unshared load into mulFPR
10509 // This instruction does not round to 24-bits
10510 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10511   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10512   match(Set dst (MulF (LoadF mem1) src));
10513 
10514   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10515             "FMUL   ST,$src\n\t"
10516             "FSTP   $dst" %}
10517   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10518   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10519               OpcReg_FPR(src),
10520               Pop_Reg_FPR(dst) );
10521   ins_pipe( fpu_reg_reg_mem );
10522 %}
10523 //
10524 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10525 // This instruction does not round to 24-bits
10526 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10527   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10528   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10529   ins_cost(95);
10530 
10531   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10532             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10533             "FADD   ST,$src2\n\t"
10534             "FSTP   $dst" %}
10535   opcode(0xD9); /* LoadF D9 /0 */
10536   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10537               FMul_ST_reg(src1),
10538               FAdd_ST_reg(src2),
10539               Pop_Reg_FPR(dst) );
10540   ins_pipe( fpu_reg_mem_reg_reg );
10541 %}
10542 
10543 // MACRO3 -- addFPR a mulFPR
10544 // This instruction does not round to 24-bits.  It is a '2-address'
10545 // instruction in that the result goes back to src2.  This eliminates
10546 // a move from the macro; possibly the register allocator will have
10547 // to add it back (and maybe not).
10548 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10549   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10550   match(Set src2 (AddF (MulF src0 src1) src2));
10551 
10552   format %{ "FLD    $src0     ===MACRO3===\n\t"
10553             "FMUL   ST,$src1\n\t"
10554             "FADDP  $src2,ST" %}
10555   opcode(0xD9); /* LoadF D9 /0 */
10556   ins_encode( Push_Reg_FPR(src0),
10557               FMul_ST_reg(src1),
10558               FAddP_reg_ST(src2) );
10559   ins_pipe( fpu_reg_reg_reg );
10560 %}
10561 
10562 // MACRO4 -- divFPR subFPR
10563 // This instruction does not round to 24-bits
10564 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10565   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10566   match(Set dst (DivF (SubF src2 src1) src3));
10567 
10568   format %{ "FLD    $src2   ===MACRO4===\n\t"
10569             "FSUB   ST,$src1\n\t"
10570             "FDIV   ST,$src3\n\t"
10571             "FSTP  $dst" %}
10572   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10573   ins_encode( Push_Reg_FPR(src2),
10574               subFPR_divFPR_encode(src1,src3),
10575               Pop_Reg_FPR(dst) );
10576   ins_pipe( fpu_reg_reg_reg_reg );
10577 %}
10578 
10579 // Spill to obtain 24-bit precision
10580 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10581   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10582   match(Set dst (DivF src1 src2));
10583 
10584   format %{ "FDIV   $dst,$src1,$src2" %}
10585   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10586   ins_encode( Push_Reg_FPR(src1),
10587               OpcReg_FPR(src2),
10588               Pop_Mem_FPR(dst) );
10589   ins_pipe( fpu_mem_reg_reg );
10590 %}
10591 //
10592 // This instruction does not round to 24-bits
10593 instruct divFPR_reg(regFPR dst, regFPR src) %{
10594   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10595   match(Set dst (DivF dst src));
10596 
10597   format %{ "FDIV   $dst,$src" %}
10598   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10599   ins_encode( Push_Reg_FPR(src),
10600               OpcP, RegOpc(dst) );
10601   ins_pipe( fpu_reg_reg );
10602 %}
10603 
10604 
10605 // Spill to obtain 24-bit precision
10606 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10607   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10608   match(Set dst (ModF src1 src2));
10609   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10610 
10611   format %{ "FMOD   $dst,$src1,$src2" %}
10612   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10613               emitModDPR(),
10614               Push_Result_Mod_DPR(src2),
10615               Pop_Mem_FPR(dst));
10616   ins_pipe( pipe_slow );
10617 %}
10618 //
10619 // This instruction does not round to 24-bits
10620 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10621   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10622   match(Set dst (ModF dst src));
10623   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10624 
10625   format %{ "FMOD   $dst,$src" %}
10626   ins_encode(Push_Reg_Mod_DPR(dst, src),
10627               emitModDPR(),
10628               Push_Result_Mod_DPR(src),
10629               Pop_Reg_FPR(dst));
10630   ins_pipe( pipe_slow );
10631 %}
10632 
10633 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10634   predicate(UseSSE>=1);
10635   match(Set dst (ModF src0 src1));
10636   effect(KILL rax, KILL cr);
10637   format %{ "SUB    ESP,4\t # FMOD\n"
10638           "\tMOVSS  [ESP+0],$src1\n"
10639           "\tFLD_S  [ESP+0]\n"
10640           "\tMOVSS  [ESP+0],$src0\n"
10641           "\tFLD_S  [ESP+0]\n"
10642      "loop:\tFPREM\n"
10643           "\tFWAIT\n"
10644           "\tFNSTSW AX\n"
10645           "\tSAHF\n"
10646           "\tJP     loop\n"
10647           "\tFSTP_S [ESP+0]\n"
10648           "\tMOVSS  $dst,[ESP+0]\n"
10649           "\tADD    ESP,4\n"
10650           "\tFSTP   ST0\t # Restore FPU Stack"
10651     %}
10652   ins_cost(250);
10653   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10654   ins_pipe( pipe_slow );
10655 %}
10656 
10657 
10658 //----------Arithmetic Conversion Instructions---------------------------------
10659 // The conversions operations are all Alpha sorted.  Please keep it that way!
10660 
10661 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10662   predicate(UseSSE==0);
10663   match(Set dst (RoundFloat src));
10664   ins_cost(125);
10665   format %{ "FST_S  $dst,$src\t# F-round" %}
10666   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10667   ins_pipe( fpu_mem_reg );
10668 %}
10669 
10670 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10671   predicate(UseSSE<=1);
10672   match(Set dst (RoundDouble src));
10673   ins_cost(125);
10674   format %{ "FST_D  $dst,$src\t# D-round" %}
10675   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10676   ins_pipe( fpu_mem_reg );
10677 %}
10678 
10679 // Force rounding to 24-bit precision and 6-bit exponent
10680 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10681   predicate(UseSSE==0);
10682   match(Set dst (ConvD2F src));
10683   format %{ "FST_S  $dst,$src\t# F-round" %}
10684   expand %{
10685     roundFloat_mem_reg(dst,src);
10686   %}
10687 %}
10688 
10689 // Force rounding to 24-bit precision and 6-bit exponent
10690 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10691   predicate(UseSSE==1);
10692   match(Set dst (ConvD2F src));
10693   effect( KILL cr );
10694   format %{ "SUB    ESP,4\n\t"
10695             "FST_S  [ESP],$src\t# F-round\n\t"
10696             "MOVSS  $dst,[ESP]\n\t"
10697             "ADD ESP,4" %}
10698   ins_encode %{
10699     __ subptr(rsp, 4);
10700     if ($src$$reg != FPR1L_enc) {
10701       __ fld_s($src$$reg-1);
10702       __ fstp_s(Address(rsp, 0));
10703     } else {
10704       __ fst_s(Address(rsp, 0));
10705     }
10706     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10707     __ addptr(rsp, 4);
10708   %}
10709   ins_pipe( pipe_slow );
10710 %}
10711 
10712 // Force rounding double precision to single precision
10713 instruct convD2F_reg(regF dst, regD src) %{
10714   predicate(UseSSE>=2);
10715   match(Set dst (ConvD2F src));
10716   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10717   ins_encode %{
10718     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10719   %}
10720   ins_pipe( pipe_slow );
10721 %}
10722 
10723 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10724   predicate(UseSSE==0);
10725   match(Set dst (ConvF2D src));
10726   format %{ "FST_S  $dst,$src\t# D-round" %}
10727   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10728   ins_pipe( fpu_reg_reg );
10729 %}
10730 
10731 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10732   predicate(UseSSE==1);
10733   match(Set dst (ConvF2D src));
10734   format %{ "FST_D  $dst,$src\t# D-round" %}
10735   expand %{
10736     roundDouble_mem_reg(dst,src);
10737   %}
10738 %}
10739 
10740 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10741   predicate(UseSSE==1);
10742   match(Set dst (ConvF2D src));
10743   effect( KILL cr );
10744   format %{ "SUB    ESP,4\n\t"
10745             "MOVSS  [ESP] $src\n\t"
10746             "FLD_S  [ESP]\n\t"
10747             "ADD    ESP,4\n\t"
10748             "FSTP   $dst\t# D-round" %}
10749   ins_encode %{
10750     __ subptr(rsp, 4);
10751     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10752     __ fld_s(Address(rsp, 0));
10753     __ addptr(rsp, 4);
10754     __ fstp_d($dst$$reg);
10755   %}
10756   ins_pipe( pipe_slow );
10757 %}
10758 
10759 instruct convF2D_reg(regD dst, regF src) %{
10760   predicate(UseSSE>=2);
10761   match(Set dst (ConvF2D src));
10762   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10763   ins_encode %{
10764     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10765   %}
10766   ins_pipe( pipe_slow );
10767 %}
10768 
10769 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10770 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10771   predicate(UseSSE<=1);
10772   match(Set dst (ConvD2I src));
10773   effect( KILL tmp, KILL cr );
10774   format %{ "FLD    $src\t# Convert double to int \n\t"
10775             "FLDCW  trunc mode\n\t"
10776             "SUB    ESP,4\n\t"
10777             "FISTp  [ESP + #0]\n\t"
10778             "FLDCW  std/24-bit mode\n\t"
10779             "POP    EAX\n\t"
10780             "CMP    EAX,0x80000000\n\t"
10781             "JNE,s  fast\n\t"
10782             "FLD_D  $src\n\t"
10783             "CALL   d2i_wrapper\n"
10784       "fast:" %}
10785   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10786   ins_pipe( pipe_slow );
10787 %}
10788 
10789 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10790 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10791   predicate(UseSSE>=2);
10792   match(Set dst (ConvD2I src));
10793   effect( KILL tmp, KILL cr );
10794   format %{ "CVTTSD2SI $dst, $src\n\t"
10795             "CMP    $dst,0x80000000\n\t"
10796             "JNE,s  fast\n\t"
10797             "SUB    ESP, 8\n\t"
10798             "MOVSD  [ESP], $src\n\t"
10799             "FLD_D  [ESP]\n\t"
10800             "ADD    ESP, 8\n\t"
10801             "CALL   d2i_wrapper\n"
10802       "fast:" %}
10803   ins_encode %{
10804     Label fast;
10805     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10806     __ cmpl($dst$$Register, 0x80000000);
10807     __ jccb(Assembler::notEqual, fast);
10808     __ subptr(rsp, 8);
10809     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10810     __ fld_d(Address(rsp, 0));
10811     __ addptr(rsp, 8);
10812     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10813     __ post_call_nop();
10814     __ bind(fast);
10815   %}
10816   ins_pipe( pipe_slow );
10817 %}
10818 
10819 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10820   predicate(UseSSE<=1);
10821   match(Set dst (ConvD2L src));
10822   effect( KILL cr );
10823   format %{ "FLD    $src\t# Convert double to long\n\t"
10824             "FLDCW  trunc mode\n\t"
10825             "SUB    ESP,8\n\t"
10826             "FISTp  [ESP + #0]\n\t"
10827             "FLDCW  std/24-bit mode\n\t"
10828             "POP    EAX\n\t"
10829             "POP    EDX\n\t"
10830             "CMP    EDX,0x80000000\n\t"
10831             "JNE,s  fast\n\t"
10832             "TEST   EAX,EAX\n\t"
10833             "JNE,s  fast\n\t"
10834             "FLD    $src\n\t"
10835             "CALL   d2l_wrapper\n"
10836       "fast:" %}
10837   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10838   ins_pipe( pipe_slow );
10839 %}
10840 
10841 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10842 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10843   predicate (UseSSE>=2);
10844   match(Set dst (ConvD2L src));
10845   effect( KILL cr );
10846   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10847             "MOVSD  [ESP],$src\n\t"
10848             "FLD_D  [ESP]\n\t"
10849             "FLDCW  trunc mode\n\t"
10850             "FISTp  [ESP + #0]\n\t"
10851             "FLDCW  std/24-bit mode\n\t"
10852             "POP    EAX\n\t"
10853             "POP    EDX\n\t"
10854             "CMP    EDX,0x80000000\n\t"
10855             "JNE,s  fast\n\t"
10856             "TEST   EAX,EAX\n\t"
10857             "JNE,s  fast\n\t"
10858             "SUB    ESP,8\n\t"
10859             "MOVSD  [ESP],$src\n\t"
10860             "FLD_D  [ESP]\n\t"
10861             "ADD    ESP,8\n\t"
10862             "CALL   d2l_wrapper\n"
10863       "fast:" %}
10864   ins_encode %{
10865     Label fast;
10866     __ subptr(rsp, 8);
10867     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10868     __ fld_d(Address(rsp, 0));
10869     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10870     __ fistp_d(Address(rsp, 0));
10871     // Restore the rounding mode, mask the exception
10872     if (Compile::current()->in_24_bit_fp_mode()) {
10873       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10874     } else {
10875       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10876     }
10877     // Load the converted long, adjust CPU stack
10878     __ pop(rax);
10879     __ pop(rdx);
10880     __ cmpl(rdx, 0x80000000);
10881     __ jccb(Assembler::notEqual, fast);
10882     __ testl(rax, rax);
10883     __ jccb(Assembler::notEqual, fast);
10884     __ subptr(rsp, 8);
10885     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10886     __ fld_d(Address(rsp, 0));
10887     __ addptr(rsp, 8);
10888     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10889     __ post_call_nop();
10890     __ bind(fast);
10891   %}
10892   ins_pipe( pipe_slow );
10893 %}
10894 
10895 // Convert a double to an int.  Java semantics require we do complex
10896 // manglations in the corner cases.  So we set the rounding mode to
10897 // 'zero', store the darned double down as an int, and reset the
10898 // rounding mode to 'nearest'.  The hardware stores a flag value down
10899 // if we would overflow or converted a NAN; we check for this and
10900 // and go the slow path if needed.
10901 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10902   predicate(UseSSE==0);
10903   match(Set dst (ConvF2I src));
10904   effect( KILL tmp, KILL cr );
10905   format %{ "FLD    $src\t# Convert float to int \n\t"
10906             "FLDCW  trunc mode\n\t"
10907             "SUB    ESP,4\n\t"
10908             "FISTp  [ESP + #0]\n\t"
10909             "FLDCW  std/24-bit mode\n\t"
10910             "POP    EAX\n\t"
10911             "CMP    EAX,0x80000000\n\t"
10912             "JNE,s  fast\n\t"
10913             "FLD    $src\n\t"
10914             "CALL   d2i_wrapper\n"
10915       "fast:" %}
10916   // DPR2I_encoding works for FPR2I
10917   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10918   ins_pipe( pipe_slow );
10919 %}
10920 
10921 // Convert a float in xmm to an int reg.
10922 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10923   predicate(UseSSE>=1);
10924   match(Set dst (ConvF2I src));
10925   effect( KILL tmp, KILL cr );
10926   format %{ "CVTTSS2SI $dst, $src\n\t"
10927             "CMP    $dst,0x80000000\n\t"
10928             "JNE,s  fast\n\t"
10929             "SUB    ESP, 4\n\t"
10930             "MOVSS  [ESP], $src\n\t"
10931             "FLD    [ESP]\n\t"
10932             "ADD    ESP, 4\n\t"
10933             "CALL   d2i_wrapper\n"
10934       "fast:" %}
10935   ins_encode %{
10936     Label fast;
10937     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10938     __ cmpl($dst$$Register, 0x80000000);
10939     __ jccb(Assembler::notEqual, fast);
10940     __ subptr(rsp, 4);
10941     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10942     __ fld_s(Address(rsp, 0));
10943     __ addptr(rsp, 4);
10944     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10945     __ post_call_nop();
10946     __ bind(fast);
10947   %}
10948   ins_pipe( pipe_slow );
10949 %}
10950 
10951 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10952   predicate(UseSSE==0);
10953   match(Set dst (ConvF2L src));
10954   effect( KILL cr );
10955   format %{ "FLD    $src\t# Convert float to long\n\t"
10956             "FLDCW  trunc mode\n\t"
10957             "SUB    ESP,8\n\t"
10958             "FISTp  [ESP + #0]\n\t"
10959             "FLDCW  std/24-bit mode\n\t"
10960             "POP    EAX\n\t"
10961             "POP    EDX\n\t"
10962             "CMP    EDX,0x80000000\n\t"
10963             "JNE,s  fast\n\t"
10964             "TEST   EAX,EAX\n\t"
10965             "JNE,s  fast\n\t"
10966             "FLD    $src\n\t"
10967             "CALL   d2l_wrapper\n"
10968       "fast:" %}
10969   // DPR2L_encoding works for FPR2L
10970   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10971   ins_pipe( pipe_slow );
10972 %}
10973 
10974 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10975 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10976   predicate (UseSSE>=1);
10977   match(Set dst (ConvF2L src));
10978   effect( KILL cr );
10979   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10980             "MOVSS  [ESP],$src\n\t"
10981             "FLD_S  [ESP]\n\t"
10982             "FLDCW  trunc mode\n\t"
10983             "FISTp  [ESP + #0]\n\t"
10984             "FLDCW  std/24-bit mode\n\t"
10985             "POP    EAX\n\t"
10986             "POP    EDX\n\t"
10987             "CMP    EDX,0x80000000\n\t"
10988             "JNE,s  fast\n\t"
10989             "TEST   EAX,EAX\n\t"
10990             "JNE,s  fast\n\t"
10991             "SUB    ESP,4\t# Convert float to long\n\t"
10992             "MOVSS  [ESP],$src\n\t"
10993             "FLD_S  [ESP]\n\t"
10994             "ADD    ESP,4\n\t"
10995             "CALL   d2l_wrapper\n"
10996       "fast:" %}
10997   ins_encode %{
10998     Label fast;
10999     __ subptr(rsp, 8);
11000     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11001     __ fld_s(Address(rsp, 0));
11002     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11003     __ fistp_d(Address(rsp, 0));
11004     // Restore the rounding mode, mask the exception
11005     if (Compile::current()->in_24_bit_fp_mode()) {
11006       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11007     } else {
11008       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11009     }
11010     // Load the converted long, adjust CPU stack
11011     __ pop(rax);
11012     __ pop(rdx);
11013     __ cmpl(rdx, 0x80000000);
11014     __ jccb(Assembler::notEqual, fast);
11015     __ testl(rax, rax);
11016     __ jccb(Assembler::notEqual, fast);
11017     __ subptr(rsp, 4);
11018     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11019     __ fld_s(Address(rsp, 0));
11020     __ addptr(rsp, 4);
11021     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11022     __ post_call_nop();
11023     __ bind(fast);
11024   %}
11025   ins_pipe( pipe_slow );
11026 %}
11027 
11028 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11029   predicate( UseSSE<=1 );
11030   match(Set dst (ConvI2D src));
11031   format %{ "FILD   $src\n\t"
11032             "FSTP   $dst" %}
11033   opcode(0xDB, 0x0);  /* DB /0 */
11034   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11035   ins_pipe( fpu_reg_mem );
11036 %}
11037 
11038 instruct convI2D_reg(regD dst, rRegI src) %{
11039   predicate( UseSSE>=2 && !UseXmmI2D );
11040   match(Set dst (ConvI2D src));
11041   format %{ "CVTSI2SD $dst,$src" %}
11042   ins_encode %{
11043     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11044   %}
11045   ins_pipe( pipe_slow );
11046 %}
11047 
11048 instruct convI2D_mem(regD dst, memory mem) %{
11049   predicate( UseSSE>=2 );
11050   match(Set dst (ConvI2D (LoadI mem)));
11051   format %{ "CVTSI2SD $dst,$mem" %}
11052   ins_encode %{
11053     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11054   %}
11055   ins_pipe( pipe_slow );
11056 %}
11057 
11058 instruct convXI2D_reg(regD dst, rRegI src)
11059 %{
11060   predicate( UseSSE>=2 && UseXmmI2D );
11061   match(Set dst (ConvI2D src));
11062 
11063   format %{ "MOVD  $dst,$src\n\t"
11064             "CVTDQ2PD $dst,$dst\t# i2d" %}
11065   ins_encode %{
11066     __ movdl($dst$$XMMRegister, $src$$Register);
11067     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11068   %}
11069   ins_pipe(pipe_slow); // XXX
11070 %}
11071 
11072 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11073   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11074   match(Set dst (ConvI2D (LoadI mem)));
11075   format %{ "FILD   $mem\n\t"
11076             "FSTP   $dst" %}
11077   opcode(0xDB);      /* DB /0 */
11078   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11079               Pop_Reg_DPR(dst));
11080   ins_pipe( fpu_reg_mem );
11081 %}
11082 
11083 // Convert a byte to a float; no rounding step needed.
11084 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11085   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11086   match(Set dst (ConvI2F src));
11087   format %{ "FILD   $src\n\t"
11088             "FSTP   $dst" %}
11089 
11090   opcode(0xDB, 0x0);  /* DB /0 */
11091   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11092   ins_pipe( fpu_reg_mem );
11093 %}
11094 
11095 // In 24-bit mode, force exponent rounding by storing back out
11096 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11097   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11098   match(Set dst (ConvI2F src));
11099   ins_cost(200);
11100   format %{ "FILD   $src\n\t"
11101             "FSTP_S $dst" %}
11102   opcode(0xDB, 0x0);  /* DB /0 */
11103   ins_encode( Push_Mem_I(src),
11104               Pop_Mem_FPR(dst));
11105   ins_pipe( fpu_mem_mem );
11106 %}
11107 
11108 // In 24-bit mode, force exponent rounding by storing back out
11109 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11110   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11111   match(Set dst (ConvI2F (LoadI mem)));
11112   ins_cost(200);
11113   format %{ "FILD   $mem\n\t"
11114             "FSTP_S $dst" %}
11115   opcode(0xDB);  /* DB /0 */
11116   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11117               Pop_Mem_FPR(dst));
11118   ins_pipe( fpu_mem_mem );
11119 %}
11120 
11121 // This instruction does not round to 24-bits
11122 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11123   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11124   match(Set dst (ConvI2F src));
11125   format %{ "FILD   $src\n\t"
11126             "FSTP   $dst" %}
11127   opcode(0xDB, 0x0);  /* DB /0 */
11128   ins_encode( Push_Mem_I(src),
11129               Pop_Reg_FPR(dst));
11130   ins_pipe( fpu_reg_mem );
11131 %}
11132 
11133 // This instruction does not round to 24-bits
11134 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11135   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11136   match(Set dst (ConvI2F (LoadI mem)));
11137   format %{ "FILD   $mem\n\t"
11138             "FSTP   $dst" %}
11139   opcode(0xDB);      /* DB /0 */
11140   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11141               Pop_Reg_FPR(dst));
11142   ins_pipe( fpu_reg_mem );
11143 %}
11144 
11145 // Convert an int to a float in xmm; no rounding step needed.
11146 instruct convI2F_reg(regF dst, rRegI src) %{
11147   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11148   match(Set dst (ConvI2F src));
11149   format %{ "CVTSI2SS $dst, $src" %}
11150   ins_encode %{
11151     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11152   %}
11153   ins_pipe( pipe_slow );
11154 %}
11155 
11156  instruct convXI2F_reg(regF dst, rRegI src)
11157 %{
11158   predicate( UseSSE>=2 && UseXmmI2F );
11159   match(Set dst (ConvI2F src));
11160 
11161   format %{ "MOVD  $dst,$src\n\t"
11162             "CVTDQ2PS $dst,$dst\t# i2f" %}
11163   ins_encode %{
11164     __ movdl($dst$$XMMRegister, $src$$Register);
11165     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11166   %}
11167   ins_pipe(pipe_slow); // XXX
11168 %}
11169 
11170 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11171   match(Set dst (ConvI2L src));
11172   effect(KILL cr);
11173   ins_cost(375);
11174   format %{ "MOV    $dst.lo,$src\n\t"
11175             "MOV    $dst.hi,$src\n\t"
11176             "SAR    $dst.hi,31" %}
11177   ins_encode(convert_int_long(dst,src));
11178   ins_pipe( ialu_reg_reg_long );
11179 %}
11180 
11181 // Zero-extend convert int to long
11182 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11183   match(Set dst (AndL (ConvI2L src) mask) );
11184   effect( KILL flags );
11185   ins_cost(250);
11186   format %{ "MOV    $dst.lo,$src\n\t"
11187             "XOR    $dst.hi,$dst.hi" %}
11188   opcode(0x33); // XOR
11189   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11190   ins_pipe( ialu_reg_reg_long );
11191 %}
11192 
11193 // Zero-extend long
11194 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11195   match(Set dst (AndL src mask) );
11196   effect( KILL flags );
11197   ins_cost(250);
11198   format %{ "MOV    $dst.lo,$src.lo\n\t"
11199             "XOR    $dst.hi,$dst.hi\n\t" %}
11200   opcode(0x33); // XOR
11201   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11202   ins_pipe( ialu_reg_reg_long );
11203 %}
11204 
11205 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11206   predicate (UseSSE<=1);
11207   match(Set dst (ConvL2D src));
11208   effect( KILL cr );
11209   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11210             "PUSH   $src.lo\n\t"
11211             "FILD   ST,[ESP + #0]\n\t"
11212             "ADD    ESP,8\n\t"
11213             "FSTP_D $dst\t# D-round" %}
11214   opcode(0xDF, 0x5);  /* DF /5 */
11215   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11216   ins_pipe( pipe_slow );
11217 %}
11218 
11219 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11220   predicate (UseSSE>=2);
11221   match(Set dst (ConvL2D src));
11222   effect( KILL cr );
11223   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11224             "PUSH   $src.lo\n\t"
11225             "FILD_D [ESP]\n\t"
11226             "FSTP_D [ESP]\n\t"
11227             "MOVSD  $dst,[ESP]\n\t"
11228             "ADD    ESP,8" %}
11229   opcode(0xDF, 0x5);  /* DF /5 */
11230   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11231   ins_pipe( pipe_slow );
11232 %}
11233 
11234 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11235   predicate (UseSSE>=1);
11236   match(Set dst (ConvL2F src));
11237   effect( KILL cr );
11238   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11239             "PUSH   $src.lo\n\t"
11240             "FILD_D [ESP]\n\t"
11241             "FSTP_S [ESP]\n\t"
11242             "MOVSS  $dst,[ESP]\n\t"
11243             "ADD    ESP,8" %}
11244   opcode(0xDF, 0x5);  /* DF /5 */
11245   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11246   ins_pipe( pipe_slow );
11247 %}
11248 
11249 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11250   match(Set dst (ConvL2F src));
11251   effect( KILL cr );
11252   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11253             "PUSH   $src.lo\n\t"
11254             "FILD   ST,[ESP + #0]\n\t"
11255             "ADD    ESP,8\n\t"
11256             "FSTP_S $dst\t# F-round" %}
11257   opcode(0xDF, 0x5);  /* DF /5 */
11258   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11259   ins_pipe( pipe_slow );
11260 %}
11261 
11262 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11263   match(Set dst (ConvL2I src));
11264   effect( DEF dst, USE src );
11265   format %{ "MOV    $dst,$src.lo" %}
11266   ins_encode(enc_CopyL_Lo(dst,src));
11267   ins_pipe( ialu_reg_reg );
11268 %}
11269 
11270 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11271   match(Set dst (MoveF2I src));
11272   effect( DEF dst, USE src );
11273   ins_cost(100);
11274   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11275   ins_encode %{
11276     __ movl($dst$$Register, Address(rsp, $src$$disp));
11277   %}
11278   ins_pipe( ialu_reg_mem );
11279 %}
11280 
11281 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11282   predicate(UseSSE==0);
11283   match(Set dst (MoveF2I src));
11284   effect( DEF dst, USE src );
11285 
11286   ins_cost(125);
11287   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11288   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11289   ins_pipe( fpu_mem_reg );
11290 %}
11291 
11292 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11293   predicate(UseSSE>=1);
11294   match(Set dst (MoveF2I src));
11295   effect( DEF dst, USE src );
11296 
11297   ins_cost(95);
11298   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11299   ins_encode %{
11300     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11301   %}
11302   ins_pipe( pipe_slow );
11303 %}
11304 
11305 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11306   predicate(UseSSE>=2);
11307   match(Set dst (MoveF2I src));
11308   effect( DEF dst, USE src );
11309   ins_cost(85);
11310   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11311   ins_encode %{
11312     __ movdl($dst$$Register, $src$$XMMRegister);
11313   %}
11314   ins_pipe( pipe_slow );
11315 %}
11316 
11317 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11318   match(Set dst (MoveI2F src));
11319   effect( DEF dst, USE src );
11320 
11321   ins_cost(100);
11322   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11323   ins_encode %{
11324     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11325   %}
11326   ins_pipe( ialu_mem_reg );
11327 %}
11328 
11329 
11330 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11331   predicate(UseSSE==0);
11332   match(Set dst (MoveI2F src));
11333   effect(DEF dst, USE src);
11334 
11335   ins_cost(125);
11336   format %{ "FLD_S  $src\n\t"
11337             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11338   opcode(0xD9);               /* D9 /0, FLD m32real */
11339   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11340               Pop_Reg_FPR(dst) );
11341   ins_pipe( fpu_reg_mem );
11342 %}
11343 
11344 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11345   predicate(UseSSE>=1);
11346   match(Set dst (MoveI2F src));
11347   effect( DEF dst, USE src );
11348 
11349   ins_cost(95);
11350   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11351   ins_encode %{
11352     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11353   %}
11354   ins_pipe( pipe_slow );
11355 %}
11356 
11357 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11358   predicate(UseSSE>=2);
11359   match(Set dst (MoveI2F src));
11360   effect( DEF dst, USE src );
11361 
11362   ins_cost(85);
11363   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11364   ins_encode %{
11365     __ movdl($dst$$XMMRegister, $src$$Register);
11366   %}
11367   ins_pipe( pipe_slow );
11368 %}
11369 
11370 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11371   match(Set dst (MoveD2L src));
11372   effect(DEF dst, USE src);
11373 
11374   ins_cost(250);
11375   format %{ "MOV    $dst.lo,$src\n\t"
11376             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11377   opcode(0x8B, 0x8B);
11378   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11379   ins_pipe( ialu_mem_long_reg );
11380 %}
11381 
11382 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11383   predicate(UseSSE<=1);
11384   match(Set dst (MoveD2L src));
11385   effect(DEF dst, USE src);
11386 
11387   ins_cost(125);
11388   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11389   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11390   ins_pipe( fpu_mem_reg );
11391 %}
11392 
11393 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11394   predicate(UseSSE>=2);
11395   match(Set dst (MoveD2L src));
11396   effect(DEF dst, USE src);
11397   ins_cost(95);
11398   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11399   ins_encode %{
11400     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11401   %}
11402   ins_pipe( pipe_slow );
11403 %}
11404 
11405 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11406   predicate(UseSSE>=2);
11407   match(Set dst (MoveD2L src));
11408   effect(DEF dst, USE src, TEMP tmp);
11409   ins_cost(85);
11410   format %{ "MOVD   $dst.lo,$src\n\t"
11411             "PSHUFLW $tmp,$src,0x4E\n\t"
11412             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11413   ins_encode %{
11414     __ movdl($dst$$Register, $src$$XMMRegister);
11415     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11416     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11417   %}
11418   ins_pipe( pipe_slow );
11419 %}
11420 
11421 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11422   match(Set dst (MoveL2D src));
11423   effect(DEF dst, USE src);
11424 
11425   ins_cost(200);
11426   format %{ "MOV    $dst,$src.lo\n\t"
11427             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11428   opcode(0x89, 0x89);
11429   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11430   ins_pipe( ialu_mem_long_reg );
11431 %}
11432 
11433 
11434 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11435   predicate(UseSSE<=1);
11436   match(Set dst (MoveL2D src));
11437   effect(DEF dst, USE src);
11438   ins_cost(125);
11439 
11440   format %{ "FLD_D  $src\n\t"
11441             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11442   opcode(0xDD);               /* DD /0, FLD m64real */
11443   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11444               Pop_Reg_DPR(dst) );
11445   ins_pipe( fpu_reg_mem );
11446 %}
11447 
11448 
11449 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11450   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11451   match(Set dst (MoveL2D src));
11452   effect(DEF dst, USE src);
11453 
11454   ins_cost(95);
11455   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11456   ins_encode %{
11457     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11458   %}
11459   ins_pipe( pipe_slow );
11460 %}
11461 
11462 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11463   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11464   match(Set dst (MoveL2D src));
11465   effect(DEF dst, USE src);
11466 
11467   ins_cost(95);
11468   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11469   ins_encode %{
11470     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11471   %}
11472   ins_pipe( pipe_slow );
11473 %}
11474 
11475 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11476   predicate(UseSSE>=2);
11477   match(Set dst (MoveL2D src));
11478   effect(TEMP dst, USE src, TEMP tmp);
11479   ins_cost(85);
11480   format %{ "MOVD   $dst,$src.lo\n\t"
11481             "MOVD   $tmp,$src.hi\n\t"
11482             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11483   ins_encode %{
11484     __ movdl($dst$$XMMRegister, $src$$Register);
11485     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11486     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11487   %}
11488   ins_pipe( pipe_slow );
11489 %}
11490 
11491 //----------------------------- CompressBits/ExpandBits ------------------------
11492 
11493 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11494   predicate(n->bottom_type()->isa_long());
11495   match(Set dst (CompressBits src mask));
11496   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11497   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11498   ins_encode %{
11499     Label exit, partail_result;
11500     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11501     // Merge the results of upper and lower destination registers such that upper destination
11502     // results are contiguously laid out after the lower destination result.
11503     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11504     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11505     __ popcntl($rtmp$$Register, $mask$$Register);
11506     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11507     __ cmpl($rtmp$$Register, 32);
11508     __ jccb(Assembler::equal, exit);
11509     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11510     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11511     // Shift left the contents of upper destination register by true bit count of lower mask register
11512     // and merge with lower destination register.
11513     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11514     __ orl($dst$$Register, $rtmp$$Register);
11515     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11516     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11517     // since contents of upper destination have already been copied to lower destination
11518     // register.
11519     __ cmpl($rtmp$$Register, 0);
11520     __ jccb(Assembler::greater, partail_result);
11521     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11522     __ jmp(exit);
11523     __ bind(partail_result);
11524     // Perform right shift over upper destination register to move out bits already copied
11525     // to lower destination register.
11526     __ subl($rtmp$$Register, 32);
11527     __ negl($rtmp$$Register);
11528     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11529     __ bind(exit);
11530   %}
11531   ins_pipe( pipe_slow );
11532 %}
11533 
11534 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11535   predicate(n->bottom_type()->isa_long());
11536   match(Set dst (ExpandBits src mask));
11537   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11538   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11539   ins_encode %{
11540     // Extraction operation sequentially reads the bits from source register starting from LSB
11541     // and lays them out into destination register at bit locations corresponding to true bits
11542     // in mask register. Thus number of source bits read are equal to combined true bit count
11543     // of mask register pair.
11544     Label exit, mask_clipping;
11545     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11546     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11547     __ popcntl($rtmp$$Register, $mask$$Register);
11548     // If true bit count of lower mask register is 32 then none of bit of lower source register
11549     // will feed to upper destination register.
11550     __ cmpl($rtmp$$Register, 32);
11551     __ jccb(Assembler::equal, exit);
11552     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11553     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11554     // Shift right the contents of lower source register to remove already consumed bits.
11555     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11556     // Extract the bits from lower source register starting from LSB under the influence
11557     // of upper mask register.
11558     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11559     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11560     __ subl($rtmp$$Register, 32);
11561     __ negl($rtmp$$Register);
11562     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11563     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11564     // Clear the set bits in upper mask register which have been used to extract the contents
11565     // from lower source register.
11566     __ bind(mask_clipping);
11567     __ blsrl($mask$$Register, $mask$$Register);
11568     __ decrementl($rtmp$$Register, 1);
11569     __ jccb(Assembler::greater, mask_clipping);
11570     // Starting from LSB extract the bits from upper source register under the influence of
11571     // remaining set bits in upper mask register.
11572     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11573     // Merge the partial results extracted from lower and upper source register bits.
11574     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11575     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11576     __ bind(exit);
11577   %}
11578   ins_pipe( pipe_slow );
11579 %}
11580 
11581 // =======================================================================
11582 // fast clearing of an array
11583 // Small ClearArray non-AVX512.
11584 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11585   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11586   match(Set dummy (ClearArray cnt base));
11587   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11588 
11589   format %{ $$template
11590     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11591     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11592     $$emit$$"JG     LARGE\n\t"
11593     $$emit$$"SHL    ECX, 1\n\t"
11594     $$emit$$"DEC    ECX\n\t"
11595     $$emit$$"JS     DONE\t# Zero length\n\t"
11596     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11597     $$emit$$"DEC    ECX\n\t"
11598     $$emit$$"JGE    LOOP\n\t"
11599     $$emit$$"JMP    DONE\n\t"
11600     $$emit$$"# LARGE:\n\t"
11601     if (UseFastStosb) {
11602        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11603        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11604     } else if (UseXMMForObjInit) {
11605        $$emit$$"MOV     RDI,RAX\n\t"
11606        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11607        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11609        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11610        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11611        $$emit$$"ADD     0x40,RAX\n\t"
11612        $$emit$$"# L_zero_64_bytes:\n\t"
11613        $$emit$$"SUB     0x8,RCX\n\t"
11614        $$emit$$"JGE     L_loop\n\t"
11615        $$emit$$"ADD     0x4,RCX\n\t"
11616        $$emit$$"JL      L_tail\n\t"
11617        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11618        $$emit$$"ADD     0x20,RAX\n\t"
11619        $$emit$$"SUB     0x4,RCX\n\t"
11620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11621        $$emit$$"ADD     0x4,RCX\n\t"
11622        $$emit$$"JLE     L_end\n\t"
11623        $$emit$$"DEC     RCX\n\t"
11624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11625        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11626        $$emit$$"ADD     0x8,RAX\n\t"
11627        $$emit$$"DEC     RCX\n\t"
11628        $$emit$$"JGE     L_sloop\n\t"
11629        $$emit$$"# L_end:\n\t"
11630     } else {
11631        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11632        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11633     }
11634     $$emit$$"# DONE"
11635   %}
11636   ins_encode %{
11637     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11638                  $tmp$$XMMRegister, false, knoreg);
11639   %}
11640   ins_pipe( pipe_slow );
11641 %}
11642 
11643 // Small ClearArray AVX512 non-constant length.
11644 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11645   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11646   match(Set dummy (ClearArray cnt base));
11647   ins_cost(125);
11648   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11649 
11650   format %{ $$template
11651     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11652     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11653     $$emit$$"JG     LARGE\n\t"
11654     $$emit$$"SHL    ECX, 1\n\t"
11655     $$emit$$"DEC    ECX\n\t"
11656     $$emit$$"JS     DONE\t# Zero length\n\t"
11657     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11658     $$emit$$"DEC    ECX\n\t"
11659     $$emit$$"JGE    LOOP\n\t"
11660     $$emit$$"JMP    DONE\n\t"
11661     $$emit$$"# LARGE:\n\t"
11662     if (UseFastStosb) {
11663        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11664        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11665     } else if (UseXMMForObjInit) {
11666        $$emit$$"MOV     RDI,RAX\n\t"
11667        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11668        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11669        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11670        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11671        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11672        $$emit$$"ADD     0x40,RAX\n\t"
11673        $$emit$$"# L_zero_64_bytes:\n\t"
11674        $$emit$$"SUB     0x8,RCX\n\t"
11675        $$emit$$"JGE     L_loop\n\t"
11676        $$emit$$"ADD     0x4,RCX\n\t"
11677        $$emit$$"JL      L_tail\n\t"
11678        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11679        $$emit$$"ADD     0x20,RAX\n\t"
11680        $$emit$$"SUB     0x4,RCX\n\t"
11681        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11682        $$emit$$"ADD     0x4,RCX\n\t"
11683        $$emit$$"JLE     L_end\n\t"
11684        $$emit$$"DEC     RCX\n\t"
11685        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11686        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11687        $$emit$$"ADD     0x8,RAX\n\t"
11688        $$emit$$"DEC     RCX\n\t"
11689        $$emit$$"JGE     L_sloop\n\t"
11690        $$emit$$"# L_end:\n\t"
11691     } else {
11692        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11693        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11694     }
11695     $$emit$$"# DONE"
11696   %}
11697   ins_encode %{
11698     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11699                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11700   %}
11701   ins_pipe( pipe_slow );
11702 %}
11703 
11704 // Large ClearArray non-AVX512.
11705 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11706   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11707   match(Set dummy (ClearArray cnt base));
11708   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11709   format %{ $$template
11710     if (UseFastStosb) {
11711        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11712        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11713        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11714     } else if (UseXMMForObjInit) {
11715        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11716        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11717        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11718        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11719        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11720        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11721        $$emit$$"ADD     0x40,RAX\n\t"
11722        $$emit$$"# L_zero_64_bytes:\n\t"
11723        $$emit$$"SUB     0x8,RCX\n\t"
11724        $$emit$$"JGE     L_loop\n\t"
11725        $$emit$$"ADD     0x4,RCX\n\t"
11726        $$emit$$"JL      L_tail\n\t"
11727        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11728        $$emit$$"ADD     0x20,RAX\n\t"
11729        $$emit$$"SUB     0x4,RCX\n\t"
11730        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11731        $$emit$$"ADD     0x4,RCX\n\t"
11732        $$emit$$"JLE     L_end\n\t"
11733        $$emit$$"DEC     RCX\n\t"
11734        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11735        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11736        $$emit$$"ADD     0x8,RAX\n\t"
11737        $$emit$$"DEC     RCX\n\t"
11738        $$emit$$"JGE     L_sloop\n\t"
11739        $$emit$$"# L_end:\n\t"
11740     } else {
11741        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11742        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11743        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11744     }
11745     $$emit$$"# DONE"
11746   %}
11747   ins_encode %{
11748     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11749                  $tmp$$XMMRegister, true, knoreg);
11750   %}
11751   ins_pipe( pipe_slow );
11752 %}
11753 
11754 // Large ClearArray AVX512.
11755 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11756   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11757   match(Set dummy (ClearArray cnt base));
11758   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11759   format %{ $$template
11760     if (UseFastStosb) {
11761        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11762        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11763        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11764     } else if (UseXMMForObjInit) {
11765        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11766        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11767        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11768        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11769        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11770        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11771        $$emit$$"ADD     0x40,RAX\n\t"
11772        $$emit$$"# L_zero_64_bytes:\n\t"
11773        $$emit$$"SUB     0x8,RCX\n\t"
11774        $$emit$$"JGE     L_loop\n\t"
11775        $$emit$$"ADD     0x4,RCX\n\t"
11776        $$emit$$"JL      L_tail\n\t"
11777        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11778        $$emit$$"ADD     0x20,RAX\n\t"
11779        $$emit$$"SUB     0x4,RCX\n\t"
11780        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11781        $$emit$$"ADD     0x4,RCX\n\t"
11782        $$emit$$"JLE     L_end\n\t"
11783        $$emit$$"DEC     RCX\n\t"
11784        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11785        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11786        $$emit$$"ADD     0x8,RAX\n\t"
11787        $$emit$$"DEC     RCX\n\t"
11788        $$emit$$"JGE     L_sloop\n\t"
11789        $$emit$$"# L_end:\n\t"
11790     } else {
11791        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11792        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11793        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11794     }
11795     $$emit$$"# DONE"
11796   %}
11797   ins_encode %{
11798     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11799                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11800   %}
11801   ins_pipe( pipe_slow );
11802 %}
11803 
11804 // Small ClearArray AVX512 constant length.
11805 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11806 %{
11807   predicate(!((ClearArrayNode*)n)->is_large() &&
11808                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11809   match(Set dummy (ClearArray cnt base));
11810   ins_cost(100);
11811   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11812   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11813   ins_encode %{
11814    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11815   %}
11816   ins_pipe(pipe_slow);
11817 %}
11818 
11819 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11820                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11821   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11822   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11823   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11824 
11825   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11826   ins_encode %{
11827     __ string_compare($str1$$Register, $str2$$Register,
11828                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11829                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11830   %}
11831   ins_pipe( pipe_slow );
11832 %}
11833 
11834 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11835                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11836   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11837   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11838   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11839 
11840   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11841   ins_encode %{
11842     __ string_compare($str1$$Register, $str2$$Register,
11843                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11844                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11845   %}
11846   ins_pipe( pipe_slow );
11847 %}
11848 
11849 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11850                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11851   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11852   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11853   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11854 
11855   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11856   ins_encode %{
11857     __ string_compare($str1$$Register, $str2$$Register,
11858                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11859                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11860   %}
11861   ins_pipe( pipe_slow );
11862 %}
11863 
11864 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11865                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11866   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11867   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11868   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11869 
11870   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11871   ins_encode %{
11872     __ string_compare($str1$$Register, $str2$$Register,
11873                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11874                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11875   %}
11876   ins_pipe( pipe_slow );
11877 %}
11878 
11879 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11880                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11881   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11882   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11883   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11884 
11885   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11886   ins_encode %{
11887     __ string_compare($str1$$Register, $str2$$Register,
11888                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11889                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11890   %}
11891   ins_pipe( pipe_slow );
11892 %}
11893 
11894 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11895                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11896   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11897   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11898   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11899 
11900   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11901   ins_encode %{
11902     __ string_compare($str1$$Register, $str2$$Register,
11903                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11904                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11905   %}
11906   ins_pipe( pipe_slow );
11907 %}
11908 
11909 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11910                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11911   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11912   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11913   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11914 
11915   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11916   ins_encode %{
11917     __ string_compare($str2$$Register, $str1$$Register,
11918                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11919                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11920   %}
11921   ins_pipe( pipe_slow );
11922 %}
11923 
11924 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11925                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11926   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11927   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11928   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11929 
11930   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11931   ins_encode %{
11932     __ string_compare($str2$$Register, $str1$$Register,
11933                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11934                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11935   %}
11936   ins_pipe( pipe_slow );
11937 %}
11938 
11939 // fast string equals
11940 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11941                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11942   predicate(!VM_Version::supports_avx512vlbw());
11943   match(Set result (StrEquals (Binary str1 str2) cnt));
11944   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11945 
11946   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11947   ins_encode %{
11948     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11949                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11950                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11951   %}
11952 
11953   ins_pipe( pipe_slow );
11954 %}
11955 
11956 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11957                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11958   predicate(VM_Version::supports_avx512vlbw());
11959   match(Set result (StrEquals (Binary str1 str2) cnt));
11960   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11961 
11962   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11963   ins_encode %{
11964     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11965                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11966                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11967   %}
11968 
11969   ins_pipe( pipe_slow );
11970 %}
11971 
11972 
11973 // fast search of substring with known size.
11974 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11975                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11976   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11977   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11978   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11979 
11980   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11981   ins_encode %{
11982     int icnt2 = (int)$int_cnt2$$constant;
11983     if (icnt2 >= 16) {
11984       // IndexOf for constant substrings with size >= 16 elements
11985       // which don't need to be loaded through stack.
11986       __ string_indexofC8($str1$$Register, $str2$$Register,
11987                           $cnt1$$Register, $cnt2$$Register,
11988                           icnt2, $result$$Register,
11989                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11990     } else {
11991       // Small strings are loaded through stack if they cross page boundary.
11992       __ string_indexof($str1$$Register, $str2$$Register,
11993                         $cnt1$$Register, $cnt2$$Register,
11994                         icnt2, $result$$Register,
11995                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11996     }
11997   %}
11998   ins_pipe( pipe_slow );
11999 %}
12000 
12001 // fast search of substring with known size.
12002 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12003                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12004   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12005   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12006   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12007 
12008   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12009   ins_encode %{
12010     int icnt2 = (int)$int_cnt2$$constant;
12011     if (icnt2 >= 8) {
12012       // IndexOf for constant substrings with size >= 8 elements
12013       // which don't need to be loaded through stack.
12014       __ string_indexofC8($str1$$Register, $str2$$Register,
12015                           $cnt1$$Register, $cnt2$$Register,
12016                           icnt2, $result$$Register,
12017                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12018     } else {
12019       // Small strings are loaded through stack if they cross page boundary.
12020       __ string_indexof($str1$$Register, $str2$$Register,
12021                         $cnt1$$Register, $cnt2$$Register,
12022                         icnt2, $result$$Register,
12023                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12024     }
12025   %}
12026   ins_pipe( pipe_slow );
12027 %}
12028 
12029 // fast search of substring with known size.
12030 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12031                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12032   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12033   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12034   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12035 
12036   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12037   ins_encode %{
12038     int icnt2 = (int)$int_cnt2$$constant;
12039     if (icnt2 >= 8) {
12040       // IndexOf for constant substrings with size >= 8 elements
12041       // which don't need to be loaded through stack.
12042       __ string_indexofC8($str1$$Register, $str2$$Register,
12043                           $cnt1$$Register, $cnt2$$Register,
12044                           icnt2, $result$$Register,
12045                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12046     } else {
12047       // Small strings are loaded through stack if they cross page boundary.
12048       __ string_indexof($str1$$Register, $str2$$Register,
12049                         $cnt1$$Register, $cnt2$$Register,
12050                         icnt2, $result$$Register,
12051                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12052     }
12053   %}
12054   ins_pipe( pipe_slow );
12055 %}
12056 
12057 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12058                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12059   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12060   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12061   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12062 
12063   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12064   ins_encode %{
12065     __ string_indexof($str1$$Register, $str2$$Register,
12066                       $cnt1$$Register, $cnt2$$Register,
12067                       (-1), $result$$Register,
12068                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12069   %}
12070   ins_pipe( pipe_slow );
12071 %}
12072 
12073 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12074                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12075   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12076   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12077   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12078 
12079   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12080   ins_encode %{
12081     __ string_indexof($str1$$Register, $str2$$Register,
12082                       $cnt1$$Register, $cnt2$$Register,
12083                       (-1), $result$$Register,
12084                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12085   %}
12086   ins_pipe( pipe_slow );
12087 %}
12088 
12089 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12090                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12091   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12092   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12093   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12094 
12095   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12096   ins_encode %{
12097     __ string_indexof($str1$$Register, $str2$$Register,
12098                       $cnt1$$Register, $cnt2$$Register,
12099                       (-1), $result$$Register,
12100                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12101   %}
12102   ins_pipe( pipe_slow );
12103 %}
12104 
12105 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12106                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12107   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12108   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12109   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12110   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12111   ins_encode %{
12112     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12113                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12114   %}
12115   ins_pipe( pipe_slow );
12116 %}
12117 
12118 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12119                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12120   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12121   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12122   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12123   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12124   ins_encode %{
12125     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12126                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12127   %}
12128   ins_pipe( pipe_slow );
12129 %}
12130 
12131 
12132 // fast array equals
12133 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12134                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12135 %{
12136   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12137   match(Set result (AryEq ary1 ary2));
12138   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12139   //ins_cost(300);
12140 
12141   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12142   ins_encode %{
12143     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12144                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12145                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12146   %}
12147   ins_pipe( pipe_slow );
12148 %}
12149 
12150 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12151                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12152 %{
12153   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12154   match(Set result (AryEq ary1 ary2));
12155   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12156   //ins_cost(300);
12157 
12158   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12159   ins_encode %{
12160     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12161                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12162                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12163   %}
12164   ins_pipe( pipe_slow );
12165 %}
12166 
12167 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12168                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12169 %{
12170   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12171   match(Set result (AryEq ary1 ary2));
12172   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12173   //ins_cost(300);
12174 
12175   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12176   ins_encode %{
12177     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12178                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12179                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12180   %}
12181   ins_pipe( pipe_slow );
12182 %}
12183 
12184 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12185                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12186 %{
12187   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12188   match(Set result (AryEq ary1 ary2));
12189   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12190   //ins_cost(300);
12191 
12192   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12193   ins_encode %{
12194     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12195                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12196                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12197   %}
12198   ins_pipe( pipe_slow );
12199 %}
12200 
12201 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12202                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12203 %{
12204   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12205   match(Set result (CountPositives ary1 len));
12206   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12207 
12208   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12209   ins_encode %{
12210     __ count_positives($ary1$$Register, $len$$Register,
12211                        $result$$Register, $tmp3$$Register,
12212                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12213   %}
12214   ins_pipe( pipe_slow );
12215 %}
12216 
12217 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12218                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12219 %{
12220   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12221   match(Set result (CountPositives ary1 len));
12222   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12223 
12224   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12225   ins_encode %{
12226     __ count_positives($ary1$$Register, $len$$Register,
12227                        $result$$Register, $tmp3$$Register,
12228                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12229   %}
12230   ins_pipe( pipe_slow );
12231 %}
12232 
12233 
12234 // fast char[] to byte[] compression
12235 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12236                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12237   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12238   match(Set result (StrCompressedCopy src (Binary dst len)));
12239   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12240 
12241   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12242   ins_encode %{
12243     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12244                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12245                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12246                            knoreg, knoreg);
12247   %}
12248   ins_pipe( pipe_slow );
12249 %}
12250 
12251 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12252                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12253   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12254   match(Set result (StrCompressedCopy src (Binary dst len)));
12255   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12256 
12257   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12258   ins_encode %{
12259     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12260                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12261                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12262                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12263   %}
12264   ins_pipe( pipe_slow );
12265 %}
12266 
12267 // fast byte[] to char[] inflation
12268 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12269                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12270   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12271   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12272   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12273 
12274   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12275   ins_encode %{
12276     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12277                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12278   %}
12279   ins_pipe( pipe_slow );
12280 %}
12281 
12282 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12283                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12284   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12285   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12286   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12287 
12288   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12289   ins_encode %{
12290     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12291                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12292   %}
12293   ins_pipe( pipe_slow );
12294 %}
12295 
12296 // encode char[] to byte[] in ISO_8859_1
12297 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12298                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12299                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12300   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12301   match(Set result (EncodeISOArray src (Binary dst len)));
12302   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12303 
12304   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12305   ins_encode %{
12306     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12307                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12308                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12309   %}
12310   ins_pipe( pipe_slow );
12311 %}
12312 
12313 // encode char[] to byte[] in ASCII
12314 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12315                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12316                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12317   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12318   match(Set result (EncodeISOArray src (Binary dst len)));
12319   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12320 
12321   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12322   ins_encode %{
12323     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12324                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12325                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12326   %}
12327   ins_pipe( pipe_slow );
12328 %}
12329 
12330 //----------Control Flow Instructions------------------------------------------
12331 // Signed compare Instructions
12332 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12333   match(Set cr (CmpI op1 op2));
12334   effect( DEF cr, USE op1, USE op2 );
12335   format %{ "CMP    $op1,$op2" %}
12336   opcode(0x3B);  /* Opcode 3B /r */
12337   ins_encode( OpcP, RegReg( op1, op2) );
12338   ins_pipe( ialu_cr_reg_reg );
12339 %}
12340 
12341 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12342   match(Set cr (CmpI op1 op2));
12343   effect( DEF cr, USE op1 );
12344   format %{ "CMP    $op1,$op2" %}
12345   opcode(0x81,0x07);  /* Opcode 81 /7 */
12346   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12347   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12348   ins_pipe( ialu_cr_reg_imm );
12349 %}
12350 
12351 // Cisc-spilled version of cmpI_eReg
12352 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12353   match(Set cr (CmpI op1 (LoadI op2)));
12354 
12355   format %{ "CMP    $op1,$op2" %}
12356   ins_cost(500);
12357   opcode(0x3B);  /* Opcode 3B /r */
12358   ins_encode( OpcP, RegMem( op1, op2) );
12359   ins_pipe( ialu_cr_reg_mem );
12360 %}
12361 
12362 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12363   match(Set cr (CmpI src zero));
12364   effect( DEF cr, USE src );
12365 
12366   format %{ "TEST   $src,$src" %}
12367   opcode(0x85);
12368   ins_encode( OpcP, RegReg( src, src ) );
12369   ins_pipe( ialu_cr_reg_imm );
12370 %}
12371 
12372 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12373   match(Set cr (CmpI (AndI src con) zero));
12374 
12375   format %{ "TEST   $src,$con" %}
12376   opcode(0xF7,0x00);
12377   ins_encode( OpcP, RegOpc(src), Con32(con) );
12378   ins_pipe( ialu_cr_reg_imm );
12379 %}
12380 
12381 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12382   match(Set cr (CmpI (AndI src mem) zero));
12383 
12384   format %{ "TEST   $src,$mem" %}
12385   opcode(0x85);
12386   ins_encode( OpcP, RegMem( src, mem ) );
12387   ins_pipe( ialu_cr_reg_mem );
12388 %}
12389 
12390 // Unsigned compare Instructions; really, same as signed except they
12391 // produce an eFlagsRegU instead of eFlagsReg.
12392 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12393   match(Set cr (CmpU op1 op2));
12394 
12395   format %{ "CMPu   $op1,$op2" %}
12396   opcode(0x3B);  /* Opcode 3B /r */
12397   ins_encode( OpcP, RegReg( op1, op2) );
12398   ins_pipe( ialu_cr_reg_reg );
12399 %}
12400 
12401 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12402   match(Set cr (CmpU op1 op2));
12403 
12404   format %{ "CMPu   $op1,$op2" %}
12405   opcode(0x81,0x07);  /* Opcode 81 /7 */
12406   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12407   ins_pipe( ialu_cr_reg_imm );
12408 %}
12409 
12410 // // Cisc-spilled version of cmpU_eReg
12411 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12412   match(Set cr (CmpU op1 (LoadI op2)));
12413 
12414   format %{ "CMPu   $op1,$op2" %}
12415   ins_cost(500);
12416   opcode(0x3B);  /* Opcode 3B /r */
12417   ins_encode( OpcP, RegMem( op1, op2) );
12418   ins_pipe( ialu_cr_reg_mem );
12419 %}
12420 
12421 // // Cisc-spilled version of cmpU_eReg
12422 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12423 //  match(Set cr (CmpU (LoadI op1) op2));
12424 //
12425 //  format %{ "CMPu   $op1,$op2" %}
12426 //  ins_cost(500);
12427 //  opcode(0x39);  /* Opcode 39 /r */
12428 //  ins_encode( OpcP, RegMem( op1, op2) );
12429 //%}
12430 
12431 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12432   match(Set cr (CmpU src zero));
12433 
12434   format %{ "TESTu  $src,$src" %}
12435   opcode(0x85);
12436   ins_encode( OpcP, RegReg( src, src ) );
12437   ins_pipe( ialu_cr_reg_imm );
12438 %}
12439 
12440 // Unsigned pointer compare Instructions
12441 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12442   match(Set cr (CmpP op1 op2));
12443 
12444   format %{ "CMPu   $op1,$op2" %}
12445   opcode(0x3B);  /* Opcode 3B /r */
12446   ins_encode( OpcP, RegReg( op1, op2) );
12447   ins_pipe( ialu_cr_reg_reg );
12448 %}
12449 
12450 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12451   match(Set cr (CmpP op1 op2));
12452 
12453   format %{ "CMPu   $op1,$op2" %}
12454   opcode(0x81,0x07);  /* Opcode 81 /7 */
12455   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12456   ins_pipe( ialu_cr_reg_imm );
12457 %}
12458 
12459 // // Cisc-spilled version of cmpP_eReg
12460 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12461   match(Set cr (CmpP op1 (LoadP op2)));
12462 
12463   format %{ "CMPu   $op1,$op2" %}
12464   ins_cost(500);
12465   opcode(0x3B);  /* Opcode 3B /r */
12466   ins_encode( OpcP, RegMem( op1, op2) );
12467   ins_pipe( ialu_cr_reg_mem );
12468 %}
12469 
12470 // // Cisc-spilled version of cmpP_eReg
12471 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12472 //  match(Set cr (CmpP (LoadP op1) op2));
12473 //
12474 //  format %{ "CMPu   $op1,$op2" %}
12475 //  ins_cost(500);
12476 //  opcode(0x39);  /* Opcode 39 /r */
12477 //  ins_encode( OpcP, RegMem( op1, op2) );
12478 //%}
12479 
12480 // Compare raw pointer (used in out-of-heap check).
12481 // Only works because non-oop pointers must be raw pointers
12482 // and raw pointers have no anti-dependencies.
12483 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12484   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12485   match(Set cr (CmpP op1 (LoadP op2)));
12486 
12487   format %{ "CMPu   $op1,$op2" %}
12488   opcode(0x3B);  /* Opcode 3B /r */
12489   ins_encode( OpcP, RegMem( op1, op2) );
12490   ins_pipe( ialu_cr_reg_mem );
12491 %}
12492 
12493 //
12494 // This will generate a signed flags result. This should be ok
12495 // since any compare to a zero should be eq/neq.
12496 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12497   match(Set cr (CmpP src zero));
12498 
12499   format %{ "TEST   $src,$src" %}
12500   opcode(0x85);
12501   ins_encode( OpcP, RegReg( src, src ) );
12502   ins_pipe( ialu_cr_reg_imm );
12503 %}
12504 
12505 // Cisc-spilled version of testP_reg
12506 // This will generate a signed flags result. This should be ok
12507 // since any compare to a zero should be eq/neq.
12508 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12509   match(Set cr (CmpP (LoadP op) zero));
12510 
12511   format %{ "TEST   $op,0xFFFFFFFF" %}
12512   ins_cost(500);
12513   opcode(0xF7);               /* Opcode F7 /0 */
12514   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12515   ins_pipe( ialu_cr_reg_imm );
12516 %}
12517 
12518 // Yanked all unsigned pointer compare operations.
12519 // Pointer compares are done with CmpP which is already unsigned.
12520 
12521 //----------Max and Min--------------------------------------------------------
12522 // Min Instructions
12523 ////
12524 //   *** Min and Max using the conditional move are slower than the
12525 //   *** branch version on a Pentium III.
12526 // // Conditional move for min
12527 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12528 //  effect( USE_DEF op2, USE op1, USE cr );
12529 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12530 //  opcode(0x4C,0x0F);
12531 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12532 //  ins_pipe( pipe_cmov_reg );
12533 //%}
12534 //
12535 //// Min Register with Register (P6 version)
12536 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12537 //  predicate(VM_Version::supports_cmov() );
12538 //  match(Set op2 (MinI op1 op2));
12539 //  ins_cost(200);
12540 //  expand %{
12541 //    eFlagsReg cr;
12542 //    compI_eReg(cr,op1,op2);
12543 //    cmovI_reg_lt(op2,op1,cr);
12544 //  %}
12545 //%}
12546 
12547 // Min Register with Register (generic version)
12548 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12549   match(Set dst (MinI dst src));
12550   effect(KILL flags);
12551   ins_cost(300);
12552 
12553   format %{ "MIN    $dst,$src" %}
12554   opcode(0xCC);
12555   ins_encode( min_enc(dst,src) );
12556   ins_pipe( pipe_slow );
12557 %}
12558 
12559 // Max Register with Register
12560 //   *** Min and Max using the conditional move are slower than the
12561 //   *** branch version on a Pentium III.
12562 // // Conditional move for max
12563 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12564 //  effect( USE_DEF op2, USE op1, USE cr );
12565 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12566 //  opcode(0x4F,0x0F);
12567 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12568 //  ins_pipe( pipe_cmov_reg );
12569 //%}
12570 //
12571 // // Max Register with Register (P6 version)
12572 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12573 //  predicate(VM_Version::supports_cmov() );
12574 //  match(Set op2 (MaxI op1 op2));
12575 //  ins_cost(200);
12576 //  expand %{
12577 //    eFlagsReg cr;
12578 //    compI_eReg(cr,op1,op2);
12579 //    cmovI_reg_gt(op2,op1,cr);
12580 //  %}
12581 //%}
12582 
12583 // Max Register with Register (generic version)
12584 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12585   match(Set dst (MaxI dst src));
12586   effect(KILL flags);
12587   ins_cost(300);
12588 
12589   format %{ "MAX    $dst,$src" %}
12590   opcode(0xCC);
12591   ins_encode( max_enc(dst,src) );
12592   ins_pipe( pipe_slow );
12593 %}
12594 
12595 // ============================================================================
12596 // Counted Loop limit node which represents exact final iterator value.
12597 // Note: the resulting value should fit into integer range since
12598 // counted loops have limit check on overflow.
12599 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12600   match(Set limit (LoopLimit (Binary init limit) stride));
12601   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12602   ins_cost(300);
12603 
12604   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12605   ins_encode %{
12606     int strd = (int)$stride$$constant;
12607     assert(strd != 1 && strd != -1, "sanity");
12608     int m1 = (strd > 0) ? 1 : -1;
12609     // Convert limit to long (EAX:EDX)
12610     __ cdql();
12611     // Convert init to long (init:tmp)
12612     __ movl($tmp$$Register, $init$$Register);
12613     __ sarl($tmp$$Register, 31);
12614     // $limit - $init
12615     __ subl($limit$$Register, $init$$Register);
12616     __ sbbl($limit_hi$$Register, $tmp$$Register);
12617     // + ($stride - 1)
12618     if (strd > 0) {
12619       __ addl($limit$$Register, (strd - 1));
12620       __ adcl($limit_hi$$Register, 0);
12621       __ movl($tmp$$Register, strd);
12622     } else {
12623       __ addl($limit$$Register, (strd + 1));
12624       __ adcl($limit_hi$$Register, -1);
12625       __ lneg($limit_hi$$Register, $limit$$Register);
12626       __ movl($tmp$$Register, -strd);
12627     }
12628     // signed division: (EAX:EDX) / pos_stride
12629     __ idivl($tmp$$Register);
12630     if (strd < 0) {
12631       // restore sign
12632       __ negl($tmp$$Register);
12633     }
12634     // (EAX) * stride
12635     __ mull($tmp$$Register);
12636     // + init (ignore upper bits)
12637     __ addl($limit$$Register, $init$$Register);
12638   %}
12639   ins_pipe( pipe_slow );
12640 %}
12641 
12642 // ============================================================================
12643 // Branch Instructions
12644 // Jump Table
12645 instruct jumpXtnd(rRegI switch_val) %{
12646   match(Jump switch_val);
12647   ins_cost(350);
12648   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12649   ins_encode %{
12650     // Jump to Address(table_base + switch_reg)
12651     Address index(noreg, $switch_val$$Register, Address::times_1);
12652     __ jump(ArrayAddress($constantaddress, index), noreg);
12653   %}
12654   ins_pipe(pipe_jmp);
12655 %}
12656 
12657 // Jump Direct - Label defines a relative address from JMP+1
12658 instruct jmpDir(label labl) %{
12659   match(Goto);
12660   effect(USE labl);
12661 
12662   ins_cost(300);
12663   format %{ "JMP    $labl" %}
12664   size(5);
12665   ins_encode %{
12666     Label* L = $labl$$label;
12667     __ jmp(*L, false); // Always long jump
12668   %}
12669   ins_pipe( pipe_jmp );
12670 %}
12671 
12672 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12673 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12674   match(If cop cr);
12675   effect(USE labl);
12676 
12677   ins_cost(300);
12678   format %{ "J$cop    $labl" %}
12679   size(6);
12680   ins_encode %{
12681     Label* L = $labl$$label;
12682     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12683   %}
12684   ins_pipe( pipe_jcc );
12685 %}
12686 
12687 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12688 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12689   match(CountedLoopEnd cop cr);
12690   effect(USE labl);
12691 
12692   ins_cost(300);
12693   format %{ "J$cop    $labl\t# Loop end" %}
12694   size(6);
12695   ins_encode %{
12696     Label* L = $labl$$label;
12697     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12698   %}
12699   ins_pipe( pipe_jcc );
12700 %}
12701 
12702 // Jump Direct Conditional - using unsigned comparison
12703 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12704   match(If cop cmp);
12705   effect(USE labl);
12706 
12707   ins_cost(300);
12708   format %{ "J$cop,u  $labl" %}
12709   size(6);
12710   ins_encode %{
12711     Label* L = $labl$$label;
12712     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12713   %}
12714   ins_pipe(pipe_jcc);
12715 %}
12716 
12717 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12718   match(If cop cmp);
12719   effect(USE labl);
12720 
12721   ins_cost(200);
12722   format %{ "J$cop,u  $labl" %}
12723   size(6);
12724   ins_encode %{
12725     Label* L = $labl$$label;
12726     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12727   %}
12728   ins_pipe(pipe_jcc);
12729 %}
12730 
12731 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12732   match(If cop cmp);
12733   effect(USE labl);
12734 
12735   ins_cost(200);
12736   format %{ $$template
12737     if ($cop$$cmpcode == Assembler::notEqual) {
12738       $$emit$$"JP,u   $labl\n\t"
12739       $$emit$$"J$cop,u   $labl"
12740     } else {
12741       $$emit$$"JP,u   done\n\t"
12742       $$emit$$"J$cop,u   $labl\n\t"
12743       $$emit$$"done:"
12744     }
12745   %}
12746   ins_encode %{
12747     Label* l = $labl$$label;
12748     if ($cop$$cmpcode == Assembler::notEqual) {
12749       __ jcc(Assembler::parity, *l, false);
12750       __ jcc(Assembler::notEqual, *l, false);
12751     } else if ($cop$$cmpcode == Assembler::equal) {
12752       Label done;
12753       __ jccb(Assembler::parity, done);
12754       __ jcc(Assembler::equal, *l, false);
12755       __ bind(done);
12756     } else {
12757        ShouldNotReachHere();
12758     }
12759   %}
12760   ins_pipe(pipe_jcc);
12761 %}
12762 
12763 // ============================================================================
12764 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12765 // array for an instance of the superklass.  Set a hidden internal cache on a
12766 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12767 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12768 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12769   match(Set result (PartialSubtypeCheck sub super));
12770   effect( KILL rcx, KILL cr );
12771 
12772   ins_cost(1100);  // slightly larger than the next version
12773   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12774             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12775             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12776             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12777             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12778             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12779             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12780      "miss:\t" %}
12781 
12782   opcode(0x1); // Force a XOR of EDI
12783   ins_encode( enc_PartialSubtypeCheck() );
12784   ins_pipe( pipe_slow );
12785 %}
12786 
12787 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12788   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12789   effect( KILL rcx, KILL result );
12790 
12791   ins_cost(1000);
12792   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12793             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12794             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12795             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12796             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12797             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12798      "miss:\t" %}
12799 
12800   opcode(0x0);  // No need to XOR EDI
12801   ins_encode( enc_PartialSubtypeCheck() );
12802   ins_pipe( pipe_slow );
12803 %}
12804 
12805 // ============================================================================
12806 // Branch Instructions -- short offset versions
12807 //
12808 // These instructions are used to replace jumps of a long offset (the default
12809 // match) with jumps of a shorter offset.  These instructions are all tagged
12810 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12811 // match rules in general matching.  Instead, the ADLC generates a conversion
12812 // method in the MachNode which can be used to do in-place replacement of the
12813 // long variant with the shorter variant.  The compiler will determine if a
12814 // branch can be taken by the is_short_branch_offset() predicate in the machine
12815 // specific code section of the file.
12816 
12817 // Jump Direct - Label defines a relative address from JMP+1
12818 instruct jmpDir_short(label labl) %{
12819   match(Goto);
12820   effect(USE labl);
12821 
12822   ins_cost(300);
12823   format %{ "JMP,s  $labl" %}
12824   size(2);
12825   ins_encode %{
12826     Label* L = $labl$$label;
12827     __ jmpb(*L);
12828   %}
12829   ins_pipe( pipe_jmp );
12830   ins_short_branch(1);
12831 %}
12832 
12833 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12834 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12835   match(If cop cr);
12836   effect(USE labl);
12837 
12838   ins_cost(300);
12839   format %{ "J$cop,s  $labl" %}
12840   size(2);
12841   ins_encode %{
12842     Label* L = $labl$$label;
12843     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12844   %}
12845   ins_pipe( pipe_jcc );
12846   ins_short_branch(1);
12847 %}
12848 
12849 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12850 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12851   match(CountedLoopEnd cop cr);
12852   effect(USE labl);
12853 
12854   ins_cost(300);
12855   format %{ "J$cop,s  $labl\t# Loop end" %}
12856   size(2);
12857   ins_encode %{
12858     Label* L = $labl$$label;
12859     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12860   %}
12861   ins_pipe( pipe_jcc );
12862   ins_short_branch(1);
12863 %}
12864 
12865 // Jump Direct Conditional - using unsigned comparison
12866 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12867   match(If cop cmp);
12868   effect(USE labl);
12869 
12870   ins_cost(300);
12871   format %{ "J$cop,us $labl" %}
12872   size(2);
12873   ins_encode %{
12874     Label* L = $labl$$label;
12875     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12876   %}
12877   ins_pipe( pipe_jcc );
12878   ins_short_branch(1);
12879 %}
12880 
12881 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12882   match(If cop cmp);
12883   effect(USE labl);
12884 
12885   ins_cost(300);
12886   format %{ "J$cop,us $labl" %}
12887   size(2);
12888   ins_encode %{
12889     Label* L = $labl$$label;
12890     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12891   %}
12892   ins_pipe( pipe_jcc );
12893   ins_short_branch(1);
12894 %}
12895 
12896 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12897   match(If cop cmp);
12898   effect(USE labl);
12899 
12900   ins_cost(300);
12901   format %{ $$template
12902     if ($cop$$cmpcode == Assembler::notEqual) {
12903       $$emit$$"JP,u,s   $labl\n\t"
12904       $$emit$$"J$cop,u,s   $labl"
12905     } else {
12906       $$emit$$"JP,u,s   done\n\t"
12907       $$emit$$"J$cop,u,s  $labl\n\t"
12908       $$emit$$"done:"
12909     }
12910   %}
12911   size(4);
12912   ins_encode %{
12913     Label* l = $labl$$label;
12914     if ($cop$$cmpcode == Assembler::notEqual) {
12915       __ jccb(Assembler::parity, *l);
12916       __ jccb(Assembler::notEqual, *l);
12917     } else if ($cop$$cmpcode == Assembler::equal) {
12918       Label done;
12919       __ jccb(Assembler::parity, done);
12920       __ jccb(Assembler::equal, *l);
12921       __ bind(done);
12922     } else {
12923        ShouldNotReachHere();
12924     }
12925   %}
12926   ins_pipe(pipe_jcc);
12927   ins_short_branch(1);
12928 %}
12929 
12930 // ============================================================================
12931 // Long Compare
12932 //
12933 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12934 // is tricky.  The flavor of compare used depends on whether we are testing
12935 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12936 // The GE test is the negated LT test.  The LE test can be had by commuting
12937 // the operands (yielding a GE test) and then negating; negate again for the
12938 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12939 // NE test is negated from that.
12940 
12941 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12942 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12943 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12944 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12945 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12946 // foo match ends up with the wrong leaf.  One fix is to not match both
12947 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12948 // both forms beat the trinary form of long-compare and both are very useful
12949 // on Intel which has so few registers.
12950 
12951 // Manifest a CmpL result in an integer register.  Very painful.
12952 // This is the test to avoid.
12953 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12954   match(Set dst (CmpL3 src1 src2));
12955   effect( KILL flags );
12956   ins_cost(1000);
12957   format %{ "XOR    $dst,$dst\n\t"
12958             "CMP    $src1.hi,$src2.hi\n\t"
12959             "JLT,s  m_one\n\t"
12960             "JGT,s  p_one\n\t"
12961             "CMP    $src1.lo,$src2.lo\n\t"
12962             "JB,s   m_one\n\t"
12963             "JEQ,s  done\n"
12964     "p_one:\tINC    $dst\n\t"
12965             "JMP,s  done\n"
12966     "m_one:\tDEC    $dst\n"
12967      "done:" %}
12968   ins_encode %{
12969     Label p_one, m_one, done;
12970     __ xorptr($dst$$Register, $dst$$Register);
12971     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12972     __ jccb(Assembler::less,    m_one);
12973     __ jccb(Assembler::greater, p_one);
12974     __ cmpl($src1$$Register, $src2$$Register);
12975     __ jccb(Assembler::below,   m_one);
12976     __ jccb(Assembler::equal,   done);
12977     __ bind(p_one);
12978     __ incrementl($dst$$Register);
12979     __ jmpb(done);
12980     __ bind(m_one);
12981     __ decrementl($dst$$Register);
12982     __ bind(done);
12983   %}
12984   ins_pipe( pipe_slow );
12985 %}
12986 
12987 //======
12988 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12989 // compares.  Can be used for LE or GT compares by reversing arguments.
12990 // NOT GOOD FOR EQ/NE tests.
12991 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12992   match( Set flags (CmpL src zero ));
12993   ins_cost(100);
12994   format %{ "TEST   $src.hi,$src.hi" %}
12995   opcode(0x85);
12996   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12997   ins_pipe( ialu_cr_reg_reg );
12998 %}
12999 
13000 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13001 // compares.  Can be used for LE or GT compares by reversing arguments.
13002 // NOT GOOD FOR EQ/NE tests.
13003 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13004   match( Set flags (CmpL src1 src2 ));
13005   effect( TEMP tmp );
13006   ins_cost(300);
13007   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13008             "MOV    $tmp,$src1.hi\n\t"
13009             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13010   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13011   ins_pipe( ialu_cr_reg_reg );
13012 %}
13013 
13014 // Long compares reg < zero/req OR reg >= zero/req.
13015 // Just a wrapper for a normal branch, plus the predicate test.
13016 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13017   match(If cmp flags);
13018   effect(USE labl);
13019   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13020   expand %{
13021     jmpCon(cmp,flags,labl);    // JLT or JGE...
13022   %}
13023 %}
13024 
13025 //======
13026 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13027 // compares.  Can be used for LE or GT compares by reversing arguments.
13028 // NOT GOOD FOR EQ/NE tests.
13029 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13030   match(Set flags (CmpUL src zero));
13031   ins_cost(100);
13032   format %{ "TEST   $src.hi,$src.hi" %}
13033   opcode(0x85);
13034   ins_encode(OpcP, RegReg_Hi2(src, src));
13035   ins_pipe(ialu_cr_reg_reg);
13036 %}
13037 
13038 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13039 // compares.  Can be used for LE or GT compares by reversing arguments.
13040 // NOT GOOD FOR EQ/NE tests.
13041 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13042   match(Set flags (CmpUL src1 src2));
13043   effect(TEMP tmp);
13044   ins_cost(300);
13045   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13046             "MOV    $tmp,$src1.hi\n\t"
13047             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13048   ins_encode(long_cmp_flags2(src1, src2, tmp));
13049   ins_pipe(ialu_cr_reg_reg);
13050 %}
13051 
13052 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13053 // Just a wrapper for a normal branch, plus the predicate test.
13054 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13055   match(If cmp flags);
13056   effect(USE labl);
13057   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13058   expand %{
13059     jmpCon(cmp, flags, labl);    // JLT or JGE...
13060   %}
13061 %}
13062 
13063 // Compare 2 longs and CMOVE longs.
13064 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13065   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13066   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13067   ins_cost(400);
13068   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13069             "CMOV$cmp $dst.hi,$src.hi" %}
13070   opcode(0x0F,0x40);
13071   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13072   ins_pipe( pipe_cmov_reg_long );
13073 %}
13074 
13075 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13076   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13077   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13078   ins_cost(500);
13079   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13080             "CMOV$cmp $dst.hi,$src.hi" %}
13081   opcode(0x0F,0x40);
13082   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13083   ins_pipe( pipe_cmov_reg_long );
13084 %}
13085 
13086 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13087   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13088   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13089   ins_cost(400);
13090   expand %{
13091     cmovLL_reg_LTGE(cmp, flags, dst, src);
13092   %}
13093 %}
13094 
13095 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13096   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13097   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13098   ins_cost(500);
13099   expand %{
13100     cmovLL_mem_LTGE(cmp, flags, dst, src);
13101   %}
13102 %}
13103 
13104 // Compare 2 longs and CMOVE ints.
13105 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13106   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13107   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13108   ins_cost(200);
13109   format %{ "CMOV$cmp $dst,$src" %}
13110   opcode(0x0F,0x40);
13111   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13112   ins_pipe( pipe_cmov_reg );
13113 %}
13114 
13115 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13116   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13117   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13118   ins_cost(250);
13119   format %{ "CMOV$cmp $dst,$src" %}
13120   opcode(0x0F,0x40);
13121   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13122   ins_pipe( pipe_cmov_mem );
13123 %}
13124 
13125 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13126   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13127   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13128   ins_cost(200);
13129   expand %{
13130     cmovII_reg_LTGE(cmp, flags, dst, src);
13131   %}
13132 %}
13133 
13134 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13135   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13136   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13137   ins_cost(250);
13138   expand %{
13139     cmovII_mem_LTGE(cmp, flags, dst, src);
13140   %}
13141 %}
13142 
13143 // Compare 2 longs and CMOVE ptrs.
13144 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13145   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13146   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13147   ins_cost(200);
13148   format %{ "CMOV$cmp $dst,$src" %}
13149   opcode(0x0F,0x40);
13150   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13151   ins_pipe( pipe_cmov_reg );
13152 %}
13153 
13154 // Compare 2 unsigned longs and CMOVE ptrs.
13155 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13156   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13157   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13158   ins_cost(200);
13159   expand %{
13160     cmovPP_reg_LTGE(cmp,flags,dst,src);
13161   %}
13162 %}
13163 
13164 // Compare 2 longs and CMOVE doubles
13165 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13166   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13167   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13168   ins_cost(200);
13169   expand %{
13170     fcmovDPR_regS(cmp,flags,dst,src);
13171   %}
13172 %}
13173 
13174 // Compare 2 longs and CMOVE doubles
13175 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13176   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13177   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13178   ins_cost(200);
13179   expand %{
13180     fcmovD_regS(cmp,flags,dst,src);
13181   %}
13182 %}
13183 
13184 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13185   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13186   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13187   ins_cost(200);
13188   expand %{
13189     fcmovFPR_regS(cmp,flags,dst,src);
13190   %}
13191 %}
13192 
13193 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13194   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13195   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13196   ins_cost(200);
13197   expand %{
13198     fcmovF_regS(cmp,flags,dst,src);
13199   %}
13200 %}
13201 
13202 //======
13203 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13204 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13205   match( Set flags (CmpL src zero ));
13206   effect(TEMP tmp);
13207   ins_cost(200);
13208   format %{ "MOV    $tmp,$src.lo\n\t"
13209             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13210   ins_encode( long_cmp_flags0( src, tmp ) );
13211   ins_pipe( ialu_reg_reg_long );
13212 %}
13213 
13214 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13215 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13216   match( Set flags (CmpL src1 src2 ));
13217   ins_cost(200+300);
13218   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13219             "JNE,s  skip\n\t"
13220             "CMP    $src1.hi,$src2.hi\n\t"
13221      "skip:\t" %}
13222   ins_encode( long_cmp_flags1( src1, src2 ) );
13223   ins_pipe( ialu_cr_reg_reg );
13224 %}
13225 
13226 // Long compare reg == zero/reg OR reg != zero/reg
13227 // Just a wrapper for a normal branch, plus the predicate test.
13228 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13229   match(If cmp flags);
13230   effect(USE labl);
13231   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13232   expand %{
13233     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13234   %}
13235 %}
13236 
13237 //======
13238 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13239 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13240   match(Set flags (CmpUL src zero));
13241   effect(TEMP tmp);
13242   ins_cost(200);
13243   format %{ "MOV    $tmp,$src.lo\n\t"
13244             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13245   ins_encode(long_cmp_flags0(src, tmp));
13246   ins_pipe(ialu_reg_reg_long);
13247 %}
13248 
13249 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13250 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13251   match(Set flags (CmpUL src1 src2));
13252   ins_cost(200+300);
13253   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13254             "JNE,s  skip\n\t"
13255             "CMP    $src1.hi,$src2.hi\n\t"
13256      "skip:\t" %}
13257   ins_encode(long_cmp_flags1(src1, src2));
13258   ins_pipe(ialu_cr_reg_reg);
13259 %}
13260 
13261 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13262 // Just a wrapper for a normal branch, plus the predicate test.
13263 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13264   match(If cmp flags);
13265   effect(USE labl);
13266   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13267   expand %{
13268     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13269   %}
13270 %}
13271 
13272 // Compare 2 longs and CMOVE longs.
13273 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13274   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13275   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13276   ins_cost(400);
13277   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13278             "CMOV$cmp $dst.hi,$src.hi" %}
13279   opcode(0x0F,0x40);
13280   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13281   ins_pipe( pipe_cmov_reg_long );
13282 %}
13283 
13284 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13285   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13286   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13287   ins_cost(500);
13288   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13289             "CMOV$cmp $dst.hi,$src.hi" %}
13290   opcode(0x0F,0x40);
13291   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13292   ins_pipe( pipe_cmov_reg_long );
13293 %}
13294 
13295 // Compare 2 longs and CMOVE ints.
13296 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13297   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13298   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13299   ins_cost(200);
13300   format %{ "CMOV$cmp $dst,$src" %}
13301   opcode(0x0F,0x40);
13302   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13303   ins_pipe( pipe_cmov_reg );
13304 %}
13305 
13306 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13307   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13308   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13309   ins_cost(250);
13310   format %{ "CMOV$cmp $dst,$src" %}
13311   opcode(0x0F,0x40);
13312   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13313   ins_pipe( pipe_cmov_mem );
13314 %}
13315 
13316 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13317   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13318   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13319   ins_cost(200);
13320   expand %{
13321     cmovII_reg_EQNE(cmp, flags, dst, src);
13322   %}
13323 %}
13324 
13325 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13326   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13327   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13328   ins_cost(250);
13329   expand %{
13330     cmovII_mem_EQNE(cmp, flags, dst, src);
13331   %}
13332 %}
13333 
13334 // Compare 2 longs and CMOVE ptrs.
13335 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13336   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13337   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13338   ins_cost(200);
13339   format %{ "CMOV$cmp $dst,$src" %}
13340   opcode(0x0F,0x40);
13341   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13342   ins_pipe( pipe_cmov_reg );
13343 %}
13344 
13345 // Compare 2 unsigned longs and CMOVE ptrs.
13346 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13347   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13348   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13349   ins_cost(200);
13350   expand %{
13351     cmovPP_reg_EQNE(cmp,flags,dst,src);
13352   %}
13353 %}
13354 
13355 // Compare 2 longs and CMOVE doubles
13356 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13357   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13358   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13359   ins_cost(200);
13360   expand %{
13361     fcmovDPR_regS(cmp,flags,dst,src);
13362   %}
13363 %}
13364 
13365 // Compare 2 longs and CMOVE doubles
13366 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13367   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13368   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13369   ins_cost(200);
13370   expand %{
13371     fcmovD_regS(cmp,flags,dst,src);
13372   %}
13373 %}
13374 
13375 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13376   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13377   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13378   ins_cost(200);
13379   expand %{
13380     fcmovFPR_regS(cmp,flags,dst,src);
13381   %}
13382 %}
13383 
13384 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13385   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13386   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13387   ins_cost(200);
13388   expand %{
13389     fcmovF_regS(cmp,flags,dst,src);
13390   %}
13391 %}
13392 
13393 //======
13394 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13395 // Same as cmpL_reg_flags_LEGT except must negate src
13396 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13397   match( Set flags (CmpL src zero ));
13398   effect( TEMP tmp );
13399   ins_cost(300);
13400   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13401             "CMP    $tmp,$src.lo\n\t"
13402             "SBB    $tmp,$src.hi\n\t" %}
13403   ins_encode( long_cmp_flags3(src, tmp) );
13404   ins_pipe( ialu_reg_reg_long );
13405 %}
13406 
13407 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13408 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13409 // requires a commuted test to get the same result.
13410 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13411   match( Set flags (CmpL src1 src2 ));
13412   effect( TEMP tmp );
13413   ins_cost(300);
13414   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13415             "MOV    $tmp,$src2.hi\n\t"
13416             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13417   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13418   ins_pipe( ialu_cr_reg_reg );
13419 %}
13420 
13421 // Long compares reg < zero/req OR reg >= zero/req.
13422 // Just a wrapper for a normal branch, plus the predicate test
13423 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13424   match(If cmp flags);
13425   effect(USE labl);
13426   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13427   ins_cost(300);
13428   expand %{
13429     jmpCon(cmp,flags,labl);    // JGT or JLE...
13430   %}
13431 %}
13432 
13433 //======
13434 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13435 // Same as cmpUL_reg_flags_LEGT except must negate src
13436 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13437   match(Set flags (CmpUL src zero));
13438   effect(TEMP tmp);
13439   ins_cost(300);
13440   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13441             "CMP    $tmp,$src.lo\n\t"
13442             "SBB    $tmp,$src.hi\n\t" %}
13443   ins_encode(long_cmp_flags3(src, tmp));
13444   ins_pipe(ialu_reg_reg_long);
13445 %}
13446 
13447 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13448 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13449 // requires a commuted test to get the same result.
13450 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13451   match(Set flags (CmpUL src1 src2));
13452   effect(TEMP tmp);
13453   ins_cost(300);
13454   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13455             "MOV    $tmp,$src2.hi\n\t"
13456             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13457   ins_encode(long_cmp_flags2( src2, src1, tmp));
13458   ins_pipe(ialu_cr_reg_reg);
13459 %}
13460 
13461 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13462 // Just a wrapper for a normal branch, plus the predicate test
13463 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13464   match(If cmp flags);
13465   effect(USE labl);
13466   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13467   ins_cost(300);
13468   expand %{
13469     jmpCon(cmp, flags, labl);    // JGT or JLE...
13470   %}
13471 %}
13472 
13473 // Compare 2 longs and CMOVE longs.
13474 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13475   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13476   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13477   ins_cost(400);
13478   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13479             "CMOV$cmp $dst.hi,$src.hi" %}
13480   opcode(0x0F,0x40);
13481   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13482   ins_pipe( pipe_cmov_reg_long );
13483 %}
13484 
13485 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13486   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13487   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13488   ins_cost(500);
13489   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13490             "CMOV$cmp $dst.hi,$src.hi+4" %}
13491   opcode(0x0F,0x40);
13492   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13493   ins_pipe( pipe_cmov_reg_long );
13494 %}
13495 
13496 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13497   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13498   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13499   ins_cost(400);
13500   expand %{
13501     cmovLL_reg_LEGT(cmp, flags, dst, src);
13502   %}
13503 %}
13504 
13505 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13506   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13507   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13508   ins_cost(500);
13509   expand %{
13510     cmovLL_mem_LEGT(cmp, flags, dst, src);
13511   %}
13512 %}
13513 
13514 // Compare 2 longs and CMOVE ints.
13515 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13516   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13517   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13518   ins_cost(200);
13519   format %{ "CMOV$cmp $dst,$src" %}
13520   opcode(0x0F,0x40);
13521   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13522   ins_pipe( pipe_cmov_reg );
13523 %}
13524 
13525 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13526   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13527   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13528   ins_cost(250);
13529   format %{ "CMOV$cmp $dst,$src" %}
13530   opcode(0x0F,0x40);
13531   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13532   ins_pipe( pipe_cmov_mem );
13533 %}
13534 
13535 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13536   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13537   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13538   ins_cost(200);
13539   expand %{
13540     cmovII_reg_LEGT(cmp, flags, dst, src);
13541   %}
13542 %}
13543 
13544 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13545   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13546   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13547   ins_cost(250);
13548   expand %{
13549     cmovII_mem_LEGT(cmp, flags, dst, src);
13550   %}
13551 %}
13552 
13553 // Compare 2 longs and CMOVE ptrs.
13554 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13555   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13556   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13557   ins_cost(200);
13558   format %{ "CMOV$cmp $dst,$src" %}
13559   opcode(0x0F,0x40);
13560   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13561   ins_pipe( pipe_cmov_reg );
13562 %}
13563 
13564 // Compare 2 unsigned longs and CMOVE ptrs.
13565 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13566   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13567   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13568   ins_cost(200);
13569   expand %{
13570     cmovPP_reg_LEGT(cmp,flags,dst,src);
13571   %}
13572 %}
13573 
13574 // Compare 2 longs and CMOVE doubles
13575 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13576   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13577   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13578   ins_cost(200);
13579   expand %{
13580     fcmovDPR_regS(cmp,flags,dst,src);
13581   %}
13582 %}
13583 
13584 // Compare 2 longs and CMOVE doubles
13585 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13586   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13587   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13588   ins_cost(200);
13589   expand %{
13590     fcmovD_regS(cmp,flags,dst,src);
13591   %}
13592 %}
13593 
13594 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13595   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13596   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13597   ins_cost(200);
13598   expand %{
13599     fcmovFPR_regS(cmp,flags,dst,src);
13600   %}
13601 %}
13602 
13603 
13604 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13605   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13606   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13607   ins_cost(200);
13608   expand %{
13609     fcmovF_regS(cmp,flags,dst,src);
13610   %}
13611 %}
13612 
13613 
13614 // ============================================================================
13615 // Procedure Call/Return Instructions
13616 // Call Java Static Instruction
13617 // Note: If this code changes, the corresponding ret_addr_offset() and
13618 //       compute_padding() functions will have to be adjusted.
13619 instruct CallStaticJavaDirect(method meth) %{
13620   match(CallStaticJava);
13621   effect(USE meth);
13622 
13623   ins_cost(300);
13624   format %{ "CALL,static " %}
13625   opcode(0xE8); /* E8 cd */
13626   ins_encode( pre_call_resets,
13627               Java_Static_Call( meth ),
13628               call_epilog,
13629               post_call_FPU );
13630   ins_pipe( pipe_slow );
13631   ins_alignment(4);
13632 %}
13633 
13634 // Call Java Dynamic Instruction
13635 // Note: If this code changes, the corresponding ret_addr_offset() and
13636 //       compute_padding() functions will have to be adjusted.
13637 instruct CallDynamicJavaDirect(method meth) %{
13638   match(CallDynamicJava);
13639   effect(USE meth);
13640 
13641   ins_cost(300);
13642   format %{ "MOV    EAX,(oop)-1\n\t"
13643             "CALL,dynamic" %}
13644   opcode(0xE8); /* E8 cd */
13645   ins_encode( pre_call_resets,
13646               Java_Dynamic_Call( meth ),
13647               call_epilog,
13648               post_call_FPU );
13649   ins_pipe( pipe_slow );
13650   ins_alignment(4);
13651 %}
13652 
13653 // Call Runtime Instruction
13654 instruct CallRuntimeDirect(method meth) %{
13655   match(CallRuntime );
13656   effect(USE meth);
13657 
13658   ins_cost(300);
13659   format %{ "CALL,runtime " %}
13660   opcode(0xE8); /* E8 cd */
13661   // Use FFREEs to clear entries in float stack
13662   ins_encode( pre_call_resets,
13663               FFree_Float_Stack_All,
13664               Java_To_Runtime( meth ),
13665               post_call_FPU );
13666   ins_pipe( pipe_slow );
13667 %}
13668 
13669 // Call runtime without safepoint
13670 instruct CallLeafDirect(method meth) %{
13671   match(CallLeaf);
13672   effect(USE meth);
13673 
13674   ins_cost(300);
13675   format %{ "CALL_LEAF,runtime " %}
13676   opcode(0xE8); /* E8 cd */
13677   ins_encode( pre_call_resets,
13678               FFree_Float_Stack_All,
13679               Java_To_Runtime( meth ),
13680               Verify_FPU_For_Leaf, post_call_FPU );
13681   ins_pipe( pipe_slow );
13682 %}
13683 
13684 instruct CallLeafNoFPDirect(method meth) %{
13685   match(CallLeafNoFP);
13686   effect(USE meth);
13687 
13688   ins_cost(300);
13689   format %{ "CALL_LEAF_NOFP,runtime " %}
13690   opcode(0xE8); /* E8 cd */
13691   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13692   ins_pipe( pipe_slow );
13693 %}
13694 
13695 
13696 // Return Instruction
13697 // Remove the return address & jump to it.
13698 instruct Ret() %{
13699   match(Return);
13700   format %{ "RET" %}
13701   opcode(0xC3);
13702   ins_encode(OpcP);
13703   ins_pipe( pipe_jmp );
13704 %}
13705 
13706 // Tail Call; Jump from runtime stub to Java code.
13707 // Also known as an 'interprocedural jump'.
13708 // Target of jump will eventually return to caller.
13709 // TailJump below removes the return address.
13710 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13711 // emitted just above the TailCall which has reset ebp to the caller state.
13712 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13713   match(TailCall jump_target method_ptr);
13714   ins_cost(300);
13715   format %{ "JMP    $jump_target \t# EBX holds method" %}
13716   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13717   ins_encode( OpcP, RegOpc(jump_target) );
13718   ins_pipe( pipe_jmp );
13719 %}
13720 
13721 
13722 // Tail Jump; remove the return address; jump to target.
13723 // TailCall above leaves the return address around.
13724 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13725   match( TailJump jump_target ex_oop );
13726   ins_cost(300);
13727   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13728             "JMP    $jump_target " %}
13729   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13730   ins_encode( enc_pop_rdx,
13731               OpcP, RegOpc(jump_target) );
13732   ins_pipe( pipe_jmp );
13733 %}
13734 
13735 // Create exception oop: created by stack-crawling runtime code.
13736 // Created exception is now available to this handler, and is setup
13737 // just prior to jumping to this handler.  No code emitted.
13738 instruct CreateException( eAXRegP ex_oop )
13739 %{
13740   match(Set ex_oop (CreateEx));
13741 
13742   size(0);
13743   // use the following format syntax
13744   format %{ "# exception oop is in EAX; no code emitted" %}
13745   ins_encode();
13746   ins_pipe( empty );
13747 %}
13748 
13749 
13750 // Rethrow exception:
13751 // The exception oop will come in the first argument position.
13752 // Then JUMP (not call) to the rethrow stub code.
13753 instruct RethrowException()
13754 %{
13755   match(Rethrow);
13756 
13757   // use the following format syntax
13758   format %{ "JMP    rethrow_stub" %}
13759   ins_encode(enc_rethrow);
13760   ins_pipe( pipe_jmp );
13761 %}
13762 
13763 // inlined locking and unlocking
13764 
13765 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13766   predicate(Compile::current()->use_rtm());
13767   match(Set cr (FastLock object box));
13768   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13769   ins_cost(300);
13770   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13771   ins_encode %{
13772     __ get_thread($thread$$Register);
13773     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13774                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13775                  _rtm_counters, _stack_rtm_counters,
13776                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13777                  true, ra_->C->profile_rtm());
13778   %}
13779   ins_pipe(pipe_slow);
13780 %}
13781 
13782 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13783   predicate(!Compile::current()->use_rtm());
13784   match(Set cr (FastLock object box));
13785   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13786   ins_cost(300);
13787   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13788   ins_encode %{
13789     __ get_thread($thread$$Register);
13790     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13791                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false);
13792   %}
13793   ins_pipe(pipe_slow);
13794 %}
13795 
13796 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13797   match(Set cr (FastUnlock object box));
13798   effect(TEMP tmp, USE_KILL box);
13799   ins_cost(300);
13800   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13801   ins_encode %{
13802     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13803   %}
13804   ins_pipe(pipe_slow);
13805 %}
13806 
13807 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13808   predicate(Matcher::vector_length(n) <= 32);
13809   match(Set dst (MaskAll src));
13810   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13811   ins_encode %{
13812     int mask_len = Matcher::vector_length(this);
13813     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13814   %}
13815   ins_pipe( pipe_slow );
13816 %}
13817 
13818 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13819   predicate(Matcher::vector_length(n) > 32);
13820   match(Set dst (MaskAll src));
13821   effect(TEMP ktmp);
13822   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13823   ins_encode %{
13824     int mask_len = Matcher::vector_length(this);
13825     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13826   %}
13827   ins_pipe( pipe_slow );
13828 %}
13829 
13830 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13831   predicate(Matcher::vector_length(n) > 32);
13832   match(Set dst (MaskAll src));
13833   effect(TEMP ktmp);
13834   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13835   ins_encode %{
13836     int mask_len = Matcher::vector_length(this);
13837     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13838   %}
13839   ins_pipe( pipe_slow );
13840 %}
13841 
13842 // ============================================================================
13843 // Safepoint Instruction
13844 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13845   match(SafePoint poll);
13846   effect(KILL cr, USE poll);
13847 
13848   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13849   ins_cost(125);
13850   // EBP would need size(3)
13851   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13852   ins_encode %{
13853     __ relocate(relocInfo::poll_type);
13854     address pre_pc = __ pc();
13855     __ testl(rax, Address($poll$$Register, 0));
13856     address post_pc = __ pc();
13857     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13858   %}
13859   ins_pipe(ialu_reg_mem);
13860 %}
13861 
13862 
13863 // ============================================================================
13864 // This name is KNOWN by the ADLC and cannot be changed.
13865 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13866 // for this guy.
13867 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13868   match(Set dst (ThreadLocal));
13869   effect(DEF dst, KILL cr);
13870 
13871   format %{ "MOV    $dst, Thread::current()" %}
13872   ins_encode %{
13873     Register dstReg = as_Register($dst$$reg);
13874     __ get_thread(dstReg);
13875   %}
13876   ins_pipe( ialu_reg_fat );
13877 %}
13878 
13879 
13880 
13881 //----------PEEPHOLE RULES-----------------------------------------------------
13882 // These must follow all instruction definitions as they use the names
13883 // defined in the instructions definitions.
13884 //
13885 // peepmatch ( root_instr_name [preceding_instruction]* );
13886 //
13887 // peepconstraint %{
13888 // (instruction_number.operand_name relational_op instruction_number.operand_name
13889 //  [, ...] );
13890 // // instruction numbers are zero-based using left to right order in peepmatch
13891 //
13892 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13893 // // provide an instruction_number.operand_name for each operand that appears
13894 // // in the replacement instruction's match rule
13895 //
13896 // ---------VM FLAGS---------------------------------------------------------
13897 //
13898 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13899 //
13900 // Each peephole rule is given an identifying number starting with zero and
13901 // increasing by one in the order seen by the parser.  An individual peephole
13902 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13903 // on the command-line.
13904 //
13905 // ---------CURRENT LIMITATIONS----------------------------------------------
13906 //
13907 // Only match adjacent instructions in same basic block
13908 // Only equality constraints
13909 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13910 // Only one replacement instruction
13911 //
13912 // ---------EXAMPLE----------------------------------------------------------
13913 //
13914 // // pertinent parts of existing instructions in architecture description
13915 // instruct movI(rRegI dst, rRegI src) %{
13916 //   match(Set dst (CopyI src));
13917 // %}
13918 //
13919 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13920 //   match(Set dst (AddI dst src));
13921 //   effect(KILL cr);
13922 // %}
13923 //
13924 // // Change (inc mov) to lea
13925 // peephole %{
13926 //   // increment preceded by register-register move
13927 //   peepmatch ( incI_eReg movI );
13928 //   // require that the destination register of the increment
13929 //   // match the destination register of the move
13930 //   peepconstraint ( 0.dst == 1.dst );
13931 //   // construct a replacement instruction that sets
13932 //   // the destination to ( move's source register + one )
13933 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13934 // %}
13935 //
13936 // Implementation no longer uses movX instructions since
13937 // machine-independent system no longer uses CopyX nodes.
13938 //
13939 // peephole %{
13940 //   peepmatch ( incI_eReg movI );
13941 //   peepconstraint ( 0.dst == 1.dst );
13942 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13943 // %}
13944 //
13945 // peephole %{
13946 //   peepmatch ( decI_eReg movI );
13947 //   peepconstraint ( 0.dst == 1.dst );
13948 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13949 // %}
13950 //
13951 // peephole %{
13952 //   peepmatch ( addI_eReg_imm movI );
13953 //   peepconstraint ( 0.dst == 1.dst );
13954 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13955 // %}
13956 //
13957 // peephole %{
13958 //   peepmatch ( addP_eReg_imm movP );
13959 //   peepconstraint ( 0.dst == 1.dst );
13960 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13961 // %}
13962 
13963 // // Change load of spilled value to only a spill
13964 // instruct storeI(memory mem, rRegI src) %{
13965 //   match(Set mem (StoreI mem src));
13966 // %}
13967 //
13968 // instruct loadI(rRegI dst, memory mem) %{
13969 //   match(Set dst (LoadI mem));
13970 // %}
13971 //
13972 peephole %{
13973   peepmatch ( loadI storeI );
13974   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13975   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13976 %}
13977 
13978 //----------SMARTSPILL RULES---------------------------------------------------
13979 // These must follow all instruction definitions as they use the names
13980 // defined in the instructions definitions.