1 //
    2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   int framesize = C->output()->frame_size_in_bytes();
  615   int bangsize = C->output()->bang_size_in_bytes();
  616 
  617   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
  618 
  619   C->output()->set_frame_complete(cbuf.insts_size());
  620 
  621   if (C->has_mach_constant_base_node()) {
  622     // NOTE: We set the table base offset here because users might be
  623     // emitted before MachConstantBaseNode.
  624     ConstantTable& constant_table = C->output()->constant_table();
  625     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  626   }
  627 }
  628 
  629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  630   return MachNode::size(ra_); // too many variables; just compute it the hard way
  631 }
  632 
  633 int MachPrologNode::reloc() const {
  634   return 0; // a large enough number
  635 }
  636 
  637 //=============================================================================
  638 #ifndef PRODUCT
  639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  640   Compile *C = ra_->C;
  641   int framesize = C->output()->frame_size_in_bytes();
  642   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  643   // Remove two words for return addr and rbp,
  644   framesize -= 2*wordSize;
  645 
  646   if (C->max_vector_size() > 16) {
  647     st->print("VZEROUPPER");
  648     st->cr(); st->print("\t");
  649   }
  650   if (C->in_24_bit_fp_mode()) {
  651     st->print("FLDCW  standard control word");
  652     st->cr(); st->print("\t");
  653   }
  654   if (framesize) {
  655     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  656     st->cr(); st->print("\t");
  657   }
  658   st->print_cr("POPL   EBP"); st->print("\t");
  659   if (do_polling() && C->is_method_compilation()) {
  660     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  661               "JA      #safepoint_stub\t"
  662               "# Safepoint: poll for GC");
  663   }
  664 }
  665 #endif
  666 
  667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  668   Compile *C = ra_->C;
  669   MacroAssembler _masm(&cbuf);
  670 
  671   if (C->max_vector_size() > 16) {
  672     // Clear upper bits of YMM registers when current compiled code uses
  673     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  674     _masm.vzeroupper();
  675   }
  676   // If method set FPU control word, restore to standard control word
  677   if (C->in_24_bit_fp_mode()) {
  678     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  679   }
  680 
  681   int framesize = C->output()->frame_size_in_bytes();
  682   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  683   // Remove two words for return addr and rbp,
  684   framesize -= 2*wordSize;
  685 
  686   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  687 
  688   if (framesize >= 128) {
  689     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  690     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  691     emit_d32(cbuf, framesize);
  692   } else if (framesize) {
  693     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  695     emit_d8(cbuf, framesize);
  696   }
  697 
  698   emit_opcode(cbuf, 0x58 | EBP_enc);
  699 
  700   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  701     __ reserved_stack_check();
  702   }
  703 
  704   if (do_polling() && C->is_method_compilation()) {
  705     Register thread = as_Register(EBX_enc);
  706     MacroAssembler masm(&cbuf);
  707     __ get_thread(thread);
  708     Label dummy_label;
  709     Label* code_stub = &dummy_label;
  710     if (!C->output()->in_scratch_emit_size()) {
  711       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  712       C->output()->add_stub(stub);
  713       code_stub = &stub->entry();
  714     }
  715     __ relocate(relocInfo::poll_return_type);
  716     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  717   }
  718 }
  719 
  720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  721   return MachNode::size(ra_); // too many variables; just compute it
  722                               // the hard way
  723 }
  724 
  725 int MachEpilogNode::reloc() const {
  726   return 0; // a large enough number
  727 }
  728 
  729 const Pipeline * MachEpilogNode::pipeline() const {
  730   return MachNode::pipeline_class();
  731 }
  732 
  733 //=============================================================================
  734 
  735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  736 static enum RC rc_class( OptoReg::Name reg ) {
  737 
  738   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  739   if (OptoReg::is_stack(reg)) return rc_stack;
  740 
  741   VMReg r = OptoReg::as_VMReg(reg);
  742   if (r->is_Register()) return rc_int;
  743   if (r->is_FloatRegister()) {
  744     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  745     return rc_float;
  746   }
  747   if (r->is_KRegister()) return rc_kreg;
  748   assert(r->is_XMMRegister(), "must be");
  749   return rc_xmm;
  750 }
  751 
  752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  753                         int opcode, const char *op_str, int size, outputStream* st ) {
  754   if( cbuf ) {
  755     emit_opcode  (*cbuf, opcode );
  756     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757 #ifndef PRODUCT
  758   } else if( !do_size ) {
  759     if( size != 0 ) st->print("\n\t");
  760     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  761       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  762       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  763     } else { // FLD, FST, PUSH, POP
  764       st->print("%s [ESP + #%d]",op_str,offset);
  765     }
  766 #endif
  767   }
  768   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  769   return size+3+offset_size;
  770 }
  771 
  772 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  774                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  775   int in_size_in_bits = Assembler::EVEX_32bit;
  776   int evex_encoding = 0;
  777   if (reg_lo+1 == reg_hi) {
  778     in_size_in_bits = Assembler::EVEX_64bit;
  779     evex_encoding = Assembler::VEX_W;
  780   }
  781   if (cbuf) {
  782     MacroAssembler _masm(cbuf);
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     _masm.set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (cbuf) {
  835     MacroAssembler _masm(cbuf);
  836     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  837     _masm.set_managed();
  838     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  839       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     } else {
  842       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  843                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  844     }
  845 #ifndef PRODUCT
  846   } else if (!do_size) {
  847     if (size != 0) st->print("\n\t");
  848     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  849       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  850         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  851       } else {
  852         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  853       }
  854     } else {
  855       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  856         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  857       } else {
  858         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  859       }
  860     }
  861 #endif
  862   }
  863   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  864   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  865   int sz = (UseAVX > 2) ? 6 : 4;
  866   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  867       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  868   return size + sz;
  869 }
  870 
  871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  872                             int src_hi, int dst_hi, int size, outputStream* st ) {
  873   // 32-bit
  874   if (cbuf) {
  875     MacroAssembler _masm(cbuf);
  876     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  877     _masm.set_managed();
  878     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  879              as_Register(Matcher::_regEncode[src_lo]));
  880 #ifndef PRODUCT
  881   } else if (!do_size) {
  882     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  883 #endif
  884   }
  885   return (UseAVX> 2) ? 6 : 4;
  886 }
  887 
  888 
  889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  890                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  891   // 32-bit
  892   if (cbuf) {
  893     MacroAssembler _masm(cbuf);
  894     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  895     _masm.set_managed();
  896     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  897              as_XMMRegister(Matcher::_regEncode[src_lo]));
  898 #ifndef PRODUCT
  899   } else if (!do_size) {
  900     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  901 #endif
  902   }
  903   return (UseAVX> 2) ? 6 : 4;
  904 }
  905 
  906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  907   if( cbuf ) {
  908     emit_opcode(*cbuf, 0x8B );
  909     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  910 #ifndef PRODUCT
  911   } else if( !do_size ) {
  912     if( size != 0 ) st->print("\n\t");
  913     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  914 #endif
  915   }
  916   return size+2;
  917 }
  918 
  919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  920                                  int offset, int size, outputStream* st ) {
  921   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  922     if( cbuf ) {
  923       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  924       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  925 #ifndef PRODUCT
  926     } else if( !do_size ) {
  927       if( size != 0 ) st->print("\n\t");
  928       st->print("FLD    %s",Matcher::regName[src_lo]);
  929 #endif
  930     }
  931     size += 2;
  932   }
  933 
  934   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  935   const char *op_str;
  936   int op;
  937   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  939     op = 0xDD;
  940   } else {                   // 32-bit store
  941     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  942     op = 0xD9;
  943     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  944   }
  945 
  946   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  947 }
  948 
  949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  951                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  952 
  953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  954                             int stack_offset, int reg, uint ireg, outputStream* st);
  955 
  956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  957                                      int dst_offset, uint ireg, outputStream* st) {
  958   if (cbuf) {
  959     MacroAssembler _masm(cbuf);
  960     switch (ireg) {
  961     case Op_VecS:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       break;
  965     case Op_VecD:
  966       __ pushl(Address(rsp, src_offset));
  967       __ popl (Address(rsp, dst_offset));
  968       __ pushl(Address(rsp, src_offset+4));
  969       __ popl (Address(rsp, dst_offset+4));
  970       break;
  971     case Op_VecX:
  972       __ movdqu(Address(rsp, -16), xmm0);
  973       __ movdqu(xmm0, Address(rsp, src_offset));
  974       __ movdqu(Address(rsp, dst_offset), xmm0);
  975       __ movdqu(xmm0, Address(rsp, -16));
  976       break;
  977     case Op_VecY:
  978       __ vmovdqu(Address(rsp, -32), xmm0);
  979       __ vmovdqu(xmm0, Address(rsp, src_offset));
  980       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  981       __ vmovdqu(xmm0, Address(rsp, -32));
  982       break;
  983     case Op_VecZ:
  984       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  985       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  986       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  987       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  988       break;
  989     default:
  990       ShouldNotReachHere();
  991     }
  992 #ifndef PRODUCT
  993   } else {
  994     switch (ireg) {
  995     case Op_VecS:
  996       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  997                 "popl    [rsp + #%d]",
  998                 src_offset, dst_offset);
  999       break;
 1000     case Op_VecD:
 1001       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1002                 "popq    [rsp + #%d]\n\t"
 1003                 "pushl   [rsp + #%d]\n\t"
 1004                 "popq    [rsp + #%d]",
 1005                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1006       break;
 1007      case Op_VecX:
 1008       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1009                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1010                 "movdqu  [rsp + #%d], xmm0\n\t"
 1011                 "movdqu  xmm0, [rsp - #16]",
 1012                 src_offset, dst_offset);
 1013       break;
 1014     case Op_VecY:
 1015       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1016                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1017                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1018                 "vmovdqu xmm0, [rsp - #32]",
 1019                 src_offset, dst_offset);
 1020       break;
 1021     case Op_VecZ:
 1022       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1023                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1024                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1025                 "vmovdqu xmm0, [rsp - #64]",
 1026                 src_offset, dst_offset);
 1027       break;
 1028     default:
 1029       ShouldNotReachHere();
 1030     }
 1031 #endif
 1032   }
 1033 }
 1034 
 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1036   // Get registers to move
 1037   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1038   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1039   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1040   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1041 
 1042   enum RC src_second_rc = rc_class(src_second);
 1043   enum RC src_first_rc = rc_class(src_first);
 1044   enum RC dst_second_rc = rc_class(dst_second);
 1045   enum RC dst_first_rc = rc_class(dst_first);
 1046 
 1047   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1048 
 1049   // Generate spill code!
 1050   int size = 0;
 1051 
 1052   if( src_first == dst_first && src_second == dst_second )
 1053     return size;            // Self copy, no move
 1054 
 1055   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1056     uint ireg = ideal_reg();
 1057     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1058     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1059     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1060     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1061       // mem -> mem
 1062       int src_offset = ra_->reg2offset(src_first);
 1063       int dst_offset = ra_->reg2offset(dst_first);
 1064       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1065     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1066       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1067     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1068       int stack_offset = ra_->reg2offset(dst_first);
 1069       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1070     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1071       int stack_offset = ra_->reg2offset(src_first);
 1072       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1073     } else {
 1074       ShouldNotReachHere();
 1075     }
 1076     return 0;
 1077   }
 1078 
 1079   // --------------------------------------
 1080   // Check for mem-mem move.  push/pop to move.
 1081   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1082     if( src_second == dst_first ) { // overlapping stack copy ranges
 1083       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1084       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1085       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1086       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1087     }
 1088     // move low bits
 1089     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1090     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1091     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1092       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1093       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1094     }
 1095     return size;
 1096   }
 1097 
 1098   // --------------------------------------
 1099   // Check for integer reg-reg copy
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1101     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1102 
 1103   // Check for integer store
 1104   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1105     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1106 
 1107   // Check for integer load
 1108   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1109     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1110 
 1111   // Check for integer reg-xmm reg copy
 1112   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1113     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1114             "no 64 bit integer-float reg moves" );
 1115     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1116   }
 1117   // --------------------------------------
 1118   // Check for float reg-reg copy
 1119   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1120     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1121             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1122     if( cbuf ) {
 1123 
 1124       // Note the mucking with the register encode to compensate for the 0/1
 1125       // indexing issue mentioned in a comment in the reg_def sections
 1126       // for FPR registers many lines above here.
 1127 
 1128       if( src_first != FPR1L_num ) {
 1129         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1130         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1131         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1132         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1133      } else {
 1134         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1135         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1136      }
 1137 #ifndef PRODUCT
 1138     } else if( !do_size ) {
 1139       if( size != 0 ) st->print("\n\t");
 1140       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1141       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1142 #endif
 1143     }
 1144     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1145   }
 1146 
 1147   // Check for float store
 1148   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1149     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1150   }
 1151 
 1152   // Check for float load
 1153   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1154     int offset = ra_->reg2offset(src_first);
 1155     const char *op_str;
 1156     int op;
 1157     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1158       op_str = "FLD_D";
 1159       op = 0xDD;
 1160     } else {                   // 32-bit load
 1161       op_str = "FLD_S";
 1162       op = 0xD9;
 1163       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1164     }
 1165     if( cbuf ) {
 1166       emit_opcode  (*cbuf, op );
 1167       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1168       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1169       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1170 #ifndef PRODUCT
 1171     } else if( !do_size ) {
 1172       if( size != 0 ) st->print("\n\t");
 1173       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1174 #endif
 1175     }
 1176     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1177     return size + 3+offset_size+2;
 1178   }
 1179 
 1180   // Check for xmm reg-reg copy
 1181   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1182     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1183             (src_first+1 == src_second && dst_first+1 == dst_second),
 1184             "no non-adjacent float-moves" );
 1185     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1186   }
 1187 
 1188   // Check for xmm reg-integer reg copy
 1189   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1190     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1191             "no 64 bit float-integer reg moves" );
 1192     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1193   }
 1194 
 1195   // Check for xmm store
 1196   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1197     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1198   }
 1199 
 1200   // Check for float xmm load
 1201   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1202     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1203   }
 1204 
 1205   // Copy from float reg to xmm reg
 1206   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1207     // copy to the top of stack from floating point reg
 1208     // and use LEA to preserve flags
 1209     if( cbuf ) {
 1210       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1211       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1212       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1213       emit_d8(*cbuf,0xF8);
 1214 #ifndef PRODUCT
 1215     } else if( !do_size ) {
 1216       if( size != 0 ) st->print("\n\t");
 1217       st->print("LEA    ESP,[ESP-8]");
 1218 #endif
 1219     }
 1220     size += 4;
 1221 
 1222     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1223 
 1224     // Copy from the temp memory to the xmm reg.
 1225     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1226 
 1227     if( cbuf ) {
 1228       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1229       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1230       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1231       emit_d8(*cbuf,0x08);
 1232 #ifndef PRODUCT
 1233     } else if( !do_size ) {
 1234       if( size != 0 ) st->print("\n\t");
 1235       st->print("LEA    ESP,[ESP+8]");
 1236 #endif
 1237     }
 1238     size += 4;
 1239     return size;
 1240   }
 1241 
 1242   // AVX-512 opmask specific spilling.
 1243   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1244     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1245     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1246     int offset = ra_->reg2offset(src_first);
 1247     if (cbuf != nullptr) {
 1248       MacroAssembler _masm(cbuf);
 1249       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1250 #ifndef PRODUCT
 1251     } else {
 1252       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1253 #endif
 1254     }
 1255     return 0;
 1256   }
 1257 
 1258   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1259     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1260     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1261     int offset = ra_->reg2offset(dst_first);
 1262     if (cbuf != nullptr) {
 1263       MacroAssembler _masm(cbuf);
 1264       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1265 #ifndef PRODUCT
 1266     } else {
 1267       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1268 #endif
 1269     }
 1270     return 0;
 1271   }
 1272 
 1273   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1274     Unimplemented();
 1275     return 0;
 1276   }
 1277 
 1278   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1279     Unimplemented();
 1280     return 0;
 1281   }
 1282 
 1283   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1284     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1285     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1286     if (cbuf != nullptr) {
 1287       MacroAssembler _masm(cbuf);
 1288       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1289 #ifndef PRODUCT
 1290     } else {
 1291       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1292 #endif
 1293     }
 1294     return 0;
 1295   }
 1296 
 1297   assert( size > 0, "missed a case" );
 1298 
 1299   // --------------------------------------------------------------------
 1300   // Check for second bits still needing moving.
 1301   if( src_second == dst_second )
 1302     return size;               // Self copy; no move
 1303   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1304 
 1305   // Check for second word int-int move
 1306   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1307     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1308 
 1309   // Check for second word integer store
 1310   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1311     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1312 
 1313   // Check for second word integer load
 1314   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1315     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1316 
 1317   Unimplemented();
 1318   return 0; // Mute compiler
 1319 }
 1320 
 1321 #ifndef PRODUCT
 1322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1323   implementation( nullptr, ra_, false, st );
 1324 }
 1325 #endif
 1326 
 1327 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1328   implementation( &cbuf, ra_, false, nullptr );
 1329 }
 1330 
 1331 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1332   return MachNode::size(ra_);
 1333 }
 1334 
 1335 
 1336 //=============================================================================
 1337 #ifndef PRODUCT
 1338 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1339   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1340   int reg = ra_->get_reg_first(this);
 1341   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1342 }
 1343 #endif
 1344 
 1345 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1346   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1347   int reg = ra_->get_encode(this);
 1348   if( offset >= 128 ) {
 1349     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1350     emit_rm(cbuf, 0x2, reg, 0x04);
 1351     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1352     emit_d32(cbuf, offset);
 1353   }
 1354   else {
 1355     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1356     emit_rm(cbuf, 0x1, reg, 0x04);
 1357     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1358     emit_d8(cbuf, offset);
 1359   }
 1360 }
 1361 
 1362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1363   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1364   if( offset >= 128 ) {
 1365     return 7;
 1366   }
 1367   else {
 1368     return 4;
 1369   }
 1370 }
 1371 
 1372 //=============================================================================
 1373 #ifndef PRODUCT
 1374 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1375   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1376   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1377   st->print_cr("\tNOP");
 1378   st->print_cr("\tNOP");
 1379   if( !OptoBreakpoint )
 1380     st->print_cr("\tNOP");
 1381 }
 1382 #endif
 1383 
 1384 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1385   MacroAssembler masm(&cbuf);
 1386   masm.ic_check(CodeEntryAlignment);
 1387 }
 1388 
 1389 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1390   return MachNode::size(ra_); // too many variables; just compute it
 1391                               // the hard way
 1392 }
 1393 
 1394 
 1395 //=============================================================================
 1396 
 1397 // Vector calling convention not supported.
 1398 bool Matcher::supports_vector_calling_convention() {
 1399   return false;
 1400 }
 1401 
 1402 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1403   Unimplemented();
 1404   return OptoRegPair(0, 0);
 1405 }
 1406 
 1407 // Is this branch offset short enough that a short branch can be used?
 1408 //
 1409 // NOTE: If the platform does not provide any short branch variants, then
 1410 //       this method should return false for offset 0.
 1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1412   // The passed offset is relative to address of the branch.
 1413   // On 86 a branch displacement is calculated relative to address
 1414   // of a next instruction.
 1415   offset -= br_size;
 1416 
 1417   // the short version of jmpConUCF2 contains multiple branches,
 1418   // making the reach slightly less
 1419   if (rule == jmpConUCF2_rule)
 1420     return (-126 <= offset && offset <= 125);
 1421   return (-128 <= offset && offset <= 127);
 1422 }
 1423 
 1424 // Return whether or not this register is ever used as an argument.  This
 1425 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1426 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1427 // arguments in those registers not be available to the callee.
 1428 bool Matcher::can_be_java_arg( int reg ) {
 1429   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1430   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1431   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1432   return false;
 1433 }
 1434 
 1435 bool Matcher::is_spillable_arg( int reg ) {
 1436   return can_be_java_arg(reg);
 1437 }
 1438 
 1439 uint Matcher::int_pressure_limit()
 1440 {
 1441   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1442 }
 1443 
 1444 uint Matcher::float_pressure_limit()
 1445 {
 1446   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1447 }
 1448 
 1449 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1450   // Use hardware integer DIV instruction when
 1451   // it is faster than a code which use multiply.
 1452   // Only when constant divisor fits into 32 bit
 1453   // (min_jint is excluded to get only correct
 1454   // positive 32 bit values from negative).
 1455   return VM_Version::has_fast_idiv() &&
 1456          (divisor == (int)divisor && divisor != min_jint);
 1457 }
 1458 
 1459 // Register for DIVI projection of divmodI
 1460 RegMask Matcher::divI_proj_mask() {
 1461   return EAX_REG_mask();
 1462 }
 1463 
 1464 // Register for MODI projection of divmodI
 1465 RegMask Matcher::modI_proj_mask() {
 1466   return EDX_REG_mask();
 1467 }
 1468 
 1469 // Register for DIVL projection of divmodL
 1470 RegMask Matcher::divL_proj_mask() {
 1471   ShouldNotReachHere();
 1472   return RegMask();
 1473 }
 1474 
 1475 // Register for MODL projection of divmodL
 1476 RegMask Matcher::modL_proj_mask() {
 1477   ShouldNotReachHere();
 1478   return RegMask();
 1479 }
 1480 
 1481 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1482   return NO_REG_mask();
 1483 }
 1484 
 1485 // Returns true if the high 32 bits of the value is known to be zero.
 1486 bool is_operand_hi32_zero(Node* n) {
 1487   int opc = n->Opcode();
 1488   if (opc == Op_AndL) {
 1489     Node* o2 = n->in(2);
 1490     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1491       return true;
 1492     }
 1493   }
 1494   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1495     return true;
 1496   }
 1497   return false;
 1498 }
 1499 
 1500 %}
 1501 
 1502 //----------ENCODING BLOCK-----------------------------------------------------
 1503 // This block specifies the encoding classes used by the compiler to output
 1504 // byte streams.  Encoding classes generate functions which are called by
 1505 // Machine Instruction Nodes in order to generate the bit encoding of the
 1506 // instruction.  Operands specify their base encoding interface with the
 1507 // interface keyword.  There are currently supported four interfaces,
 1508 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1509 // operand to generate a function which returns its register number when
 1510 // queried.   CONST_INTER causes an operand to generate a function which
 1511 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1512 // operand to generate four functions which return the Base Register, the
 1513 // Index Register, the Scale Value, and the Offset Value of the operand when
 1514 // queried.  COND_INTER causes an operand to generate six functions which
 1515 // return the encoding code (ie - encoding bits for the instruction)
 1516 // associated with each basic boolean condition for a conditional instruction.
 1517 // Instructions specify two basic values for encoding.  They use the
 1518 // ins_encode keyword to specify their encoding class (which must be one of
 1519 // the class names specified in the encoding block), and they use the
 1520 // opcode keyword to specify, in order, their primary, secondary, and
 1521 // tertiary opcode.  Only the opcode sections which a particular instruction
 1522 // needs for encoding need to be specified.
 1523 encode %{
 1524   // Build emit functions for each basic byte or larger field in the intel
 1525   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1526   // code in the enc_class source block.  Emit functions will live in the
 1527   // main source block for now.  In future, we can generalize this by
 1528   // adding a syntax that specifies the sizes of fields in an order,
 1529   // so that the adlc can build the emit functions automagically
 1530 
 1531   // Emit primary opcode
 1532   enc_class OpcP %{
 1533     emit_opcode(cbuf, $primary);
 1534   %}
 1535 
 1536   // Emit secondary opcode
 1537   enc_class OpcS %{
 1538     emit_opcode(cbuf, $secondary);
 1539   %}
 1540 
 1541   // Emit opcode directly
 1542   enc_class Opcode(immI d8) %{
 1543     emit_opcode(cbuf, $d8$$constant);
 1544   %}
 1545 
 1546   enc_class SizePrefix %{
 1547     emit_opcode(cbuf,0x66);
 1548   %}
 1549 
 1550   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1551     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1552   %}
 1553 
 1554   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1555     emit_opcode(cbuf,$opcode$$constant);
 1556     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1557   %}
 1558 
 1559   enc_class mov_r32_imm0( rRegI dst ) %{
 1560     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1561     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1562   %}
 1563 
 1564   enc_class cdq_enc %{
 1565     // Full implementation of Java idiv and irem; checks for
 1566     // special case as described in JVM spec., p.243 & p.271.
 1567     //
 1568     //         normal case                           special case
 1569     //
 1570     // input : rax,: dividend                         min_int
 1571     //         reg: divisor                          -1
 1572     //
 1573     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1574     //         rdx: remainder (= rax, irem reg)       0
 1575     //
 1576     //  Code sequnce:
 1577     //
 1578     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1579     //  0F 85 0B 00 00 00    jne         normal_case
 1580     //  33 D2                xor         rdx,edx
 1581     //  83 F9 FF             cmp         rcx,0FFh
 1582     //  0F 84 03 00 00 00    je          done
 1583     //                  normal_case:
 1584     //  99                   cdq
 1585     //  F7 F9                idiv        rax,ecx
 1586     //                  done:
 1587     //
 1588     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1589     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1590     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1591     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1592     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1593     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1594     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1595     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1596     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1597     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1598     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1599     // normal_case:
 1600     emit_opcode(cbuf,0x99);                                         // cdq
 1601     // idiv (note: must be emitted by the user of this rule)
 1602     // normal:
 1603   %}
 1604 
 1605   // Dense encoding for older common ops
 1606   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1607     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1608   %}
 1609 
 1610 
 1611   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1612   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1613     // Check for 8-bit immediate, and set sign extend bit in opcode
 1614     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1615       emit_opcode(cbuf, $primary | 0x02);
 1616     }
 1617     else {                          // If 32-bit immediate
 1618       emit_opcode(cbuf, $primary);
 1619     }
 1620   %}
 1621 
 1622   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1623     // Emit primary opcode and set sign-extend bit
 1624     // Check for 8-bit immediate, and set sign extend bit in opcode
 1625     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1626       emit_opcode(cbuf, $primary | 0x02);    }
 1627     else {                          // If 32-bit immediate
 1628       emit_opcode(cbuf, $primary);
 1629     }
 1630     // Emit r/m byte with secondary opcode, after primary opcode.
 1631     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1632   %}
 1633 
 1634   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1635     // Check for 8-bit immediate, and set sign extend bit in opcode
 1636     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1637       $$$emit8$imm$$constant;
 1638     }
 1639     else {                          // If 32-bit immediate
 1640       // Output immediate
 1641       $$$emit32$imm$$constant;
 1642     }
 1643   %}
 1644 
 1645   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1646     // Emit primary opcode and set sign-extend bit
 1647     // Check for 8-bit immediate, and set sign extend bit in opcode
 1648     int con = (int)$imm$$constant; // Throw away top bits
 1649     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1650     // Emit r/m byte with secondary opcode, after primary opcode.
 1651     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1652     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1653     else                               emit_d32(cbuf,con);
 1654   %}
 1655 
 1656   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1657     // Emit primary opcode and set sign-extend bit
 1658     // Check for 8-bit immediate, and set sign extend bit in opcode
 1659     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1660     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1661     // Emit r/m byte with tertiary opcode, after primary opcode.
 1662     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1663     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1664     else                               emit_d32(cbuf,con);
 1665   %}
 1666 
 1667   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1668     emit_cc(cbuf, $secondary, $dst$$reg );
 1669   %}
 1670 
 1671   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1672     int destlo = $dst$$reg;
 1673     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1674     // bswap lo
 1675     emit_opcode(cbuf, 0x0F);
 1676     emit_cc(cbuf, 0xC8, destlo);
 1677     // bswap hi
 1678     emit_opcode(cbuf, 0x0F);
 1679     emit_cc(cbuf, 0xC8, desthi);
 1680     // xchg lo and hi
 1681     emit_opcode(cbuf, 0x87);
 1682     emit_rm(cbuf, 0x3, destlo, desthi);
 1683   %}
 1684 
 1685   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1686     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1687   %}
 1688 
 1689   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1690     $$$emit8$primary;
 1691     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1692   %}
 1693 
 1694   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1695     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1696     emit_d8(cbuf, op >> 8 );
 1697     emit_d8(cbuf, op & 255);
 1698   %}
 1699 
 1700   // emulate a CMOV with a conditional branch around a MOV
 1701   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1702     // Invert sense of branch from sense of CMOV
 1703     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1704     emit_d8( cbuf, $brOffs$$constant );
 1705   %}
 1706 
 1707   enc_class enc_PartialSubtypeCheck( ) %{
 1708     Register Redi = as_Register(EDI_enc); // result register
 1709     Register Reax = as_Register(EAX_enc); // super class
 1710     Register Recx = as_Register(ECX_enc); // killed
 1711     Register Resi = as_Register(ESI_enc); // sub class
 1712     Label miss;
 1713 
 1714     MacroAssembler _masm(&cbuf);
 1715     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1716                                      nullptr, &miss,
 1717                                      /*set_cond_codes:*/ true);
 1718     if ($primary) {
 1719       __ xorptr(Redi, Redi);
 1720     }
 1721     __ bind(miss);
 1722   %}
 1723 
 1724   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1725     MacroAssembler masm(&cbuf);
 1726     int start = masm.offset();
 1727     if (UseSSE >= 2) {
 1728       if (VerifyFPU) {
 1729         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1730       }
 1731     } else {
 1732       // External c_calling_convention expects the FPU stack to be 'clean'.
 1733       // Compiled code leaves it dirty.  Do cleanup now.
 1734       masm.empty_FPU_stack();
 1735     }
 1736     if (sizeof_FFree_Float_Stack_All == -1) {
 1737       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1738     } else {
 1739       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1740     }
 1741   %}
 1742 
 1743   enc_class Verify_FPU_For_Leaf %{
 1744     if( VerifyFPU ) {
 1745       MacroAssembler masm(&cbuf);
 1746       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1747     }
 1748   %}
 1749 
 1750   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1751     // This is the instruction starting address for relocation info.
 1752     MacroAssembler _masm(&cbuf);
 1753     cbuf.set_insts_mark();
 1754     $$$emit8$primary;
 1755     // CALL directly to the runtime
 1756     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1757                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1758     __ post_call_nop();
 1759 
 1760     if (UseSSE >= 2) {
 1761       MacroAssembler _masm(&cbuf);
 1762       BasicType rt = tf()->return_type();
 1763 
 1764       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1765         // A C runtime call where the return value is unused.  In SSE2+
 1766         // mode the result needs to be removed from the FPU stack.  It's
 1767         // likely that this function call could be removed by the
 1768         // optimizer if the C function is a pure function.
 1769         __ ffree(0);
 1770       } else if (rt == T_FLOAT) {
 1771         __ lea(rsp, Address(rsp, -4));
 1772         __ fstp_s(Address(rsp, 0));
 1773         __ movflt(xmm0, Address(rsp, 0));
 1774         __ lea(rsp, Address(rsp,  4));
 1775       } else if (rt == T_DOUBLE) {
 1776         __ lea(rsp, Address(rsp, -8));
 1777         __ fstp_d(Address(rsp, 0));
 1778         __ movdbl(xmm0, Address(rsp, 0));
 1779         __ lea(rsp, Address(rsp,  8));
 1780       }
 1781     }
 1782   %}
 1783 
 1784   enc_class pre_call_resets %{
 1785     // If method sets FPU control word restore it here
 1786     debug_only(int off0 = cbuf.insts_size());
 1787     if (ra_->C->in_24_bit_fp_mode()) {
 1788       MacroAssembler _masm(&cbuf);
 1789       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1790     }
 1791     // Clear upper bits of YMM registers when current compiled code uses
 1792     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1793     MacroAssembler _masm(&cbuf);
 1794     __ vzeroupper();
 1795     debug_only(int off1 = cbuf.insts_size());
 1796     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1797   %}
 1798 
 1799   enc_class post_call_FPU %{
 1800     // If method sets FPU control word do it here also
 1801     if (Compile::current()->in_24_bit_fp_mode()) {
 1802       MacroAssembler masm(&cbuf);
 1803       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1804     }
 1805   %}
 1806 
 1807   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1808     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1809     // who we intended to call.
 1810     MacroAssembler _masm(&cbuf);
 1811     cbuf.set_insts_mark();
 1812     $$$emit8$primary;
 1813 
 1814     if (!_method) {
 1815       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1816                      runtime_call_Relocation::spec(),
 1817                      RELOC_IMM32);
 1818       __ post_call_nop();
 1819     } else {
 1820       int method_index = resolved_method_index(cbuf);
 1821       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1822                                                   : static_call_Relocation::spec(method_index);
 1823       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1824                      rspec, RELOC_DISP32);
 1825       __ post_call_nop();
 1826       address mark = cbuf.insts_mark();
 1827       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1828         // Calls of the same statically bound method can share
 1829         // a stub to the interpreter.
 1830         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1831       } else {
 1832         // Emit stubs for static call.
 1833         address stub = CompiledDirectCall::emit_to_interp_stub(cbuf, mark);
 1834         if (stub == nullptr) {
 1835           ciEnv::current()->record_failure("CodeCache is full");
 1836           return;
 1837         }
 1838       }
 1839     }
 1840   %}
 1841 
 1842   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1843     MacroAssembler _masm(&cbuf);
 1844     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1845     __ post_call_nop();
 1846   %}
 1847 
 1848   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1849     int disp = in_bytes(Method::from_compiled_offset());
 1850     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1851 
 1852     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1853     MacroAssembler _masm(&cbuf);
 1854     cbuf.set_insts_mark();
 1855     $$$emit8$primary;
 1856     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1857     emit_d8(cbuf, disp);             // Displacement
 1858     __ post_call_nop();
 1859   %}
 1860 
 1861 //   Following encoding is no longer used, but may be restored if calling
 1862 //   convention changes significantly.
 1863 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1864 //
 1865 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1866 //     // int ic_reg     = Matcher::inline_cache_reg();
 1867 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1868 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1869 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1870 //
 1871 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1872 //     // // so we load it immediately before the call
 1873 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1874 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1875 //
 1876 //     // xor rbp,ebp
 1877 //     emit_opcode(cbuf, 0x33);
 1878 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1879 //
 1880 //     // CALL to interpreter.
 1881 //     cbuf.set_insts_mark();
 1882 //     $$$emit8$primary;
 1883 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1884 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1885 //   %}
 1886 
 1887   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1888     $$$emit8$primary;
 1889     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1890     $$$emit8$shift$$constant;
 1891   %}
 1892 
 1893   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1894     // Load immediate does not have a zero or sign extended version
 1895     // for 8-bit immediates
 1896     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1897     $$$emit32$src$$constant;
 1898   %}
 1899 
 1900   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1901     // Load immediate does not have a zero or sign extended version
 1902     // for 8-bit immediates
 1903     emit_opcode(cbuf, $primary + $dst$$reg);
 1904     $$$emit32$src$$constant;
 1905   %}
 1906 
 1907   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1908     // Load immediate does not have a zero or sign extended version
 1909     // for 8-bit immediates
 1910     int dst_enc = $dst$$reg;
 1911     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1912     if (src_con == 0) {
 1913       // xor dst, dst
 1914       emit_opcode(cbuf, 0x33);
 1915       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1916     } else {
 1917       emit_opcode(cbuf, $primary + dst_enc);
 1918       emit_d32(cbuf, src_con);
 1919     }
 1920   %}
 1921 
 1922   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1923     // Load immediate does not have a zero or sign extended version
 1924     // for 8-bit immediates
 1925     int dst_enc = $dst$$reg + 2;
 1926     int src_con = ((julong)($src$$constant)) >> 32;
 1927     if (src_con == 0) {
 1928       // xor dst, dst
 1929       emit_opcode(cbuf, 0x33);
 1930       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1931     } else {
 1932       emit_opcode(cbuf, $primary + dst_enc);
 1933       emit_d32(cbuf, src_con);
 1934     }
 1935   %}
 1936 
 1937 
 1938   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1939   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1940     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1941   %}
 1942 
 1943   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1944     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1945   %}
 1946 
 1947   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1948     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1949   %}
 1950 
 1951   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1952     $$$emit8$primary;
 1953     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1954   %}
 1955 
 1956   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1957     $$$emit8$secondary;
 1958     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1959   %}
 1960 
 1961   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1962     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1963   %}
 1964 
 1965   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1966     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1967   %}
 1968 
 1969   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1970     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1971   %}
 1972 
 1973   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1974     // Output immediate
 1975     $$$emit32$src$$constant;
 1976   %}
 1977 
 1978   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1979     // Output Float immediate bits
 1980     jfloat jf = $src$$constant;
 1981     int    jf_as_bits = jint_cast( jf );
 1982     emit_d32(cbuf, jf_as_bits);
 1983   %}
 1984 
 1985   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1986     // Output Float immediate bits
 1987     jfloat jf = $src$$constant;
 1988     int    jf_as_bits = jint_cast( jf );
 1989     emit_d32(cbuf, jf_as_bits);
 1990   %}
 1991 
 1992   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1993     // Output immediate
 1994     $$$emit16$src$$constant;
 1995   %}
 1996 
 1997   enc_class Con_d32(immI src) %{
 1998     emit_d32(cbuf,$src$$constant);
 1999   %}
 2000 
 2001   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 2002     // Output immediate memory reference
 2003     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 2004     emit_d32(cbuf, 0x00);
 2005   %}
 2006 
 2007   enc_class lock_prefix( ) %{
 2008     emit_opcode(cbuf,0xF0);         // [Lock]
 2009   %}
 2010 
 2011   // Cmp-xchg long value.
 2012   // Note: we need to swap rbx, and rcx before and after the
 2013   //       cmpxchg8 instruction because the instruction uses
 2014   //       rcx as the high order word of the new value to store but
 2015   //       our register encoding uses rbx,.
 2016   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2017 
 2018     // XCHG  rbx,ecx
 2019     emit_opcode(cbuf,0x87);
 2020     emit_opcode(cbuf,0xD9);
 2021     // [Lock]
 2022     emit_opcode(cbuf,0xF0);
 2023     // CMPXCHG8 [Eptr]
 2024     emit_opcode(cbuf,0x0F);
 2025     emit_opcode(cbuf,0xC7);
 2026     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2027     // XCHG  rbx,ecx
 2028     emit_opcode(cbuf,0x87);
 2029     emit_opcode(cbuf,0xD9);
 2030   %}
 2031 
 2032   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2033     // [Lock]
 2034     emit_opcode(cbuf,0xF0);
 2035 
 2036     // CMPXCHG [Eptr]
 2037     emit_opcode(cbuf,0x0F);
 2038     emit_opcode(cbuf,0xB1);
 2039     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2040   %}
 2041 
 2042   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2043     // [Lock]
 2044     emit_opcode(cbuf,0xF0);
 2045 
 2046     // CMPXCHGB [Eptr]
 2047     emit_opcode(cbuf,0x0F);
 2048     emit_opcode(cbuf,0xB0);
 2049     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2050   %}
 2051 
 2052   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2053     // [Lock]
 2054     emit_opcode(cbuf,0xF0);
 2055 
 2056     // 16-bit mode
 2057     emit_opcode(cbuf, 0x66);
 2058 
 2059     // CMPXCHGW [Eptr]
 2060     emit_opcode(cbuf,0x0F);
 2061     emit_opcode(cbuf,0xB1);
 2062     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2063   %}
 2064 
 2065   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2066     int res_encoding = $res$$reg;
 2067 
 2068     // MOV  res,0
 2069     emit_opcode( cbuf, 0xB8 + res_encoding);
 2070     emit_d32( cbuf, 0 );
 2071     // JNE,s  fail
 2072     emit_opcode(cbuf,0x75);
 2073     emit_d8(cbuf, 5 );
 2074     // MOV  res,1
 2075     emit_opcode( cbuf, 0xB8 + res_encoding);
 2076     emit_d32( cbuf, 1 );
 2077     // fail:
 2078   %}
 2079 
 2080   enc_class set_instruction_start( ) %{
 2081     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2082   %}
 2083 
 2084   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2085     int reg_encoding = $ereg$$reg;
 2086     int base  = $mem$$base;
 2087     int index = $mem$$index;
 2088     int scale = $mem$$scale;
 2089     int displace = $mem$$disp;
 2090     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2091     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2092   %}
 2093 
 2094   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2095     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2096     int base  = $mem$$base;
 2097     int index = $mem$$index;
 2098     int scale = $mem$$scale;
 2099     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2100     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2101     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2102   %}
 2103 
 2104   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2105     int r1, r2;
 2106     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2107     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2108     emit_opcode(cbuf,0x0F);
 2109     emit_opcode(cbuf,$tertiary);
 2110     emit_rm(cbuf, 0x3, r1, r2);
 2111     emit_d8(cbuf,$cnt$$constant);
 2112     emit_d8(cbuf,$primary);
 2113     emit_rm(cbuf, 0x3, $secondary, r1);
 2114     emit_d8(cbuf,$cnt$$constant);
 2115   %}
 2116 
 2117   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2118     emit_opcode( cbuf, 0x8B ); // Move
 2119     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2120     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2121       emit_d8(cbuf,$primary);
 2122       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2123       emit_d8(cbuf,$cnt$$constant-32);
 2124     }
 2125     emit_d8(cbuf,$primary);
 2126     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2127     emit_d8(cbuf,31);
 2128   %}
 2129 
 2130   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2131     int r1, r2;
 2132     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2133     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2134 
 2135     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2136     emit_rm(cbuf, 0x3, r1, r2);
 2137     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2138       emit_opcode(cbuf,$primary);
 2139       emit_rm(cbuf, 0x3, $secondary, r1);
 2140       emit_d8(cbuf,$cnt$$constant-32);
 2141     }
 2142     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2143     emit_rm(cbuf, 0x3, r2, r2);
 2144   %}
 2145 
 2146   // Clone of RegMem but accepts an extra parameter to access each
 2147   // half of a double in memory; it never needs relocation info.
 2148   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2149     emit_opcode(cbuf,$opcode$$constant);
 2150     int reg_encoding = $rm_reg$$reg;
 2151     int base     = $mem$$base;
 2152     int index    = $mem$$index;
 2153     int scale    = $mem$$scale;
 2154     int displace = $mem$$disp + $disp_for_half$$constant;
 2155     relocInfo::relocType disp_reloc = relocInfo::none;
 2156     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2157   %}
 2158 
 2159   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2160   //
 2161   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2162   // and it never needs relocation information.
 2163   // Frequently used to move data between FPU's Stack Top and memory.
 2164   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2165     int rm_byte_opcode = $rm_opcode$$constant;
 2166     int base     = $mem$$base;
 2167     int index    = $mem$$index;
 2168     int scale    = $mem$$scale;
 2169     int displace = $mem$$disp;
 2170     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2171     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2172   %}
 2173 
 2174   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2175     int rm_byte_opcode = $rm_opcode$$constant;
 2176     int base     = $mem$$base;
 2177     int index    = $mem$$index;
 2178     int scale    = $mem$$scale;
 2179     int displace = $mem$$disp;
 2180     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2181     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2182   %}
 2183 
 2184   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2185     int reg_encoding = $dst$$reg;
 2186     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2187     int index        = 0x04;            // 0x04 indicates no index
 2188     int scale        = 0x00;            // 0x00 indicates no scale
 2189     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2190     relocInfo::relocType disp_reloc = relocInfo::none;
 2191     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2192   %}
 2193 
 2194   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2195     // Compare dst,src
 2196     emit_opcode(cbuf,0x3B);
 2197     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2198     // jmp dst < src around move
 2199     emit_opcode(cbuf,0x7C);
 2200     emit_d8(cbuf,2);
 2201     // move dst,src
 2202     emit_opcode(cbuf,0x8B);
 2203     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2204   %}
 2205 
 2206   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2207     // Compare dst,src
 2208     emit_opcode(cbuf,0x3B);
 2209     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2210     // jmp dst > src around move
 2211     emit_opcode(cbuf,0x7F);
 2212     emit_d8(cbuf,2);
 2213     // move dst,src
 2214     emit_opcode(cbuf,0x8B);
 2215     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2216   %}
 2217 
 2218   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2219     // If src is FPR1, we can just FST to store it.
 2220     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2221     int reg_encoding = 0x2; // Just store
 2222     int base  = $mem$$base;
 2223     int index = $mem$$index;
 2224     int scale = $mem$$scale;
 2225     int displace = $mem$$disp;
 2226     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2227     if( $src$$reg != FPR1L_enc ) {
 2228       reg_encoding = 0x3;  // Store & pop
 2229       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2230       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2231     }
 2232     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2233     emit_opcode(cbuf,$primary);
 2234     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2235   %}
 2236 
 2237   enc_class neg_reg(rRegI dst) %{
 2238     // NEG $dst
 2239     emit_opcode(cbuf,0xF7);
 2240     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2241   %}
 2242 
 2243   enc_class setLT_reg(eCXRegI dst) %{
 2244     // SETLT $dst
 2245     emit_opcode(cbuf,0x0F);
 2246     emit_opcode(cbuf,0x9C);
 2247     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2248   %}
 2249 
 2250   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2251     int tmpReg = $tmp$$reg;
 2252 
 2253     // SUB $p,$q
 2254     emit_opcode(cbuf,0x2B);
 2255     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2256     // SBB $tmp,$tmp
 2257     emit_opcode(cbuf,0x1B);
 2258     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2259     // AND $tmp,$y
 2260     emit_opcode(cbuf,0x23);
 2261     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2262     // ADD $p,$tmp
 2263     emit_opcode(cbuf,0x03);
 2264     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2265   %}
 2266 
 2267   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2268     // TEST shift,32
 2269     emit_opcode(cbuf,0xF7);
 2270     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2271     emit_d32(cbuf,0x20);
 2272     // JEQ,s small
 2273     emit_opcode(cbuf, 0x74);
 2274     emit_d8(cbuf, 0x04);
 2275     // MOV    $dst.hi,$dst.lo
 2276     emit_opcode( cbuf, 0x8B );
 2277     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2278     // CLR    $dst.lo
 2279     emit_opcode(cbuf, 0x33);
 2280     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2281 // small:
 2282     // SHLD   $dst.hi,$dst.lo,$shift
 2283     emit_opcode(cbuf,0x0F);
 2284     emit_opcode(cbuf,0xA5);
 2285     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2286     // SHL    $dst.lo,$shift"
 2287     emit_opcode(cbuf,0xD3);
 2288     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2289   %}
 2290 
 2291   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2292     // TEST shift,32
 2293     emit_opcode(cbuf,0xF7);
 2294     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2295     emit_d32(cbuf,0x20);
 2296     // JEQ,s small
 2297     emit_opcode(cbuf, 0x74);
 2298     emit_d8(cbuf, 0x04);
 2299     // MOV    $dst.lo,$dst.hi
 2300     emit_opcode( cbuf, 0x8B );
 2301     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2302     // CLR    $dst.hi
 2303     emit_opcode(cbuf, 0x33);
 2304     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2305 // small:
 2306     // SHRD   $dst.lo,$dst.hi,$shift
 2307     emit_opcode(cbuf,0x0F);
 2308     emit_opcode(cbuf,0xAD);
 2309     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2310     // SHR    $dst.hi,$shift"
 2311     emit_opcode(cbuf,0xD3);
 2312     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2313   %}
 2314 
 2315   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2316     // TEST shift,32
 2317     emit_opcode(cbuf,0xF7);
 2318     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2319     emit_d32(cbuf,0x20);
 2320     // JEQ,s small
 2321     emit_opcode(cbuf, 0x74);
 2322     emit_d8(cbuf, 0x05);
 2323     // MOV    $dst.lo,$dst.hi
 2324     emit_opcode( cbuf, 0x8B );
 2325     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2326     // SAR    $dst.hi,31
 2327     emit_opcode(cbuf, 0xC1);
 2328     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2329     emit_d8(cbuf, 0x1F );
 2330 // small:
 2331     // SHRD   $dst.lo,$dst.hi,$shift
 2332     emit_opcode(cbuf,0x0F);
 2333     emit_opcode(cbuf,0xAD);
 2334     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2335     // SAR    $dst.hi,$shift"
 2336     emit_opcode(cbuf,0xD3);
 2337     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2338   %}
 2339 
 2340 
 2341   // ----------------- Encodings for floating point unit -----------------
 2342   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2343   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2344     $$$emit8$primary;
 2345     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2346   %}
 2347 
 2348   // Pop argument in FPR0 with FSTP ST(0)
 2349   enc_class PopFPU() %{
 2350     emit_opcode( cbuf, 0xDD );
 2351     emit_d8( cbuf, 0xD8 );
 2352   %}
 2353 
 2354   // !!!!! equivalent to Pop_Reg_F
 2355   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2356     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2357     emit_d8( cbuf, 0xD8+$dst$$reg );
 2358   %}
 2359 
 2360   enc_class Push_Reg_DPR( regDPR dst ) %{
 2361     emit_opcode( cbuf, 0xD9 );
 2362     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2363   %}
 2364 
 2365   enc_class strictfp_bias1( regDPR dst ) %{
 2366     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2367     emit_opcode( cbuf, 0x2D );
 2368     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2369     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2370     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2371   %}
 2372 
 2373   enc_class strictfp_bias2( regDPR dst ) %{
 2374     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2375     emit_opcode( cbuf, 0x2D );
 2376     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2377     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2378     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2379   %}
 2380 
 2381   // Special case for moving an integer register to a stack slot.
 2382   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2383     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2384   %}
 2385 
 2386   // Special case for moving a register to a stack slot.
 2387   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2388     // Opcode already emitted
 2389     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2390     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2391     emit_d32(cbuf, $dst$$disp);   // Displacement
 2392   %}
 2393 
 2394   // Push the integer in stackSlot 'src' onto FP-stack
 2395   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2396     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2397   %}
 2398 
 2399   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2400   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2401     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2402   %}
 2403 
 2404   // Same as Pop_Mem_F except for opcode
 2405   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2406   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2407     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2408   %}
 2409 
 2410   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2411     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2412     emit_d8( cbuf, 0xD8+$dst$$reg );
 2413   %}
 2414 
 2415   enc_class Push_Reg_FPR( regFPR dst ) %{
 2416     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2417     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2418   %}
 2419 
 2420   // Push FPU's float to a stack-slot, and pop FPU-stack
 2421   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2422     int pop = 0x02;
 2423     if ($src$$reg != FPR1L_enc) {
 2424       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2425       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2426       pop = 0x03;
 2427     }
 2428     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2429   %}
 2430 
 2431   // Push FPU's double to a stack-slot, and pop FPU-stack
 2432   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2433     int pop = 0x02;
 2434     if ($src$$reg != FPR1L_enc) {
 2435       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2436       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2437       pop = 0x03;
 2438     }
 2439     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2440   %}
 2441 
 2442   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2443   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2444     int pop = 0xD0 - 1; // -1 since we skip FLD
 2445     if ($src$$reg != FPR1L_enc) {
 2446       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2447       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2448       pop = 0xD8;
 2449     }
 2450     emit_opcode( cbuf, 0xDD );
 2451     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2452   %}
 2453 
 2454 
 2455   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2456     // load dst in FPR0
 2457     emit_opcode( cbuf, 0xD9 );
 2458     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2459     if ($src$$reg != FPR1L_enc) {
 2460       // fincstp
 2461       emit_opcode (cbuf, 0xD9);
 2462       emit_opcode (cbuf, 0xF7);
 2463       // swap src with FPR1:
 2464       // FXCH FPR1 with src
 2465       emit_opcode(cbuf, 0xD9);
 2466       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2467       // fdecstp
 2468       emit_opcode (cbuf, 0xD9);
 2469       emit_opcode (cbuf, 0xF6);
 2470     }
 2471   %}
 2472 
 2473   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2474     MacroAssembler _masm(&cbuf);
 2475     __ subptr(rsp, 8);
 2476     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2477     __ fld_d(Address(rsp, 0));
 2478     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2479     __ fld_d(Address(rsp, 0));
 2480   %}
 2481 
 2482   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2483     MacroAssembler _masm(&cbuf);
 2484     __ subptr(rsp, 4);
 2485     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2486     __ fld_s(Address(rsp, 0));
 2487     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2488     __ fld_s(Address(rsp, 0));
 2489   %}
 2490 
 2491   enc_class Push_ResultD(regD dst) %{
 2492     MacroAssembler _masm(&cbuf);
 2493     __ fstp_d(Address(rsp, 0));
 2494     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2495     __ addptr(rsp, 8);
 2496   %}
 2497 
 2498   enc_class Push_ResultF(regF dst, immI d8) %{
 2499     MacroAssembler _masm(&cbuf);
 2500     __ fstp_s(Address(rsp, 0));
 2501     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2502     __ addptr(rsp, $d8$$constant);
 2503   %}
 2504 
 2505   enc_class Push_SrcD(regD src) %{
 2506     MacroAssembler _masm(&cbuf);
 2507     __ subptr(rsp, 8);
 2508     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2509     __ fld_d(Address(rsp, 0));
 2510   %}
 2511 
 2512   enc_class push_stack_temp_qword() %{
 2513     MacroAssembler _masm(&cbuf);
 2514     __ subptr(rsp, 8);
 2515   %}
 2516 
 2517   enc_class pop_stack_temp_qword() %{
 2518     MacroAssembler _masm(&cbuf);
 2519     __ addptr(rsp, 8);
 2520   %}
 2521 
 2522   enc_class push_xmm_to_fpr1(regD src) %{
 2523     MacroAssembler _masm(&cbuf);
 2524     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2525     __ fld_d(Address(rsp, 0));
 2526   %}
 2527 
 2528   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2529     if ($src$$reg != FPR1L_enc) {
 2530       // fincstp
 2531       emit_opcode (cbuf, 0xD9);
 2532       emit_opcode (cbuf, 0xF7);
 2533       // FXCH FPR1 with src
 2534       emit_opcode(cbuf, 0xD9);
 2535       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2536       // fdecstp
 2537       emit_opcode (cbuf, 0xD9);
 2538       emit_opcode (cbuf, 0xF6);
 2539     }
 2540     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2541     // // FSTP   FPR$dst$$reg
 2542     // emit_opcode( cbuf, 0xDD );
 2543     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2544   %}
 2545 
 2546   enc_class fnstsw_sahf_skip_parity() %{
 2547     // fnstsw ax
 2548     emit_opcode( cbuf, 0xDF );
 2549     emit_opcode( cbuf, 0xE0 );
 2550     // sahf
 2551     emit_opcode( cbuf, 0x9E );
 2552     // jnp  ::skip
 2553     emit_opcode( cbuf, 0x7B );
 2554     emit_opcode( cbuf, 0x05 );
 2555   %}
 2556 
 2557   enc_class emitModDPR() %{
 2558     // fprem must be iterative
 2559     // :: loop
 2560     // fprem
 2561     emit_opcode( cbuf, 0xD9 );
 2562     emit_opcode( cbuf, 0xF8 );
 2563     // wait
 2564     emit_opcode( cbuf, 0x9b );
 2565     // fnstsw ax
 2566     emit_opcode( cbuf, 0xDF );
 2567     emit_opcode( cbuf, 0xE0 );
 2568     // sahf
 2569     emit_opcode( cbuf, 0x9E );
 2570     // jp  ::loop
 2571     emit_opcode( cbuf, 0x0F );
 2572     emit_opcode( cbuf, 0x8A );
 2573     emit_opcode( cbuf, 0xF4 );
 2574     emit_opcode( cbuf, 0xFF );
 2575     emit_opcode( cbuf, 0xFF );
 2576     emit_opcode( cbuf, 0xFF );
 2577   %}
 2578 
 2579   enc_class fpu_flags() %{
 2580     // fnstsw_ax
 2581     emit_opcode( cbuf, 0xDF);
 2582     emit_opcode( cbuf, 0xE0);
 2583     // test ax,0x0400
 2584     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2585     emit_opcode( cbuf, 0xA9 );
 2586     emit_d16   ( cbuf, 0x0400 );
 2587     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2588     // // test rax,0x0400
 2589     // emit_opcode( cbuf, 0xA9 );
 2590     // emit_d32   ( cbuf, 0x00000400 );
 2591     //
 2592     // jz exit (no unordered comparison)
 2593     emit_opcode( cbuf, 0x74 );
 2594     emit_d8    ( cbuf, 0x02 );
 2595     // mov ah,1 - treat as LT case (set carry flag)
 2596     emit_opcode( cbuf, 0xB4 );
 2597     emit_d8    ( cbuf, 0x01 );
 2598     // sahf
 2599     emit_opcode( cbuf, 0x9E);
 2600   %}
 2601 
 2602   enc_class cmpF_P6_fixup() %{
 2603     // Fixup the integer flags in case comparison involved a NaN
 2604     //
 2605     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2606     emit_opcode( cbuf, 0x7B );
 2607     emit_d8    ( cbuf, 0x03 );
 2608     // MOV AH,1 - treat as LT case (set carry flag)
 2609     emit_opcode( cbuf, 0xB4 );
 2610     emit_d8    ( cbuf, 0x01 );
 2611     // SAHF
 2612     emit_opcode( cbuf, 0x9E);
 2613     // NOP     // target for branch to avoid branch to branch
 2614     emit_opcode( cbuf, 0x90);
 2615   %}
 2616 
 2617 //     fnstsw_ax();
 2618 //     sahf();
 2619 //     movl(dst, nan_result);
 2620 //     jcc(Assembler::parity, exit);
 2621 //     movl(dst, less_result);
 2622 //     jcc(Assembler::below, exit);
 2623 //     movl(dst, equal_result);
 2624 //     jcc(Assembler::equal, exit);
 2625 //     movl(dst, greater_result);
 2626 
 2627 // less_result     =  1;
 2628 // greater_result  = -1;
 2629 // equal_result    = 0;
 2630 // nan_result      = -1;
 2631 
 2632   enc_class CmpF_Result(rRegI dst) %{
 2633     // fnstsw_ax();
 2634     emit_opcode( cbuf, 0xDF);
 2635     emit_opcode( cbuf, 0xE0);
 2636     // sahf
 2637     emit_opcode( cbuf, 0x9E);
 2638     // movl(dst, nan_result);
 2639     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2640     emit_d32( cbuf, -1 );
 2641     // jcc(Assembler::parity, exit);
 2642     emit_opcode( cbuf, 0x7A );
 2643     emit_d8    ( cbuf, 0x13 );
 2644     // movl(dst, less_result);
 2645     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2646     emit_d32( cbuf, -1 );
 2647     // jcc(Assembler::below, exit);
 2648     emit_opcode( cbuf, 0x72 );
 2649     emit_d8    ( cbuf, 0x0C );
 2650     // movl(dst, equal_result);
 2651     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2652     emit_d32( cbuf, 0 );
 2653     // jcc(Assembler::equal, exit);
 2654     emit_opcode( cbuf, 0x74 );
 2655     emit_d8    ( cbuf, 0x05 );
 2656     // movl(dst, greater_result);
 2657     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2658     emit_d32( cbuf, 1 );
 2659   %}
 2660 
 2661 
 2662   // Compare the longs and set flags
 2663   // BROKEN!  Do Not use as-is
 2664   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2665     // CMP    $src1.hi,$src2.hi
 2666     emit_opcode( cbuf, 0x3B );
 2667     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2668     // JNE,s  done
 2669     emit_opcode(cbuf,0x75);
 2670     emit_d8(cbuf, 2 );
 2671     // CMP    $src1.lo,$src2.lo
 2672     emit_opcode( cbuf, 0x3B );
 2673     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2674 // done:
 2675   %}
 2676 
 2677   enc_class convert_int_long( regL dst, rRegI src ) %{
 2678     // mov $dst.lo,$src
 2679     int dst_encoding = $dst$$reg;
 2680     int src_encoding = $src$$reg;
 2681     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2682     // mov $dst.hi,$src
 2683     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2684     // sar $dst.hi,31
 2685     emit_opcode( cbuf, 0xC1 );
 2686     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2687     emit_d8(cbuf, 0x1F );
 2688   %}
 2689 
 2690   enc_class convert_long_double( eRegL src ) %{
 2691     // push $src.hi
 2692     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2693     // push $src.lo
 2694     emit_opcode(cbuf, 0x50+$src$$reg  );
 2695     // fild 64-bits at [SP]
 2696     emit_opcode(cbuf,0xdf);
 2697     emit_d8(cbuf, 0x6C);
 2698     emit_d8(cbuf, 0x24);
 2699     emit_d8(cbuf, 0x00);
 2700     // pop stack
 2701     emit_opcode(cbuf, 0x83); // add  SP, #8
 2702     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2703     emit_d8(cbuf, 0x8);
 2704   %}
 2705 
 2706   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2707     // IMUL   EDX:EAX,$src1
 2708     emit_opcode( cbuf, 0xF7 );
 2709     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2710     // SAR    EDX,$cnt-32
 2711     int shift_count = ((int)$cnt$$constant) - 32;
 2712     if (shift_count > 0) {
 2713       emit_opcode(cbuf, 0xC1);
 2714       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2715       emit_d8(cbuf, shift_count);
 2716     }
 2717   %}
 2718 
 2719   // this version doesn't have add sp, 8
 2720   enc_class convert_long_double2( eRegL src ) %{
 2721     // push $src.hi
 2722     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2723     // push $src.lo
 2724     emit_opcode(cbuf, 0x50+$src$$reg  );
 2725     // fild 64-bits at [SP]
 2726     emit_opcode(cbuf,0xdf);
 2727     emit_d8(cbuf, 0x6C);
 2728     emit_d8(cbuf, 0x24);
 2729     emit_d8(cbuf, 0x00);
 2730   %}
 2731 
 2732   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2733     // Basic idea: long = (long)int * (long)int
 2734     // IMUL EDX:EAX, src
 2735     emit_opcode( cbuf, 0xF7 );
 2736     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2737   %}
 2738 
 2739   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2740     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2741     // MUL EDX:EAX, src
 2742     emit_opcode( cbuf, 0xF7 );
 2743     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2744   %}
 2745 
 2746   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2747     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2748     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2749     // MOV    $tmp,$src.lo
 2750     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2751     // IMUL   $tmp,EDX
 2752     emit_opcode( cbuf, 0x0F );
 2753     emit_opcode( cbuf, 0xAF );
 2754     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2755     // MOV    EDX,$src.hi
 2756     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2757     // IMUL   EDX,EAX
 2758     emit_opcode( cbuf, 0x0F );
 2759     emit_opcode( cbuf, 0xAF );
 2760     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2761     // ADD    $tmp,EDX
 2762     emit_opcode( cbuf, 0x03 );
 2763     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2764     // MUL   EDX:EAX,$src.lo
 2765     emit_opcode( cbuf, 0xF7 );
 2766     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2767     // ADD    EDX,ESI
 2768     emit_opcode( cbuf, 0x03 );
 2769     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2770   %}
 2771 
 2772   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2773     // Basic idea: lo(result) = lo(src * y_lo)
 2774     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2775     // IMUL   $tmp,EDX,$src
 2776     emit_opcode( cbuf, 0x6B );
 2777     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2778     emit_d8( cbuf, (int)$src$$constant );
 2779     // MOV    EDX,$src
 2780     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2781     emit_d32( cbuf, (int)$src$$constant );
 2782     // MUL   EDX:EAX,EDX
 2783     emit_opcode( cbuf, 0xF7 );
 2784     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2785     // ADD    EDX,ESI
 2786     emit_opcode( cbuf, 0x03 );
 2787     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2788   %}
 2789 
 2790   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2791     // PUSH src1.hi
 2792     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2793     // PUSH src1.lo
 2794     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2795     // PUSH src2.hi
 2796     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2797     // PUSH src2.lo
 2798     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2799     // CALL directly to the runtime
 2800     MacroAssembler _masm(&cbuf);
 2801     cbuf.set_insts_mark();
 2802     emit_opcode(cbuf,0xE8);       // Call into runtime
 2803     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2804     __ post_call_nop();
 2805     // Restore stack
 2806     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2807     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2808     emit_d8(cbuf, 4*4);
 2809   %}
 2810 
 2811   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2812     // PUSH src1.hi
 2813     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2814     // PUSH src1.lo
 2815     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2816     // PUSH src2.hi
 2817     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2818     // PUSH src2.lo
 2819     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2820     // CALL directly to the runtime
 2821     MacroAssembler _masm(&cbuf);
 2822     cbuf.set_insts_mark();
 2823     emit_opcode(cbuf,0xE8);       // Call into runtime
 2824     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2825     __ post_call_nop();
 2826     // Restore stack
 2827     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2828     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2829     emit_d8(cbuf, 4*4);
 2830   %}
 2831 
 2832   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2833     // MOV   $tmp,$src.lo
 2834     emit_opcode(cbuf, 0x8B);
 2835     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2836     // OR    $tmp,$src.hi
 2837     emit_opcode(cbuf, 0x0B);
 2838     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2839   %}
 2840 
 2841   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2842     // CMP    $src1.lo,$src2.lo
 2843     emit_opcode( cbuf, 0x3B );
 2844     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2845     // JNE,s  skip
 2846     emit_cc(cbuf, 0x70, 0x5);
 2847     emit_d8(cbuf,2);
 2848     // CMP    $src1.hi,$src2.hi
 2849     emit_opcode( cbuf, 0x3B );
 2850     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2851   %}
 2852 
 2853   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2854     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2855     emit_opcode( cbuf, 0x3B );
 2856     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2857     // MOV    $tmp,$src1.hi
 2858     emit_opcode( cbuf, 0x8B );
 2859     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2860     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2861     emit_opcode( cbuf, 0x1B );
 2862     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2863   %}
 2864 
 2865   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2866     // XOR    $tmp,$tmp
 2867     emit_opcode(cbuf,0x33);  // XOR
 2868     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2869     // CMP    $tmp,$src.lo
 2870     emit_opcode( cbuf, 0x3B );
 2871     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2872     // SBB    $tmp,$src.hi
 2873     emit_opcode( cbuf, 0x1B );
 2874     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2875   %}
 2876 
 2877  // Sniff, sniff... smells like Gnu Superoptimizer
 2878   enc_class neg_long( eRegL dst ) %{
 2879     emit_opcode(cbuf,0xF7);    // NEG hi
 2880     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2881     emit_opcode(cbuf,0xF7);    // NEG lo
 2882     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2883     emit_opcode(cbuf,0x83);    // SBB hi,0
 2884     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2885     emit_d8    (cbuf,0 );
 2886   %}
 2887 
 2888   enc_class enc_pop_rdx() %{
 2889     emit_opcode(cbuf,0x5A);
 2890   %}
 2891 
 2892   enc_class enc_rethrow() %{
 2893     MacroAssembler _masm(&cbuf);
 2894     cbuf.set_insts_mark();
 2895     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2896     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2897                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2898     __ post_call_nop();
 2899   %}
 2900 
 2901 
 2902   // Convert a double to an int.  Java semantics require we do complex
 2903   // manglelations in the corner cases.  So we set the rounding mode to
 2904   // 'zero', store the darned double down as an int, and reset the
 2905   // rounding mode to 'nearest'.  The hardware throws an exception which
 2906   // patches up the correct value directly to the stack.
 2907   enc_class DPR2I_encoding( regDPR src ) %{
 2908     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2909     // exceptions here, so that a NAN or other corner-case value will
 2910     // thrown an exception (but normal values get converted at full speed).
 2911     // However, I2C adapters and other float-stack manglers leave pending
 2912     // invalid-op exceptions hanging.  We would have to clear them before
 2913     // enabling them and that is more expensive than just testing for the
 2914     // invalid value Intel stores down in the corner cases.
 2915     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2916     emit_opcode(cbuf,0x2D);
 2917     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2918     // Allocate a word
 2919     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2920     emit_opcode(cbuf,0xEC);
 2921     emit_d8(cbuf,0x04);
 2922     // Encoding assumes a double has been pushed into FPR0.
 2923     // Store down the double as an int, popping the FPU stack
 2924     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2925     emit_opcode(cbuf,0x1C);
 2926     emit_d8(cbuf,0x24);
 2927     // Restore the rounding mode; mask the exception
 2928     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2929     emit_opcode(cbuf,0x2D);
 2930     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2931         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2932         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2933 
 2934     // Load the converted int; adjust CPU stack
 2935     emit_opcode(cbuf,0x58);       // POP EAX
 2936     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2937     emit_d32   (cbuf,0x80000000); //         0x80000000
 2938     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2939     emit_d8    (cbuf,0x07);       // Size of slow_call
 2940     // Push src onto stack slow-path
 2941     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2942     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2943     // CALL directly to the runtime
 2944     MacroAssembler _masm(&cbuf);
 2945     cbuf.set_insts_mark();
 2946     emit_opcode(cbuf,0xE8);       // Call into runtime
 2947     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2948     __ post_call_nop();
 2949     // Carry on here...
 2950   %}
 2951 
 2952   enc_class DPR2L_encoding( regDPR src ) %{
 2953     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2954     emit_opcode(cbuf,0x2D);
 2955     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2956     // Allocate a word
 2957     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2958     emit_opcode(cbuf,0xEC);
 2959     emit_d8(cbuf,0x08);
 2960     // Encoding assumes a double has been pushed into FPR0.
 2961     // Store down the double as a long, popping the FPU stack
 2962     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2963     emit_opcode(cbuf,0x3C);
 2964     emit_d8(cbuf,0x24);
 2965     // Restore the rounding mode; mask the exception
 2966     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2967     emit_opcode(cbuf,0x2D);
 2968     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2969         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2970         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2971 
 2972     // Load the converted int; adjust CPU stack
 2973     emit_opcode(cbuf,0x58);       // POP EAX
 2974     emit_opcode(cbuf,0x5A);       // POP EDX
 2975     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2976     emit_d8    (cbuf,0xFA);       // rdx
 2977     emit_d32   (cbuf,0x80000000); //         0x80000000
 2978     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2979     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2980     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2981     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2982     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2983     emit_d8    (cbuf,0x07);       // Size of slow_call
 2984     // Push src onto stack slow-path
 2985     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2986     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2987     // CALL directly to the runtime
 2988     MacroAssembler _masm(&cbuf);
 2989     cbuf.set_insts_mark();
 2990     emit_opcode(cbuf,0xE8);       // Call into runtime
 2991     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2992     __ post_call_nop();
 2993     // Carry on here...
 2994   %}
 2995 
 2996   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2997     // Operand was loaded from memory into fp ST (stack top)
 2998     // FMUL   ST,$src  /* D8 C8+i */
 2999     emit_opcode(cbuf, 0xD8);
 3000     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 3001   %}
 3002 
 3003   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 3004     // FADDP  ST,src2  /* D8 C0+i */
 3005     emit_opcode(cbuf, 0xD8);
 3006     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3007     //could use FADDP  src2,fpST  /* DE C0+i */
 3008   %}
 3009 
 3010   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3011     // FADDP  src2,ST  /* DE C0+i */
 3012     emit_opcode(cbuf, 0xDE);
 3013     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3014   %}
 3015 
 3016   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3017     // Operand has been loaded into fp ST (stack top)
 3018       // FSUB   ST,$src1
 3019       emit_opcode(cbuf, 0xD8);
 3020       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3021 
 3022       // FDIV
 3023       emit_opcode(cbuf, 0xD8);
 3024       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3025   %}
 3026 
 3027   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3028     // Operand was loaded from memory into fp ST (stack top)
 3029     // FADD   ST,$src  /* D8 C0+i */
 3030     emit_opcode(cbuf, 0xD8);
 3031     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3032 
 3033     // FMUL  ST,src2  /* D8 C*+i */
 3034     emit_opcode(cbuf, 0xD8);
 3035     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3036   %}
 3037 
 3038 
 3039   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3040     // Operand was loaded from memory into fp ST (stack top)
 3041     // FADD   ST,$src  /* D8 C0+i */
 3042     emit_opcode(cbuf, 0xD8);
 3043     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3044 
 3045     // FMULP  src2,ST  /* DE C8+i */
 3046     emit_opcode(cbuf, 0xDE);
 3047     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3048   %}
 3049 
 3050   // Atomically load the volatile long
 3051   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3052     emit_opcode(cbuf,0xDF);
 3053     int rm_byte_opcode = 0x05;
 3054     int base     = $mem$$base;
 3055     int index    = $mem$$index;
 3056     int scale    = $mem$$scale;
 3057     int displace = $mem$$disp;
 3058     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3059     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3060     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3061   %}
 3062 
 3063   // Volatile Store Long.  Must be atomic, so move it into
 3064   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3065   // target address before the store (for null-ptr checks)
 3066   // so the memory operand is used twice in the encoding.
 3067   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3068     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3069     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3070     emit_opcode(cbuf,0xDF);
 3071     int rm_byte_opcode = 0x07;
 3072     int base     = $mem$$base;
 3073     int index    = $mem$$index;
 3074     int scale    = $mem$$scale;
 3075     int displace = $mem$$disp;
 3076     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3077     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3078   %}
 3079 
 3080 %}
 3081 
 3082 
 3083 //----------FRAME--------------------------------------------------------------
 3084 // Definition of frame structure and management information.
 3085 //
 3086 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3087 //                             |   (to get allocators register number
 3088 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3089 //  r   CALLER     |        |
 3090 //  o     |        +--------+      pad to even-align allocators stack-slot
 3091 //  w     V        |  pad0  |        numbers; owned by CALLER
 3092 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3093 //  h     ^        |   in   |  5
 3094 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3095 //  |     |        |        |  3
 3096 //  |     |        +--------+
 3097 //  V     |        | old out|      Empty on Intel, window on Sparc
 3098 //        |    old |preserve|      Must be even aligned.
 3099 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3100 //        |        |   in   |  3   area for Intel ret address
 3101 //     Owned by    |preserve|      Empty on Sparc.
 3102 //       SELF      +--------+
 3103 //        |        |  pad2  |  2   pad to align old SP
 3104 //        |        +--------+  1
 3105 //        |        | locks  |  0
 3106 //        |        +--------+----> OptoReg::stack0(), even aligned
 3107 //        |        |  pad1  | 11   pad to align new SP
 3108 //        |        +--------+
 3109 //        |        |        | 10
 3110 //        |        | spills |  9   spills
 3111 //        V        |        |  8   (pad0 slot for callee)
 3112 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3113 //        ^        |  out   |  7
 3114 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3115 //     Owned by    +--------+
 3116 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3117 //        |    new |preserve|      Must be even-aligned.
 3118 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3119 //        |        |        |
 3120 //
 3121 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3122 //         known from SELF's arguments and the Java calling convention.
 3123 //         Region 6-7 is determined per call site.
 3124 // Note 2: If the calling convention leaves holes in the incoming argument
 3125 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3126 //         are owned by the CALLEE.  Holes should not be necessary in the
 3127 //         incoming area, as the Java calling convention is completely under
 3128 //         the control of the AD file.  Doubles can be sorted and packed to
 3129 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3130 //         varargs C calling conventions.
 3131 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3132 //         even aligned with pad0 as needed.
 3133 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3134 //         region 6-11 is even aligned; it may be padded out more so that
 3135 //         the region from SP to FP meets the minimum stack alignment.
 3136 
 3137 frame %{
 3138   // These three registers define part of the calling convention
 3139   // between compiled code and the interpreter.
 3140   inline_cache_reg(EAX);                // Inline Cache Register
 3141 
 3142   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3143   cisc_spilling_operand_name(indOffset32);
 3144 
 3145   // Number of stack slots consumed by locking an object
 3146   sync_stack_slots(1);
 3147 
 3148   // Compiled code's Frame Pointer
 3149   frame_pointer(ESP);
 3150   // Interpreter stores its frame pointer in a register which is
 3151   // stored to the stack by I2CAdaptors.
 3152   // I2CAdaptors convert from interpreted java to compiled java.
 3153   interpreter_frame_pointer(EBP);
 3154 
 3155   // Stack alignment requirement
 3156   // Alignment size in bytes (128-bit -> 16 bytes)
 3157   stack_alignment(StackAlignmentInBytes);
 3158 
 3159   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3160   // for calls to C.  Supports the var-args backing area for register parms.
 3161   varargs_C_out_slots_killed(0);
 3162 
 3163   // The after-PROLOG location of the return address.  Location of
 3164   // return address specifies a type (REG or STACK) and a number
 3165   // representing the register number (i.e. - use a register name) or
 3166   // stack slot.
 3167   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3168   // Otherwise, it is above the locks and verification slot and alignment word
 3169   return_addr(STACK - 1 +
 3170               align_up((Compile::current()->in_preserve_stack_slots() +
 3171                         Compile::current()->fixed_slots()),
 3172                        stack_alignment_in_slots()));
 3173 
 3174   // Location of C & interpreter return values
 3175   c_return_value %{
 3176     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3177     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3178     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3179 
 3180     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3181     // that C functions return float and double results in XMM0.
 3182     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3183       return OptoRegPair(XMM0b_num,XMM0_num);
 3184     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3185       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3186 
 3187     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3188   %}
 3189 
 3190   // Location of return values
 3191   return_value %{
 3192     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3193     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3194     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3195     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3196       return OptoRegPair(XMM0b_num,XMM0_num);
 3197     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3198       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3199     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3200   %}
 3201 
 3202 %}
 3203 
 3204 //----------ATTRIBUTES---------------------------------------------------------
 3205 //----------Operand Attributes-------------------------------------------------
 3206 op_attrib op_cost(0);        // Required cost attribute
 3207 
 3208 //----------Instruction Attributes---------------------------------------------
 3209 ins_attrib ins_cost(100);       // Required cost attribute
 3210 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3211 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3212                                 // non-matching short branch variant of some
 3213                                                             // long branch?
 3214 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3215                                 // specifies the alignment that some part of the instruction (not
 3216                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3217                                 // function must be provided for the instruction
 3218 
 3219 //----------OPERANDS-----------------------------------------------------------
 3220 // Operand definitions must precede instruction definitions for correct parsing
 3221 // in the ADLC because operands constitute user defined types which are used in
 3222 // instruction definitions.
 3223 
 3224 //----------Simple Operands----------------------------------------------------
 3225 // Immediate Operands
 3226 // Integer Immediate
 3227 operand immI() %{
 3228   match(ConI);
 3229 
 3230   op_cost(10);
 3231   format %{ %}
 3232   interface(CONST_INTER);
 3233 %}
 3234 
 3235 // Constant for test vs zero
 3236 operand immI_0() %{
 3237   predicate(n->get_int() == 0);
 3238   match(ConI);
 3239 
 3240   op_cost(0);
 3241   format %{ %}
 3242   interface(CONST_INTER);
 3243 %}
 3244 
 3245 // Constant for increment
 3246 operand immI_1() %{
 3247   predicate(n->get_int() == 1);
 3248   match(ConI);
 3249 
 3250   op_cost(0);
 3251   format %{ %}
 3252   interface(CONST_INTER);
 3253 %}
 3254 
 3255 // Constant for decrement
 3256 operand immI_M1() %{
 3257   predicate(n->get_int() == -1);
 3258   match(ConI);
 3259 
 3260   op_cost(0);
 3261   format %{ %}
 3262   interface(CONST_INTER);
 3263 %}
 3264 
 3265 // Valid scale values for addressing modes
 3266 operand immI2() %{
 3267   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3268   match(ConI);
 3269 
 3270   format %{ %}
 3271   interface(CONST_INTER);
 3272 %}
 3273 
 3274 operand immI8() %{
 3275   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3276   match(ConI);
 3277 
 3278   op_cost(5);
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 operand immU8() %{
 3284   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3285   match(ConI);
 3286 
 3287   op_cost(5);
 3288   format %{ %}
 3289   interface(CONST_INTER);
 3290 %}
 3291 
 3292 operand immI16() %{
 3293   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3294   match(ConI);
 3295 
 3296   op_cost(10);
 3297   format %{ %}
 3298   interface(CONST_INTER);
 3299 %}
 3300 
 3301 // Int Immediate non-negative
 3302 operand immU31()
 3303 %{
 3304   predicate(n->get_int() >= 0);
 3305   match(ConI);
 3306 
 3307   op_cost(0);
 3308   format %{ %}
 3309   interface(CONST_INTER);
 3310 %}
 3311 
 3312 // Constant for long shifts
 3313 operand immI_32() %{
 3314   predicate( n->get_int() == 32 );
 3315   match(ConI);
 3316 
 3317   op_cost(0);
 3318   format %{ %}
 3319   interface(CONST_INTER);
 3320 %}
 3321 
 3322 operand immI_1_31() %{
 3323   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3324   match(ConI);
 3325 
 3326   op_cost(0);
 3327   format %{ %}
 3328   interface(CONST_INTER);
 3329 %}
 3330 
 3331 operand immI_32_63() %{
 3332   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3333   match(ConI);
 3334   op_cost(0);
 3335 
 3336   format %{ %}
 3337   interface(CONST_INTER);
 3338 %}
 3339 
 3340 operand immI_2() %{
 3341   predicate( n->get_int() == 2 );
 3342   match(ConI);
 3343 
 3344   op_cost(0);
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 operand immI_3() %{
 3350   predicate( n->get_int() == 3 );
 3351   match(ConI);
 3352 
 3353   op_cost(0);
 3354   format %{ %}
 3355   interface(CONST_INTER);
 3356 %}
 3357 
 3358 operand immI_4()
 3359 %{
 3360   predicate(n->get_int() == 4);
 3361   match(ConI);
 3362 
 3363   op_cost(0);
 3364   format %{ %}
 3365   interface(CONST_INTER);
 3366 %}
 3367 
 3368 operand immI_8()
 3369 %{
 3370   predicate(n->get_int() == 8);
 3371   match(ConI);
 3372 
 3373   op_cost(0);
 3374   format %{ %}
 3375   interface(CONST_INTER);
 3376 %}
 3377 
 3378 // Pointer Immediate
 3379 operand immP() %{
 3380   match(ConP);
 3381 
 3382   op_cost(10);
 3383   format %{ %}
 3384   interface(CONST_INTER);
 3385 %}
 3386 
 3387 // Null Pointer Immediate
 3388 operand immP0() %{
 3389   predicate( n->get_ptr() == 0 );
 3390   match(ConP);
 3391   op_cost(0);
 3392 
 3393   format %{ %}
 3394   interface(CONST_INTER);
 3395 %}
 3396 
 3397 // Long Immediate
 3398 operand immL() %{
 3399   match(ConL);
 3400 
 3401   op_cost(20);
 3402   format %{ %}
 3403   interface(CONST_INTER);
 3404 %}
 3405 
 3406 // Long Immediate zero
 3407 operand immL0() %{
 3408   predicate( n->get_long() == 0L );
 3409   match(ConL);
 3410   op_cost(0);
 3411 
 3412   format %{ %}
 3413   interface(CONST_INTER);
 3414 %}
 3415 
 3416 // Long Immediate zero
 3417 operand immL_M1() %{
 3418   predicate( n->get_long() == -1L );
 3419   match(ConL);
 3420   op_cost(0);
 3421 
 3422   format %{ %}
 3423   interface(CONST_INTER);
 3424 %}
 3425 
 3426 // Long immediate from 0 to 127.
 3427 // Used for a shorter form of long mul by 10.
 3428 operand immL_127() %{
 3429   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3430   match(ConL);
 3431   op_cost(0);
 3432 
 3433   format %{ %}
 3434   interface(CONST_INTER);
 3435 %}
 3436 
 3437 // Long Immediate: low 32-bit mask
 3438 operand immL_32bits() %{
 3439   predicate(n->get_long() == 0xFFFFFFFFL);
 3440   match(ConL);
 3441   op_cost(0);
 3442 
 3443   format %{ %}
 3444   interface(CONST_INTER);
 3445 %}
 3446 
 3447 // Long Immediate: low 32-bit mask
 3448 operand immL32() %{
 3449   predicate(n->get_long() == (int)(n->get_long()));
 3450   match(ConL);
 3451   op_cost(20);
 3452 
 3453   format %{ %}
 3454   interface(CONST_INTER);
 3455 %}
 3456 
 3457 //Double Immediate zero
 3458 operand immDPR0() %{
 3459   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3460   // bug that generates code such that NaNs compare equal to 0.0
 3461   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3462   match(ConD);
 3463 
 3464   op_cost(5);
 3465   format %{ %}
 3466   interface(CONST_INTER);
 3467 %}
 3468 
 3469 // Double Immediate one
 3470 operand immDPR1() %{
 3471   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3472   match(ConD);
 3473 
 3474   op_cost(5);
 3475   format %{ %}
 3476   interface(CONST_INTER);
 3477 %}
 3478 
 3479 // Double Immediate
 3480 operand immDPR() %{
 3481   predicate(UseSSE<=1);
 3482   match(ConD);
 3483 
 3484   op_cost(5);
 3485   format %{ %}
 3486   interface(CONST_INTER);
 3487 %}
 3488 
 3489 operand immD() %{
 3490   predicate(UseSSE>=2);
 3491   match(ConD);
 3492 
 3493   op_cost(5);
 3494   format %{ %}
 3495   interface(CONST_INTER);
 3496 %}
 3497 
 3498 // Double Immediate zero
 3499 operand immD0() %{
 3500   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3501   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3502   // compare equal to -0.0.
 3503   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3504   match(ConD);
 3505 
 3506   format %{ %}
 3507   interface(CONST_INTER);
 3508 %}
 3509 
 3510 // Float Immediate zero
 3511 operand immFPR0() %{
 3512   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3513   match(ConF);
 3514 
 3515   op_cost(5);
 3516   format %{ %}
 3517   interface(CONST_INTER);
 3518 %}
 3519 
 3520 // Float Immediate one
 3521 operand immFPR1() %{
 3522   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3523   match(ConF);
 3524 
 3525   op_cost(5);
 3526   format %{ %}
 3527   interface(CONST_INTER);
 3528 %}
 3529 
 3530 // Float Immediate
 3531 operand immFPR() %{
 3532   predicate( UseSSE == 0 );
 3533   match(ConF);
 3534 
 3535   op_cost(5);
 3536   format %{ %}
 3537   interface(CONST_INTER);
 3538 %}
 3539 
 3540 // Float Immediate
 3541 operand immF() %{
 3542   predicate(UseSSE >= 1);
 3543   match(ConF);
 3544 
 3545   op_cost(5);
 3546   format %{ %}
 3547   interface(CONST_INTER);
 3548 %}
 3549 
 3550 // Float Immediate zero.  Zero and not -0.0
 3551 operand immF0() %{
 3552   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3553   match(ConF);
 3554 
 3555   op_cost(5);
 3556   format %{ %}
 3557   interface(CONST_INTER);
 3558 %}
 3559 
 3560 // Immediates for special shifts (sign extend)
 3561 
 3562 // Constants for increment
 3563 operand immI_16() %{
 3564   predicate( n->get_int() == 16 );
 3565   match(ConI);
 3566 
 3567   format %{ %}
 3568   interface(CONST_INTER);
 3569 %}
 3570 
 3571 operand immI_24() %{
 3572   predicate( n->get_int() == 24 );
 3573   match(ConI);
 3574 
 3575   format %{ %}
 3576   interface(CONST_INTER);
 3577 %}
 3578 
 3579 // Constant for byte-wide masking
 3580 operand immI_255() %{
 3581   predicate( n->get_int() == 255 );
 3582   match(ConI);
 3583 
 3584   format %{ %}
 3585   interface(CONST_INTER);
 3586 %}
 3587 
 3588 // Constant for short-wide masking
 3589 operand immI_65535() %{
 3590   predicate(n->get_int() == 65535);
 3591   match(ConI);
 3592 
 3593   format %{ %}
 3594   interface(CONST_INTER);
 3595 %}
 3596 
 3597 operand kReg()
 3598 %{
 3599   constraint(ALLOC_IN_RC(vectmask_reg));
 3600   match(RegVectMask);
 3601   format %{%}
 3602   interface(REG_INTER);
 3603 %}
 3604 
 3605 operand kReg_K1()
 3606 %{
 3607   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3608   match(RegVectMask);
 3609   format %{%}
 3610   interface(REG_INTER);
 3611 %}
 3612 
 3613 operand kReg_K2()
 3614 %{
 3615   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3616   match(RegVectMask);
 3617   format %{%}
 3618   interface(REG_INTER);
 3619 %}
 3620 
 3621 // Special Registers
 3622 operand kReg_K3()
 3623 %{
 3624   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3625   match(RegVectMask);
 3626   format %{%}
 3627   interface(REG_INTER);
 3628 %}
 3629 
 3630 operand kReg_K4()
 3631 %{
 3632   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3633   match(RegVectMask);
 3634   format %{%}
 3635   interface(REG_INTER);
 3636 %}
 3637 
 3638 operand kReg_K5()
 3639 %{
 3640   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3641   match(RegVectMask);
 3642   format %{%}
 3643   interface(REG_INTER);
 3644 %}
 3645 
 3646 operand kReg_K6()
 3647 %{
 3648   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3649   match(RegVectMask);
 3650   format %{%}
 3651   interface(REG_INTER);
 3652 %}
 3653 
 3654 // Special Registers
 3655 operand kReg_K7()
 3656 %{
 3657   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3658   match(RegVectMask);
 3659   format %{%}
 3660   interface(REG_INTER);
 3661 %}
 3662 
 3663 // Register Operands
 3664 // Integer Register
 3665 operand rRegI() %{
 3666   constraint(ALLOC_IN_RC(int_reg));
 3667   match(RegI);
 3668   match(xRegI);
 3669   match(eAXRegI);
 3670   match(eBXRegI);
 3671   match(eCXRegI);
 3672   match(eDXRegI);
 3673   match(eDIRegI);
 3674   match(eSIRegI);
 3675 
 3676   format %{ %}
 3677   interface(REG_INTER);
 3678 %}
 3679 
 3680 // Subset of Integer Register
 3681 operand xRegI(rRegI reg) %{
 3682   constraint(ALLOC_IN_RC(int_x_reg));
 3683   match(reg);
 3684   match(eAXRegI);
 3685   match(eBXRegI);
 3686   match(eCXRegI);
 3687   match(eDXRegI);
 3688 
 3689   format %{ %}
 3690   interface(REG_INTER);
 3691 %}
 3692 
 3693 // Special Registers
 3694 operand eAXRegI(xRegI reg) %{
 3695   constraint(ALLOC_IN_RC(eax_reg));
 3696   match(reg);
 3697   match(rRegI);
 3698 
 3699   format %{ "EAX" %}
 3700   interface(REG_INTER);
 3701 %}
 3702 
 3703 // Special Registers
 3704 operand eBXRegI(xRegI reg) %{
 3705   constraint(ALLOC_IN_RC(ebx_reg));
 3706   match(reg);
 3707   match(rRegI);
 3708 
 3709   format %{ "EBX" %}
 3710   interface(REG_INTER);
 3711 %}
 3712 
 3713 operand eCXRegI(xRegI reg) %{
 3714   constraint(ALLOC_IN_RC(ecx_reg));
 3715   match(reg);
 3716   match(rRegI);
 3717 
 3718   format %{ "ECX" %}
 3719   interface(REG_INTER);
 3720 %}
 3721 
 3722 operand eDXRegI(xRegI reg) %{
 3723   constraint(ALLOC_IN_RC(edx_reg));
 3724   match(reg);
 3725   match(rRegI);
 3726 
 3727   format %{ "EDX" %}
 3728   interface(REG_INTER);
 3729 %}
 3730 
 3731 operand eDIRegI(xRegI reg) %{
 3732   constraint(ALLOC_IN_RC(edi_reg));
 3733   match(reg);
 3734   match(rRegI);
 3735 
 3736   format %{ "EDI" %}
 3737   interface(REG_INTER);
 3738 %}
 3739 
 3740 operand naxRegI() %{
 3741   constraint(ALLOC_IN_RC(nax_reg));
 3742   match(RegI);
 3743   match(eCXRegI);
 3744   match(eDXRegI);
 3745   match(eSIRegI);
 3746   match(eDIRegI);
 3747 
 3748   format %{ %}
 3749   interface(REG_INTER);
 3750 %}
 3751 
 3752 operand nadxRegI() %{
 3753   constraint(ALLOC_IN_RC(nadx_reg));
 3754   match(RegI);
 3755   match(eBXRegI);
 3756   match(eCXRegI);
 3757   match(eSIRegI);
 3758   match(eDIRegI);
 3759 
 3760   format %{ %}
 3761   interface(REG_INTER);
 3762 %}
 3763 
 3764 operand ncxRegI() %{
 3765   constraint(ALLOC_IN_RC(ncx_reg));
 3766   match(RegI);
 3767   match(eAXRegI);
 3768   match(eDXRegI);
 3769   match(eSIRegI);
 3770   match(eDIRegI);
 3771 
 3772   format %{ %}
 3773   interface(REG_INTER);
 3774 %}
 3775 
 3776 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3777 // //
 3778 operand eSIRegI(xRegI reg) %{
 3779    constraint(ALLOC_IN_RC(esi_reg));
 3780    match(reg);
 3781    match(rRegI);
 3782 
 3783    format %{ "ESI" %}
 3784    interface(REG_INTER);
 3785 %}
 3786 
 3787 // Pointer Register
 3788 operand anyRegP() %{
 3789   constraint(ALLOC_IN_RC(any_reg));
 3790   match(RegP);
 3791   match(eAXRegP);
 3792   match(eBXRegP);
 3793   match(eCXRegP);
 3794   match(eDIRegP);
 3795   match(eRegP);
 3796 
 3797   format %{ %}
 3798   interface(REG_INTER);
 3799 %}
 3800 
 3801 operand eRegP() %{
 3802   constraint(ALLOC_IN_RC(int_reg));
 3803   match(RegP);
 3804   match(eAXRegP);
 3805   match(eBXRegP);
 3806   match(eCXRegP);
 3807   match(eDIRegP);
 3808 
 3809   format %{ %}
 3810   interface(REG_INTER);
 3811 %}
 3812 
 3813 operand rRegP() %{
 3814   constraint(ALLOC_IN_RC(int_reg));
 3815   match(RegP);
 3816   match(eAXRegP);
 3817   match(eBXRegP);
 3818   match(eCXRegP);
 3819   match(eDIRegP);
 3820 
 3821   format %{ %}
 3822   interface(REG_INTER);
 3823 %}
 3824 
 3825 // On windows95, EBP is not safe to use for implicit null tests.
 3826 operand eRegP_no_EBP() %{
 3827   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3828   match(RegP);
 3829   match(eAXRegP);
 3830   match(eBXRegP);
 3831   match(eCXRegP);
 3832   match(eDIRegP);
 3833 
 3834   op_cost(100);
 3835   format %{ %}
 3836   interface(REG_INTER);
 3837 %}
 3838 
 3839 operand naxRegP() %{
 3840   constraint(ALLOC_IN_RC(nax_reg));
 3841   match(RegP);
 3842   match(eBXRegP);
 3843   match(eDXRegP);
 3844   match(eCXRegP);
 3845   match(eSIRegP);
 3846   match(eDIRegP);
 3847 
 3848   format %{ %}
 3849   interface(REG_INTER);
 3850 %}
 3851 
 3852 operand nabxRegP() %{
 3853   constraint(ALLOC_IN_RC(nabx_reg));
 3854   match(RegP);
 3855   match(eCXRegP);
 3856   match(eDXRegP);
 3857   match(eSIRegP);
 3858   match(eDIRegP);
 3859 
 3860   format %{ %}
 3861   interface(REG_INTER);
 3862 %}
 3863 
 3864 operand pRegP() %{
 3865   constraint(ALLOC_IN_RC(p_reg));
 3866   match(RegP);
 3867   match(eBXRegP);
 3868   match(eDXRegP);
 3869   match(eSIRegP);
 3870   match(eDIRegP);
 3871 
 3872   format %{ %}
 3873   interface(REG_INTER);
 3874 %}
 3875 
 3876 // Special Registers
 3877 // Return a pointer value
 3878 operand eAXRegP(eRegP reg) %{
 3879   constraint(ALLOC_IN_RC(eax_reg));
 3880   match(reg);
 3881   format %{ "EAX" %}
 3882   interface(REG_INTER);
 3883 %}
 3884 
 3885 // Used in AtomicAdd
 3886 operand eBXRegP(eRegP reg) %{
 3887   constraint(ALLOC_IN_RC(ebx_reg));
 3888   match(reg);
 3889   format %{ "EBX" %}
 3890   interface(REG_INTER);
 3891 %}
 3892 
 3893 // Tail-call (interprocedural jump) to interpreter
 3894 operand eCXRegP(eRegP reg) %{
 3895   constraint(ALLOC_IN_RC(ecx_reg));
 3896   match(reg);
 3897   format %{ "ECX" %}
 3898   interface(REG_INTER);
 3899 %}
 3900 
 3901 operand eDXRegP(eRegP reg) %{
 3902   constraint(ALLOC_IN_RC(edx_reg));
 3903   match(reg);
 3904   format %{ "EDX" %}
 3905   interface(REG_INTER);
 3906 %}
 3907 
 3908 operand eSIRegP(eRegP reg) %{
 3909   constraint(ALLOC_IN_RC(esi_reg));
 3910   match(reg);
 3911   format %{ "ESI" %}
 3912   interface(REG_INTER);
 3913 %}
 3914 
 3915 // Used in rep stosw
 3916 operand eDIRegP(eRegP reg) %{
 3917   constraint(ALLOC_IN_RC(edi_reg));
 3918   match(reg);
 3919   format %{ "EDI" %}
 3920   interface(REG_INTER);
 3921 %}
 3922 
 3923 operand eRegL() %{
 3924   constraint(ALLOC_IN_RC(long_reg));
 3925   match(RegL);
 3926   match(eADXRegL);
 3927 
 3928   format %{ %}
 3929   interface(REG_INTER);
 3930 %}
 3931 
 3932 operand eADXRegL( eRegL reg ) %{
 3933   constraint(ALLOC_IN_RC(eadx_reg));
 3934   match(reg);
 3935 
 3936   format %{ "EDX:EAX" %}
 3937   interface(REG_INTER);
 3938 %}
 3939 
 3940 operand eBCXRegL( eRegL reg ) %{
 3941   constraint(ALLOC_IN_RC(ebcx_reg));
 3942   match(reg);
 3943 
 3944   format %{ "EBX:ECX" %}
 3945   interface(REG_INTER);
 3946 %}
 3947 
 3948 operand eBDPRegL( eRegL reg ) %{
 3949   constraint(ALLOC_IN_RC(ebpd_reg));
 3950   match(reg);
 3951 
 3952   format %{ "EBP:EDI" %}
 3953   interface(REG_INTER);
 3954 %}
 3955 // Special case for integer high multiply
 3956 operand eADXRegL_low_only() %{
 3957   constraint(ALLOC_IN_RC(eadx_reg));
 3958   match(RegL);
 3959 
 3960   format %{ "EAX" %}
 3961   interface(REG_INTER);
 3962 %}
 3963 
 3964 // Flags register, used as output of compare instructions
 3965 operand rFlagsReg() %{
 3966   constraint(ALLOC_IN_RC(int_flags));
 3967   match(RegFlags);
 3968 
 3969   format %{ "EFLAGS" %}
 3970   interface(REG_INTER);
 3971 %}
 3972 
 3973 // Flags register, used as output of compare instructions
 3974 operand eFlagsReg() %{
 3975   constraint(ALLOC_IN_RC(int_flags));
 3976   match(RegFlags);
 3977 
 3978   format %{ "EFLAGS" %}
 3979   interface(REG_INTER);
 3980 %}
 3981 
 3982 // Flags register, used as output of FLOATING POINT compare instructions
 3983 operand eFlagsRegU() %{
 3984   constraint(ALLOC_IN_RC(int_flags));
 3985   match(RegFlags);
 3986 
 3987   format %{ "EFLAGS_U" %}
 3988   interface(REG_INTER);
 3989 %}
 3990 
 3991 operand eFlagsRegUCF() %{
 3992   constraint(ALLOC_IN_RC(int_flags));
 3993   match(RegFlags);
 3994   predicate(false);
 3995 
 3996   format %{ "EFLAGS_U_CF" %}
 3997   interface(REG_INTER);
 3998 %}
 3999 
 4000 // Condition Code Register used by long compare
 4001 operand flagsReg_long_LTGE() %{
 4002   constraint(ALLOC_IN_RC(int_flags));
 4003   match(RegFlags);
 4004   format %{ "FLAGS_LTGE" %}
 4005   interface(REG_INTER);
 4006 %}
 4007 operand flagsReg_long_EQNE() %{
 4008   constraint(ALLOC_IN_RC(int_flags));
 4009   match(RegFlags);
 4010   format %{ "FLAGS_EQNE" %}
 4011   interface(REG_INTER);
 4012 %}
 4013 operand flagsReg_long_LEGT() %{
 4014   constraint(ALLOC_IN_RC(int_flags));
 4015   match(RegFlags);
 4016   format %{ "FLAGS_LEGT" %}
 4017   interface(REG_INTER);
 4018 %}
 4019 
 4020 // Condition Code Register used by unsigned long compare
 4021 operand flagsReg_ulong_LTGE() %{
 4022   constraint(ALLOC_IN_RC(int_flags));
 4023   match(RegFlags);
 4024   format %{ "FLAGS_U_LTGE" %}
 4025   interface(REG_INTER);
 4026 %}
 4027 operand flagsReg_ulong_EQNE() %{
 4028   constraint(ALLOC_IN_RC(int_flags));
 4029   match(RegFlags);
 4030   format %{ "FLAGS_U_EQNE" %}
 4031   interface(REG_INTER);
 4032 %}
 4033 operand flagsReg_ulong_LEGT() %{
 4034   constraint(ALLOC_IN_RC(int_flags));
 4035   match(RegFlags);
 4036   format %{ "FLAGS_U_LEGT" %}
 4037   interface(REG_INTER);
 4038 %}
 4039 
 4040 // Float register operands
 4041 operand regDPR() %{
 4042   predicate( UseSSE < 2 );
 4043   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4044   match(RegD);
 4045   match(regDPR1);
 4046   match(regDPR2);
 4047   format %{ %}
 4048   interface(REG_INTER);
 4049 %}
 4050 
 4051 operand regDPR1(regDPR reg) %{
 4052   predicate( UseSSE < 2 );
 4053   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4054   match(reg);
 4055   format %{ "FPR1" %}
 4056   interface(REG_INTER);
 4057 %}
 4058 
 4059 operand regDPR2(regDPR reg) %{
 4060   predicate( UseSSE < 2 );
 4061   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4062   match(reg);
 4063   format %{ "FPR2" %}
 4064   interface(REG_INTER);
 4065 %}
 4066 
 4067 operand regnotDPR1(regDPR reg) %{
 4068   predicate( UseSSE < 2 );
 4069   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4070   match(reg);
 4071   format %{ %}
 4072   interface(REG_INTER);
 4073 %}
 4074 
 4075 // Float register operands
 4076 operand regFPR() %{
 4077   predicate( UseSSE < 2 );
 4078   constraint(ALLOC_IN_RC(fp_flt_reg));
 4079   match(RegF);
 4080   match(regFPR1);
 4081   format %{ %}
 4082   interface(REG_INTER);
 4083 %}
 4084 
 4085 // Float register operands
 4086 operand regFPR1(regFPR reg) %{
 4087   predicate( UseSSE < 2 );
 4088   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4089   match(reg);
 4090   format %{ "FPR1" %}
 4091   interface(REG_INTER);
 4092 %}
 4093 
 4094 // XMM Float register operands
 4095 operand regF() %{
 4096   predicate( UseSSE>=1 );
 4097   constraint(ALLOC_IN_RC(float_reg_legacy));
 4098   match(RegF);
 4099   format %{ %}
 4100   interface(REG_INTER);
 4101 %}
 4102 
 4103 operand legRegF() %{
 4104   predicate( UseSSE>=1 );
 4105   constraint(ALLOC_IN_RC(float_reg_legacy));
 4106   match(RegF);
 4107   format %{ %}
 4108   interface(REG_INTER);
 4109 %}
 4110 
 4111 // Float register operands
 4112 operand vlRegF() %{
 4113    constraint(ALLOC_IN_RC(float_reg_vl));
 4114    match(RegF);
 4115 
 4116    format %{ %}
 4117    interface(REG_INTER);
 4118 %}
 4119 
 4120 // XMM Double register operands
 4121 operand regD() %{
 4122   predicate( UseSSE>=2 );
 4123   constraint(ALLOC_IN_RC(double_reg_legacy));
 4124   match(RegD);
 4125   format %{ %}
 4126   interface(REG_INTER);
 4127 %}
 4128 
 4129 // Double register operands
 4130 operand legRegD() %{
 4131   predicate( UseSSE>=2 );
 4132   constraint(ALLOC_IN_RC(double_reg_legacy));
 4133   match(RegD);
 4134   format %{ %}
 4135   interface(REG_INTER);
 4136 %}
 4137 
 4138 operand vlRegD() %{
 4139    constraint(ALLOC_IN_RC(double_reg_vl));
 4140    match(RegD);
 4141 
 4142    format %{ %}
 4143    interface(REG_INTER);
 4144 %}
 4145 
 4146 //----------Memory Operands----------------------------------------------------
 4147 // Direct Memory Operand
 4148 operand direct(immP addr) %{
 4149   match(addr);
 4150 
 4151   format %{ "[$addr]" %}
 4152   interface(MEMORY_INTER) %{
 4153     base(0xFFFFFFFF);
 4154     index(0x4);
 4155     scale(0x0);
 4156     disp($addr);
 4157   %}
 4158 %}
 4159 
 4160 // Indirect Memory Operand
 4161 operand indirect(eRegP reg) %{
 4162   constraint(ALLOC_IN_RC(int_reg));
 4163   match(reg);
 4164 
 4165   format %{ "[$reg]" %}
 4166   interface(MEMORY_INTER) %{
 4167     base($reg);
 4168     index(0x4);
 4169     scale(0x0);
 4170     disp(0x0);
 4171   %}
 4172 %}
 4173 
 4174 // Indirect Memory Plus Short Offset Operand
 4175 operand indOffset8(eRegP reg, immI8 off) %{
 4176   match(AddP reg off);
 4177 
 4178   format %{ "[$reg + $off]" %}
 4179   interface(MEMORY_INTER) %{
 4180     base($reg);
 4181     index(0x4);
 4182     scale(0x0);
 4183     disp($off);
 4184   %}
 4185 %}
 4186 
 4187 // Indirect Memory Plus Long Offset Operand
 4188 operand indOffset32(eRegP reg, immI off) %{
 4189   match(AddP reg off);
 4190 
 4191   format %{ "[$reg + $off]" %}
 4192   interface(MEMORY_INTER) %{
 4193     base($reg);
 4194     index(0x4);
 4195     scale(0x0);
 4196     disp($off);
 4197   %}
 4198 %}
 4199 
 4200 // Indirect Memory Plus Long Offset Operand
 4201 operand indOffset32X(rRegI reg, immP off) %{
 4202   match(AddP off reg);
 4203 
 4204   format %{ "[$reg + $off]" %}
 4205   interface(MEMORY_INTER) %{
 4206     base($reg);
 4207     index(0x4);
 4208     scale(0x0);
 4209     disp($off);
 4210   %}
 4211 %}
 4212 
 4213 // Indirect Memory Plus Index Register Plus Offset Operand
 4214 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4215   match(AddP (AddP reg ireg) off);
 4216 
 4217   op_cost(10);
 4218   format %{"[$reg + $off + $ireg]" %}
 4219   interface(MEMORY_INTER) %{
 4220     base($reg);
 4221     index($ireg);
 4222     scale(0x0);
 4223     disp($off);
 4224   %}
 4225 %}
 4226 
 4227 // Indirect Memory Plus Index Register Plus Offset Operand
 4228 operand indIndex(eRegP reg, rRegI ireg) %{
 4229   match(AddP reg ireg);
 4230 
 4231   op_cost(10);
 4232   format %{"[$reg + $ireg]" %}
 4233   interface(MEMORY_INTER) %{
 4234     base($reg);
 4235     index($ireg);
 4236     scale(0x0);
 4237     disp(0x0);
 4238   %}
 4239 %}
 4240 
 4241 // // -------------------------------------------------------------------------
 4242 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4243 // // -------------------------------------------------------------------------
 4244 // // Scaled Memory Operands
 4245 // // Indirect Memory Times Scale Plus Offset Operand
 4246 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4247 //   match(AddP off (LShiftI ireg scale));
 4248 //
 4249 //   op_cost(10);
 4250 //   format %{"[$off + $ireg << $scale]" %}
 4251 //   interface(MEMORY_INTER) %{
 4252 //     base(0x4);
 4253 //     index($ireg);
 4254 //     scale($scale);
 4255 //     disp($off);
 4256 //   %}
 4257 // %}
 4258 
 4259 // Indirect Memory Times Scale Plus Index Register
 4260 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4261   match(AddP reg (LShiftI ireg scale));
 4262 
 4263   op_cost(10);
 4264   format %{"[$reg + $ireg << $scale]" %}
 4265   interface(MEMORY_INTER) %{
 4266     base($reg);
 4267     index($ireg);
 4268     scale($scale);
 4269     disp(0x0);
 4270   %}
 4271 %}
 4272 
 4273 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4274 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4275   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4276 
 4277   op_cost(10);
 4278   format %{"[$reg + $off + $ireg << $scale]" %}
 4279   interface(MEMORY_INTER) %{
 4280     base($reg);
 4281     index($ireg);
 4282     scale($scale);
 4283     disp($off);
 4284   %}
 4285 %}
 4286 
 4287 //----------Load Long Memory Operands------------------------------------------
 4288 // The load-long idiom will use it's address expression again after loading
 4289 // the first word of the long.  If the load-long destination overlaps with
 4290 // registers used in the addressing expression, the 2nd half will be loaded
 4291 // from a clobbered address.  Fix this by requiring that load-long use
 4292 // address registers that do not overlap with the load-long target.
 4293 
 4294 // load-long support
 4295 operand load_long_RegP() %{
 4296   constraint(ALLOC_IN_RC(esi_reg));
 4297   match(RegP);
 4298   match(eSIRegP);
 4299   op_cost(100);
 4300   format %{  %}
 4301   interface(REG_INTER);
 4302 %}
 4303 
 4304 // Indirect Memory Operand Long
 4305 operand load_long_indirect(load_long_RegP reg) %{
 4306   constraint(ALLOC_IN_RC(esi_reg));
 4307   match(reg);
 4308 
 4309   format %{ "[$reg]" %}
 4310   interface(MEMORY_INTER) %{
 4311     base($reg);
 4312     index(0x4);
 4313     scale(0x0);
 4314     disp(0x0);
 4315   %}
 4316 %}
 4317 
 4318 // Indirect Memory Plus Long Offset Operand
 4319 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4320   match(AddP reg off);
 4321 
 4322   format %{ "[$reg + $off]" %}
 4323   interface(MEMORY_INTER) %{
 4324     base($reg);
 4325     index(0x4);
 4326     scale(0x0);
 4327     disp($off);
 4328   %}
 4329 %}
 4330 
 4331 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4332 
 4333 
 4334 //----------Special Memory Operands--------------------------------------------
 4335 // Stack Slot Operand - This operand is used for loading and storing temporary
 4336 //                      values on the stack where a match requires a value to
 4337 //                      flow through memory.
 4338 operand stackSlotP(sRegP reg) %{
 4339   constraint(ALLOC_IN_RC(stack_slots));
 4340   // No match rule because this operand is only generated in matching
 4341   format %{ "[$reg]" %}
 4342   interface(MEMORY_INTER) %{
 4343     base(0x4);   // ESP
 4344     index(0x4);  // No Index
 4345     scale(0x0);  // No Scale
 4346     disp($reg);  // Stack Offset
 4347   %}
 4348 %}
 4349 
 4350 operand stackSlotI(sRegI reg) %{
 4351   constraint(ALLOC_IN_RC(stack_slots));
 4352   // No match rule because this operand is only generated in matching
 4353   format %{ "[$reg]" %}
 4354   interface(MEMORY_INTER) %{
 4355     base(0x4);   // ESP
 4356     index(0x4);  // No Index
 4357     scale(0x0);  // No Scale
 4358     disp($reg);  // Stack Offset
 4359   %}
 4360 %}
 4361 
 4362 operand stackSlotF(sRegF reg) %{
 4363   constraint(ALLOC_IN_RC(stack_slots));
 4364   // No match rule because this operand is only generated in matching
 4365   format %{ "[$reg]" %}
 4366   interface(MEMORY_INTER) %{
 4367     base(0x4);   // ESP
 4368     index(0x4);  // No Index
 4369     scale(0x0);  // No Scale
 4370     disp($reg);  // Stack Offset
 4371   %}
 4372 %}
 4373 
 4374 operand stackSlotD(sRegD reg) %{
 4375   constraint(ALLOC_IN_RC(stack_slots));
 4376   // No match rule because this operand is only generated in matching
 4377   format %{ "[$reg]" %}
 4378   interface(MEMORY_INTER) %{
 4379     base(0x4);   // ESP
 4380     index(0x4);  // No Index
 4381     scale(0x0);  // No Scale
 4382     disp($reg);  // Stack Offset
 4383   %}
 4384 %}
 4385 
 4386 operand stackSlotL(sRegL reg) %{
 4387   constraint(ALLOC_IN_RC(stack_slots));
 4388   // No match rule because this operand is only generated in matching
 4389   format %{ "[$reg]" %}
 4390   interface(MEMORY_INTER) %{
 4391     base(0x4);   // ESP
 4392     index(0x4);  // No Index
 4393     scale(0x0);  // No Scale
 4394     disp($reg);  // Stack Offset
 4395   %}
 4396 %}
 4397 
 4398 //----------Conditional Branch Operands----------------------------------------
 4399 // Comparison Op  - This is the operation of the comparison, and is limited to
 4400 //                  the following set of codes:
 4401 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4402 //
 4403 // Other attributes of the comparison, such as unsignedness, are specified
 4404 // by the comparison instruction that sets a condition code flags register.
 4405 // That result is represented by a flags operand whose subtype is appropriate
 4406 // to the unsignedness (etc.) of the comparison.
 4407 //
 4408 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4409 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4410 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4411 
 4412 // Comparison Code
 4413 operand cmpOp() %{
 4414   match(Bool);
 4415 
 4416   format %{ "" %}
 4417   interface(COND_INTER) %{
 4418     equal(0x4, "e");
 4419     not_equal(0x5, "ne");
 4420     less(0xC, "l");
 4421     greater_equal(0xD, "ge");
 4422     less_equal(0xE, "le");
 4423     greater(0xF, "g");
 4424     overflow(0x0, "o");
 4425     no_overflow(0x1, "no");
 4426   %}
 4427 %}
 4428 
 4429 // Comparison Code, unsigned compare.  Used by FP also, with
 4430 // C2 (unordered) turned into GT or LT already.  The other bits
 4431 // C0 and C3 are turned into Carry & Zero flags.
 4432 operand cmpOpU() %{
 4433   match(Bool);
 4434 
 4435   format %{ "" %}
 4436   interface(COND_INTER) %{
 4437     equal(0x4, "e");
 4438     not_equal(0x5, "ne");
 4439     less(0x2, "b");
 4440     greater_equal(0x3, "nb");
 4441     less_equal(0x6, "be");
 4442     greater(0x7, "nbe");
 4443     overflow(0x0, "o");
 4444     no_overflow(0x1, "no");
 4445   %}
 4446 %}
 4447 
 4448 // Floating comparisons that don't require any fixup for the unordered case
 4449 operand cmpOpUCF() %{
 4450   match(Bool);
 4451   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4452             n->as_Bool()->_test._test == BoolTest::ge ||
 4453             n->as_Bool()->_test._test == BoolTest::le ||
 4454             n->as_Bool()->_test._test == BoolTest::gt);
 4455   format %{ "" %}
 4456   interface(COND_INTER) %{
 4457     equal(0x4, "e");
 4458     not_equal(0x5, "ne");
 4459     less(0x2, "b");
 4460     greater_equal(0x3, "nb");
 4461     less_equal(0x6, "be");
 4462     greater(0x7, "nbe");
 4463     overflow(0x0, "o");
 4464     no_overflow(0x1, "no");
 4465   %}
 4466 %}
 4467 
 4468 
 4469 // Floating comparisons that can be fixed up with extra conditional jumps
 4470 operand cmpOpUCF2() %{
 4471   match(Bool);
 4472   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4473             n->as_Bool()->_test._test == BoolTest::eq);
 4474   format %{ "" %}
 4475   interface(COND_INTER) %{
 4476     equal(0x4, "e");
 4477     not_equal(0x5, "ne");
 4478     less(0x2, "b");
 4479     greater_equal(0x3, "nb");
 4480     less_equal(0x6, "be");
 4481     greater(0x7, "nbe");
 4482     overflow(0x0, "o");
 4483     no_overflow(0x1, "no");
 4484   %}
 4485 %}
 4486 
 4487 // Comparison Code for FP conditional move
 4488 operand cmpOp_fcmov() %{
 4489   match(Bool);
 4490 
 4491   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4492             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4493   format %{ "" %}
 4494   interface(COND_INTER) %{
 4495     equal        (0x0C8);
 4496     not_equal    (0x1C8);
 4497     less         (0x0C0);
 4498     greater_equal(0x1C0);
 4499     less_equal   (0x0D0);
 4500     greater      (0x1D0);
 4501     overflow(0x0, "o"); // not really supported by the instruction
 4502     no_overflow(0x1, "no"); // not really supported by the instruction
 4503   %}
 4504 %}
 4505 
 4506 // Comparison Code used in long compares
 4507 operand cmpOp_commute() %{
 4508   match(Bool);
 4509 
 4510   format %{ "" %}
 4511   interface(COND_INTER) %{
 4512     equal(0x4, "e");
 4513     not_equal(0x5, "ne");
 4514     less(0xF, "g");
 4515     greater_equal(0xE, "le");
 4516     less_equal(0xD, "ge");
 4517     greater(0xC, "l");
 4518     overflow(0x0, "o");
 4519     no_overflow(0x1, "no");
 4520   %}
 4521 %}
 4522 
 4523 // Comparison Code used in unsigned long compares
 4524 operand cmpOpU_commute() %{
 4525   match(Bool);
 4526 
 4527   format %{ "" %}
 4528   interface(COND_INTER) %{
 4529     equal(0x4, "e");
 4530     not_equal(0x5, "ne");
 4531     less(0x7, "nbe");
 4532     greater_equal(0x6, "be");
 4533     less_equal(0x3, "nb");
 4534     greater(0x2, "b");
 4535     overflow(0x0, "o");
 4536     no_overflow(0x1, "no");
 4537   %}
 4538 %}
 4539 
 4540 //----------OPERAND CLASSES----------------------------------------------------
 4541 // Operand Classes are groups of operands that are used as to simplify
 4542 // instruction definitions by not requiring the AD writer to specify separate
 4543 // instructions for every form of operand when the instruction accepts
 4544 // multiple operand types with the same basic encoding and format.  The classic
 4545 // case of this is memory operands.
 4546 
 4547 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4548                indIndex, indIndexScale, indIndexScaleOffset);
 4549 
 4550 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4551 // This means some kind of offset is always required and you cannot use
 4552 // an oop as the offset (done when working on static globals).
 4553 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4554                     indIndex, indIndexScale, indIndexScaleOffset);
 4555 
 4556 
 4557 //----------PIPELINE-----------------------------------------------------------
 4558 // Rules which define the behavior of the target architectures pipeline.
 4559 pipeline %{
 4560 
 4561 //----------ATTRIBUTES---------------------------------------------------------
 4562 attributes %{
 4563   variable_size_instructions;        // Fixed size instructions
 4564   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4565   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4566   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4567   instruction_fetch_units = 1;       // of 16 bytes
 4568 
 4569   // List of nop instructions
 4570   nops( MachNop );
 4571 %}
 4572 
 4573 //----------RESOURCES----------------------------------------------------------
 4574 // Resources are the functional units available to the machine
 4575 
 4576 // Generic P2/P3 pipeline
 4577 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4578 // 3 instructions decoded per cycle.
 4579 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4580 // 2 ALU op, only ALU0 handles mul/div instructions.
 4581 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4582            MS0, MS1, MEM = MS0 | MS1,
 4583            BR, FPU,
 4584            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4585 
 4586 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4587 // Pipeline Description specifies the stages in the machine's pipeline
 4588 
 4589 // Generic P2/P3 pipeline
 4590 pipe_desc(S0, S1, S2, S3, S4, S5);
 4591 
 4592 //----------PIPELINE CLASSES---------------------------------------------------
 4593 // Pipeline Classes describe the stages in which input and output are
 4594 // referenced by the hardware pipeline.
 4595 
 4596 // Naming convention: ialu or fpu
 4597 // Then: _reg
 4598 // Then: _reg if there is a 2nd register
 4599 // Then: _long if it's a pair of instructions implementing a long
 4600 // Then: _fat if it requires the big decoder
 4601 //   Or: _mem if it requires the big decoder and a memory unit.
 4602 
 4603 // Integer ALU reg operation
 4604 pipe_class ialu_reg(rRegI dst) %{
 4605     single_instruction;
 4606     dst    : S4(write);
 4607     dst    : S3(read);
 4608     DECODE : S0;        // any decoder
 4609     ALU    : S3;        // any alu
 4610 %}
 4611 
 4612 // Long ALU reg operation
 4613 pipe_class ialu_reg_long(eRegL dst) %{
 4614     instruction_count(2);
 4615     dst    : S4(write);
 4616     dst    : S3(read);
 4617     DECODE : S0(2);     // any 2 decoders
 4618     ALU    : S3(2);     // both alus
 4619 %}
 4620 
 4621 // Integer ALU reg operation using big decoder
 4622 pipe_class ialu_reg_fat(rRegI dst) %{
 4623     single_instruction;
 4624     dst    : S4(write);
 4625     dst    : S3(read);
 4626     D0     : S0;        // big decoder only
 4627     ALU    : S3;        // any alu
 4628 %}
 4629 
 4630 // Long ALU reg operation using big decoder
 4631 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4632     instruction_count(2);
 4633     dst    : S4(write);
 4634     dst    : S3(read);
 4635     D0     : S0(2);     // big decoder only; twice
 4636     ALU    : S3(2);     // any 2 alus
 4637 %}
 4638 
 4639 // Integer ALU reg-reg operation
 4640 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4641     single_instruction;
 4642     dst    : S4(write);
 4643     src    : S3(read);
 4644     DECODE : S0;        // any decoder
 4645     ALU    : S3;        // any alu
 4646 %}
 4647 
 4648 // Long ALU reg-reg operation
 4649 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4650     instruction_count(2);
 4651     dst    : S4(write);
 4652     src    : S3(read);
 4653     DECODE : S0(2);     // any 2 decoders
 4654     ALU    : S3(2);     // both alus
 4655 %}
 4656 
 4657 // Integer ALU reg-reg operation
 4658 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4659     single_instruction;
 4660     dst    : S4(write);
 4661     src    : S3(read);
 4662     D0     : S0;        // big decoder only
 4663     ALU    : S3;        // any alu
 4664 %}
 4665 
 4666 // Long ALU reg-reg operation
 4667 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4668     instruction_count(2);
 4669     dst    : S4(write);
 4670     src    : S3(read);
 4671     D0     : S0(2);     // big decoder only; twice
 4672     ALU    : S3(2);     // both alus
 4673 %}
 4674 
 4675 // Integer ALU reg-mem operation
 4676 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4677     single_instruction;
 4678     dst    : S5(write);
 4679     mem    : S3(read);
 4680     D0     : S0;        // big decoder only
 4681     ALU    : S4;        // any alu
 4682     MEM    : S3;        // any mem
 4683 %}
 4684 
 4685 // Long ALU reg-mem operation
 4686 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4687     instruction_count(2);
 4688     dst    : S5(write);
 4689     mem    : S3(read);
 4690     D0     : S0(2);     // big decoder only; twice
 4691     ALU    : S4(2);     // any 2 alus
 4692     MEM    : S3(2);     // both mems
 4693 %}
 4694 
 4695 // Integer mem operation (prefetch)
 4696 pipe_class ialu_mem(memory mem)
 4697 %{
 4698     single_instruction;
 4699     mem    : S3(read);
 4700     D0     : S0;        // big decoder only
 4701     MEM    : S3;        // any mem
 4702 %}
 4703 
 4704 // Integer Store to Memory
 4705 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4706     single_instruction;
 4707     mem    : S3(read);
 4708     src    : S5(read);
 4709     D0     : S0;        // big decoder only
 4710     ALU    : S4;        // any alu
 4711     MEM    : S3;
 4712 %}
 4713 
 4714 // Long Store to Memory
 4715 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4716     instruction_count(2);
 4717     mem    : S3(read);
 4718     src    : S5(read);
 4719     D0     : S0(2);     // big decoder only; twice
 4720     ALU    : S4(2);     // any 2 alus
 4721     MEM    : S3(2);     // Both mems
 4722 %}
 4723 
 4724 // Integer Store to Memory
 4725 pipe_class ialu_mem_imm(memory mem) %{
 4726     single_instruction;
 4727     mem    : S3(read);
 4728     D0     : S0;        // big decoder only
 4729     ALU    : S4;        // any alu
 4730     MEM    : S3;
 4731 %}
 4732 
 4733 // Integer ALU0 reg-reg operation
 4734 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4735     single_instruction;
 4736     dst    : S4(write);
 4737     src    : S3(read);
 4738     D0     : S0;        // Big decoder only
 4739     ALU0   : S3;        // only alu0
 4740 %}
 4741 
 4742 // Integer ALU0 reg-mem operation
 4743 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4744     single_instruction;
 4745     dst    : S5(write);
 4746     mem    : S3(read);
 4747     D0     : S0;        // big decoder only
 4748     ALU0   : S4;        // ALU0 only
 4749     MEM    : S3;        // any mem
 4750 %}
 4751 
 4752 // Integer ALU reg-reg operation
 4753 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4754     single_instruction;
 4755     cr     : S4(write);
 4756     src1   : S3(read);
 4757     src2   : S3(read);
 4758     DECODE : S0;        // any decoder
 4759     ALU    : S3;        // any alu
 4760 %}
 4761 
 4762 // Integer ALU reg-imm operation
 4763 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4764     single_instruction;
 4765     cr     : S4(write);
 4766     src1   : S3(read);
 4767     DECODE : S0;        // any decoder
 4768     ALU    : S3;        // any alu
 4769 %}
 4770 
 4771 // Integer ALU reg-mem operation
 4772 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4773     single_instruction;
 4774     cr     : S4(write);
 4775     src1   : S3(read);
 4776     src2   : S3(read);
 4777     D0     : S0;        // big decoder only
 4778     ALU    : S4;        // any alu
 4779     MEM    : S3;
 4780 %}
 4781 
 4782 // Conditional move reg-reg
 4783 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4784     instruction_count(4);
 4785     y      : S4(read);
 4786     q      : S3(read);
 4787     p      : S3(read);
 4788     DECODE : S0(4);     // any decoder
 4789 %}
 4790 
 4791 // Conditional move reg-reg
 4792 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4793     single_instruction;
 4794     dst    : S4(write);
 4795     src    : S3(read);
 4796     cr     : S3(read);
 4797     DECODE : S0;        // any decoder
 4798 %}
 4799 
 4800 // Conditional move reg-mem
 4801 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4802     single_instruction;
 4803     dst    : S4(write);
 4804     src    : S3(read);
 4805     cr     : S3(read);
 4806     DECODE : S0;        // any decoder
 4807     MEM    : S3;
 4808 %}
 4809 
 4810 // Conditional move reg-reg long
 4811 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4812     single_instruction;
 4813     dst    : S4(write);
 4814     src    : S3(read);
 4815     cr     : S3(read);
 4816     DECODE : S0(2);     // any 2 decoders
 4817 %}
 4818 
 4819 // Conditional move double reg-reg
 4820 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4821     single_instruction;
 4822     dst    : S4(write);
 4823     src    : S3(read);
 4824     cr     : S3(read);
 4825     DECODE : S0;        // any decoder
 4826 %}
 4827 
 4828 // Float reg-reg operation
 4829 pipe_class fpu_reg(regDPR dst) %{
 4830     instruction_count(2);
 4831     dst    : S3(read);
 4832     DECODE : S0(2);     // any 2 decoders
 4833     FPU    : S3;
 4834 %}
 4835 
 4836 // Float reg-reg operation
 4837 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4838     instruction_count(2);
 4839     dst    : S4(write);
 4840     src    : S3(read);
 4841     DECODE : S0(2);     // any 2 decoders
 4842     FPU    : S3;
 4843 %}
 4844 
 4845 // Float reg-reg operation
 4846 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4847     instruction_count(3);
 4848     dst    : S4(write);
 4849     src1   : S3(read);
 4850     src2   : S3(read);
 4851     DECODE : S0(3);     // any 3 decoders
 4852     FPU    : S3(2);
 4853 %}
 4854 
 4855 // Float reg-reg operation
 4856 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4857     instruction_count(4);
 4858     dst    : S4(write);
 4859     src1   : S3(read);
 4860     src2   : S3(read);
 4861     src3   : S3(read);
 4862     DECODE : S0(4);     // any 3 decoders
 4863     FPU    : S3(2);
 4864 %}
 4865 
 4866 // Float reg-reg operation
 4867 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4868     instruction_count(4);
 4869     dst    : S4(write);
 4870     src1   : S3(read);
 4871     src2   : S3(read);
 4872     src3   : S3(read);
 4873     DECODE : S1(3);     // any 3 decoders
 4874     D0     : S0;        // Big decoder only
 4875     FPU    : S3(2);
 4876     MEM    : S3;
 4877 %}
 4878 
 4879 // Float reg-mem operation
 4880 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4881     instruction_count(2);
 4882     dst    : S5(write);
 4883     mem    : S3(read);
 4884     D0     : S0;        // big decoder only
 4885     DECODE : S1;        // any decoder for FPU POP
 4886     FPU    : S4;
 4887     MEM    : S3;        // any mem
 4888 %}
 4889 
 4890 // Float reg-mem operation
 4891 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4892     instruction_count(3);
 4893     dst    : S5(write);
 4894     src1   : S3(read);
 4895     mem    : S3(read);
 4896     D0     : S0;        // big decoder only
 4897     DECODE : S1(2);     // any decoder for FPU POP
 4898     FPU    : S4;
 4899     MEM    : S3;        // any mem
 4900 %}
 4901 
 4902 // Float mem-reg operation
 4903 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4904     instruction_count(2);
 4905     src    : S5(read);
 4906     mem    : S3(read);
 4907     DECODE : S0;        // any decoder for FPU PUSH
 4908     D0     : S1;        // big decoder only
 4909     FPU    : S4;
 4910     MEM    : S3;        // any mem
 4911 %}
 4912 
 4913 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4914     instruction_count(3);
 4915     src1   : S3(read);
 4916     src2   : S3(read);
 4917     mem    : S3(read);
 4918     DECODE : S0(2);     // any decoder for FPU PUSH
 4919     D0     : S1;        // big decoder only
 4920     FPU    : S4;
 4921     MEM    : S3;        // any mem
 4922 %}
 4923 
 4924 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4925     instruction_count(3);
 4926     src1   : S3(read);
 4927     src2   : S3(read);
 4928     mem    : S4(read);
 4929     DECODE : S0;        // any decoder for FPU PUSH
 4930     D0     : S0(2);     // big decoder only
 4931     FPU    : S4;
 4932     MEM    : S3(2);     // any mem
 4933 %}
 4934 
 4935 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4936     instruction_count(2);
 4937     src1   : S3(read);
 4938     dst    : S4(read);
 4939     D0     : S0(2);     // big decoder only
 4940     MEM    : S3(2);     // any mem
 4941 %}
 4942 
 4943 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4944     instruction_count(3);
 4945     src1   : S3(read);
 4946     src2   : S3(read);
 4947     dst    : S4(read);
 4948     D0     : S0(3);     // big decoder only
 4949     FPU    : S4;
 4950     MEM    : S3(3);     // any mem
 4951 %}
 4952 
 4953 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4954     instruction_count(3);
 4955     src1   : S4(read);
 4956     mem    : S4(read);
 4957     DECODE : S0;        // any decoder for FPU PUSH
 4958     D0     : S0(2);     // big decoder only
 4959     FPU    : S4;
 4960     MEM    : S3(2);     // any mem
 4961 %}
 4962 
 4963 // Float load constant
 4964 pipe_class fpu_reg_con(regDPR dst) %{
 4965     instruction_count(2);
 4966     dst    : S5(write);
 4967     D0     : S0;        // big decoder only for the load
 4968     DECODE : S1;        // any decoder for FPU POP
 4969     FPU    : S4;
 4970     MEM    : S3;        // any mem
 4971 %}
 4972 
 4973 // Float load constant
 4974 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4975     instruction_count(3);
 4976     dst    : S5(write);
 4977     src    : S3(read);
 4978     D0     : S0;        // big decoder only for the load
 4979     DECODE : S1(2);     // any decoder for FPU POP
 4980     FPU    : S4;
 4981     MEM    : S3;        // any mem
 4982 %}
 4983 
 4984 // UnConditional branch
 4985 pipe_class pipe_jmp( label labl ) %{
 4986     single_instruction;
 4987     BR   : S3;
 4988 %}
 4989 
 4990 // Conditional branch
 4991 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4992     single_instruction;
 4993     cr    : S1(read);
 4994     BR    : S3;
 4995 %}
 4996 
 4997 // Allocation idiom
 4998 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4999     instruction_count(1); force_serialization;
 5000     fixed_latency(6);
 5001     heap_ptr : S3(read);
 5002     DECODE   : S0(3);
 5003     D0       : S2;
 5004     MEM      : S3;
 5005     ALU      : S3(2);
 5006     dst      : S5(write);
 5007     BR       : S5;
 5008 %}
 5009 
 5010 // Generic big/slow expanded idiom
 5011 pipe_class pipe_slow(  ) %{
 5012     instruction_count(10); multiple_bundles; force_serialization;
 5013     fixed_latency(100);
 5014     D0  : S0(2);
 5015     MEM : S3(2);
 5016 %}
 5017 
 5018 // The real do-nothing guy
 5019 pipe_class empty( ) %{
 5020     instruction_count(0);
 5021 %}
 5022 
 5023 // Define the class for the Nop node
 5024 define %{
 5025    MachNop = empty;
 5026 %}
 5027 
 5028 %}
 5029 
 5030 //----------INSTRUCTIONS-------------------------------------------------------
 5031 //
 5032 // match      -- States which machine-independent subtree may be replaced
 5033 //               by this instruction.
 5034 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5035 //               selection to identify a minimum cost tree of machine
 5036 //               instructions that matches a tree of machine-independent
 5037 //               instructions.
 5038 // format     -- A string providing the disassembly for this instruction.
 5039 //               The value of an instruction's operand may be inserted
 5040 //               by referring to it with a '$' prefix.
 5041 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5042 //               to within an encode class as $primary, $secondary, and $tertiary
 5043 //               respectively.  The primary opcode is commonly used to
 5044 //               indicate the type of machine instruction, while secondary
 5045 //               and tertiary are often used for prefix options or addressing
 5046 //               modes.
 5047 // ins_encode -- A list of encode classes with parameters. The encode class
 5048 //               name must have been defined in an 'enc_class' specification
 5049 //               in the encode section of the architecture description.
 5050 
 5051 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5052 // Load Float
 5053 instruct MoveF2LEG(legRegF dst, regF src) %{
 5054   match(Set dst src);
 5055   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5056   ins_encode %{
 5057     ShouldNotReachHere();
 5058   %}
 5059   ins_pipe( fpu_reg_reg );
 5060 %}
 5061 
 5062 // Load Float
 5063 instruct MoveLEG2F(regF dst, legRegF src) %{
 5064   match(Set dst src);
 5065   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5066   ins_encode %{
 5067     ShouldNotReachHere();
 5068   %}
 5069   ins_pipe( fpu_reg_reg );
 5070 %}
 5071 
 5072 // Load Float
 5073 instruct MoveF2VL(vlRegF dst, regF src) %{
 5074   match(Set dst src);
 5075   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5076   ins_encode %{
 5077     ShouldNotReachHere();
 5078   %}
 5079   ins_pipe( fpu_reg_reg );
 5080 %}
 5081 
 5082 // Load Float
 5083 instruct MoveVL2F(regF dst, vlRegF src) %{
 5084   match(Set dst src);
 5085   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5086   ins_encode %{
 5087     ShouldNotReachHere();
 5088   %}
 5089   ins_pipe( fpu_reg_reg );
 5090 %}
 5091 
 5092 
 5093 
 5094 // Load Double
 5095 instruct MoveD2LEG(legRegD dst, regD src) %{
 5096   match(Set dst src);
 5097   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5098   ins_encode %{
 5099     ShouldNotReachHere();
 5100   %}
 5101   ins_pipe( fpu_reg_reg );
 5102 %}
 5103 
 5104 // Load Double
 5105 instruct MoveLEG2D(regD dst, legRegD src) %{
 5106   match(Set dst src);
 5107   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5108   ins_encode %{
 5109     ShouldNotReachHere();
 5110   %}
 5111   ins_pipe( fpu_reg_reg );
 5112 %}
 5113 
 5114 // Load Double
 5115 instruct MoveD2VL(vlRegD dst, regD src) %{
 5116   match(Set dst src);
 5117   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5118   ins_encode %{
 5119     ShouldNotReachHere();
 5120   %}
 5121   ins_pipe( fpu_reg_reg );
 5122 %}
 5123 
 5124 // Load Double
 5125 instruct MoveVL2D(regD dst, vlRegD src) %{
 5126   match(Set dst src);
 5127   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5128   ins_encode %{
 5129     ShouldNotReachHere();
 5130   %}
 5131   ins_pipe( fpu_reg_reg );
 5132 %}
 5133 
 5134 //----------BSWAP-Instruction--------------------------------------------------
 5135 instruct bytes_reverse_int(rRegI dst) %{
 5136   match(Set dst (ReverseBytesI dst));
 5137 
 5138   format %{ "BSWAP  $dst" %}
 5139   opcode(0x0F, 0xC8);
 5140   ins_encode( OpcP, OpcSReg(dst) );
 5141   ins_pipe( ialu_reg );
 5142 %}
 5143 
 5144 instruct bytes_reverse_long(eRegL dst) %{
 5145   match(Set dst (ReverseBytesL dst));
 5146 
 5147   format %{ "BSWAP  $dst.lo\n\t"
 5148             "BSWAP  $dst.hi\n\t"
 5149             "XCHG   $dst.lo $dst.hi" %}
 5150 
 5151   ins_cost(125);
 5152   ins_encode( bswap_long_bytes(dst) );
 5153   ins_pipe( ialu_reg_reg);
 5154 %}
 5155 
 5156 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5157   match(Set dst (ReverseBytesUS dst));
 5158   effect(KILL cr);
 5159 
 5160   format %{ "BSWAP  $dst\n\t"
 5161             "SHR    $dst,16\n\t" %}
 5162   ins_encode %{
 5163     __ bswapl($dst$$Register);
 5164     __ shrl($dst$$Register, 16);
 5165   %}
 5166   ins_pipe( ialu_reg );
 5167 %}
 5168 
 5169 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5170   match(Set dst (ReverseBytesS dst));
 5171   effect(KILL cr);
 5172 
 5173   format %{ "BSWAP  $dst\n\t"
 5174             "SAR    $dst,16\n\t" %}
 5175   ins_encode %{
 5176     __ bswapl($dst$$Register);
 5177     __ sarl($dst$$Register, 16);
 5178   %}
 5179   ins_pipe( ialu_reg );
 5180 %}
 5181 
 5182 
 5183 //---------- Zeros Count Instructions ------------------------------------------
 5184 
 5185 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5186   predicate(UseCountLeadingZerosInstruction);
 5187   match(Set dst (CountLeadingZerosI src));
 5188   effect(KILL cr);
 5189 
 5190   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5191   ins_encode %{
 5192     __ lzcntl($dst$$Register, $src$$Register);
 5193   %}
 5194   ins_pipe(ialu_reg);
 5195 %}
 5196 
 5197 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5198   predicate(!UseCountLeadingZerosInstruction);
 5199   match(Set dst (CountLeadingZerosI src));
 5200   effect(KILL cr);
 5201 
 5202   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5203             "JNZ    skip\n\t"
 5204             "MOV    $dst, -1\n"
 5205       "skip:\n\t"
 5206             "NEG    $dst\n\t"
 5207             "ADD    $dst, 31" %}
 5208   ins_encode %{
 5209     Register Rdst = $dst$$Register;
 5210     Register Rsrc = $src$$Register;
 5211     Label skip;
 5212     __ bsrl(Rdst, Rsrc);
 5213     __ jccb(Assembler::notZero, skip);
 5214     __ movl(Rdst, -1);
 5215     __ bind(skip);
 5216     __ negl(Rdst);
 5217     __ addl(Rdst, BitsPerInt - 1);
 5218   %}
 5219   ins_pipe(ialu_reg);
 5220 %}
 5221 
 5222 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5223   predicate(UseCountLeadingZerosInstruction);
 5224   match(Set dst (CountLeadingZerosL src));
 5225   effect(TEMP dst, KILL cr);
 5226 
 5227   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5228             "JNC    done\n\t"
 5229             "LZCNT  $dst, $src.lo\n\t"
 5230             "ADD    $dst, 32\n"
 5231       "done:" %}
 5232   ins_encode %{
 5233     Register Rdst = $dst$$Register;
 5234     Register Rsrc = $src$$Register;
 5235     Label done;
 5236     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5237     __ jccb(Assembler::carryClear, done);
 5238     __ lzcntl(Rdst, Rsrc);
 5239     __ addl(Rdst, BitsPerInt);
 5240     __ bind(done);
 5241   %}
 5242   ins_pipe(ialu_reg);
 5243 %}
 5244 
 5245 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5246   predicate(!UseCountLeadingZerosInstruction);
 5247   match(Set dst (CountLeadingZerosL src));
 5248   effect(TEMP dst, KILL cr);
 5249 
 5250   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5251             "JZ     msw_is_zero\n\t"
 5252             "ADD    $dst, 32\n\t"
 5253             "JMP    not_zero\n"
 5254       "msw_is_zero:\n\t"
 5255             "BSR    $dst, $src.lo\n\t"
 5256             "JNZ    not_zero\n\t"
 5257             "MOV    $dst, -1\n"
 5258       "not_zero:\n\t"
 5259             "NEG    $dst\n\t"
 5260             "ADD    $dst, 63\n" %}
 5261  ins_encode %{
 5262     Register Rdst = $dst$$Register;
 5263     Register Rsrc = $src$$Register;
 5264     Label msw_is_zero;
 5265     Label not_zero;
 5266     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5267     __ jccb(Assembler::zero, msw_is_zero);
 5268     __ addl(Rdst, BitsPerInt);
 5269     __ jmpb(not_zero);
 5270     __ bind(msw_is_zero);
 5271     __ bsrl(Rdst, Rsrc);
 5272     __ jccb(Assembler::notZero, not_zero);
 5273     __ movl(Rdst, -1);
 5274     __ bind(not_zero);
 5275     __ negl(Rdst);
 5276     __ addl(Rdst, BitsPerLong - 1);
 5277   %}
 5278   ins_pipe(ialu_reg);
 5279 %}
 5280 
 5281 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5282   predicate(UseCountTrailingZerosInstruction);
 5283   match(Set dst (CountTrailingZerosI src));
 5284   effect(KILL cr);
 5285 
 5286   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5287   ins_encode %{
 5288     __ tzcntl($dst$$Register, $src$$Register);
 5289   %}
 5290   ins_pipe(ialu_reg);
 5291 %}
 5292 
 5293 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5294   predicate(!UseCountTrailingZerosInstruction);
 5295   match(Set dst (CountTrailingZerosI src));
 5296   effect(KILL cr);
 5297 
 5298   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5299             "JNZ    done\n\t"
 5300             "MOV    $dst, 32\n"
 5301       "done:" %}
 5302   ins_encode %{
 5303     Register Rdst = $dst$$Register;
 5304     Label done;
 5305     __ bsfl(Rdst, $src$$Register);
 5306     __ jccb(Assembler::notZero, done);
 5307     __ movl(Rdst, BitsPerInt);
 5308     __ bind(done);
 5309   %}
 5310   ins_pipe(ialu_reg);
 5311 %}
 5312 
 5313 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5314   predicate(UseCountTrailingZerosInstruction);
 5315   match(Set dst (CountTrailingZerosL src));
 5316   effect(TEMP dst, KILL cr);
 5317 
 5318   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5319             "JNC    done\n\t"
 5320             "TZCNT  $dst, $src.hi\n\t"
 5321             "ADD    $dst, 32\n"
 5322             "done:" %}
 5323   ins_encode %{
 5324     Register Rdst = $dst$$Register;
 5325     Register Rsrc = $src$$Register;
 5326     Label done;
 5327     __ tzcntl(Rdst, Rsrc);
 5328     __ jccb(Assembler::carryClear, done);
 5329     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5330     __ addl(Rdst, BitsPerInt);
 5331     __ bind(done);
 5332   %}
 5333   ins_pipe(ialu_reg);
 5334 %}
 5335 
 5336 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5337   predicate(!UseCountTrailingZerosInstruction);
 5338   match(Set dst (CountTrailingZerosL src));
 5339   effect(TEMP dst, KILL cr);
 5340 
 5341   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5342             "JNZ    done\n\t"
 5343             "BSF    $dst, $src.hi\n\t"
 5344             "JNZ    msw_not_zero\n\t"
 5345             "MOV    $dst, 32\n"
 5346       "msw_not_zero:\n\t"
 5347             "ADD    $dst, 32\n"
 5348       "done:" %}
 5349   ins_encode %{
 5350     Register Rdst = $dst$$Register;
 5351     Register Rsrc = $src$$Register;
 5352     Label msw_not_zero;
 5353     Label done;
 5354     __ bsfl(Rdst, Rsrc);
 5355     __ jccb(Assembler::notZero, done);
 5356     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5357     __ jccb(Assembler::notZero, msw_not_zero);
 5358     __ movl(Rdst, BitsPerInt);
 5359     __ bind(msw_not_zero);
 5360     __ addl(Rdst, BitsPerInt);
 5361     __ bind(done);
 5362   %}
 5363   ins_pipe(ialu_reg);
 5364 %}
 5365 
 5366 
 5367 //---------- Population Count Instructions -------------------------------------
 5368 
 5369 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5370   predicate(UsePopCountInstruction);
 5371   match(Set dst (PopCountI src));
 5372   effect(KILL cr);
 5373 
 5374   format %{ "POPCNT $dst, $src" %}
 5375   ins_encode %{
 5376     __ popcntl($dst$$Register, $src$$Register);
 5377   %}
 5378   ins_pipe(ialu_reg);
 5379 %}
 5380 
 5381 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5382   predicate(UsePopCountInstruction);
 5383   match(Set dst (PopCountI (LoadI mem)));
 5384   effect(KILL cr);
 5385 
 5386   format %{ "POPCNT $dst, $mem" %}
 5387   ins_encode %{
 5388     __ popcntl($dst$$Register, $mem$$Address);
 5389   %}
 5390   ins_pipe(ialu_reg);
 5391 %}
 5392 
 5393 // Note: Long.bitCount(long) returns an int.
 5394 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5395   predicate(UsePopCountInstruction);
 5396   match(Set dst (PopCountL src));
 5397   effect(KILL cr, TEMP tmp, TEMP dst);
 5398 
 5399   format %{ "POPCNT $dst, $src.lo\n\t"
 5400             "POPCNT $tmp, $src.hi\n\t"
 5401             "ADD    $dst, $tmp" %}
 5402   ins_encode %{
 5403     __ popcntl($dst$$Register, $src$$Register);
 5404     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5405     __ addl($dst$$Register, $tmp$$Register);
 5406   %}
 5407   ins_pipe(ialu_reg);
 5408 %}
 5409 
 5410 // Note: Long.bitCount(long) returns an int.
 5411 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5412   predicate(UsePopCountInstruction);
 5413   match(Set dst (PopCountL (LoadL mem)));
 5414   effect(KILL cr, TEMP tmp, TEMP dst);
 5415 
 5416   format %{ "POPCNT $dst, $mem\n\t"
 5417             "POPCNT $tmp, $mem+4\n\t"
 5418             "ADD    $dst, $tmp" %}
 5419   ins_encode %{
 5420     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5421     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5422     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5423     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5424     __ addl($dst$$Register, $tmp$$Register);
 5425   %}
 5426   ins_pipe(ialu_reg);
 5427 %}
 5428 
 5429 
 5430 //----------Load/Store/Move Instructions---------------------------------------
 5431 //----------Load Instructions--------------------------------------------------
 5432 // Load Byte (8bit signed)
 5433 instruct loadB(xRegI dst, memory mem) %{
 5434   match(Set dst (LoadB mem));
 5435 
 5436   ins_cost(125);
 5437   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5438 
 5439   ins_encode %{
 5440     __ movsbl($dst$$Register, $mem$$Address);
 5441   %}
 5442 
 5443   ins_pipe(ialu_reg_mem);
 5444 %}
 5445 
 5446 // Load Byte (8bit signed) into Long Register
 5447 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5448   match(Set dst (ConvI2L (LoadB mem)));
 5449   effect(KILL cr);
 5450 
 5451   ins_cost(375);
 5452   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5453             "MOV    $dst.hi,$dst.lo\n\t"
 5454             "SAR    $dst.hi,7" %}
 5455 
 5456   ins_encode %{
 5457     __ movsbl($dst$$Register, $mem$$Address);
 5458     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5459     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5460   %}
 5461 
 5462   ins_pipe(ialu_reg_mem);
 5463 %}
 5464 
 5465 // Load Unsigned Byte (8bit UNsigned)
 5466 instruct loadUB(xRegI dst, memory mem) %{
 5467   match(Set dst (LoadUB mem));
 5468 
 5469   ins_cost(125);
 5470   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5471 
 5472   ins_encode %{
 5473     __ movzbl($dst$$Register, $mem$$Address);
 5474   %}
 5475 
 5476   ins_pipe(ialu_reg_mem);
 5477 %}
 5478 
 5479 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5480 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5481   match(Set dst (ConvI2L (LoadUB mem)));
 5482   effect(KILL cr);
 5483 
 5484   ins_cost(250);
 5485   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5486             "XOR    $dst.hi,$dst.hi" %}
 5487 
 5488   ins_encode %{
 5489     Register Rdst = $dst$$Register;
 5490     __ movzbl(Rdst, $mem$$Address);
 5491     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5492   %}
 5493 
 5494   ins_pipe(ialu_reg_mem);
 5495 %}
 5496 
 5497 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5498 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5499   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5500   effect(KILL cr);
 5501 
 5502   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5503             "XOR    $dst.hi,$dst.hi\n\t"
 5504             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5505   ins_encode %{
 5506     Register Rdst = $dst$$Register;
 5507     __ movzbl(Rdst, $mem$$Address);
 5508     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5509     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5510   %}
 5511   ins_pipe(ialu_reg_mem);
 5512 %}
 5513 
 5514 // Load Short (16bit signed)
 5515 instruct loadS(rRegI dst, memory mem) %{
 5516   match(Set dst (LoadS mem));
 5517 
 5518   ins_cost(125);
 5519   format %{ "MOVSX  $dst,$mem\t# short" %}
 5520 
 5521   ins_encode %{
 5522     __ movswl($dst$$Register, $mem$$Address);
 5523   %}
 5524 
 5525   ins_pipe(ialu_reg_mem);
 5526 %}
 5527 
 5528 // Load Short (16 bit signed) to Byte (8 bit signed)
 5529 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5530   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5531 
 5532   ins_cost(125);
 5533   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5534   ins_encode %{
 5535     __ movsbl($dst$$Register, $mem$$Address);
 5536   %}
 5537   ins_pipe(ialu_reg_mem);
 5538 %}
 5539 
 5540 // Load Short (16bit signed) into Long Register
 5541 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5542   match(Set dst (ConvI2L (LoadS mem)));
 5543   effect(KILL cr);
 5544 
 5545   ins_cost(375);
 5546   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5547             "MOV    $dst.hi,$dst.lo\n\t"
 5548             "SAR    $dst.hi,15" %}
 5549 
 5550   ins_encode %{
 5551     __ movswl($dst$$Register, $mem$$Address);
 5552     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5553     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5554   %}
 5555 
 5556   ins_pipe(ialu_reg_mem);
 5557 %}
 5558 
 5559 // Load Unsigned Short/Char (16bit unsigned)
 5560 instruct loadUS(rRegI dst, memory mem) %{
 5561   match(Set dst (LoadUS mem));
 5562 
 5563   ins_cost(125);
 5564   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5565 
 5566   ins_encode %{
 5567     __ movzwl($dst$$Register, $mem$$Address);
 5568   %}
 5569 
 5570   ins_pipe(ialu_reg_mem);
 5571 %}
 5572 
 5573 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5574 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5575   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5576 
 5577   ins_cost(125);
 5578   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5579   ins_encode %{
 5580     __ movsbl($dst$$Register, $mem$$Address);
 5581   %}
 5582   ins_pipe(ialu_reg_mem);
 5583 %}
 5584 
 5585 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5586 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5587   match(Set dst (ConvI2L (LoadUS mem)));
 5588   effect(KILL cr);
 5589 
 5590   ins_cost(250);
 5591   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5592             "XOR    $dst.hi,$dst.hi" %}
 5593 
 5594   ins_encode %{
 5595     __ movzwl($dst$$Register, $mem$$Address);
 5596     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5597   %}
 5598 
 5599   ins_pipe(ialu_reg_mem);
 5600 %}
 5601 
 5602 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5603 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5604   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5605   effect(KILL cr);
 5606 
 5607   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5608             "XOR    $dst.hi,$dst.hi" %}
 5609   ins_encode %{
 5610     Register Rdst = $dst$$Register;
 5611     __ movzbl(Rdst, $mem$$Address);
 5612     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5613   %}
 5614   ins_pipe(ialu_reg_mem);
 5615 %}
 5616 
 5617 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5618 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5619   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5620   effect(KILL cr);
 5621 
 5622   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5623             "XOR    $dst.hi,$dst.hi\n\t"
 5624             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5625   ins_encode %{
 5626     Register Rdst = $dst$$Register;
 5627     __ movzwl(Rdst, $mem$$Address);
 5628     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5629     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5630   %}
 5631   ins_pipe(ialu_reg_mem);
 5632 %}
 5633 
 5634 // Load Integer
 5635 instruct loadI(rRegI dst, memory mem) %{
 5636   match(Set dst (LoadI mem));
 5637 
 5638   ins_cost(125);
 5639   format %{ "MOV    $dst,$mem\t# int" %}
 5640 
 5641   ins_encode %{
 5642     __ movl($dst$$Register, $mem$$Address);
 5643   %}
 5644 
 5645   ins_pipe(ialu_reg_mem);
 5646 %}
 5647 
 5648 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5649 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5650   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5651 
 5652   ins_cost(125);
 5653   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5654   ins_encode %{
 5655     __ movsbl($dst$$Register, $mem$$Address);
 5656   %}
 5657   ins_pipe(ialu_reg_mem);
 5658 %}
 5659 
 5660 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5661 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5662   match(Set dst (AndI (LoadI mem) mask));
 5663 
 5664   ins_cost(125);
 5665   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5666   ins_encode %{
 5667     __ movzbl($dst$$Register, $mem$$Address);
 5668   %}
 5669   ins_pipe(ialu_reg_mem);
 5670 %}
 5671 
 5672 // Load Integer (32 bit signed) to Short (16 bit signed)
 5673 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5674   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5675 
 5676   ins_cost(125);
 5677   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5678   ins_encode %{
 5679     __ movswl($dst$$Register, $mem$$Address);
 5680   %}
 5681   ins_pipe(ialu_reg_mem);
 5682 %}
 5683 
 5684 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5685 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5686   match(Set dst (AndI (LoadI mem) mask));
 5687 
 5688   ins_cost(125);
 5689   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5690   ins_encode %{
 5691     __ movzwl($dst$$Register, $mem$$Address);
 5692   %}
 5693   ins_pipe(ialu_reg_mem);
 5694 %}
 5695 
 5696 // Load Integer into Long Register
 5697 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5698   match(Set dst (ConvI2L (LoadI mem)));
 5699   effect(KILL cr);
 5700 
 5701   ins_cost(375);
 5702   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5703             "MOV    $dst.hi,$dst.lo\n\t"
 5704             "SAR    $dst.hi,31" %}
 5705 
 5706   ins_encode %{
 5707     __ movl($dst$$Register, $mem$$Address);
 5708     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5709     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5710   %}
 5711 
 5712   ins_pipe(ialu_reg_mem);
 5713 %}
 5714 
 5715 // Load Integer with mask 0xFF into Long Register
 5716 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5717   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5718   effect(KILL cr);
 5719 
 5720   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5721             "XOR    $dst.hi,$dst.hi" %}
 5722   ins_encode %{
 5723     Register Rdst = $dst$$Register;
 5724     __ movzbl(Rdst, $mem$$Address);
 5725     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5726   %}
 5727   ins_pipe(ialu_reg_mem);
 5728 %}
 5729 
 5730 // Load Integer with mask 0xFFFF into Long Register
 5731 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5732   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5733   effect(KILL cr);
 5734 
 5735   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5736             "XOR    $dst.hi,$dst.hi" %}
 5737   ins_encode %{
 5738     Register Rdst = $dst$$Register;
 5739     __ movzwl(Rdst, $mem$$Address);
 5740     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5741   %}
 5742   ins_pipe(ialu_reg_mem);
 5743 %}
 5744 
 5745 // Load Integer with 31-bit mask into Long Register
 5746 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5747   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5748   effect(KILL cr);
 5749 
 5750   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5751             "XOR    $dst.hi,$dst.hi\n\t"
 5752             "AND    $dst.lo,$mask" %}
 5753   ins_encode %{
 5754     Register Rdst = $dst$$Register;
 5755     __ movl(Rdst, $mem$$Address);
 5756     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5757     __ andl(Rdst, $mask$$constant);
 5758   %}
 5759   ins_pipe(ialu_reg_mem);
 5760 %}
 5761 
 5762 // Load Unsigned Integer into Long Register
 5763 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5764   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5765   effect(KILL cr);
 5766 
 5767   ins_cost(250);
 5768   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5769             "XOR    $dst.hi,$dst.hi" %}
 5770 
 5771   ins_encode %{
 5772     __ movl($dst$$Register, $mem$$Address);
 5773     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5774   %}
 5775 
 5776   ins_pipe(ialu_reg_mem);
 5777 %}
 5778 
 5779 // Load Long.  Cannot clobber address while loading, so restrict address
 5780 // register to ESI
 5781 instruct loadL(eRegL dst, load_long_memory mem) %{
 5782   predicate(!((LoadLNode*)n)->require_atomic_access());
 5783   match(Set dst (LoadL mem));
 5784 
 5785   ins_cost(250);
 5786   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5787             "MOV    $dst.hi,$mem+4" %}
 5788 
 5789   ins_encode %{
 5790     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5791     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5792     __ movl($dst$$Register, Amemlo);
 5793     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5794   %}
 5795 
 5796   ins_pipe(ialu_reg_long_mem);
 5797 %}
 5798 
 5799 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5800 // then store it down to the stack and reload on the int
 5801 // side.
 5802 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5803   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5804   match(Set dst (LoadL mem));
 5805 
 5806   ins_cost(200);
 5807   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5808             "FISTp  $dst" %}
 5809   ins_encode(enc_loadL_volatile(mem,dst));
 5810   ins_pipe( fpu_reg_mem );
 5811 %}
 5812 
 5813 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5814   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5815   match(Set dst (LoadL mem));
 5816   effect(TEMP tmp);
 5817   ins_cost(180);
 5818   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5819             "MOVSD  $dst,$tmp" %}
 5820   ins_encode %{
 5821     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5822     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5823   %}
 5824   ins_pipe( pipe_slow );
 5825 %}
 5826 
 5827 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5828   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5829   match(Set dst (LoadL mem));
 5830   effect(TEMP tmp);
 5831   ins_cost(160);
 5832   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5833             "MOVD   $dst.lo,$tmp\n\t"
 5834             "PSRLQ  $tmp,32\n\t"
 5835             "MOVD   $dst.hi,$tmp" %}
 5836   ins_encode %{
 5837     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5838     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5839     __ psrlq($tmp$$XMMRegister, 32);
 5840     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5841   %}
 5842   ins_pipe( pipe_slow );
 5843 %}
 5844 
 5845 // Load Range
 5846 instruct loadRange(rRegI dst, memory mem) %{
 5847   match(Set dst (LoadRange mem));
 5848 
 5849   ins_cost(125);
 5850   format %{ "MOV    $dst,$mem" %}
 5851   opcode(0x8B);
 5852   ins_encode( OpcP, RegMem(dst,mem));
 5853   ins_pipe( ialu_reg_mem );
 5854 %}
 5855 
 5856 
 5857 // Load Pointer
 5858 instruct loadP(eRegP dst, memory mem) %{
 5859   match(Set dst (LoadP mem));
 5860 
 5861   ins_cost(125);
 5862   format %{ "MOV    $dst,$mem" %}
 5863   opcode(0x8B);
 5864   ins_encode( OpcP, RegMem(dst,mem));
 5865   ins_pipe( ialu_reg_mem );
 5866 %}
 5867 
 5868 // Load Klass Pointer
 5869 instruct loadKlass(eRegP dst, memory mem) %{
 5870   match(Set dst (LoadKlass mem));
 5871 
 5872   ins_cost(125);
 5873   format %{ "MOV    $dst,$mem" %}
 5874   opcode(0x8B);
 5875   ins_encode( OpcP, RegMem(dst,mem));
 5876   ins_pipe( ialu_reg_mem );
 5877 %}
 5878 
 5879 // Load Double
 5880 instruct loadDPR(regDPR dst, memory mem) %{
 5881   predicate(UseSSE<=1);
 5882   match(Set dst (LoadD mem));
 5883 
 5884   ins_cost(150);
 5885   format %{ "FLD_D  ST,$mem\n\t"
 5886             "FSTP   $dst" %}
 5887   opcode(0xDD);               /* DD /0 */
 5888   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5889               Pop_Reg_DPR(dst) );
 5890   ins_pipe( fpu_reg_mem );
 5891 %}
 5892 
 5893 // Load Double to XMM
 5894 instruct loadD(regD dst, memory mem) %{
 5895   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5896   match(Set dst (LoadD mem));
 5897   ins_cost(145);
 5898   format %{ "MOVSD  $dst,$mem" %}
 5899   ins_encode %{
 5900     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5901   %}
 5902   ins_pipe( pipe_slow );
 5903 %}
 5904 
 5905 instruct loadD_partial(regD dst, memory mem) %{
 5906   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5907   match(Set dst (LoadD mem));
 5908   ins_cost(145);
 5909   format %{ "MOVLPD $dst,$mem" %}
 5910   ins_encode %{
 5911     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5912   %}
 5913   ins_pipe( pipe_slow );
 5914 %}
 5915 
 5916 // Load to XMM register (single-precision floating point)
 5917 // MOVSS instruction
 5918 instruct loadF(regF dst, memory mem) %{
 5919   predicate(UseSSE>=1);
 5920   match(Set dst (LoadF mem));
 5921   ins_cost(145);
 5922   format %{ "MOVSS  $dst,$mem" %}
 5923   ins_encode %{
 5924     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5925   %}
 5926   ins_pipe( pipe_slow );
 5927 %}
 5928 
 5929 // Load Float
 5930 instruct loadFPR(regFPR dst, memory mem) %{
 5931   predicate(UseSSE==0);
 5932   match(Set dst (LoadF mem));
 5933 
 5934   ins_cost(150);
 5935   format %{ "FLD_S  ST,$mem\n\t"
 5936             "FSTP   $dst" %}
 5937   opcode(0xD9);               /* D9 /0 */
 5938   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5939               Pop_Reg_FPR(dst) );
 5940   ins_pipe( fpu_reg_mem );
 5941 %}
 5942 
 5943 // Load Effective Address
 5944 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5945   match(Set dst mem);
 5946 
 5947   ins_cost(110);
 5948   format %{ "LEA    $dst,$mem" %}
 5949   opcode(0x8D);
 5950   ins_encode( OpcP, RegMem(dst,mem));
 5951   ins_pipe( ialu_reg_reg_fat );
 5952 %}
 5953 
 5954 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5955   match(Set dst mem);
 5956 
 5957   ins_cost(110);
 5958   format %{ "LEA    $dst,$mem" %}
 5959   opcode(0x8D);
 5960   ins_encode( OpcP, RegMem(dst,mem));
 5961   ins_pipe( ialu_reg_reg_fat );
 5962 %}
 5963 
 5964 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5965   match(Set dst mem);
 5966 
 5967   ins_cost(110);
 5968   format %{ "LEA    $dst,$mem" %}
 5969   opcode(0x8D);
 5970   ins_encode( OpcP, RegMem(dst,mem));
 5971   ins_pipe( ialu_reg_reg_fat );
 5972 %}
 5973 
 5974 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5975   match(Set dst mem);
 5976 
 5977   ins_cost(110);
 5978   format %{ "LEA    $dst,$mem" %}
 5979   opcode(0x8D);
 5980   ins_encode( OpcP, RegMem(dst,mem));
 5981   ins_pipe( ialu_reg_reg_fat );
 5982 %}
 5983 
 5984 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5985   match(Set dst mem);
 5986 
 5987   ins_cost(110);
 5988   format %{ "LEA    $dst,$mem" %}
 5989   opcode(0x8D);
 5990   ins_encode( OpcP, RegMem(dst,mem));
 5991   ins_pipe( ialu_reg_reg_fat );
 5992 %}
 5993 
 5994 // Load Constant
 5995 instruct loadConI(rRegI dst, immI src) %{
 5996   match(Set dst src);
 5997 
 5998   format %{ "MOV    $dst,$src" %}
 5999   ins_encode( LdImmI(dst, src) );
 6000   ins_pipe( ialu_reg_fat );
 6001 %}
 6002 
 6003 // Load Constant zero
 6004 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 6005   match(Set dst src);
 6006   effect(KILL cr);
 6007 
 6008   ins_cost(50);
 6009   format %{ "XOR    $dst,$dst" %}
 6010   opcode(0x33);  /* + rd */
 6011   ins_encode( OpcP, RegReg( dst, dst ) );
 6012   ins_pipe( ialu_reg );
 6013 %}
 6014 
 6015 instruct loadConP(eRegP dst, immP src) %{
 6016   match(Set dst src);
 6017 
 6018   format %{ "MOV    $dst,$src" %}
 6019   opcode(0xB8);  /* + rd */
 6020   ins_encode( LdImmP(dst, src) );
 6021   ins_pipe( ialu_reg_fat );
 6022 %}
 6023 
 6024 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6025   match(Set dst src);
 6026   effect(KILL cr);
 6027   ins_cost(200);
 6028   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6029             "MOV    $dst.hi,$src.hi" %}
 6030   opcode(0xB8);
 6031   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6032   ins_pipe( ialu_reg_long_fat );
 6033 %}
 6034 
 6035 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6036   match(Set dst src);
 6037   effect(KILL cr);
 6038   ins_cost(150);
 6039   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6040             "XOR    $dst.hi,$dst.hi" %}
 6041   opcode(0x33,0x33);
 6042   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6043   ins_pipe( ialu_reg_long );
 6044 %}
 6045 
 6046 // The instruction usage is guarded by predicate in operand immFPR().
 6047 instruct loadConFPR(regFPR dst, immFPR con) %{
 6048   match(Set dst con);
 6049   ins_cost(125);
 6050   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6051             "FSTP   $dst" %}
 6052   ins_encode %{
 6053     __ fld_s($constantaddress($con));
 6054     __ fstp_d($dst$$reg);
 6055   %}
 6056   ins_pipe(fpu_reg_con);
 6057 %}
 6058 
 6059 // The instruction usage is guarded by predicate in operand immFPR0().
 6060 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6061   match(Set dst con);
 6062   ins_cost(125);
 6063   format %{ "FLDZ   ST\n\t"
 6064             "FSTP   $dst" %}
 6065   ins_encode %{
 6066     __ fldz();
 6067     __ fstp_d($dst$$reg);
 6068   %}
 6069   ins_pipe(fpu_reg_con);
 6070 %}
 6071 
 6072 // The instruction usage is guarded by predicate in operand immFPR1().
 6073 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6074   match(Set dst con);
 6075   ins_cost(125);
 6076   format %{ "FLD1   ST\n\t"
 6077             "FSTP   $dst" %}
 6078   ins_encode %{
 6079     __ fld1();
 6080     __ fstp_d($dst$$reg);
 6081   %}
 6082   ins_pipe(fpu_reg_con);
 6083 %}
 6084 
 6085 // The instruction usage is guarded by predicate in operand immF().
 6086 instruct loadConF(regF dst, immF con) %{
 6087   match(Set dst con);
 6088   ins_cost(125);
 6089   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6090   ins_encode %{
 6091     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6092   %}
 6093   ins_pipe(pipe_slow);
 6094 %}
 6095 
 6096 // The instruction usage is guarded by predicate in operand immF0().
 6097 instruct loadConF0(regF dst, immF0 src) %{
 6098   match(Set dst src);
 6099   ins_cost(100);
 6100   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6101   ins_encode %{
 6102     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6103   %}
 6104   ins_pipe(pipe_slow);
 6105 %}
 6106 
 6107 // The instruction usage is guarded by predicate in operand immDPR().
 6108 instruct loadConDPR(regDPR dst, immDPR con) %{
 6109   match(Set dst con);
 6110   ins_cost(125);
 6111 
 6112   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6113             "FSTP   $dst" %}
 6114   ins_encode %{
 6115     __ fld_d($constantaddress($con));
 6116     __ fstp_d($dst$$reg);
 6117   %}
 6118   ins_pipe(fpu_reg_con);
 6119 %}
 6120 
 6121 // The instruction usage is guarded by predicate in operand immDPR0().
 6122 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6123   match(Set dst con);
 6124   ins_cost(125);
 6125 
 6126   format %{ "FLDZ   ST\n\t"
 6127             "FSTP   $dst" %}
 6128   ins_encode %{
 6129     __ fldz();
 6130     __ fstp_d($dst$$reg);
 6131   %}
 6132   ins_pipe(fpu_reg_con);
 6133 %}
 6134 
 6135 // The instruction usage is guarded by predicate in operand immDPR1().
 6136 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6137   match(Set dst con);
 6138   ins_cost(125);
 6139 
 6140   format %{ "FLD1   ST\n\t"
 6141             "FSTP   $dst" %}
 6142   ins_encode %{
 6143     __ fld1();
 6144     __ fstp_d($dst$$reg);
 6145   %}
 6146   ins_pipe(fpu_reg_con);
 6147 %}
 6148 
 6149 // The instruction usage is guarded by predicate in operand immD().
 6150 instruct loadConD(regD dst, immD con) %{
 6151   match(Set dst con);
 6152   ins_cost(125);
 6153   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6154   ins_encode %{
 6155     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6156   %}
 6157   ins_pipe(pipe_slow);
 6158 %}
 6159 
 6160 // The instruction usage is guarded by predicate in operand immD0().
 6161 instruct loadConD0(regD dst, immD0 src) %{
 6162   match(Set dst src);
 6163   ins_cost(100);
 6164   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6165   ins_encode %{
 6166     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6167   %}
 6168   ins_pipe( pipe_slow );
 6169 %}
 6170 
 6171 // Load Stack Slot
 6172 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6173   match(Set dst src);
 6174   ins_cost(125);
 6175 
 6176   format %{ "MOV    $dst,$src" %}
 6177   opcode(0x8B);
 6178   ins_encode( OpcP, RegMem(dst,src));
 6179   ins_pipe( ialu_reg_mem );
 6180 %}
 6181 
 6182 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6183   match(Set dst src);
 6184 
 6185   ins_cost(200);
 6186   format %{ "MOV    $dst,$src.lo\n\t"
 6187             "MOV    $dst+4,$src.hi" %}
 6188   opcode(0x8B, 0x8B);
 6189   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6190   ins_pipe( ialu_mem_long_reg );
 6191 %}
 6192 
 6193 // Load Stack Slot
 6194 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6195   match(Set dst src);
 6196   ins_cost(125);
 6197 
 6198   format %{ "MOV    $dst,$src" %}
 6199   opcode(0x8B);
 6200   ins_encode( OpcP, RegMem(dst,src));
 6201   ins_pipe( ialu_reg_mem );
 6202 %}
 6203 
 6204 // Load Stack Slot
 6205 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6206   match(Set dst src);
 6207   ins_cost(125);
 6208 
 6209   format %{ "FLD_S  $src\n\t"
 6210             "FSTP   $dst" %}
 6211   opcode(0xD9);               /* D9 /0, FLD m32real */
 6212   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6213               Pop_Reg_FPR(dst) );
 6214   ins_pipe( fpu_reg_mem );
 6215 %}
 6216 
 6217 // Load Stack Slot
 6218 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6219   match(Set dst src);
 6220   ins_cost(125);
 6221 
 6222   format %{ "FLD_D  $src\n\t"
 6223             "FSTP   $dst" %}
 6224   opcode(0xDD);               /* DD /0, FLD m64real */
 6225   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6226               Pop_Reg_DPR(dst) );
 6227   ins_pipe( fpu_reg_mem );
 6228 %}
 6229 
 6230 // Prefetch instructions for allocation.
 6231 // Must be safe to execute with invalid address (cannot fault).
 6232 
 6233 instruct prefetchAlloc0( memory mem ) %{
 6234   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6235   match(PrefetchAllocation mem);
 6236   ins_cost(0);
 6237   size(0);
 6238   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6239   ins_encode();
 6240   ins_pipe(empty);
 6241 %}
 6242 
 6243 instruct prefetchAlloc( memory mem ) %{
 6244   predicate(AllocatePrefetchInstr==3);
 6245   match( PrefetchAllocation mem );
 6246   ins_cost(100);
 6247 
 6248   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6249   ins_encode %{
 6250     __ prefetchw($mem$$Address);
 6251   %}
 6252   ins_pipe(ialu_mem);
 6253 %}
 6254 
 6255 instruct prefetchAllocNTA( memory mem ) %{
 6256   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6257   match(PrefetchAllocation mem);
 6258   ins_cost(100);
 6259 
 6260   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6261   ins_encode %{
 6262     __ prefetchnta($mem$$Address);
 6263   %}
 6264   ins_pipe(ialu_mem);
 6265 %}
 6266 
 6267 instruct prefetchAllocT0( memory mem ) %{
 6268   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6269   match(PrefetchAllocation mem);
 6270   ins_cost(100);
 6271 
 6272   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6273   ins_encode %{
 6274     __ prefetcht0($mem$$Address);
 6275   %}
 6276   ins_pipe(ialu_mem);
 6277 %}
 6278 
 6279 instruct prefetchAllocT2( memory mem ) %{
 6280   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6281   match(PrefetchAllocation mem);
 6282   ins_cost(100);
 6283 
 6284   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6285   ins_encode %{
 6286     __ prefetcht2($mem$$Address);
 6287   %}
 6288   ins_pipe(ialu_mem);
 6289 %}
 6290 
 6291 //----------Store Instructions-------------------------------------------------
 6292 
 6293 // Store Byte
 6294 instruct storeB(memory mem, xRegI src) %{
 6295   match(Set mem (StoreB mem src));
 6296 
 6297   ins_cost(125);
 6298   format %{ "MOV8   $mem,$src" %}
 6299   opcode(0x88);
 6300   ins_encode( OpcP, RegMem( src, mem ) );
 6301   ins_pipe( ialu_mem_reg );
 6302 %}
 6303 
 6304 // Store Char/Short
 6305 instruct storeC(memory mem, rRegI src) %{
 6306   match(Set mem (StoreC mem src));
 6307 
 6308   ins_cost(125);
 6309   format %{ "MOV16  $mem,$src" %}
 6310   opcode(0x89, 0x66);
 6311   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6312   ins_pipe( ialu_mem_reg );
 6313 %}
 6314 
 6315 // Store Integer
 6316 instruct storeI(memory mem, rRegI src) %{
 6317   match(Set mem (StoreI mem src));
 6318 
 6319   ins_cost(125);
 6320   format %{ "MOV    $mem,$src" %}
 6321   opcode(0x89);
 6322   ins_encode( OpcP, RegMem( src, mem ) );
 6323   ins_pipe( ialu_mem_reg );
 6324 %}
 6325 
 6326 // Store Long
 6327 instruct storeL(long_memory mem, eRegL src) %{
 6328   predicate(!((StoreLNode*)n)->require_atomic_access());
 6329   match(Set mem (StoreL mem src));
 6330 
 6331   ins_cost(200);
 6332   format %{ "MOV    $mem,$src.lo\n\t"
 6333             "MOV    $mem+4,$src.hi" %}
 6334   opcode(0x89, 0x89);
 6335   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6336   ins_pipe( ialu_mem_long_reg );
 6337 %}
 6338 
 6339 // Store Long to Integer
 6340 instruct storeL2I(memory mem, eRegL src) %{
 6341   match(Set mem (StoreI mem (ConvL2I src)));
 6342 
 6343   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6344   ins_encode %{
 6345     __ movl($mem$$Address, $src$$Register);
 6346   %}
 6347   ins_pipe(ialu_mem_reg);
 6348 %}
 6349 
 6350 // Volatile Store Long.  Must be atomic, so move it into
 6351 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6352 // target address before the store (for null-ptr checks)
 6353 // so the memory operand is used twice in the encoding.
 6354 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6355   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6356   match(Set mem (StoreL mem src));
 6357   effect( KILL cr );
 6358   ins_cost(400);
 6359   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6360             "FILD   $src\n\t"
 6361             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6362   opcode(0x3B);
 6363   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6364   ins_pipe( fpu_reg_mem );
 6365 %}
 6366 
 6367 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6368   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6369   match(Set mem (StoreL mem src));
 6370   effect( TEMP tmp, KILL cr );
 6371   ins_cost(380);
 6372   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6373             "MOVSD  $tmp,$src\n\t"
 6374             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6375   ins_encode %{
 6376     __ cmpl(rax, $mem$$Address);
 6377     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6378     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6379   %}
 6380   ins_pipe( pipe_slow );
 6381 %}
 6382 
 6383 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6384   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6385   match(Set mem (StoreL mem src));
 6386   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6387   ins_cost(360);
 6388   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6389             "MOVD   $tmp,$src.lo\n\t"
 6390             "MOVD   $tmp2,$src.hi\n\t"
 6391             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6392             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6393   ins_encode %{
 6394     __ cmpl(rax, $mem$$Address);
 6395     __ movdl($tmp$$XMMRegister, $src$$Register);
 6396     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6397     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6398     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6399   %}
 6400   ins_pipe( pipe_slow );
 6401 %}
 6402 
 6403 // Store Pointer; for storing unknown oops and raw pointers
 6404 instruct storeP(memory mem, anyRegP src) %{
 6405   match(Set mem (StoreP mem src));
 6406 
 6407   ins_cost(125);
 6408   format %{ "MOV    $mem,$src" %}
 6409   opcode(0x89);
 6410   ins_encode( OpcP, RegMem( src, mem ) );
 6411   ins_pipe( ialu_mem_reg );
 6412 %}
 6413 
 6414 // Store Integer Immediate
 6415 instruct storeImmI(memory mem, immI src) %{
 6416   match(Set mem (StoreI mem src));
 6417 
 6418   ins_cost(150);
 6419   format %{ "MOV    $mem,$src" %}
 6420   opcode(0xC7);               /* C7 /0 */
 6421   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6422   ins_pipe( ialu_mem_imm );
 6423 %}
 6424 
 6425 // Store Short/Char Immediate
 6426 instruct storeImmI16(memory mem, immI16 src) %{
 6427   predicate(UseStoreImmI16);
 6428   match(Set mem (StoreC mem src));
 6429 
 6430   ins_cost(150);
 6431   format %{ "MOV16  $mem,$src" %}
 6432   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6433   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6434   ins_pipe( ialu_mem_imm );
 6435 %}
 6436 
 6437 // Store Pointer Immediate; null pointers or constant oops that do not
 6438 // need card-mark barriers.
 6439 instruct storeImmP(memory mem, immP src) %{
 6440   match(Set mem (StoreP mem src));
 6441 
 6442   ins_cost(150);
 6443   format %{ "MOV    $mem,$src" %}
 6444   opcode(0xC7);               /* C7 /0 */
 6445   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6446   ins_pipe( ialu_mem_imm );
 6447 %}
 6448 
 6449 // Store Byte Immediate
 6450 instruct storeImmB(memory mem, immI8 src) %{
 6451   match(Set mem (StoreB mem src));
 6452 
 6453   ins_cost(150);
 6454   format %{ "MOV8   $mem,$src" %}
 6455   opcode(0xC6);               /* C6 /0 */
 6456   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6457   ins_pipe( ialu_mem_imm );
 6458 %}
 6459 
 6460 // Store CMS card-mark Immediate
 6461 instruct storeImmCM(memory mem, immI8 src) %{
 6462   match(Set mem (StoreCM mem src));
 6463 
 6464   ins_cost(150);
 6465   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6466   opcode(0xC6);               /* C6 /0 */
 6467   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6468   ins_pipe( ialu_mem_imm );
 6469 %}
 6470 
 6471 // Store Double
 6472 instruct storeDPR( memory mem, regDPR1 src) %{
 6473   predicate(UseSSE<=1);
 6474   match(Set mem (StoreD mem src));
 6475 
 6476   ins_cost(100);
 6477   format %{ "FST_D  $mem,$src" %}
 6478   opcode(0xDD);       /* DD /2 */
 6479   ins_encode( enc_FPR_store(mem,src) );
 6480   ins_pipe( fpu_mem_reg );
 6481 %}
 6482 
 6483 // Store double does rounding on x86
 6484 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6485   predicate(UseSSE<=1);
 6486   match(Set mem (StoreD mem (RoundDouble src)));
 6487 
 6488   ins_cost(100);
 6489   format %{ "FST_D  $mem,$src\t# round" %}
 6490   opcode(0xDD);       /* DD /2 */
 6491   ins_encode( enc_FPR_store(mem,src) );
 6492   ins_pipe( fpu_mem_reg );
 6493 %}
 6494 
 6495 // Store XMM register to memory (double-precision floating points)
 6496 // MOVSD instruction
 6497 instruct storeD(memory mem, regD src) %{
 6498   predicate(UseSSE>=2);
 6499   match(Set mem (StoreD mem src));
 6500   ins_cost(95);
 6501   format %{ "MOVSD  $mem,$src" %}
 6502   ins_encode %{
 6503     __ movdbl($mem$$Address, $src$$XMMRegister);
 6504   %}
 6505   ins_pipe( pipe_slow );
 6506 %}
 6507 
 6508 // Store XMM register to memory (single-precision floating point)
 6509 // MOVSS instruction
 6510 instruct storeF(memory mem, regF src) %{
 6511   predicate(UseSSE>=1);
 6512   match(Set mem (StoreF mem src));
 6513   ins_cost(95);
 6514   format %{ "MOVSS  $mem,$src" %}
 6515   ins_encode %{
 6516     __ movflt($mem$$Address, $src$$XMMRegister);
 6517   %}
 6518   ins_pipe( pipe_slow );
 6519 %}
 6520 
 6521 
 6522 // Store Float
 6523 instruct storeFPR( memory mem, regFPR1 src) %{
 6524   predicate(UseSSE==0);
 6525   match(Set mem (StoreF mem src));
 6526 
 6527   ins_cost(100);
 6528   format %{ "FST_S  $mem,$src" %}
 6529   opcode(0xD9);       /* D9 /2 */
 6530   ins_encode( enc_FPR_store(mem,src) );
 6531   ins_pipe( fpu_mem_reg );
 6532 %}
 6533 
 6534 // Store Float does rounding on x86
 6535 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6536   predicate(UseSSE==0);
 6537   match(Set mem (StoreF mem (RoundFloat src)));
 6538 
 6539   ins_cost(100);
 6540   format %{ "FST_S  $mem,$src\t# round" %}
 6541   opcode(0xD9);       /* D9 /2 */
 6542   ins_encode( enc_FPR_store(mem,src) );
 6543   ins_pipe( fpu_mem_reg );
 6544 %}
 6545 
 6546 // Store Float does rounding on x86
 6547 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6548   predicate(UseSSE<=1);
 6549   match(Set mem (StoreF mem (ConvD2F src)));
 6550 
 6551   ins_cost(100);
 6552   format %{ "FST_S  $mem,$src\t# D-round" %}
 6553   opcode(0xD9);       /* D9 /2 */
 6554   ins_encode( enc_FPR_store(mem,src) );
 6555   ins_pipe( fpu_mem_reg );
 6556 %}
 6557 
 6558 // Store immediate Float value (it is faster than store from FPU register)
 6559 // The instruction usage is guarded by predicate in operand immFPR().
 6560 instruct storeFPR_imm( memory mem, immFPR src) %{
 6561   match(Set mem (StoreF mem src));
 6562 
 6563   ins_cost(50);
 6564   format %{ "MOV    $mem,$src\t# store float" %}
 6565   opcode(0xC7);               /* C7 /0 */
 6566   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6567   ins_pipe( ialu_mem_imm );
 6568 %}
 6569 
 6570 // Store immediate Float value (it is faster than store from XMM register)
 6571 // The instruction usage is guarded by predicate in operand immF().
 6572 instruct storeF_imm( memory mem, immF src) %{
 6573   match(Set mem (StoreF mem src));
 6574 
 6575   ins_cost(50);
 6576   format %{ "MOV    $mem,$src\t# store float" %}
 6577   opcode(0xC7);               /* C7 /0 */
 6578   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6579   ins_pipe( ialu_mem_imm );
 6580 %}
 6581 
 6582 // Store Integer to stack slot
 6583 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6584   match(Set dst src);
 6585 
 6586   ins_cost(100);
 6587   format %{ "MOV    $dst,$src" %}
 6588   opcode(0x89);
 6589   ins_encode( OpcPRegSS( dst, src ) );
 6590   ins_pipe( ialu_mem_reg );
 6591 %}
 6592 
 6593 // Store Integer to stack slot
 6594 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6595   match(Set dst src);
 6596 
 6597   ins_cost(100);
 6598   format %{ "MOV    $dst,$src" %}
 6599   opcode(0x89);
 6600   ins_encode( OpcPRegSS( dst, src ) );
 6601   ins_pipe( ialu_mem_reg );
 6602 %}
 6603 
 6604 // Store Long to stack slot
 6605 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6606   match(Set dst src);
 6607 
 6608   ins_cost(200);
 6609   format %{ "MOV    $dst,$src.lo\n\t"
 6610             "MOV    $dst+4,$src.hi" %}
 6611   opcode(0x89, 0x89);
 6612   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6613   ins_pipe( ialu_mem_long_reg );
 6614 %}
 6615 
 6616 //----------MemBar Instructions-----------------------------------------------
 6617 // Memory barrier flavors
 6618 
 6619 instruct membar_acquire() %{
 6620   match(MemBarAcquire);
 6621   match(LoadFence);
 6622   ins_cost(400);
 6623 
 6624   size(0);
 6625   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6626   ins_encode();
 6627   ins_pipe(empty);
 6628 %}
 6629 
 6630 instruct membar_acquire_lock() %{
 6631   match(MemBarAcquireLock);
 6632   ins_cost(0);
 6633 
 6634   size(0);
 6635   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6636   ins_encode( );
 6637   ins_pipe(empty);
 6638 %}
 6639 
 6640 instruct membar_release() %{
 6641   match(MemBarRelease);
 6642   match(StoreFence);
 6643   ins_cost(400);
 6644 
 6645   size(0);
 6646   format %{ "MEMBAR-release ! (empty encoding)" %}
 6647   ins_encode( );
 6648   ins_pipe(empty);
 6649 %}
 6650 
 6651 instruct membar_release_lock() %{
 6652   match(MemBarReleaseLock);
 6653   ins_cost(0);
 6654 
 6655   size(0);
 6656   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6657   ins_encode( );
 6658   ins_pipe(empty);
 6659 %}
 6660 
 6661 instruct membar_volatile(eFlagsReg cr) %{
 6662   match(MemBarVolatile);
 6663   effect(KILL cr);
 6664   ins_cost(400);
 6665 
 6666   format %{
 6667     $$template
 6668     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6669   %}
 6670   ins_encode %{
 6671     __ membar(Assembler::StoreLoad);
 6672   %}
 6673   ins_pipe(pipe_slow);
 6674 %}
 6675 
 6676 instruct unnecessary_membar_volatile() %{
 6677   match(MemBarVolatile);
 6678   predicate(Matcher::post_store_load_barrier(n));
 6679   ins_cost(0);
 6680 
 6681   size(0);
 6682   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6683   ins_encode( );
 6684   ins_pipe(empty);
 6685 %}
 6686 
 6687 instruct membar_storestore() %{
 6688   match(MemBarStoreStore);
 6689   match(StoreStoreFence);
 6690   ins_cost(0);
 6691 
 6692   size(0);
 6693   format %{ "MEMBAR-storestore (empty encoding)" %}
 6694   ins_encode( );
 6695   ins_pipe(empty);
 6696 %}
 6697 
 6698 //----------Move Instructions--------------------------------------------------
 6699 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6700   match(Set dst (CastX2P src));
 6701   format %{ "# X2P  $dst, $src" %}
 6702   ins_encode( /*empty encoding*/ );
 6703   ins_cost(0);
 6704   ins_pipe(empty);
 6705 %}
 6706 
 6707 instruct castP2X(rRegI dst, eRegP src ) %{
 6708   match(Set dst (CastP2X src));
 6709   ins_cost(50);
 6710   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6711   ins_encode( enc_Copy( dst, src) );
 6712   ins_pipe( ialu_reg_reg );
 6713 %}
 6714 
 6715 //----------Conditional Move---------------------------------------------------
 6716 // Conditional move
 6717 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6718   predicate(!VM_Version::supports_cmov() );
 6719   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6720   ins_cost(200);
 6721   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6722             "MOV    $dst,$src\n"
 6723       "skip:" %}
 6724   ins_encode %{
 6725     Label Lskip;
 6726     // Invert sense of branch from sense of CMOV
 6727     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6728     __ movl($dst$$Register, $src$$Register);
 6729     __ bind(Lskip);
 6730   %}
 6731   ins_pipe( pipe_cmov_reg );
 6732 %}
 6733 
 6734 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6735   predicate(!VM_Version::supports_cmov() );
 6736   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6737   ins_cost(200);
 6738   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6739             "MOV    $dst,$src\n"
 6740       "skip:" %}
 6741   ins_encode %{
 6742     Label Lskip;
 6743     // Invert sense of branch from sense of CMOV
 6744     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6745     __ movl($dst$$Register, $src$$Register);
 6746     __ bind(Lskip);
 6747   %}
 6748   ins_pipe( pipe_cmov_reg );
 6749 %}
 6750 
 6751 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6752   predicate(VM_Version::supports_cmov() );
 6753   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6754   ins_cost(200);
 6755   format %{ "CMOV$cop $dst,$src" %}
 6756   opcode(0x0F,0x40);
 6757   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6758   ins_pipe( pipe_cmov_reg );
 6759 %}
 6760 
 6761 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6762   predicate(VM_Version::supports_cmov() );
 6763   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6764   ins_cost(200);
 6765   format %{ "CMOV$cop $dst,$src" %}
 6766   opcode(0x0F,0x40);
 6767   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6768   ins_pipe( pipe_cmov_reg );
 6769 %}
 6770 
 6771 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6772   predicate(VM_Version::supports_cmov() );
 6773   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6774   ins_cost(200);
 6775   expand %{
 6776     cmovI_regU(cop, cr, dst, src);
 6777   %}
 6778 %}
 6779 
 6780 // Conditional move
 6781 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6782   predicate(VM_Version::supports_cmov() );
 6783   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6784   ins_cost(250);
 6785   format %{ "CMOV$cop $dst,$src" %}
 6786   opcode(0x0F,0x40);
 6787   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6788   ins_pipe( pipe_cmov_mem );
 6789 %}
 6790 
 6791 // Conditional move
 6792 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6793   predicate(VM_Version::supports_cmov() );
 6794   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6795   ins_cost(250);
 6796   format %{ "CMOV$cop $dst,$src" %}
 6797   opcode(0x0F,0x40);
 6798   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6799   ins_pipe( pipe_cmov_mem );
 6800 %}
 6801 
 6802 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6803   predicate(VM_Version::supports_cmov() );
 6804   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6805   ins_cost(250);
 6806   expand %{
 6807     cmovI_memU(cop, cr, dst, src);
 6808   %}
 6809 %}
 6810 
 6811 // Conditional move
 6812 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6813   predicate(VM_Version::supports_cmov() );
 6814   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6815   ins_cost(200);
 6816   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6817   opcode(0x0F,0x40);
 6818   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6819   ins_pipe( pipe_cmov_reg );
 6820 %}
 6821 
 6822 // Conditional move (non-P6 version)
 6823 // Note:  a CMoveP is generated for  stubs and native wrappers
 6824 //        regardless of whether we are on a P6, so we
 6825 //        emulate a cmov here
 6826 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6827   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6828   ins_cost(300);
 6829   format %{ "Jn$cop   skip\n\t"
 6830           "MOV    $dst,$src\t# pointer\n"
 6831       "skip:" %}
 6832   opcode(0x8b);
 6833   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6834   ins_pipe( pipe_cmov_reg );
 6835 %}
 6836 
 6837 // Conditional move
 6838 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6839   predicate(VM_Version::supports_cmov() );
 6840   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6841   ins_cost(200);
 6842   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6843   opcode(0x0F,0x40);
 6844   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6845   ins_pipe( pipe_cmov_reg );
 6846 %}
 6847 
 6848 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6849   predicate(VM_Version::supports_cmov() );
 6850   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6851   ins_cost(200);
 6852   expand %{
 6853     cmovP_regU(cop, cr, dst, src);
 6854   %}
 6855 %}
 6856 
 6857 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6858 // correctly meets the two pointer arguments; one is an incoming
 6859 // register but the other is a memory operand.  ALSO appears to
 6860 // be buggy with implicit null checks.
 6861 //
 6862 //// Conditional move
 6863 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6864 //  predicate(VM_Version::supports_cmov() );
 6865 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6866 //  ins_cost(250);
 6867 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6868 //  opcode(0x0F,0x40);
 6869 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6870 //  ins_pipe( pipe_cmov_mem );
 6871 //%}
 6872 //
 6873 //// Conditional move
 6874 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6875 //  predicate(VM_Version::supports_cmov() );
 6876 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6877 //  ins_cost(250);
 6878 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6879 //  opcode(0x0F,0x40);
 6880 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6881 //  ins_pipe( pipe_cmov_mem );
 6882 //%}
 6883 
 6884 // Conditional move
 6885 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6886   predicate(UseSSE<=1);
 6887   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6888   ins_cost(200);
 6889   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6890   opcode(0xDA);
 6891   ins_encode( enc_cmov_dpr(cop,src) );
 6892   ins_pipe( pipe_cmovDPR_reg );
 6893 %}
 6894 
 6895 // Conditional move
 6896 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6897   predicate(UseSSE==0);
 6898   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6899   ins_cost(200);
 6900   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6901   opcode(0xDA);
 6902   ins_encode( enc_cmov_dpr(cop,src) );
 6903   ins_pipe( pipe_cmovDPR_reg );
 6904 %}
 6905 
 6906 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6907 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6908   predicate(UseSSE<=1);
 6909   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6910   ins_cost(200);
 6911   format %{ "Jn$cop   skip\n\t"
 6912             "MOV    $dst,$src\t# double\n"
 6913       "skip:" %}
 6914   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6915   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6916   ins_pipe( pipe_cmovDPR_reg );
 6917 %}
 6918 
 6919 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6920 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6921   predicate(UseSSE==0);
 6922   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6923   ins_cost(200);
 6924   format %{ "Jn$cop    skip\n\t"
 6925             "MOV    $dst,$src\t# float\n"
 6926       "skip:" %}
 6927   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6928   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6929   ins_pipe( pipe_cmovDPR_reg );
 6930 %}
 6931 
 6932 // No CMOVE with SSE/SSE2
 6933 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6934   predicate (UseSSE>=1);
 6935   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6936   ins_cost(200);
 6937   format %{ "Jn$cop   skip\n\t"
 6938             "MOVSS  $dst,$src\t# float\n"
 6939       "skip:" %}
 6940   ins_encode %{
 6941     Label skip;
 6942     // Invert sense of branch from sense of CMOV
 6943     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6944     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6945     __ bind(skip);
 6946   %}
 6947   ins_pipe( pipe_slow );
 6948 %}
 6949 
 6950 // No CMOVE with SSE/SSE2
 6951 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6952   predicate (UseSSE>=2);
 6953   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6954   ins_cost(200);
 6955   format %{ "Jn$cop   skip\n\t"
 6956             "MOVSD  $dst,$src\t# float\n"
 6957       "skip:" %}
 6958   ins_encode %{
 6959     Label skip;
 6960     // Invert sense of branch from sense of CMOV
 6961     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6962     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6963     __ bind(skip);
 6964   %}
 6965   ins_pipe( pipe_slow );
 6966 %}
 6967 
 6968 // unsigned version
 6969 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6970   predicate (UseSSE>=1);
 6971   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6972   ins_cost(200);
 6973   format %{ "Jn$cop   skip\n\t"
 6974             "MOVSS  $dst,$src\t# float\n"
 6975       "skip:" %}
 6976   ins_encode %{
 6977     Label skip;
 6978     // Invert sense of branch from sense of CMOV
 6979     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6980     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6981     __ bind(skip);
 6982   %}
 6983   ins_pipe( pipe_slow );
 6984 %}
 6985 
 6986 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6987   predicate (UseSSE>=1);
 6988   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6989   ins_cost(200);
 6990   expand %{
 6991     fcmovF_regU(cop, cr, dst, src);
 6992   %}
 6993 %}
 6994 
 6995 // unsigned version
 6996 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6997   predicate (UseSSE>=2);
 6998   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6999   ins_cost(200);
 7000   format %{ "Jn$cop   skip\n\t"
 7001             "MOVSD  $dst,$src\t# float\n"
 7002       "skip:" %}
 7003   ins_encode %{
 7004     Label skip;
 7005     // Invert sense of branch from sense of CMOV
 7006     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 7007     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 7008     __ bind(skip);
 7009   %}
 7010   ins_pipe( pipe_slow );
 7011 %}
 7012 
 7013 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7014   predicate (UseSSE>=2);
 7015   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7016   ins_cost(200);
 7017   expand %{
 7018     fcmovD_regU(cop, cr, dst, src);
 7019   %}
 7020 %}
 7021 
 7022 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7023   predicate(VM_Version::supports_cmov() );
 7024   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7025   ins_cost(200);
 7026   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7027             "CMOV$cop $dst.hi,$src.hi" %}
 7028   opcode(0x0F,0x40);
 7029   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7030   ins_pipe( pipe_cmov_reg_long );
 7031 %}
 7032 
 7033 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7034   predicate(VM_Version::supports_cmov() );
 7035   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7036   ins_cost(200);
 7037   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7038             "CMOV$cop $dst.hi,$src.hi" %}
 7039   opcode(0x0F,0x40);
 7040   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7041   ins_pipe( pipe_cmov_reg_long );
 7042 %}
 7043 
 7044 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7045   predicate(VM_Version::supports_cmov() );
 7046   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7047   ins_cost(200);
 7048   expand %{
 7049     cmovL_regU(cop, cr, dst, src);
 7050   %}
 7051 %}
 7052 
 7053 //----------Arithmetic Instructions--------------------------------------------
 7054 //----------Addition Instructions----------------------------------------------
 7055 
 7056 // Integer Addition Instructions
 7057 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7058   match(Set dst (AddI dst src));
 7059   effect(KILL cr);
 7060 
 7061   size(2);
 7062   format %{ "ADD    $dst,$src" %}
 7063   opcode(0x03);
 7064   ins_encode( OpcP, RegReg( dst, src) );
 7065   ins_pipe( ialu_reg_reg );
 7066 %}
 7067 
 7068 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7069   match(Set dst (AddI dst src));
 7070   effect(KILL cr);
 7071 
 7072   format %{ "ADD    $dst,$src" %}
 7073   opcode(0x81, 0x00); /* /0 id */
 7074   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7075   ins_pipe( ialu_reg );
 7076 %}
 7077 
 7078 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7079   predicate(UseIncDec);
 7080   match(Set dst (AddI dst src));
 7081   effect(KILL cr);
 7082 
 7083   size(1);
 7084   format %{ "INC    $dst" %}
 7085   opcode(0x40); /*  */
 7086   ins_encode( Opc_plus( primary, dst ) );
 7087   ins_pipe( ialu_reg );
 7088 %}
 7089 
 7090 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7091   match(Set dst (AddI src0 src1));
 7092   ins_cost(110);
 7093 
 7094   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7095   opcode(0x8D); /* 0x8D /r */
 7096   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7097   ins_pipe( ialu_reg_reg );
 7098 %}
 7099 
 7100 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7101   match(Set dst (AddP src0 src1));
 7102   ins_cost(110);
 7103 
 7104   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7105   opcode(0x8D); /* 0x8D /r */
 7106   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7107   ins_pipe( ialu_reg_reg );
 7108 %}
 7109 
 7110 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7111   predicate(UseIncDec);
 7112   match(Set dst (AddI dst src));
 7113   effect(KILL cr);
 7114 
 7115   size(1);
 7116   format %{ "DEC    $dst" %}
 7117   opcode(0x48); /*  */
 7118   ins_encode( Opc_plus( primary, dst ) );
 7119   ins_pipe( ialu_reg );
 7120 %}
 7121 
 7122 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7123   match(Set dst (AddP dst src));
 7124   effect(KILL cr);
 7125 
 7126   size(2);
 7127   format %{ "ADD    $dst,$src" %}
 7128   opcode(0x03);
 7129   ins_encode( OpcP, RegReg( dst, src) );
 7130   ins_pipe( ialu_reg_reg );
 7131 %}
 7132 
 7133 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7134   match(Set dst (AddP dst src));
 7135   effect(KILL cr);
 7136 
 7137   format %{ "ADD    $dst,$src" %}
 7138   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7139   // ins_encode( RegImm( dst, src) );
 7140   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7141   ins_pipe( ialu_reg );
 7142 %}
 7143 
 7144 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7145   match(Set dst (AddI dst (LoadI src)));
 7146   effect(KILL cr);
 7147 
 7148   ins_cost(150);
 7149   format %{ "ADD    $dst,$src" %}
 7150   opcode(0x03);
 7151   ins_encode( OpcP, RegMem( dst, src) );
 7152   ins_pipe( ialu_reg_mem );
 7153 %}
 7154 
 7155 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7156   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7157   effect(KILL cr);
 7158 
 7159   ins_cost(150);
 7160   format %{ "ADD    $dst,$src" %}
 7161   opcode(0x01);  /* Opcode 01 /r */
 7162   ins_encode( OpcP, RegMem( src, dst ) );
 7163   ins_pipe( ialu_mem_reg );
 7164 %}
 7165 
 7166 // Add Memory with Immediate
 7167 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7168   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7169   effect(KILL cr);
 7170 
 7171   ins_cost(125);
 7172   format %{ "ADD    $dst,$src" %}
 7173   opcode(0x81);               /* Opcode 81 /0 id */
 7174   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7175   ins_pipe( ialu_mem_imm );
 7176 %}
 7177 
 7178 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7179   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7180   effect(KILL cr);
 7181 
 7182   ins_cost(125);
 7183   format %{ "INC    $dst" %}
 7184   opcode(0xFF);               /* Opcode FF /0 */
 7185   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7186   ins_pipe( ialu_mem_imm );
 7187 %}
 7188 
 7189 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7190   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7191   effect(KILL cr);
 7192 
 7193   ins_cost(125);
 7194   format %{ "DEC    $dst" %}
 7195   opcode(0xFF);               /* Opcode FF /1 */
 7196   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7197   ins_pipe( ialu_mem_imm );
 7198 %}
 7199 
 7200 
 7201 instruct checkCastPP( eRegP dst ) %{
 7202   match(Set dst (CheckCastPP dst));
 7203 
 7204   size(0);
 7205   format %{ "#checkcastPP of $dst" %}
 7206   ins_encode( /*empty encoding*/ );
 7207   ins_pipe( empty );
 7208 %}
 7209 
 7210 instruct castPP( eRegP dst ) %{
 7211   match(Set dst (CastPP dst));
 7212   format %{ "#castPP of $dst" %}
 7213   ins_encode( /*empty encoding*/ );
 7214   ins_pipe( empty );
 7215 %}
 7216 
 7217 instruct castII( rRegI dst ) %{
 7218   match(Set dst (CastII dst));
 7219   format %{ "#castII of $dst" %}
 7220   ins_encode( /*empty encoding*/ );
 7221   ins_cost(0);
 7222   ins_pipe( empty );
 7223 %}
 7224 
 7225 instruct castLL( eRegL dst ) %{
 7226   match(Set dst (CastLL dst));
 7227   format %{ "#castLL of $dst" %}
 7228   ins_encode( /*empty encoding*/ );
 7229   ins_cost(0);
 7230   ins_pipe( empty );
 7231 %}
 7232 
 7233 instruct castFF( regF dst ) %{
 7234   predicate(UseSSE >= 1);
 7235   match(Set dst (CastFF dst));
 7236   format %{ "#castFF of $dst" %}
 7237   ins_encode( /*empty encoding*/ );
 7238   ins_cost(0);
 7239   ins_pipe( empty );
 7240 %}
 7241 
 7242 instruct castDD( regD dst ) %{
 7243   predicate(UseSSE >= 2);
 7244   match(Set dst (CastDD dst));
 7245   format %{ "#castDD of $dst" %}
 7246   ins_encode( /*empty encoding*/ );
 7247   ins_cost(0);
 7248   ins_pipe( empty );
 7249 %}
 7250 
 7251 instruct castFF_PR( regFPR dst ) %{
 7252   predicate(UseSSE < 1);
 7253   match(Set dst (CastFF dst));
 7254   format %{ "#castFF of $dst" %}
 7255   ins_encode( /*empty encoding*/ );
 7256   ins_cost(0);
 7257   ins_pipe( empty );
 7258 %}
 7259 
 7260 instruct castDD_PR( regDPR dst ) %{
 7261   predicate(UseSSE < 2);
 7262   match(Set dst (CastDD dst));
 7263   format %{ "#castDD of $dst" %}
 7264   ins_encode( /*empty encoding*/ );
 7265   ins_cost(0);
 7266   ins_pipe( empty );
 7267 %}
 7268 
 7269 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7270 
 7271 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7272   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7273   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7274   effect(KILL cr, KILL oldval);
 7275   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7276             "MOV    $res,0\n\t"
 7277             "JNE,s  fail\n\t"
 7278             "MOV    $res,1\n"
 7279           "fail:" %}
 7280   ins_encode( enc_cmpxchg8(mem_ptr),
 7281               enc_flags_ne_to_boolean(res) );
 7282   ins_pipe( pipe_cmpxchg );
 7283 %}
 7284 
 7285 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7286   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7287   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7288   effect(KILL cr, KILL oldval);
 7289   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7290             "MOV    $res,0\n\t"
 7291             "JNE,s  fail\n\t"
 7292             "MOV    $res,1\n"
 7293           "fail:" %}
 7294   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7295   ins_pipe( pipe_cmpxchg );
 7296 %}
 7297 
 7298 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7299   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7300   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7301   effect(KILL cr, KILL oldval);
 7302   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7303             "MOV    $res,0\n\t"
 7304             "JNE,s  fail\n\t"
 7305             "MOV    $res,1\n"
 7306           "fail:" %}
 7307   ins_encode( enc_cmpxchgb(mem_ptr),
 7308               enc_flags_ne_to_boolean(res) );
 7309   ins_pipe( pipe_cmpxchg );
 7310 %}
 7311 
 7312 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7313   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7314   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7315   effect(KILL cr, KILL oldval);
 7316   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7317             "MOV    $res,0\n\t"
 7318             "JNE,s  fail\n\t"
 7319             "MOV    $res,1\n"
 7320           "fail:" %}
 7321   ins_encode( enc_cmpxchgw(mem_ptr),
 7322               enc_flags_ne_to_boolean(res) );
 7323   ins_pipe( pipe_cmpxchg );
 7324 %}
 7325 
 7326 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7327   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7328   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7329   effect(KILL cr, KILL oldval);
 7330   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7331             "MOV    $res,0\n\t"
 7332             "JNE,s  fail\n\t"
 7333             "MOV    $res,1\n"
 7334           "fail:" %}
 7335   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7336   ins_pipe( pipe_cmpxchg );
 7337 %}
 7338 
 7339 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7340   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7341   effect(KILL cr);
 7342   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7343   ins_encode( enc_cmpxchg8(mem_ptr) );
 7344   ins_pipe( pipe_cmpxchg );
 7345 %}
 7346 
 7347 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7348   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7349   effect(KILL cr);
 7350   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7351   ins_encode( enc_cmpxchg(mem_ptr) );
 7352   ins_pipe( pipe_cmpxchg );
 7353 %}
 7354 
 7355 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7356   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7357   effect(KILL cr);
 7358   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7359   ins_encode( enc_cmpxchgb(mem_ptr) );
 7360   ins_pipe( pipe_cmpxchg );
 7361 %}
 7362 
 7363 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7364   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7365   effect(KILL cr);
 7366   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7367   ins_encode( enc_cmpxchgw(mem_ptr) );
 7368   ins_pipe( pipe_cmpxchg );
 7369 %}
 7370 
 7371 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7372   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7373   effect(KILL cr);
 7374   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7375   ins_encode( enc_cmpxchg(mem_ptr) );
 7376   ins_pipe( pipe_cmpxchg );
 7377 %}
 7378 
 7379 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7380   predicate(n->as_LoadStore()->result_not_used());
 7381   match(Set dummy (GetAndAddB mem add));
 7382   effect(KILL cr);
 7383   format %{ "ADDB  [$mem],$add" %}
 7384   ins_encode %{
 7385     __ lock();
 7386     __ addb($mem$$Address, $add$$constant);
 7387   %}
 7388   ins_pipe( pipe_cmpxchg );
 7389 %}
 7390 
 7391 // Important to match to xRegI: only 8-bit regs.
 7392 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7393   match(Set newval (GetAndAddB mem newval));
 7394   effect(KILL cr);
 7395   format %{ "XADDB  [$mem],$newval" %}
 7396   ins_encode %{
 7397     __ lock();
 7398     __ xaddb($mem$$Address, $newval$$Register);
 7399   %}
 7400   ins_pipe( pipe_cmpxchg );
 7401 %}
 7402 
 7403 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7404   predicate(n->as_LoadStore()->result_not_used());
 7405   match(Set dummy (GetAndAddS mem add));
 7406   effect(KILL cr);
 7407   format %{ "ADDS  [$mem],$add" %}
 7408   ins_encode %{
 7409     __ lock();
 7410     __ addw($mem$$Address, $add$$constant);
 7411   %}
 7412   ins_pipe( pipe_cmpxchg );
 7413 %}
 7414 
 7415 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7416   match(Set newval (GetAndAddS mem newval));
 7417   effect(KILL cr);
 7418   format %{ "XADDS  [$mem],$newval" %}
 7419   ins_encode %{
 7420     __ lock();
 7421     __ xaddw($mem$$Address, $newval$$Register);
 7422   %}
 7423   ins_pipe( pipe_cmpxchg );
 7424 %}
 7425 
 7426 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7427   predicate(n->as_LoadStore()->result_not_used());
 7428   match(Set dummy (GetAndAddI mem add));
 7429   effect(KILL cr);
 7430   format %{ "ADDL  [$mem],$add" %}
 7431   ins_encode %{
 7432     __ lock();
 7433     __ addl($mem$$Address, $add$$constant);
 7434   %}
 7435   ins_pipe( pipe_cmpxchg );
 7436 %}
 7437 
 7438 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7439   match(Set newval (GetAndAddI mem newval));
 7440   effect(KILL cr);
 7441   format %{ "XADDL  [$mem],$newval" %}
 7442   ins_encode %{
 7443     __ lock();
 7444     __ xaddl($mem$$Address, $newval$$Register);
 7445   %}
 7446   ins_pipe( pipe_cmpxchg );
 7447 %}
 7448 
 7449 // Important to match to xRegI: only 8-bit regs.
 7450 instruct xchgB( memory mem, xRegI newval) %{
 7451   match(Set newval (GetAndSetB mem newval));
 7452   format %{ "XCHGB  $newval,[$mem]" %}
 7453   ins_encode %{
 7454     __ xchgb($newval$$Register, $mem$$Address);
 7455   %}
 7456   ins_pipe( pipe_cmpxchg );
 7457 %}
 7458 
 7459 instruct xchgS( memory mem, rRegI newval) %{
 7460   match(Set newval (GetAndSetS mem newval));
 7461   format %{ "XCHGW  $newval,[$mem]" %}
 7462   ins_encode %{
 7463     __ xchgw($newval$$Register, $mem$$Address);
 7464   %}
 7465   ins_pipe( pipe_cmpxchg );
 7466 %}
 7467 
 7468 instruct xchgI( memory mem, rRegI newval) %{
 7469   match(Set newval (GetAndSetI mem newval));
 7470   format %{ "XCHGL  $newval,[$mem]" %}
 7471   ins_encode %{
 7472     __ xchgl($newval$$Register, $mem$$Address);
 7473   %}
 7474   ins_pipe( pipe_cmpxchg );
 7475 %}
 7476 
 7477 instruct xchgP( memory mem, pRegP newval) %{
 7478   match(Set newval (GetAndSetP mem newval));
 7479   format %{ "XCHGL  $newval,[$mem]" %}
 7480   ins_encode %{
 7481     __ xchgl($newval$$Register, $mem$$Address);
 7482   %}
 7483   ins_pipe( pipe_cmpxchg );
 7484 %}
 7485 
 7486 //----------Subtraction Instructions-------------------------------------------
 7487 
 7488 // Integer Subtraction Instructions
 7489 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7490   match(Set dst (SubI dst src));
 7491   effect(KILL cr);
 7492 
 7493   size(2);
 7494   format %{ "SUB    $dst,$src" %}
 7495   opcode(0x2B);
 7496   ins_encode( OpcP, RegReg( dst, src) );
 7497   ins_pipe( ialu_reg_reg );
 7498 %}
 7499 
 7500 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7501   match(Set dst (SubI dst src));
 7502   effect(KILL cr);
 7503 
 7504   format %{ "SUB    $dst,$src" %}
 7505   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7506   // ins_encode( RegImm( dst, src) );
 7507   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7508   ins_pipe( ialu_reg );
 7509 %}
 7510 
 7511 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7512   match(Set dst (SubI dst (LoadI src)));
 7513   effect(KILL cr);
 7514 
 7515   ins_cost(150);
 7516   format %{ "SUB    $dst,$src" %}
 7517   opcode(0x2B);
 7518   ins_encode( OpcP, RegMem( dst, src) );
 7519   ins_pipe( ialu_reg_mem );
 7520 %}
 7521 
 7522 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7523   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7524   effect(KILL cr);
 7525 
 7526   ins_cost(150);
 7527   format %{ "SUB    $dst,$src" %}
 7528   opcode(0x29);  /* Opcode 29 /r */
 7529   ins_encode( OpcP, RegMem( src, dst ) );
 7530   ins_pipe( ialu_mem_reg );
 7531 %}
 7532 
 7533 // Subtract from a pointer
 7534 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7535   match(Set dst (AddP dst (SubI zero src)));
 7536   effect(KILL cr);
 7537 
 7538   size(2);
 7539   format %{ "SUB    $dst,$src" %}
 7540   opcode(0x2B);
 7541   ins_encode( OpcP, RegReg( dst, src) );
 7542   ins_pipe( ialu_reg_reg );
 7543 %}
 7544 
 7545 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7546   match(Set dst (SubI zero dst));
 7547   effect(KILL cr);
 7548 
 7549   size(2);
 7550   format %{ "NEG    $dst" %}
 7551   opcode(0xF7,0x03);  // Opcode F7 /3
 7552   ins_encode( OpcP, RegOpc( dst ) );
 7553   ins_pipe( ialu_reg );
 7554 %}
 7555 
 7556 //----------Multiplication/Division Instructions-------------------------------
 7557 // Integer Multiplication Instructions
 7558 // Multiply Register
 7559 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7560   match(Set dst (MulI dst src));
 7561   effect(KILL cr);
 7562 
 7563   size(3);
 7564   ins_cost(300);
 7565   format %{ "IMUL   $dst,$src" %}
 7566   opcode(0xAF, 0x0F);
 7567   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7568   ins_pipe( ialu_reg_reg_alu0 );
 7569 %}
 7570 
 7571 // Multiply 32-bit Immediate
 7572 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7573   match(Set dst (MulI src imm));
 7574   effect(KILL cr);
 7575 
 7576   ins_cost(300);
 7577   format %{ "IMUL   $dst,$src,$imm" %}
 7578   opcode(0x69);  /* 69 /r id */
 7579   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7580   ins_pipe( ialu_reg_reg_alu0 );
 7581 %}
 7582 
 7583 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7584   match(Set dst src);
 7585   effect(KILL cr);
 7586 
 7587   // Note that this is artificially increased to make it more expensive than loadConL
 7588   ins_cost(250);
 7589   format %{ "MOV    EAX,$src\t// low word only" %}
 7590   opcode(0xB8);
 7591   ins_encode( LdImmL_Lo(dst, src) );
 7592   ins_pipe( ialu_reg_fat );
 7593 %}
 7594 
 7595 // Multiply by 32-bit Immediate, taking the shifted high order results
 7596 //  (special case for shift by 32)
 7597 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7598   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7599   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7600              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7601              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7602   effect(USE src1, KILL cr);
 7603 
 7604   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7605   ins_cost(0*100 + 1*400 - 150);
 7606   format %{ "IMUL   EDX:EAX,$src1" %}
 7607   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7608   ins_pipe( pipe_slow );
 7609 %}
 7610 
 7611 // Multiply by 32-bit Immediate, taking the shifted high order results
 7612 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7613   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7614   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7615              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7616              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7617   effect(USE src1, KILL cr);
 7618 
 7619   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7620   ins_cost(1*100 + 1*400 - 150);
 7621   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7622             "SAR    EDX,$cnt-32" %}
 7623   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7624   ins_pipe( pipe_slow );
 7625 %}
 7626 
 7627 // Multiply Memory 32-bit Immediate
 7628 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7629   match(Set dst (MulI (LoadI src) imm));
 7630   effect(KILL cr);
 7631 
 7632   ins_cost(300);
 7633   format %{ "IMUL   $dst,$src,$imm" %}
 7634   opcode(0x69);  /* 69 /r id */
 7635   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7636   ins_pipe( ialu_reg_mem_alu0 );
 7637 %}
 7638 
 7639 // Multiply Memory
 7640 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7641   match(Set dst (MulI dst (LoadI src)));
 7642   effect(KILL cr);
 7643 
 7644   ins_cost(350);
 7645   format %{ "IMUL   $dst,$src" %}
 7646   opcode(0xAF, 0x0F);
 7647   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7648   ins_pipe( ialu_reg_mem_alu0 );
 7649 %}
 7650 
 7651 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7652 %{
 7653   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7654   effect(KILL cr, KILL src2);
 7655 
 7656   expand %{ mulI_eReg(dst, src1, cr);
 7657            mulI_eReg(src2, src3, cr);
 7658            addI_eReg(dst, src2, cr); %}
 7659 %}
 7660 
 7661 // Multiply Register Int to Long
 7662 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7663   // Basic Idea: long = (long)int * (long)int
 7664   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7665   effect(DEF dst, USE src, USE src1, KILL flags);
 7666 
 7667   ins_cost(300);
 7668   format %{ "IMUL   $dst,$src1" %}
 7669 
 7670   ins_encode( long_int_multiply( dst, src1 ) );
 7671   ins_pipe( ialu_reg_reg_alu0 );
 7672 %}
 7673 
 7674 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7675   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7676   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7677   effect(KILL flags);
 7678 
 7679   ins_cost(300);
 7680   format %{ "MUL    $dst,$src1" %}
 7681 
 7682   ins_encode( long_uint_multiply(dst, src1) );
 7683   ins_pipe( ialu_reg_reg_alu0 );
 7684 %}
 7685 
 7686 // Multiply Register Long
 7687 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7688   match(Set dst (MulL dst src));
 7689   effect(KILL cr, TEMP tmp);
 7690   ins_cost(4*100+3*400);
 7691 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7692 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7693   format %{ "MOV    $tmp,$src.lo\n\t"
 7694             "IMUL   $tmp,EDX\n\t"
 7695             "MOV    EDX,$src.hi\n\t"
 7696             "IMUL   EDX,EAX\n\t"
 7697             "ADD    $tmp,EDX\n\t"
 7698             "MUL    EDX:EAX,$src.lo\n\t"
 7699             "ADD    EDX,$tmp" %}
 7700   ins_encode( long_multiply( dst, src, tmp ) );
 7701   ins_pipe( pipe_slow );
 7702 %}
 7703 
 7704 // Multiply Register Long where the left operand's high 32 bits are zero
 7705 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7706   predicate(is_operand_hi32_zero(n->in(1)));
 7707   match(Set dst (MulL dst src));
 7708   effect(KILL cr, TEMP tmp);
 7709   ins_cost(2*100+2*400);
 7710 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7711 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7712   format %{ "MOV    $tmp,$src.hi\n\t"
 7713             "IMUL   $tmp,EAX\n\t"
 7714             "MUL    EDX:EAX,$src.lo\n\t"
 7715             "ADD    EDX,$tmp" %}
 7716   ins_encode %{
 7717     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7718     __ imull($tmp$$Register, rax);
 7719     __ mull($src$$Register);
 7720     __ addl(rdx, $tmp$$Register);
 7721   %}
 7722   ins_pipe( pipe_slow );
 7723 %}
 7724 
 7725 // Multiply Register Long where the right operand's high 32 bits are zero
 7726 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7727   predicate(is_operand_hi32_zero(n->in(2)));
 7728   match(Set dst (MulL dst src));
 7729   effect(KILL cr, TEMP tmp);
 7730   ins_cost(2*100+2*400);
 7731 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7732 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7733   format %{ "MOV    $tmp,$src.lo\n\t"
 7734             "IMUL   $tmp,EDX\n\t"
 7735             "MUL    EDX:EAX,$src.lo\n\t"
 7736             "ADD    EDX,$tmp" %}
 7737   ins_encode %{
 7738     __ movl($tmp$$Register, $src$$Register);
 7739     __ imull($tmp$$Register, rdx);
 7740     __ mull($src$$Register);
 7741     __ addl(rdx, $tmp$$Register);
 7742   %}
 7743   ins_pipe( pipe_slow );
 7744 %}
 7745 
 7746 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7747 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7748   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7749   match(Set dst (MulL dst src));
 7750   effect(KILL cr);
 7751   ins_cost(1*400);
 7752 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7753 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7754   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7755   ins_encode %{
 7756     __ mull($src$$Register);
 7757   %}
 7758   ins_pipe( pipe_slow );
 7759 %}
 7760 
 7761 // Multiply Register Long by small constant
 7762 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7763   match(Set dst (MulL dst src));
 7764   effect(KILL cr, TEMP tmp);
 7765   ins_cost(2*100+2*400);
 7766   size(12);
 7767 // Basic idea: lo(result) = lo(src * EAX)
 7768 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7769   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7770             "MOV    EDX,$src\n\t"
 7771             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7772             "ADD    EDX,$tmp" %}
 7773   ins_encode( long_multiply_con( dst, src, tmp ) );
 7774   ins_pipe( pipe_slow );
 7775 %}
 7776 
 7777 // Integer DIV with Register
 7778 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7779   match(Set rax (DivI rax div));
 7780   effect(KILL rdx, KILL cr);
 7781   size(26);
 7782   ins_cost(30*100+10*100);
 7783   format %{ "CMP    EAX,0x80000000\n\t"
 7784             "JNE,s  normal\n\t"
 7785             "XOR    EDX,EDX\n\t"
 7786             "CMP    ECX,-1\n\t"
 7787             "JE,s   done\n"
 7788     "normal: CDQ\n\t"
 7789             "IDIV   $div\n\t"
 7790     "done:"        %}
 7791   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7792   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7793   ins_pipe( ialu_reg_reg_alu0 );
 7794 %}
 7795 
 7796 // Divide Register Long
 7797 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7798   match(Set dst (DivL src1 src2));
 7799   effect(CALL);
 7800   ins_cost(10000);
 7801   format %{ "PUSH   $src1.hi\n\t"
 7802             "PUSH   $src1.lo\n\t"
 7803             "PUSH   $src2.hi\n\t"
 7804             "PUSH   $src2.lo\n\t"
 7805             "CALL   SharedRuntime::ldiv\n\t"
 7806             "ADD    ESP,16" %}
 7807   ins_encode( long_div(src1,src2) );
 7808   ins_pipe( pipe_slow );
 7809 %}
 7810 
 7811 // Integer DIVMOD with Register, both quotient and mod results
 7812 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7813   match(DivModI rax div);
 7814   effect(KILL cr);
 7815   size(26);
 7816   ins_cost(30*100+10*100);
 7817   format %{ "CMP    EAX,0x80000000\n\t"
 7818             "JNE,s  normal\n\t"
 7819             "XOR    EDX,EDX\n\t"
 7820             "CMP    ECX,-1\n\t"
 7821             "JE,s   done\n"
 7822     "normal: CDQ\n\t"
 7823             "IDIV   $div\n\t"
 7824     "done:"        %}
 7825   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7826   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7827   ins_pipe( pipe_slow );
 7828 %}
 7829 
 7830 // Integer MOD with Register
 7831 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7832   match(Set rdx (ModI rax div));
 7833   effect(KILL rax, KILL cr);
 7834 
 7835   size(26);
 7836   ins_cost(300);
 7837   format %{ "CDQ\n\t"
 7838             "IDIV   $div" %}
 7839   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7840   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7841   ins_pipe( ialu_reg_reg_alu0 );
 7842 %}
 7843 
 7844 // Remainder Register Long
 7845 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7846   match(Set dst (ModL src1 src2));
 7847   effect(CALL);
 7848   ins_cost(10000);
 7849   format %{ "PUSH   $src1.hi\n\t"
 7850             "PUSH   $src1.lo\n\t"
 7851             "PUSH   $src2.hi\n\t"
 7852             "PUSH   $src2.lo\n\t"
 7853             "CALL   SharedRuntime::lrem\n\t"
 7854             "ADD    ESP,16" %}
 7855   ins_encode( long_mod(src1,src2) );
 7856   ins_pipe( pipe_slow );
 7857 %}
 7858 
 7859 // Divide Register Long (no special case since divisor != -1)
 7860 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7861   match(Set dst (DivL dst imm));
 7862   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7863   ins_cost(1000);
 7864   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7865             "XOR    $tmp2,$tmp2\n\t"
 7866             "CMP    $tmp,EDX\n\t"
 7867             "JA,s   fast\n\t"
 7868             "MOV    $tmp2,EAX\n\t"
 7869             "MOV    EAX,EDX\n\t"
 7870             "MOV    EDX,0\n\t"
 7871             "JLE,s  pos\n\t"
 7872             "LNEG   EAX : $tmp2\n\t"
 7873             "DIV    $tmp # unsigned division\n\t"
 7874             "XCHG   EAX,$tmp2\n\t"
 7875             "DIV    $tmp\n\t"
 7876             "LNEG   $tmp2 : EAX\n\t"
 7877             "JMP,s  done\n"
 7878     "pos:\n\t"
 7879             "DIV    $tmp\n\t"
 7880             "XCHG   EAX,$tmp2\n"
 7881     "fast:\n\t"
 7882             "DIV    $tmp\n"
 7883     "done:\n\t"
 7884             "MOV    EDX,$tmp2\n\t"
 7885             "NEG    EDX:EAX # if $imm < 0" %}
 7886   ins_encode %{
 7887     int con = (int)$imm$$constant;
 7888     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7889     int pcon = (con > 0) ? con : -con;
 7890     Label Lfast, Lpos, Ldone;
 7891 
 7892     __ movl($tmp$$Register, pcon);
 7893     __ xorl($tmp2$$Register,$tmp2$$Register);
 7894     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7895     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7896 
 7897     __ movl($tmp2$$Register, $dst$$Register); // save
 7898     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7899     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7900     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7901 
 7902     // Negative dividend.
 7903     // convert value to positive to use unsigned division
 7904     __ lneg($dst$$Register, $tmp2$$Register);
 7905     __ divl($tmp$$Register);
 7906     __ xchgl($dst$$Register, $tmp2$$Register);
 7907     __ divl($tmp$$Register);
 7908     // revert result back to negative
 7909     __ lneg($tmp2$$Register, $dst$$Register);
 7910     __ jmpb(Ldone);
 7911 
 7912     __ bind(Lpos);
 7913     __ divl($tmp$$Register); // Use unsigned division
 7914     __ xchgl($dst$$Register, $tmp2$$Register);
 7915     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7916 
 7917     __ bind(Lfast);
 7918     // fast path: src is positive
 7919     __ divl($tmp$$Register); // Use unsigned division
 7920 
 7921     __ bind(Ldone);
 7922     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7923     if (con < 0) {
 7924       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7925     }
 7926   %}
 7927   ins_pipe( pipe_slow );
 7928 %}
 7929 
 7930 // Remainder Register Long (remainder fit into 32 bits)
 7931 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7932   match(Set dst (ModL dst imm));
 7933   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7934   ins_cost(1000);
 7935   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7936             "CMP    $tmp,EDX\n\t"
 7937             "JA,s   fast\n\t"
 7938             "MOV    $tmp2,EAX\n\t"
 7939             "MOV    EAX,EDX\n\t"
 7940             "MOV    EDX,0\n\t"
 7941             "JLE,s  pos\n\t"
 7942             "LNEG   EAX : $tmp2\n\t"
 7943             "DIV    $tmp # unsigned division\n\t"
 7944             "MOV    EAX,$tmp2\n\t"
 7945             "DIV    $tmp\n\t"
 7946             "NEG    EDX\n\t"
 7947             "JMP,s  done\n"
 7948     "pos:\n\t"
 7949             "DIV    $tmp\n\t"
 7950             "MOV    EAX,$tmp2\n"
 7951     "fast:\n\t"
 7952             "DIV    $tmp\n"
 7953     "done:\n\t"
 7954             "MOV    EAX,EDX\n\t"
 7955             "SAR    EDX,31\n\t" %}
 7956   ins_encode %{
 7957     int con = (int)$imm$$constant;
 7958     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7959     int pcon = (con > 0) ? con : -con;
 7960     Label  Lfast, Lpos, Ldone;
 7961 
 7962     __ movl($tmp$$Register, pcon);
 7963     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7964     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7965 
 7966     __ movl($tmp2$$Register, $dst$$Register); // save
 7967     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7968     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7969     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7970 
 7971     // Negative dividend.
 7972     // convert value to positive to use unsigned division
 7973     __ lneg($dst$$Register, $tmp2$$Register);
 7974     __ divl($tmp$$Register);
 7975     __ movl($dst$$Register, $tmp2$$Register);
 7976     __ divl($tmp$$Register);
 7977     // revert remainder back to negative
 7978     __ negl(HIGH_FROM_LOW($dst$$Register));
 7979     __ jmpb(Ldone);
 7980 
 7981     __ bind(Lpos);
 7982     __ divl($tmp$$Register);
 7983     __ movl($dst$$Register, $tmp2$$Register);
 7984 
 7985     __ bind(Lfast);
 7986     // fast path: src is positive
 7987     __ divl($tmp$$Register);
 7988 
 7989     __ bind(Ldone);
 7990     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7991     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7992 
 7993   %}
 7994   ins_pipe( pipe_slow );
 7995 %}
 7996 
 7997 // Integer Shift Instructions
 7998 // Shift Left by one
 7999 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8000   match(Set dst (LShiftI dst shift));
 8001   effect(KILL cr);
 8002 
 8003   size(2);
 8004   format %{ "SHL    $dst,$shift" %}
 8005   opcode(0xD1, 0x4);  /* D1 /4 */
 8006   ins_encode( OpcP, RegOpc( dst ) );
 8007   ins_pipe( ialu_reg );
 8008 %}
 8009 
 8010 // Shift Left by 8-bit immediate
 8011 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8012   match(Set dst (LShiftI dst shift));
 8013   effect(KILL cr);
 8014 
 8015   size(3);
 8016   format %{ "SHL    $dst,$shift" %}
 8017   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8018   ins_encode( RegOpcImm( dst, shift) );
 8019   ins_pipe( ialu_reg );
 8020 %}
 8021 
 8022 // Shift Left by variable
 8023 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8024   match(Set dst (LShiftI dst shift));
 8025   effect(KILL cr);
 8026 
 8027   size(2);
 8028   format %{ "SHL    $dst,$shift" %}
 8029   opcode(0xD3, 0x4);  /* D3 /4 */
 8030   ins_encode( OpcP, RegOpc( dst ) );
 8031   ins_pipe( ialu_reg_reg );
 8032 %}
 8033 
 8034 // Arithmetic shift right by one
 8035 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8036   match(Set dst (RShiftI dst shift));
 8037   effect(KILL cr);
 8038 
 8039   size(2);
 8040   format %{ "SAR    $dst,$shift" %}
 8041   opcode(0xD1, 0x7);  /* D1 /7 */
 8042   ins_encode( OpcP, RegOpc( dst ) );
 8043   ins_pipe( ialu_reg );
 8044 %}
 8045 
 8046 // Arithmetic shift right by one
 8047 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8048   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8049   effect(KILL cr);
 8050   format %{ "SAR    $dst,$shift" %}
 8051   opcode(0xD1, 0x7);  /* D1 /7 */
 8052   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8053   ins_pipe( ialu_mem_imm );
 8054 %}
 8055 
 8056 // Arithmetic Shift Right by 8-bit immediate
 8057 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8058   match(Set dst (RShiftI dst shift));
 8059   effect(KILL cr);
 8060 
 8061   size(3);
 8062   format %{ "SAR    $dst,$shift" %}
 8063   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8064   ins_encode( RegOpcImm( dst, shift ) );
 8065   ins_pipe( ialu_mem_imm );
 8066 %}
 8067 
 8068 // Arithmetic Shift Right by 8-bit immediate
 8069 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8070   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8071   effect(KILL cr);
 8072 
 8073   format %{ "SAR    $dst,$shift" %}
 8074   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8075   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8076   ins_pipe( ialu_mem_imm );
 8077 %}
 8078 
 8079 // Arithmetic Shift Right by variable
 8080 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8081   match(Set dst (RShiftI dst shift));
 8082   effect(KILL cr);
 8083 
 8084   size(2);
 8085   format %{ "SAR    $dst,$shift" %}
 8086   opcode(0xD3, 0x7);  /* D3 /7 */
 8087   ins_encode( OpcP, RegOpc( dst ) );
 8088   ins_pipe( ialu_reg_reg );
 8089 %}
 8090 
 8091 // Logical shift right by one
 8092 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8093   match(Set dst (URShiftI dst shift));
 8094   effect(KILL cr);
 8095 
 8096   size(2);
 8097   format %{ "SHR    $dst,$shift" %}
 8098   opcode(0xD1, 0x5);  /* D1 /5 */
 8099   ins_encode( OpcP, RegOpc( dst ) );
 8100   ins_pipe( ialu_reg );
 8101 %}
 8102 
 8103 // Logical Shift Right by 8-bit immediate
 8104 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8105   match(Set dst (URShiftI dst shift));
 8106   effect(KILL cr);
 8107 
 8108   size(3);
 8109   format %{ "SHR    $dst,$shift" %}
 8110   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8111   ins_encode( RegOpcImm( dst, shift) );
 8112   ins_pipe( ialu_reg );
 8113 %}
 8114 
 8115 
 8116 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8117 // This idiom is used by the compiler for the i2b bytecode.
 8118 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8119   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8120 
 8121   size(3);
 8122   format %{ "MOVSX  $dst,$src :8" %}
 8123   ins_encode %{
 8124     __ movsbl($dst$$Register, $src$$Register);
 8125   %}
 8126   ins_pipe(ialu_reg_reg);
 8127 %}
 8128 
 8129 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8130 // This idiom is used by the compiler the i2s bytecode.
 8131 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8132   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8133 
 8134   size(3);
 8135   format %{ "MOVSX  $dst,$src :16" %}
 8136   ins_encode %{
 8137     __ movswl($dst$$Register, $src$$Register);
 8138   %}
 8139   ins_pipe(ialu_reg_reg);
 8140 %}
 8141 
 8142 
 8143 // Logical Shift Right by variable
 8144 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8145   match(Set dst (URShiftI dst shift));
 8146   effect(KILL cr);
 8147 
 8148   size(2);
 8149   format %{ "SHR    $dst,$shift" %}
 8150   opcode(0xD3, 0x5);  /* D3 /5 */
 8151   ins_encode( OpcP, RegOpc( dst ) );
 8152   ins_pipe( ialu_reg_reg );
 8153 %}
 8154 
 8155 
 8156 //----------Logical Instructions-----------------------------------------------
 8157 //----------Integer Logical Instructions---------------------------------------
 8158 // And Instructions
 8159 // And Register with Register
 8160 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8161   match(Set dst (AndI dst src));
 8162   effect(KILL cr);
 8163 
 8164   size(2);
 8165   format %{ "AND    $dst,$src" %}
 8166   opcode(0x23);
 8167   ins_encode( OpcP, RegReg( dst, src) );
 8168   ins_pipe( ialu_reg_reg );
 8169 %}
 8170 
 8171 // And Register with Immediate
 8172 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8173   match(Set dst (AndI dst src));
 8174   effect(KILL cr);
 8175 
 8176   format %{ "AND    $dst,$src" %}
 8177   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8178   // ins_encode( RegImm( dst, src) );
 8179   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8180   ins_pipe( ialu_reg );
 8181 %}
 8182 
 8183 // And Register with Memory
 8184 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8185   match(Set dst (AndI dst (LoadI src)));
 8186   effect(KILL cr);
 8187 
 8188   ins_cost(150);
 8189   format %{ "AND    $dst,$src" %}
 8190   opcode(0x23);
 8191   ins_encode( OpcP, RegMem( dst, src) );
 8192   ins_pipe( ialu_reg_mem );
 8193 %}
 8194 
 8195 // And Memory with Register
 8196 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8197   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8198   effect(KILL cr);
 8199 
 8200   ins_cost(150);
 8201   format %{ "AND    $dst,$src" %}
 8202   opcode(0x21);  /* Opcode 21 /r */
 8203   ins_encode( OpcP, RegMem( src, dst ) );
 8204   ins_pipe( ialu_mem_reg );
 8205 %}
 8206 
 8207 // And Memory with Immediate
 8208 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8209   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8210   effect(KILL cr);
 8211 
 8212   ins_cost(125);
 8213   format %{ "AND    $dst,$src" %}
 8214   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8215   // ins_encode( MemImm( dst, src) );
 8216   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8217   ins_pipe( ialu_mem_imm );
 8218 %}
 8219 
 8220 // BMI1 instructions
 8221 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8222   match(Set dst (AndI (XorI src1 minus_1) src2));
 8223   predicate(UseBMI1Instructions);
 8224   effect(KILL cr);
 8225 
 8226   format %{ "ANDNL  $dst, $src1, $src2" %}
 8227 
 8228   ins_encode %{
 8229     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8230   %}
 8231   ins_pipe(ialu_reg);
 8232 %}
 8233 
 8234 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8235   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8236   predicate(UseBMI1Instructions);
 8237   effect(KILL cr);
 8238 
 8239   ins_cost(125);
 8240   format %{ "ANDNL  $dst, $src1, $src2" %}
 8241 
 8242   ins_encode %{
 8243     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8244   %}
 8245   ins_pipe(ialu_reg_mem);
 8246 %}
 8247 
 8248 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8249   match(Set dst (AndI (SubI imm_zero src) src));
 8250   predicate(UseBMI1Instructions);
 8251   effect(KILL cr);
 8252 
 8253   format %{ "BLSIL  $dst, $src" %}
 8254 
 8255   ins_encode %{
 8256     __ blsil($dst$$Register, $src$$Register);
 8257   %}
 8258   ins_pipe(ialu_reg);
 8259 %}
 8260 
 8261 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8262   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8263   predicate(UseBMI1Instructions);
 8264   effect(KILL cr);
 8265 
 8266   ins_cost(125);
 8267   format %{ "BLSIL  $dst, $src" %}
 8268 
 8269   ins_encode %{
 8270     __ blsil($dst$$Register, $src$$Address);
 8271   %}
 8272   ins_pipe(ialu_reg_mem);
 8273 %}
 8274 
 8275 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8276 %{
 8277   match(Set dst (XorI (AddI src minus_1) src));
 8278   predicate(UseBMI1Instructions);
 8279   effect(KILL cr);
 8280 
 8281   format %{ "BLSMSKL $dst, $src" %}
 8282 
 8283   ins_encode %{
 8284     __ blsmskl($dst$$Register, $src$$Register);
 8285   %}
 8286 
 8287   ins_pipe(ialu_reg);
 8288 %}
 8289 
 8290 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8291 %{
 8292   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8293   predicate(UseBMI1Instructions);
 8294   effect(KILL cr);
 8295 
 8296   ins_cost(125);
 8297   format %{ "BLSMSKL $dst, $src" %}
 8298 
 8299   ins_encode %{
 8300     __ blsmskl($dst$$Register, $src$$Address);
 8301   %}
 8302 
 8303   ins_pipe(ialu_reg_mem);
 8304 %}
 8305 
 8306 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8307 %{
 8308   match(Set dst (AndI (AddI src minus_1) src) );
 8309   predicate(UseBMI1Instructions);
 8310   effect(KILL cr);
 8311 
 8312   format %{ "BLSRL  $dst, $src" %}
 8313 
 8314   ins_encode %{
 8315     __ blsrl($dst$$Register, $src$$Register);
 8316   %}
 8317 
 8318   ins_pipe(ialu_reg);
 8319 %}
 8320 
 8321 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8322 %{
 8323   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8324   predicate(UseBMI1Instructions);
 8325   effect(KILL cr);
 8326 
 8327   ins_cost(125);
 8328   format %{ "BLSRL  $dst, $src" %}
 8329 
 8330   ins_encode %{
 8331     __ blsrl($dst$$Register, $src$$Address);
 8332   %}
 8333 
 8334   ins_pipe(ialu_reg_mem);
 8335 %}
 8336 
 8337 // Or Instructions
 8338 // Or Register with Register
 8339 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8340   match(Set dst (OrI dst src));
 8341   effect(KILL cr);
 8342 
 8343   size(2);
 8344   format %{ "OR     $dst,$src" %}
 8345   opcode(0x0B);
 8346   ins_encode( OpcP, RegReg( dst, src) );
 8347   ins_pipe( ialu_reg_reg );
 8348 %}
 8349 
 8350 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8351   match(Set dst (OrI dst (CastP2X src)));
 8352   effect(KILL cr);
 8353 
 8354   size(2);
 8355   format %{ "OR     $dst,$src" %}
 8356   opcode(0x0B);
 8357   ins_encode( OpcP, RegReg( dst, src) );
 8358   ins_pipe( ialu_reg_reg );
 8359 %}
 8360 
 8361 
 8362 // Or Register with Immediate
 8363 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8364   match(Set dst (OrI dst src));
 8365   effect(KILL cr);
 8366 
 8367   format %{ "OR     $dst,$src" %}
 8368   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8369   // ins_encode( RegImm( dst, src) );
 8370   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8371   ins_pipe( ialu_reg );
 8372 %}
 8373 
 8374 // Or Register with Memory
 8375 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8376   match(Set dst (OrI dst (LoadI src)));
 8377   effect(KILL cr);
 8378 
 8379   ins_cost(150);
 8380   format %{ "OR     $dst,$src" %}
 8381   opcode(0x0B);
 8382   ins_encode( OpcP, RegMem( dst, src) );
 8383   ins_pipe( ialu_reg_mem );
 8384 %}
 8385 
 8386 // Or Memory with Register
 8387 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8388   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8389   effect(KILL cr);
 8390 
 8391   ins_cost(150);
 8392   format %{ "OR     $dst,$src" %}
 8393   opcode(0x09);  /* Opcode 09 /r */
 8394   ins_encode( OpcP, RegMem( src, dst ) );
 8395   ins_pipe( ialu_mem_reg );
 8396 %}
 8397 
 8398 // Or Memory with Immediate
 8399 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8400   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8401   effect(KILL cr);
 8402 
 8403   ins_cost(125);
 8404   format %{ "OR     $dst,$src" %}
 8405   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8406   // ins_encode( MemImm( dst, src) );
 8407   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8408   ins_pipe( ialu_mem_imm );
 8409 %}
 8410 
 8411 // ROL/ROR
 8412 // ROL expand
 8413 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8414   effect(USE_DEF dst, USE shift, KILL cr);
 8415 
 8416   format %{ "ROL    $dst, $shift" %}
 8417   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8418   ins_encode( OpcP, RegOpc( dst ));
 8419   ins_pipe( ialu_reg );
 8420 %}
 8421 
 8422 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8423   effect(USE_DEF dst, USE shift, KILL cr);
 8424 
 8425   format %{ "ROL    $dst, $shift" %}
 8426   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8427   ins_encode( RegOpcImm(dst, shift) );
 8428   ins_pipe(ialu_reg);
 8429 %}
 8430 
 8431 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8432   effect(USE_DEF dst, USE shift, KILL cr);
 8433 
 8434   format %{ "ROL    $dst, $shift" %}
 8435   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8436   ins_encode(OpcP, RegOpc(dst));
 8437   ins_pipe( ialu_reg_reg );
 8438 %}
 8439 // end of ROL expand
 8440 
 8441 // ROL 32bit by one once
 8442 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8443   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8444 
 8445   expand %{
 8446     rolI_eReg_imm1(dst, lshift, cr);
 8447   %}
 8448 %}
 8449 
 8450 // ROL 32bit var by imm8 once
 8451 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8452   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8453   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8454 
 8455   expand %{
 8456     rolI_eReg_imm8(dst, lshift, cr);
 8457   %}
 8458 %}
 8459 
 8460 // ROL 32bit var by var once
 8461 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8462   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8463 
 8464   expand %{
 8465     rolI_eReg_CL(dst, shift, cr);
 8466   %}
 8467 %}
 8468 
 8469 // ROL 32bit var by var once
 8470 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8471   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8472 
 8473   expand %{
 8474     rolI_eReg_CL(dst, shift, cr);
 8475   %}
 8476 %}
 8477 
 8478 // ROR expand
 8479 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8480   effect(USE_DEF dst, USE shift, KILL cr);
 8481 
 8482   format %{ "ROR    $dst, $shift" %}
 8483   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8484   ins_encode( OpcP, RegOpc( dst ) );
 8485   ins_pipe( ialu_reg );
 8486 %}
 8487 
 8488 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8489   effect (USE_DEF dst, USE shift, KILL cr);
 8490 
 8491   format %{ "ROR    $dst, $shift" %}
 8492   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8493   ins_encode( RegOpcImm(dst, shift) );
 8494   ins_pipe( ialu_reg );
 8495 %}
 8496 
 8497 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8498   effect(USE_DEF dst, USE shift, KILL cr);
 8499 
 8500   format %{ "ROR    $dst, $shift" %}
 8501   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8502   ins_encode(OpcP, RegOpc(dst));
 8503   ins_pipe( ialu_reg_reg );
 8504 %}
 8505 // end of ROR expand
 8506 
 8507 // ROR right once
 8508 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8509   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8510 
 8511   expand %{
 8512     rorI_eReg_imm1(dst, rshift, cr);
 8513   %}
 8514 %}
 8515 
 8516 // ROR 32bit by immI8 once
 8517 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8518   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8519   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8520 
 8521   expand %{
 8522     rorI_eReg_imm8(dst, rshift, cr);
 8523   %}
 8524 %}
 8525 
 8526 // ROR 32bit var by var once
 8527 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8528   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8529 
 8530   expand %{
 8531     rorI_eReg_CL(dst, shift, cr);
 8532   %}
 8533 %}
 8534 
 8535 // ROR 32bit var by var once
 8536 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8537   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8538 
 8539   expand %{
 8540     rorI_eReg_CL(dst, shift, cr);
 8541   %}
 8542 %}
 8543 
 8544 // Xor Instructions
 8545 // Xor Register with Register
 8546 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8547   match(Set dst (XorI dst src));
 8548   effect(KILL cr);
 8549 
 8550   size(2);
 8551   format %{ "XOR    $dst,$src" %}
 8552   opcode(0x33);
 8553   ins_encode( OpcP, RegReg( dst, src) );
 8554   ins_pipe( ialu_reg_reg );
 8555 %}
 8556 
 8557 // Xor Register with Immediate -1
 8558 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8559   match(Set dst (XorI dst imm));
 8560 
 8561   size(2);
 8562   format %{ "NOT    $dst" %}
 8563   ins_encode %{
 8564      __ notl($dst$$Register);
 8565   %}
 8566   ins_pipe( ialu_reg );
 8567 %}
 8568 
 8569 // Xor Register with Immediate
 8570 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8571   match(Set dst (XorI dst src));
 8572   effect(KILL cr);
 8573 
 8574   format %{ "XOR    $dst,$src" %}
 8575   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8576   // ins_encode( RegImm( dst, src) );
 8577   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8578   ins_pipe( ialu_reg );
 8579 %}
 8580 
 8581 // Xor Register with Memory
 8582 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8583   match(Set dst (XorI dst (LoadI src)));
 8584   effect(KILL cr);
 8585 
 8586   ins_cost(150);
 8587   format %{ "XOR    $dst,$src" %}
 8588   opcode(0x33);
 8589   ins_encode( OpcP, RegMem(dst, src) );
 8590   ins_pipe( ialu_reg_mem );
 8591 %}
 8592 
 8593 // Xor Memory with Register
 8594 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8595   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8596   effect(KILL cr);
 8597 
 8598   ins_cost(150);
 8599   format %{ "XOR    $dst,$src" %}
 8600   opcode(0x31);  /* Opcode 31 /r */
 8601   ins_encode( OpcP, RegMem( src, dst ) );
 8602   ins_pipe( ialu_mem_reg );
 8603 %}
 8604 
 8605 // Xor Memory with Immediate
 8606 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8607   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8608   effect(KILL cr);
 8609 
 8610   ins_cost(125);
 8611   format %{ "XOR    $dst,$src" %}
 8612   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8613   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8614   ins_pipe( ialu_mem_imm );
 8615 %}
 8616 
 8617 //----------Convert Int to Boolean---------------------------------------------
 8618 
 8619 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8620   effect( DEF dst, USE src );
 8621   format %{ "MOV    $dst,$src" %}
 8622   ins_encode( enc_Copy( dst, src) );
 8623   ins_pipe( ialu_reg_reg );
 8624 %}
 8625 
 8626 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8627   effect( USE_DEF dst, USE src, KILL cr );
 8628 
 8629   size(4);
 8630   format %{ "NEG    $dst\n\t"
 8631             "ADC    $dst,$src" %}
 8632   ins_encode( neg_reg(dst),
 8633               OpcRegReg(0x13,dst,src) );
 8634   ins_pipe( ialu_reg_reg_long );
 8635 %}
 8636 
 8637 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8638   match(Set dst (Conv2B src));
 8639 
 8640   expand %{
 8641     movI_nocopy(dst,src);
 8642     ci2b(dst,src,cr);
 8643   %}
 8644 %}
 8645 
 8646 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8647   effect( DEF dst, USE src );
 8648   format %{ "MOV    $dst,$src" %}
 8649   ins_encode( enc_Copy( dst, src) );
 8650   ins_pipe( ialu_reg_reg );
 8651 %}
 8652 
 8653 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8654   effect( USE_DEF dst, USE src, KILL cr );
 8655   format %{ "NEG    $dst\n\t"
 8656             "ADC    $dst,$src" %}
 8657   ins_encode( neg_reg(dst),
 8658               OpcRegReg(0x13,dst,src) );
 8659   ins_pipe( ialu_reg_reg_long );
 8660 %}
 8661 
 8662 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8663   match(Set dst (Conv2B src));
 8664 
 8665   expand %{
 8666     movP_nocopy(dst,src);
 8667     cp2b(dst,src,cr);
 8668   %}
 8669 %}
 8670 
 8671 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8672   match(Set dst (CmpLTMask p q));
 8673   effect(KILL cr);
 8674   ins_cost(400);
 8675 
 8676   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8677   format %{ "XOR    $dst,$dst\n\t"
 8678             "CMP    $p,$q\n\t"
 8679             "SETlt  $dst\n\t"
 8680             "NEG    $dst" %}
 8681   ins_encode %{
 8682     Register Rp = $p$$Register;
 8683     Register Rq = $q$$Register;
 8684     Register Rd = $dst$$Register;
 8685     Label done;
 8686     __ xorl(Rd, Rd);
 8687     __ cmpl(Rp, Rq);
 8688     __ setb(Assembler::less, Rd);
 8689     __ negl(Rd);
 8690   %}
 8691 
 8692   ins_pipe(pipe_slow);
 8693 %}
 8694 
 8695 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8696   match(Set dst (CmpLTMask dst zero));
 8697   effect(DEF dst, KILL cr);
 8698   ins_cost(100);
 8699 
 8700   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8701   ins_encode %{
 8702   __ sarl($dst$$Register, 31);
 8703   %}
 8704   ins_pipe(ialu_reg);
 8705 %}
 8706 
 8707 /* better to save a register than avoid a branch */
 8708 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8709   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8710   effect(KILL cr);
 8711   ins_cost(400);
 8712   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8713             "JGE    done\n\t"
 8714             "ADD    $p,$y\n"
 8715             "done:  " %}
 8716   ins_encode %{
 8717     Register Rp = $p$$Register;
 8718     Register Rq = $q$$Register;
 8719     Register Ry = $y$$Register;
 8720     Label done;
 8721     __ subl(Rp, Rq);
 8722     __ jccb(Assembler::greaterEqual, done);
 8723     __ addl(Rp, Ry);
 8724     __ bind(done);
 8725   %}
 8726 
 8727   ins_pipe(pipe_cmplt);
 8728 %}
 8729 
 8730 /* better to save a register than avoid a branch */
 8731 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8732   match(Set y (AndI (CmpLTMask p q) y));
 8733   effect(KILL cr);
 8734 
 8735   ins_cost(300);
 8736 
 8737   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8738             "JLT      done\n\t"
 8739             "XORL     $y, $y\n"
 8740             "done:  " %}
 8741   ins_encode %{
 8742     Register Rp = $p$$Register;
 8743     Register Rq = $q$$Register;
 8744     Register Ry = $y$$Register;
 8745     Label done;
 8746     __ cmpl(Rp, Rq);
 8747     __ jccb(Assembler::less, done);
 8748     __ xorl(Ry, Ry);
 8749     __ bind(done);
 8750   %}
 8751 
 8752   ins_pipe(pipe_cmplt);
 8753 %}
 8754 
 8755 /* If I enable this, I encourage spilling in the inner loop of compress.
 8756 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8757   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8758 */
 8759 //----------Overflow Math Instructions-----------------------------------------
 8760 
 8761 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8762 %{
 8763   match(Set cr (OverflowAddI op1 op2));
 8764   effect(DEF cr, USE_KILL op1, USE op2);
 8765 
 8766   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8767 
 8768   ins_encode %{
 8769     __ addl($op1$$Register, $op2$$Register);
 8770   %}
 8771   ins_pipe(ialu_reg_reg);
 8772 %}
 8773 
 8774 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8775 %{
 8776   match(Set cr (OverflowAddI op1 op2));
 8777   effect(DEF cr, USE_KILL op1, USE op2);
 8778 
 8779   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8780 
 8781   ins_encode %{
 8782     __ addl($op1$$Register, $op2$$constant);
 8783   %}
 8784   ins_pipe(ialu_reg_reg);
 8785 %}
 8786 
 8787 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8788 %{
 8789   match(Set cr (OverflowSubI op1 op2));
 8790 
 8791   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8792   ins_encode %{
 8793     __ cmpl($op1$$Register, $op2$$Register);
 8794   %}
 8795   ins_pipe(ialu_reg_reg);
 8796 %}
 8797 
 8798 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8799 %{
 8800   match(Set cr (OverflowSubI op1 op2));
 8801 
 8802   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8803   ins_encode %{
 8804     __ cmpl($op1$$Register, $op2$$constant);
 8805   %}
 8806   ins_pipe(ialu_reg_reg);
 8807 %}
 8808 
 8809 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8810 %{
 8811   match(Set cr (OverflowSubI zero op2));
 8812   effect(DEF cr, USE_KILL op2);
 8813 
 8814   format %{ "NEG    $op2\t# overflow check int" %}
 8815   ins_encode %{
 8816     __ negl($op2$$Register);
 8817   %}
 8818   ins_pipe(ialu_reg_reg);
 8819 %}
 8820 
 8821 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8822 %{
 8823   match(Set cr (OverflowMulI op1 op2));
 8824   effect(DEF cr, USE_KILL op1, USE op2);
 8825 
 8826   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8827   ins_encode %{
 8828     __ imull($op1$$Register, $op2$$Register);
 8829   %}
 8830   ins_pipe(ialu_reg_reg_alu0);
 8831 %}
 8832 
 8833 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8834 %{
 8835   match(Set cr (OverflowMulI op1 op2));
 8836   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8837 
 8838   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8839   ins_encode %{
 8840     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8841   %}
 8842   ins_pipe(ialu_reg_reg_alu0);
 8843 %}
 8844 
 8845 // Integer Absolute Instructions
 8846 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8847 %{
 8848   match(Set dst (AbsI src));
 8849   effect(TEMP dst, TEMP tmp, KILL cr);
 8850   format %{ "movl $tmp, $src\n\t"
 8851             "sarl $tmp, 31\n\t"
 8852             "movl $dst, $src\n\t"
 8853             "xorl $dst, $tmp\n\t"
 8854             "subl $dst, $tmp\n"
 8855           %}
 8856   ins_encode %{
 8857     __ movl($tmp$$Register, $src$$Register);
 8858     __ sarl($tmp$$Register, 31);
 8859     __ movl($dst$$Register, $src$$Register);
 8860     __ xorl($dst$$Register, $tmp$$Register);
 8861     __ subl($dst$$Register, $tmp$$Register);
 8862   %}
 8863 
 8864   ins_pipe(ialu_reg_reg);
 8865 %}
 8866 
 8867 //----------Long Instructions------------------------------------------------
 8868 // Add Long Register with Register
 8869 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8870   match(Set dst (AddL dst src));
 8871   effect(KILL cr);
 8872   ins_cost(200);
 8873   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8874             "ADC    $dst.hi,$src.hi" %}
 8875   opcode(0x03, 0x13);
 8876   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8877   ins_pipe( ialu_reg_reg_long );
 8878 %}
 8879 
 8880 // Add Long Register with Immediate
 8881 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8882   match(Set dst (AddL dst src));
 8883   effect(KILL cr);
 8884   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8885             "ADC    $dst.hi,$src.hi" %}
 8886   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8887   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8888   ins_pipe( ialu_reg_long );
 8889 %}
 8890 
 8891 // Add Long Register with Memory
 8892 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8893   match(Set dst (AddL dst (LoadL mem)));
 8894   effect(KILL cr);
 8895   ins_cost(125);
 8896   format %{ "ADD    $dst.lo,$mem\n\t"
 8897             "ADC    $dst.hi,$mem+4" %}
 8898   opcode(0x03, 0x13);
 8899   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8900   ins_pipe( ialu_reg_long_mem );
 8901 %}
 8902 
 8903 // Subtract Long Register with Register.
 8904 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8905   match(Set dst (SubL dst src));
 8906   effect(KILL cr);
 8907   ins_cost(200);
 8908   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8909             "SBB    $dst.hi,$src.hi" %}
 8910   opcode(0x2B, 0x1B);
 8911   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8912   ins_pipe( ialu_reg_reg_long );
 8913 %}
 8914 
 8915 // Subtract Long Register with Immediate
 8916 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8917   match(Set dst (SubL dst src));
 8918   effect(KILL cr);
 8919   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8920             "SBB    $dst.hi,$src.hi" %}
 8921   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8922   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8923   ins_pipe( ialu_reg_long );
 8924 %}
 8925 
 8926 // Subtract Long Register with Memory
 8927 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8928   match(Set dst (SubL dst (LoadL mem)));
 8929   effect(KILL cr);
 8930   ins_cost(125);
 8931   format %{ "SUB    $dst.lo,$mem\n\t"
 8932             "SBB    $dst.hi,$mem+4" %}
 8933   opcode(0x2B, 0x1B);
 8934   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8935   ins_pipe( ialu_reg_long_mem );
 8936 %}
 8937 
 8938 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8939   match(Set dst (SubL zero dst));
 8940   effect(KILL cr);
 8941   ins_cost(300);
 8942   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8943   ins_encode( neg_long(dst) );
 8944   ins_pipe( ialu_reg_reg_long );
 8945 %}
 8946 
 8947 // And Long Register with Register
 8948 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8949   match(Set dst (AndL dst src));
 8950   effect(KILL cr);
 8951   format %{ "AND    $dst.lo,$src.lo\n\t"
 8952             "AND    $dst.hi,$src.hi" %}
 8953   opcode(0x23,0x23);
 8954   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8955   ins_pipe( ialu_reg_reg_long );
 8956 %}
 8957 
 8958 // And Long Register with Immediate
 8959 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8960   match(Set dst (AndL dst src));
 8961   effect(KILL cr);
 8962   format %{ "AND    $dst.lo,$src.lo\n\t"
 8963             "AND    $dst.hi,$src.hi" %}
 8964   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8965   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8966   ins_pipe( ialu_reg_long );
 8967 %}
 8968 
 8969 // And Long Register with Memory
 8970 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8971   match(Set dst (AndL dst (LoadL mem)));
 8972   effect(KILL cr);
 8973   ins_cost(125);
 8974   format %{ "AND    $dst.lo,$mem\n\t"
 8975             "AND    $dst.hi,$mem+4" %}
 8976   opcode(0x23, 0x23);
 8977   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8978   ins_pipe( ialu_reg_long_mem );
 8979 %}
 8980 
 8981 // BMI1 instructions
 8982 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8983   match(Set dst (AndL (XorL src1 minus_1) src2));
 8984   predicate(UseBMI1Instructions);
 8985   effect(KILL cr, TEMP dst);
 8986 
 8987   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8988             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8989          %}
 8990 
 8991   ins_encode %{
 8992     Register Rdst = $dst$$Register;
 8993     Register Rsrc1 = $src1$$Register;
 8994     Register Rsrc2 = $src2$$Register;
 8995     __ andnl(Rdst, Rsrc1, Rsrc2);
 8996     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8997   %}
 8998   ins_pipe(ialu_reg_reg_long);
 8999 %}
 9000 
 9001 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9002   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9003   predicate(UseBMI1Instructions);
 9004   effect(KILL cr, TEMP dst);
 9005 
 9006   ins_cost(125);
 9007   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9008             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9009          %}
 9010 
 9011   ins_encode %{
 9012     Register Rdst = $dst$$Register;
 9013     Register Rsrc1 = $src1$$Register;
 9014     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9015 
 9016     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9017     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9018   %}
 9019   ins_pipe(ialu_reg_mem);
 9020 %}
 9021 
 9022 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9023   match(Set dst (AndL (SubL imm_zero src) src));
 9024   predicate(UseBMI1Instructions);
 9025   effect(KILL cr, TEMP dst);
 9026 
 9027   format %{ "MOVL   $dst.hi, 0\n\t"
 9028             "BLSIL  $dst.lo, $src.lo\n\t"
 9029             "JNZ    done\n\t"
 9030             "BLSIL  $dst.hi, $src.hi\n"
 9031             "done:"
 9032          %}
 9033 
 9034   ins_encode %{
 9035     Label done;
 9036     Register Rdst = $dst$$Register;
 9037     Register Rsrc = $src$$Register;
 9038     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9039     __ blsil(Rdst, Rsrc);
 9040     __ jccb(Assembler::notZero, done);
 9041     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9042     __ bind(done);
 9043   %}
 9044   ins_pipe(ialu_reg);
 9045 %}
 9046 
 9047 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9048   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9049   predicate(UseBMI1Instructions);
 9050   effect(KILL cr, TEMP dst);
 9051 
 9052   ins_cost(125);
 9053   format %{ "MOVL   $dst.hi, 0\n\t"
 9054             "BLSIL  $dst.lo, $src\n\t"
 9055             "JNZ    done\n\t"
 9056             "BLSIL  $dst.hi, $src+4\n"
 9057             "done:"
 9058          %}
 9059 
 9060   ins_encode %{
 9061     Label done;
 9062     Register Rdst = $dst$$Register;
 9063     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9064 
 9065     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9066     __ blsil(Rdst, $src$$Address);
 9067     __ jccb(Assembler::notZero, done);
 9068     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9069     __ bind(done);
 9070   %}
 9071   ins_pipe(ialu_reg_mem);
 9072 %}
 9073 
 9074 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9075 %{
 9076   match(Set dst (XorL (AddL src minus_1) src));
 9077   predicate(UseBMI1Instructions);
 9078   effect(KILL cr, TEMP dst);
 9079 
 9080   format %{ "MOVL    $dst.hi, 0\n\t"
 9081             "BLSMSKL $dst.lo, $src.lo\n\t"
 9082             "JNC     done\n\t"
 9083             "BLSMSKL $dst.hi, $src.hi\n"
 9084             "done:"
 9085          %}
 9086 
 9087   ins_encode %{
 9088     Label done;
 9089     Register Rdst = $dst$$Register;
 9090     Register Rsrc = $src$$Register;
 9091     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9092     __ blsmskl(Rdst, Rsrc);
 9093     __ jccb(Assembler::carryClear, done);
 9094     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9095     __ bind(done);
 9096   %}
 9097 
 9098   ins_pipe(ialu_reg);
 9099 %}
 9100 
 9101 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9102 %{
 9103   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9104   predicate(UseBMI1Instructions);
 9105   effect(KILL cr, TEMP dst);
 9106 
 9107   ins_cost(125);
 9108   format %{ "MOVL    $dst.hi, 0\n\t"
 9109             "BLSMSKL $dst.lo, $src\n\t"
 9110             "JNC     done\n\t"
 9111             "BLSMSKL $dst.hi, $src+4\n"
 9112             "done:"
 9113          %}
 9114 
 9115   ins_encode %{
 9116     Label done;
 9117     Register Rdst = $dst$$Register;
 9118     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9119 
 9120     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9121     __ blsmskl(Rdst, $src$$Address);
 9122     __ jccb(Assembler::carryClear, done);
 9123     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9124     __ bind(done);
 9125   %}
 9126 
 9127   ins_pipe(ialu_reg_mem);
 9128 %}
 9129 
 9130 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9131 %{
 9132   match(Set dst (AndL (AddL src minus_1) src) );
 9133   predicate(UseBMI1Instructions);
 9134   effect(KILL cr, TEMP dst);
 9135 
 9136   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9137             "BLSRL  $dst.lo, $src.lo\n\t"
 9138             "JNC    done\n\t"
 9139             "BLSRL  $dst.hi, $src.hi\n"
 9140             "done:"
 9141   %}
 9142 
 9143   ins_encode %{
 9144     Label done;
 9145     Register Rdst = $dst$$Register;
 9146     Register Rsrc = $src$$Register;
 9147     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9148     __ blsrl(Rdst, Rsrc);
 9149     __ jccb(Assembler::carryClear, done);
 9150     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9151     __ bind(done);
 9152   %}
 9153 
 9154   ins_pipe(ialu_reg);
 9155 %}
 9156 
 9157 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9158 %{
 9159   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9160   predicate(UseBMI1Instructions);
 9161   effect(KILL cr, TEMP dst);
 9162 
 9163   ins_cost(125);
 9164   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9165             "BLSRL  $dst.lo, $src\n\t"
 9166             "JNC    done\n\t"
 9167             "BLSRL  $dst.hi, $src+4\n"
 9168             "done:"
 9169   %}
 9170 
 9171   ins_encode %{
 9172     Label done;
 9173     Register Rdst = $dst$$Register;
 9174     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9175     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9176     __ blsrl(Rdst, $src$$Address);
 9177     __ jccb(Assembler::carryClear, done);
 9178     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9179     __ bind(done);
 9180   %}
 9181 
 9182   ins_pipe(ialu_reg_mem);
 9183 %}
 9184 
 9185 // Or Long Register with Register
 9186 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9187   match(Set dst (OrL dst src));
 9188   effect(KILL cr);
 9189   format %{ "OR     $dst.lo,$src.lo\n\t"
 9190             "OR     $dst.hi,$src.hi" %}
 9191   opcode(0x0B,0x0B);
 9192   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9193   ins_pipe( ialu_reg_reg_long );
 9194 %}
 9195 
 9196 // Or Long Register with Immediate
 9197 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9198   match(Set dst (OrL dst src));
 9199   effect(KILL cr);
 9200   format %{ "OR     $dst.lo,$src.lo\n\t"
 9201             "OR     $dst.hi,$src.hi" %}
 9202   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9203   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9204   ins_pipe( ialu_reg_long );
 9205 %}
 9206 
 9207 // Or Long Register with Memory
 9208 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9209   match(Set dst (OrL dst (LoadL mem)));
 9210   effect(KILL cr);
 9211   ins_cost(125);
 9212   format %{ "OR     $dst.lo,$mem\n\t"
 9213             "OR     $dst.hi,$mem+4" %}
 9214   opcode(0x0B,0x0B);
 9215   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9216   ins_pipe( ialu_reg_long_mem );
 9217 %}
 9218 
 9219 // Xor Long Register with Register
 9220 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9221   match(Set dst (XorL dst src));
 9222   effect(KILL cr);
 9223   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9224             "XOR    $dst.hi,$src.hi" %}
 9225   opcode(0x33,0x33);
 9226   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9227   ins_pipe( ialu_reg_reg_long );
 9228 %}
 9229 
 9230 // Xor Long Register with Immediate -1
 9231 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9232   match(Set dst (XorL dst imm));
 9233   format %{ "NOT    $dst.lo\n\t"
 9234             "NOT    $dst.hi" %}
 9235   ins_encode %{
 9236      __ notl($dst$$Register);
 9237      __ notl(HIGH_FROM_LOW($dst$$Register));
 9238   %}
 9239   ins_pipe( ialu_reg_long );
 9240 %}
 9241 
 9242 // Xor Long Register with Immediate
 9243 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9244   match(Set dst (XorL dst src));
 9245   effect(KILL cr);
 9246   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9247             "XOR    $dst.hi,$src.hi" %}
 9248   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9249   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9250   ins_pipe( ialu_reg_long );
 9251 %}
 9252 
 9253 // Xor Long Register with Memory
 9254 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9255   match(Set dst (XorL dst (LoadL mem)));
 9256   effect(KILL cr);
 9257   ins_cost(125);
 9258   format %{ "XOR    $dst.lo,$mem\n\t"
 9259             "XOR    $dst.hi,$mem+4" %}
 9260   opcode(0x33,0x33);
 9261   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9262   ins_pipe( ialu_reg_long_mem );
 9263 %}
 9264 
 9265 // Shift Left Long by 1
 9266 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9267   predicate(UseNewLongLShift);
 9268   match(Set dst (LShiftL dst cnt));
 9269   effect(KILL cr);
 9270   ins_cost(100);
 9271   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9272             "ADC    $dst.hi,$dst.hi" %}
 9273   ins_encode %{
 9274     __ addl($dst$$Register,$dst$$Register);
 9275     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9276   %}
 9277   ins_pipe( ialu_reg_long );
 9278 %}
 9279 
 9280 // Shift Left Long by 2
 9281 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9282   predicate(UseNewLongLShift);
 9283   match(Set dst (LShiftL dst cnt));
 9284   effect(KILL cr);
 9285   ins_cost(100);
 9286   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9287             "ADC    $dst.hi,$dst.hi\n\t"
 9288             "ADD    $dst.lo,$dst.lo\n\t"
 9289             "ADC    $dst.hi,$dst.hi" %}
 9290   ins_encode %{
 9291     __ addl($dst$$Register,$dst$$Register);
 9292     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9293     __ addl($dst$$Register,$dst$$Register);
 9294     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9295   %}
 9296   ins_pipe( ialu_reg_long );
 9297 %}
 9298 
 9299 // Shift Left Long by 3
 9300 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9301   predicate(UseNewLongLShift);
 9302   match(Set dst (LShiftL dst cnt));
 9303   effect(KILL cr);
 9304   ins_cost(100);
 9305   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9306             "ADC    $dst.hi,$dst.hi\n\t"
 9307             "ADD    $dst.lo,$dst.lo\n\t"
 9308             "ADC    $dst.hi,$dst.hi\n\t"
 9309             "ADD    $dst.lo,$dst.lo\n\t"
 9310             "ADC    $dst.hi,$dst.hi" %}
 9311   ins_encode %{
 9312     __ addl($dst$$Register,$dst$$Register);
 9313     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9314     __ addl($dst$$Register,$dst$$Register);
 9315     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9316     __ addl($dst$$Register,$dst$$Register);
 9317     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9318   %}
 9319   ins_pipe( ialu_reg_long );
 9320 %}
 9321 
 9322 // Shift Left Long by 1-31
 9323 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9324   match(Set dst (LShiftL dst cnt));
 9325   effect(KILL cr);
 9326   ins_cost(200);
 9327   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9328             "SHL    $dst.lo,$cnt" %}
 9329   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9330   ins_encode( move_long_small_shift(dst,cnt) );
 9331   ins_pipe( ialu_reg_long );
 9332 %}
 9333 
 9334 // Shift Left Long by 32-63
 9335 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9336   match(Set dst (LShiftL dst cnt));
 9337   effect(KILL cr);
 9338   ins_cost(300);
 9339   format %{ "MOV    $dst.hi,$dst.lo\n"
 9340           "\tSHL    $dst.hi,$cnt-32\n"
 9341           "\tXOR    $dst.lo,$dst.lo" %}
 9342   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9343   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9344   ins_pipe( ialu_reg_long );
 9345 %}
 9346 
 9347 // Shift Left Long by variable
 9348 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9349   match(Set dst (LShiftL dst shift));
 9350   effect(KILL cr);
 9351   ins_cost(500+200);
 9352   size(17);
 9353   format %{ "TEST   $shift,32\n\t"
 9354             "JEQ,s  small\n\t"
 9355             "MOV    $dst.hi,$dst.lo\n\t"
 9356             "XOR    $dst.lo,$dst.lo\n"
 9357     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9358             "SHL    $dst.lo,$shift" %}
 9359   ins_encode( shift_left_long( dst, shift ) );
 9360   ins_pipe( pipe_slow );
 9361 %}
 9362 
 9363 // Shift Right Long by 1-31
 9364 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9365   match(Set dst (URShiftL dst cnt));
 9366   effect(KILL cr);
 9367   ins_cost(200);
 9368   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9369             "SHR    $dst.hi,$cnt" %}
 9370   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9371   ins_encode( move_long_small_shift(dst,cnt) );
 9372   ins_pipe( ialu_reg_long );
 9373 %}
 9374 
 9375 // Shift Right Long by 32-63
 9376 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9377   match(Set dst (URShiftL dst cnt));
 9378   effect(KILL cr);
 9379   ins_cost(300);
 9380   format %{ "MOV    $dst.lo,$dst.hi\n"
 9381           "\tSHR    $dst.lo,$cnt-32\n"
 9382           "\tXOR    $dst.hi,$dst.hi" %}
 9383   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9384   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9385   ins_pipe( ialu_reg_long );
 9386 %}
 9387 
 9388 // Shift Right Long by variable
 9389 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9390   match(Set dst (URShiftL dst shift));
 9391   effect(KILL cr);
 9392   ins_cost(600);
 9393   size(17);
 9394   format %{ "TEST   $shift,32\n\t"
 9395             "JEQ,s  small\n\t"
 9396             "MOV    $dst.lo,$dst.hi\n\t"
 9397             "XOR    $dst.hi,$dst.hi\n"
 9398     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9399             "SHR    $dst.hi,$shift" %}
 9400   ins_encode( shift_right_long( dst, shift ) );
 9401   ins_pipe( pipe_slow );
 9402 %}
 9403 
 9404 // Shift Right Long by 1-31
 9405 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9406   match(Set dst (RShiftL dst cnt));
 9407   effect(KILL cr);
 9408   ins_cost(200);
 9409   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9410             "SAR    $dst.hi,$cnt" %}
 9411   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9412   ins_encode( move_long_small_shift(dst,cnt) );
 9413   ins_pipe( ialu_reg_long );
 9414 %}
 9415 
 9416 // Shift Right Long by 32-63
 9417 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9418   match(Set dst (RShiftL dst cnt));
 9419   effect(KILL cr);
 9420   ins_cost(300);
 9421   format %{ "MOV    $dst.lo,$dst.hi\n"
 9422           "\tSAR    $dst.lo,$cnt-32\n"
 9423           "\tSAR    $dst.hi,31" %}
 9424   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9425   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9426   ins_pipe( ialu_reg_long );
 9427 %}
 9428 
 9429 // Shift Right arithmetic Long by variable
 9430 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9431   match(Set dst (RShiftL dst shift));
 9432   effect(KILL cr);
 9433   ins_cost(600);
 9434   size(18);
 9435   format %{ "TEST   $shift,32\n\t"
 9436             "JEQ,s  small\n\t"
 9437             "MOV    $dst.lo,$dst.hi\n\t"
 9438             "SAR    $dst.hi,31\n"
 9439     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9440             "SAR    $dst.hi,$shift" %}
 9441   ins_encode( shift_right_arith_long( dst, shift ) );
 9442   ins_pipe( pipe_slow );
 9443 %}
 9444 
 9445 
 9446 //----------Double Instructions------------------------------------------------
 9447 // Double Math
 9448 
 9449 // Compare & branch
 9450 
 9451 // P6 version of float compare, sets condition codes in EFLAGS
 9452 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9453   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9454   match(Set cr (CmpD src1 src2));
 9455   effect(KILL rax);
 9456   ins_cost(150);
 9457   format %{ "FLD    $src1\n\t"
 9458             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9459             "JNP    exit\n\t"
 9460             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9461             "SAHF\n"
 9462      "exit:\tNOP               // avoid branch to branch" %}
 9463   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9464   ins_encode( Push_Reg_DPR(src1),
 9465               OpcP, RegOpc(src2),
 9466               cmpF_P6_fixup );
 9467   ins_pipe( pipe_slow );
 9468 %}
 9469 
 9470 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9471   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9472   match(Set cr (CmpD src1 src2));
 9473   ins_cost(150);
 9474   format %{ "FLD    $src1\n\t"
 9475             "FUCOMIP ST,$src2  // P6 instruction" %}
 9476   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9477   ins_encode( Push_Reg_DPR(src1),
 9478               OpcP, RegOpc(src2));
 9479   ins_pipe( pipe_slow );
 9480 %}
 9481 
 9482 // Compare & branch
 9483 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9484   predicate(UseSSE<=1);
 9485   match(Set cr (CmpD src1 src2));
 9486   effect(KILL rax);
 9487   ins_cost(200);
 9488   format %{ "FLD    $src1\n\t"
 9489             "FCOMp  $src2\n\t"
 9490             "FNSTSW AX\n\t"
 9491             "TEST   AX,0x400\n\t"
 9492             "JZ,s   flags\n\t"
 9493             "MOV    AH,1\t# unordered treat as LT\n"
 9494     "flags:\tSAHF" %}
 9495   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9496   ins_encode( Push_Reg_DPR(src1),
 9497               OpcP, RegOpc(src2),
 9498               fpu_flags);
 9499   ins_pipe( pipe_slow );
 9500 %}
 9501 
 9502 // Compare vs zero into -1,0,1
 9503 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9504   predicate(UseSSE<=1);
 9505   match(Set dst (CmpD3 src1 zero));
 9506   effect(KILL cr, KILL rax);
 9507   ins_cost(280);
 9508   format %{ "FTSTD  $dst,$src1" %}
 9509   opcode(0xE4, 0xD9);
 9510   ins_encode( Push_Reg_DPR(src1),
 9511               OpcS, OpcP, PopFPU,
 9512               CmpF_Result(dst));
 9513   ins_pipe( pipe_slow );
 9514 %}
 9515 
 9516 // Compare into -1,0,1
 9517 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9518   predicate(UseSSE<=1);
 9519   match(Set dst (CmpD3 src1 src2));
 9520   effect(KILL cr, KILL rax);
 9521   ins_cost(300);
 9522   format %{ "FCMPD  $dst,$src1,$src2" %}
 9523   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9524   ins_encode( Push_Reg_DPR(src1),
 9525               OpcP, RegOpc(src2),
 9526               CmpF_Result(dst));
 9527   ins_pipe( pipe_slow );
 9528 %}
 9529 
 9530 // float compare and set condition codes in EFLAGS by XMM regs
 9531 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9532   predicate(UseSSE>=2);
 9533   match(Set cr (CmpD src1 src2));
 9534   ins_cost(145);
 9535   format %{ "UCOMISD $src1,$src2\n\t"
 9536             "JNP,s   exit\n\t"
 9537             "PUSHF\t# saw NaN, set CF\n\t"
 9538             "AND     [rsp], #0xffffff2b\n\t"
 9539             "POPF\n"
 9540     "exit:" %}
 9541   ins_encode %{
 9542     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9543     emit_cmpfp_fixup(_masm);
 9544   %}
 9545   ins_pipe( pipe_slow );
 9546 %}
 9547 
 9548 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9549   predicate(UseSSE>=2);
 9550   match(Set cr (CmpD src1 src2));
 9551   ins_cost(100);
 9552   format %{ "UCOMISD $src1,$src2" %}
 9553   ins_encode %{
 9554     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9555   %}
 9556   ins_pipe( pipe_slow );
 9557 %}
 9558 
 9559 // float compare and set condition codes in EFLAGS by XMM regs
 9560 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9561   predicate(UseSSE>=2);
 9562   match(Set cr (CmpD src1 (LoadD src2)));
 9563   ins_cost(145);
 9564   format %{ "UCOMISD $src1,$src2\n\t"
 9565             "JNP,s   exit\n\t"
 9566             "PUSHF\t# saw NaN, set CF\n\t"
 9567             "AND     [rsp], #0xffffff2b\n\t"
 9568             "POPF\n"
 9569     "exit:" %}
 9570   ins_encode %{
 9571     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9572     emit_cmpfp_fixup(_masm);
 9573   %}
 9574   ins_pipe( pipe_slow );
 9575 %}
 9576 
 9577 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9578   predicate(UseSSE>=2);
 9579   match(Set cr (CmpD src1 (LoadD src2)));
 9580   ins_cost(100);
 9581   format %{ "UCOMISD $src1,$src2" %}
 9582   ins_encode %{
 9583     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9584   %}
 9585   ins_pipe( pipe_slow );
 9586 %}
 9587 
 9588 // Compare into -1,0,1 in XMM
 9589 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9590   predicate(UseSSE>=2);
 9591   match(Set dst (CmpD3 src1 src2));
 9592   effect(KILL cr);
 9593   ins_cost(255);
 9594   format %{ "UCOMISD $src1, $src2\n\t"
 9595             "MOV     $dst, #-1\n\t"
 9596             "JP,s    done\n\t"
 9597             "JB,s    done\n\t"
 9598             "SETNE   $dst\n\t"
 9599             "MOVZB   $dst, $dst\n"
 9600     "done:" %}
 9601   ins_encode %{
 9602     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9603     emit_cmpfp3(_masm, $dst$$Register);
 9604   %}
 9605   ins_pipe( pipe_slow );
 9606 %}
 9607 
 9608 // Compare into -1,0,1 in XMM and memory
 9609 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9610   predicate(UseSSE>=2);
 9611   match(Set dst (CmpD3 src1 (LoadD src2)));
 9612   effect(KILL cr);
 9613   ins_cost(275);
 9614   format %{ "UCOMISD $src1, $src2\n\t"
 9615             "MOV     $dst, #-1\n\t"
 9616             "JP,s    done\n\t"
 9617             "JB,s    done\n\t"
 9618             "SETNE   $dst\n\t"
 9619             "MOVZB   $dst, $dst\n"
 9620     "done:" %}
 9621   ins_encode %{
 9622     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9623     emit_cmpfp3(_masm, $dst$$Register);
 9624   %}
 9625   ins_pipe( pipe_slow );
 9626 %}
 9627 
 9628 
 9629 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9630   predicate (UseSSE <=1);
 9631   match(Set dst (SubD dst src));
 9632 
 9633   format %{ "FLD    $src\n\t"
 9634             "DSUBp  $dst,ST" %}
 9635   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9636   ins_cost(150);
 9637   ins_encode( Push_Reg_DPR(src),
 9638               OpcP, RegOpc(dst) );
 9639   ins_pipe( fpu_reg_reg );
 9640 %}
 9641 
 9642 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9643   predicate (UseSSE <=1);
 9644   match(Set dst (RoundDouble (SubD src1 src2)));
 9645   ins_cost(250);
 9646 
 9647   format %{ "FLD    $src2\n\t"
 9648             "DSUB   ST,$src1\n\t"
 9649             "FSTP_D $dst\t# D-round" %}
 9650   opcode(0xD8, 0x5);
 9651   ins_encode( Push_Reg_DPR(src2),
 9652               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9653   ins_pipe( fpu_mem_reg_reg );
 9654 %}
 9655 
 9656 
 9657 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9658   predicate (UseSSE <=1);
 9659   match(Set dst (SubD dst (LoadD src)));
 9660   ins_cost(150);
 9661 
 9662   format %{ "FLD    $src\n\t"
 9663             "DSUBp  $dst,ST" %}
 9664   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9665   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9666               OpcP, RegOpc(dst) );
 9667   ins_pipe( fpu_reg_mem );
 9668 %}
 9669 
 9670 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9671   predicate (UseSSE<=1);
 9672   match(Set dst (AbsD src));
 9673   ins_cost(100);
 9674   format %{ "FABS" %}
 9675   opcode(0xE1, 0xD9);
 9676   ins_encode( OpcS, OpcP );
 9677   ins_pipe( fpu_reg_reg );
 9678 %}
 9679 
 9680 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9681   predicate(UseSSE<=1);
 9682   match(Set dst (NegD src));
 9683   ins_cost(100);
 9684   format %{ "FCHS" %}
 9685   opcode(0xE0, 0xD9);
 9686   ins_encode( OpcS, OpcP );
 9687   ins_pipe( fpu_reg_reg );
 9688 %}
 9689 
 9690 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9691   predicate(UseSSE<=1);
 9692   match(Set dst (AddD dst src));
 9693   format %{ "FLD    $src\n\t"
 9694             "DADD   $dst,ST" %}
 9695   size(4);
 9696   ins_cost(150);
 9697   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9698   ins_encode( Push_Reg_DPR(src),
 9699               OpcP, RegOpc(dst) );
 9700   ins_pipe( fpu_reg_reg );
 9701 %}
 9702 
 9703 
 9704 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9705   predicate(UseSSE<=1);
 9706   match(Set dst (RoundDouble (AddD src1 src2)));
 9707   ins_cost(250);
 9708 
 9709   format %{ "FLD    $src2\n\t"
 9710             "DADD   ST,$src1\n\t"
 9711             "FSTP_D $dst\t# D-round" %}
 9712   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9713   ins_encode( Push_Reg_DPR(src2),
 9714               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9715   ins_pipe( fpu_mem_reg_reg );
 9716 %}
 9717 
 9718 
 9719 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9720   predicate(UseSSE<=1);
 9721   match(Set dst (AddD dst (LoadD src)));
 9722   ins_cost(150);
 9723 
 9724   format %{ "FLD    $src\n\t"
 9725             "DADDp  $dst,ST" %}
 9726   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9727   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9728               OpcP, RegOpc(dst) );
 9729   ins_pipe( fpu_reg_mem );
 9730 %}
 9731 
 9732 // add-to-memory
 9733 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9734   predicate(UseSSE<=1);
 9735   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9736   ins_cost(150);
 9737 
 9738   format %{ "FLD_D  $dst\n\t"
 9739             "DADD   ST,$src\n\t"
 9740             "FST_D  $dst" %}
 9741   opcode(0xDD, 0x0);
 9742   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9743               Opcode(0xD8), RegOpc(src),
 9744               set_instruction_start,
 9745               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9746   ins_pipe( fpu_reg_mem );
 9747 %}
 9748 
 9749 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9750   predicate(UseSSE<=1);
 9751   match(Set dst (AddD dst con));
 9752   ins_cost(125);
 9753   format %{ "FLD1\n\t"
 9754             "DADDp  $dst,ST" %}
 9755   ins_encode %{
 9756     __ fld1();
 9757     __ faddp($dst$$reg);
 9758   %}
 9759   ins_pipe(fpu_reg);
 9760 %}
 9761 
 9762 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9763   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9764   match(Set dst (AddD dst con));
 9765   ins_cost(200);
 9766   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9767             "DADDp  $dst,ST" %}
 9768   ins_encode %{
 9769     __ fld_d($constantaddress($con));
 9770     __ faddp($dst$$reg);
 9771   %}
 9772   ins_pipe(fpu_reg_mem);
 9773 %}
 9774 
 9775 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9776   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9777   match(Set dst (RoundDouble (AddD src con)));
 9778   ins_cost(200);
 9779   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9780             "DADD   ST,$src\n\t"
 9781             "FSTP_D $dst\t# D-round" %}
 9782   ins_encode %{
 9783     __ fld_d($constantaddress($con));
 9784     __ fadd($src$$reg);
 9785     __ fstp_d(Address(rsp, $dst$$disp));
 9786   %}
 9787   ins_pipe(fpu_mem_reg_con);
 9788 %}
 9789 
 9790 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9791   predicate(UseSSE<=1);
 9792   match(Set dst (MulD dst src));
 9793   format %{ "FLD    $src\n\t"
 9794             "DMULp  $dst,ST" %}
 9795   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9796   ins_cost(150);
 9797   ins_encode( Push_Reg_DPR(src),
 9798               OpcP, RegOpc(dst) );
 9799   ins_pipe( fpu_reg_reg );
 9800 %}
 9801 
 9802 // Strict FP instruction biases argument before multiply then
 9803 // biases result to avoid double rounding of subnormals.
 9804 //
 9805 // scale arg1 by multiplying arg1 by 2^(-15360)
 9806 // load arg2
 9807 // multiply scaled arg1 by arg2
 9808 // rescale product by 2^(15360)
 9809 //
 9810 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9811   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9812   match(Set dst (MulD dst src));
 9813   ins_cost(1);   // Select this instruction for all FP double multiplies
 9814 
 9815   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9816             "DMULp  $dst,ST\n\t"
 9817             "FLD    $src\n\t"
 9818             "DMULp  $dst,ST\n\t"
 9819             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9820             "DMULp  $dst,ST\n\t" %}
 9821   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9822   ins_encode( strictfp_bias1(dst),
 9823               Push_Reg_DPR(src),
 9824               OpcP, RegOpc(dst),
 9825               strictfp_bias2(dst) );
 9826   ins_pipe( fpu_reg_reg );
 9827 %}
 9828 
 9829 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9830   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9831   match(Set dst (MulD dst con));
 9832   ins_cost(200);
 9833   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9834             "DMULp  $dst,ST" %}
 9835   ins_encode %{
 9836     __ fld_d($constantaddress($con));
 9837     __ fmulp($dst$$reg);
 9838   %}
 9839   ins_pipe(fpu_reg_mem);
 9840 %}
 9841 
 9842 
 9843 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9844   predicate( UseSSE<=1 );
 9845   match(Set dst (MulD dst (LoadD src)));
 9846   ins_cost(200);
 9847   format %{ "FLD_D  $src\n\t"
 9848             "DMULp  $dst,ST" %}
 9849   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9850   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9851               OpcP, RegOpc(dst) );
 9852   ins_pipe( fpu_reg_mem );
 9853 %}
 9854 
 9855 //
 9856 // Cisc-alternate to reg-reg multiply
 9857 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9858   predicate( UseSSE<=1 );
 9859   match(Set dst (MulD src (LoadD mem)));
 9860   ins_cost(250);
 9861   format %{ "FLD_D  $mem\n\t"
 9862             "DMUL   ST,$src\n\t"
 9863             "FSTP_D $dst" %}
 9864   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9865   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9866               OpcReg_FPR(src),
 9867               Pop_Reg_DPR(dst) );
 9868   ins_pipe( fpu_reg_reg_mem );
 9869 %}
 9870 
 9871 
 9872 // MACRO3 -- addDPR a mulDPR
 9873 // This instruction is a '2-address' instruction in that the result goes
 9874 // back to src2.  This eliminates a move from the macro; possibly the
 9875 // register allocator will have to add it back (and maybe not).
 9876 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9877   predicate( UseSSE<=1 );
 9878   match(Set src2 (AddD (MulD src0 src1) src2));
 9879   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9880             "DMUL   ST,$src1\n\t"
 9881             "DADDp  $src2,ST" %}
 9882   ins_cost(250);
 9883   opcode(0xDD); /* LoadD DD /0 */
 9884   ins_encode( Push_Reg_FPR(src0),
 9885               FMul_ST_reg(src1),
 9886               FAddP_reg_ST(src2) );
 9887   ins_pipe( fpu_reg_reg_reg );
 9888 %}
 9889 
 9890 
 9891 // MACRO3 -- subDPR a mulDPR
 9892 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9893   predicate( UseSSE<=1 );
 9894   match(Set src2 (SubD (MulD src0 src1) src2));
 9895   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9896             "DMUL   ST,$src1\n\t"
 9897             "DSUBRp $src2,ST" %}
 9898   ins_cost(250);
 9899   ins_encode( Push_Reg_FPR(src0),
 9900               FMul_ST_reg(src1),
 9901               Opcode(0xDE), Opc_plus(0xE0,src2));
 9902   ins_pipe( fpu_reg_reg_reg );
 9903 %}
 9904 
 9905 
 9906 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9907   predicate( UseSSE<=1 );
 9908   match(Set dst (DivD dst src));
 9909 
 9910   format %{ "FLD    $src\n\t"
 9911             "FDIVp  $dst,ST" %}
 9912   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9913   ins_cost(150);
 9914   ins_encode( Push_Reg_DPR(src),
 9915               OpcP, RegOpc(dst) );
 9916   ins_pipe( fpu_reg_reg );
 9917 %}
 9918 
 9919 // Strict FP instruction biases argument before division then
 9920 // biases result, to avoid double rounding of subnormals.
 9921 //
 9922 // scale dividend by multiplying dividend by 2^(-15360)
 9923 // load divisor
 9924 // divide scaled dividend by divisor
 9925 // rescale quotient by 2^(15360)
 9926 //
 9927 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9928   predicate (UseSSE<=1);
 9929   match(Set dst (DivD dst src));
 9930   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9931   ins_cost(01);
 9932 
 9933   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9934             "DMULp  $dst,ST\n\t"
 9935             "FLD    $src\n\t"
 9936             "FDIVp  $dst,ST\n\t"
 9937             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9938             "DMULp  $dst,ST\n\t" %}
 9939   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9940   ins_encode( strictfp_bias1(dst),
 9941               Push_Reg_DPR(src),
 9942               OpcP, RegOpc(dst),
 9943               strictfp_bias2(dst) );
 9944   ins_pipe( fpu_reg_reg );
 9945 %}
 9946 
 9947 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9948   predicate(UseSSE<=1);
 9949   match(Set dst (ModD dst src));
 9950   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9951 
 9952   format %{ "DMOD   $dst,$src" %}
 9953   ins_cost(250);
 9954   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9955               emitModDPR(),
 9956               Push_Result_Mod_DPR(src),
 9957               Pop_Reg_DPR(dst));
 9958   ins_pipe( pipe_slow );
 9959 %}
 9960 
 9961 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9962   predicate(UseSSE>=2);
 9963   match(Set dst (ModD src0 src1));
 9964   effect(KILL rax, KILL cr);
 9965 
 9966   format %{ "SUB    ESP,8\t # DMOD\n"
 9967           "\tMOVSD  [ESP+0],$src1\n"
 9968           "\tFLD_D  [ESP+0]\n"
 9969           "\tMOVSD  [ESP+0],$src0\n"
 9970           "\tFLD_D  [ESP+0]\n"
 9971      "loop:\tFPREM\n"
 9972           "\tFWAIT\n"
 9973           "\tFNSTSW AX\n"
 9974           "\tSAHF\n"
 9975           "\tJP     loop\n"
 9976           "\tFSTP_D [ESP+0]\n"
 9977           "\tMOVSD  $dst,[ESP+0]\n"
 9978           "\tADD    ESP,8\n"
 9979           "\tFSTP   ST0\t # Restore FPU Stack"
 9980     %}
 9981   ins_cost(250);
 9982   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9983   ins_pipe( pipe_slow );
 9984 %}
 9985 
 9986 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9987   predicate (UseSSE<=1);
 9988   match(Set dst(AtanD dst src));
 9989   format %{ "DATA   $dst,$src" %}
 9990   opcode(0xD9, 0xF3);
 9991   ins_encode( Push_Reg_DPR(src),
 9992               OpcP, OpcS, RegOpc(dst) );
 9993   ins_pipe( pipe_slow );
 9994 %}
 9995 
 9996 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9997   predicate (UseSSE>=2);
 9998   match(Set dst(AtanD dst src));
 9999   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10000   format %{ "DATA   $dst,$src" %}
10001   opcode(0xD9, 0xF3);
10002   ins_encode( Push_SrcD(src),
10003               OpcP, OpcS, Push_ResultD(dst) );
10004   ins_pipe( pipe_slow );
10005 %}
10006 
10007 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10008   predicate (UseSSE<=1);
10009   match(Set dst (SqrtD src));
10010   format %{ "DSQRT  $dst,$src" %}
10011   opcode(0xFA, 0xD9);
10012   ins_encode( Push_Reg_DPR(src),
10013               OpcS, OpcP, Pop_Reg_DPR(dst) );
10014   ins_pipe( pipe_slow );
10015 %}
10016 
10017 //-------------Float Instructions-------------------------------
10018 // Float Math
10019 
10020 // Code for float compare:
10021 //     fcompp();
10022 //     fwait(); fnstsw_ax();
10023 //     sahf();
10024 //     movl(dst, unordered_result);
10025 //     jcc(Assembler::parity, exit);
10026 //     movl(dst, less_result);
10027 //     jcc(Assembler::below, exit);
10028 //     movl(dst, equal_result);
10029 //     jcc(Assembler::equal, exit);
10030 //     movl(dst, greater_result);
10031 //   exit:
10032 
10033 // P6 version of float compare, sets condition codes in EFLAGS
10034 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10035   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10036   match(Set cr (CmpF src1 src2));
10037   effect(KILL rax);
10038   ins_cost(150);
10039   format %{ "FLD    $src1\n\t"
10040             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10041             "JNP    exit\n\t"
10042             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10043             "SAHF\n"
10044      "exit:\tNOP               // avoid branch to branch" %}
10045   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10046   ins_encode( Push_Reg_DPR(src1),
10047               OpcP, RegOpc(src2),
10048               cmpF_P6_fixup );
10049   ins_pipe( pipe_slow );
10050 %}
10051 
10052 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10053   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10054   match(Set cr (CmpF src1 src2));
10055   ins_cost(100);
10056   format %{ "FLD    $src1\n\t"
10057             "FUCOMIP ST,$src2  // P6 instruction" %}
10058   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10059   ins_encode( Push_Reg_DPR(src1),
10060               OpcP, RegOpc(src2));
10061   ins_pipe( pipe_slow );
10062 %}
10063 
10064 
10065 // Compare & branch
10066 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10067   predicate(UseSSE == 0);
10068   match(Set cr (CmpF src1 src2));
10069   effect(KILL rax);
10070   ins_cost(200);
10071   format %{ "FLD    $src1\n\t"
10072             "FCOMp  $src2\n\t"
10073             "FNSTSW AX\n\t"
10074             "TEST   AX,0x400\n\t"
10075             "JZ,s   flags\n\t"
10076             "MOV    AH,1\t# unordered treat as LT\n"
10077     "flags:\tSAHF" %}
10078   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10079   ins_encode( Push_Reg_DPR(src1),
10080               OpcP, RegOpc(src2),
10081               fpu_flags);
10082   ins_pipe( pipe_slow );
10083 %}
10084 
10085 // Compare vs zero into -1,0,1
10086 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10087   predicate(UseSSE == 0);
10088   match(Set dst (CmpF3 src1 zero));
10089   effect(KILL cr, KILL rax);
10090   ins_cost(280);
10091   format %{ "FTSTF  $dst,$src1" %}
10092   opcode(0xE4, 0xD9);
10093   ins_encode( Push_Reg_DPR(src1),
10094               OpcS, OpcP, PopFPU,
10095               CmpF_Result(dst));
10096   ins_pipe( pipe_slow );
10097 %}
10098 
10099 // Compare into -1,0,1
10100 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10101   predicate(UseSSE == 0);
10102   match(Set dst (CmpF3 src1 src2));
10103   effect(KILL cr, KILL rax);
10104   ins_cost(300);
10105   format %{ "FCMPF  $dst,$src1,$src2" %}
10106   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10107   ins_encode( Push_Reg_DPR(src1),
10108               OpcP, RegOpc(src2),
10109               CmpF_Result(dst));
10110   ins_pipe( pipe_slow );
10111 %}
10112 
10113 // float compare and set condition codes in EFLAGS by XMM regs
10114 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10115   predicate(UseSSE>=1);
10116   match(Set cr (CmpF src1 src2));
10117   ins_cost(145);
10118   format %{ "UCOMISS $src1,$src2\n\t"
10119             "JNP,s   exit\n\t"
10120             "PUSHF\t# saw NaN, set CF\n\t"
10121             "AND     [rsp], #0xffffff2b\n\t"
10122             "POPF\n"
10123     "exit:" %}
10124   ins_encode %{
10125     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10126     emit_cmpfp_fixup(_masm);
10127   %}
10128   ins_pipe( pipe_slow );
10129 %}
10130 
10131 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10132   predicate(UseSSE>=1);
10133   match(Set cr (CmpF src1 src2));
10134   ins_cost(100);
10135   format %{ "UCOMISS $src1,$src2" %}
10136   ins_encode %{
10137     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10138   %}
10139   ins_pipe( pipe_slow );
10140 %}
10141 
10142 // float compare and set condition codes in EFLAGS by XMM regs
10143 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10144   predicate(UseSSE>=1);
10145   match(Set cr (CmpF src1 (LoadF src2)));
10146   ins_cost(165);
10147   format %{ "UCOMISS $src1,$src2\n\t"
10148             "JNP,s   exit\n\t"
10149             "PUSHF\t# saw NaN, set CF\n\t"
10150             "AND     [rsp], #0xffffff2b\n\t"
10151             "POPF\n"
10152     "exit:" %}
10153   ins_encode %{
10154     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10155     emit_cmpfp_fixup(_masm);
10156   %}
10157   ins_pipe( pipe_slow );
10158 %}
10159 
10160 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10161   predicate(UseSSE>=1);
10162   match(Set cr (CmpF src1 (LoadF src2)));
10163   ins_cost(100);
10164   format %{ "UCOMISS $src1,$src2" %}
10165   ins_encode %{
10166     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10167   %}
10168   ins_pipe( pipe_slow );
10169 %}
10170 
10171 // Compare into -1,0,1 in XMM
10172 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10173   predicate(UseSSE>=1);
10174   match(Set dst (CmpF3 src1 src2));
10175   effect(KILL cr);
10176   ins_cost(255);
10177   format %{ "UCOMISS $src1, $src2\n\t"
10178             "MOV     $dst, #-1\n\t"
10179             "JP,s    done\n\t"
10180             "JB,s    done\n\t"
10181             "SETNE   $dst\n\t"
10182             "MOVZB   $dst, $dst\n"
10183     "done:" %}
10184   ins_encode %{
10185     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10186     emit_cmpfp3(_masm, $dst$$Register);
10187   %}
10188   ins_pipe( pipe_slow );
10189 %}
10190 
10191 // Compare into -1,0,1 in XMM and memory
10192 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10193   predicate(UseSSE>=1);
10194   match(Set dst (CmpF3 src1 (LoadF src2)));
10195   effect(KILL cr);
10196   ins_cost(275);
10197   format %{ "UCOMISS $src1, $src2\n\t"
10198             "MOV     $dst, #-1\n\t"
10199             "JP,s    done\n\t"
10200             "JB,s    done\n\t"
10201             "SETNE   $dst\n\t"
10202             "MOVZB   $dst, $dst\n"
10203     "done:" %}
10204   ins_encode %{
10205     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10206     emit_cmpfp3(_masm, $dst$$Register);
10207   %}
10208   ins_pipe( pipe_slow );
10209 %}
10210 
10211 // Spill to obtain 24-bit precision
10212 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10213   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10214   match(Set dst (SubF src1 src2));
10215 
10216   format %{ "FSUB   $dst,$src1 - $src2" %}
10217   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10218   ins_encode( Push_Reg_FPR(src1),
10219               OpcReg_FPR(src2),
10220               Pop_Mem_FPR(dst) );
10221   ins_pipe( fpu_mem_reg_reg );
10222 %}
10223 //
10224 // This instruction does not round to 24-bits
10225 instruct subFPR_reg(regFPR dst, regFPR src) %{
10226   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10227   match(Set dst (SubF dst src));
10228 
10229   format %{ "FSUB   $dst,$src" %}
10230   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10231   ins_encode( Push_Reg_FPR(src),
10232               OpcP, RegOpc(dst) );
10233   ins_pipe( fpu_reg_reg );
10234 %}
10235 
10236 // Spill to obtain 24-bit precision
10237 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10238   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10239   match(Set dst (AddF src1 src2));
10240 
10241   format %{ "FADD   $dst,$src1,$src2" %}
10242   opcode(0xD8, 0x0); /* D8 C0+i */
10243   ins_encode( Push_Reg_FPR(src2),
10244               OpcReg_FPR(src1),
10245               Pop_Mem_FPR(dst) );
10246   ins_pipe( fpu_mem_reg_reg );
10247 %}
10248 //
10249 // This instruction does not round to 24-bits
10250 instruct addFPR_reg(regFPR dst, regFPR src) %{
10251   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10252   match(Set dst (AddF dst src));
10253 
10254   format %{ "FLD    $src\n\t"
10255             "FADDp  $dst,ST" %}
10256   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10257   ins_encode( Push_Reg_FPR(src),
10258               OpcP, RegOpc(dst) );
10259   ins_pipe( fpu_reg_reg );
10260 %}
10261 
10262 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10263   predicate(UseSSE==0);
10264   match(Set dst (AbsF src));
10265   ins_cost(100);
10266   format %{ "FABS" %}
10267   opcode(0xE1, 0xD9);
10268   ins_encode( OpcS, OpcP );
10269   ins_pipe( fpu_reg_reg );
10270 %}
10271 
10272 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10273   predicate(UseSSE==0);
10274   match(Set dst (NegF src));
10275   ins_cost(100);
10276   format %{ "FCHS" %}
10277   opcode(0xE0, 0xD9);
10278   ins_encode( OpcS, OpcP );
10279   ins_pipe( fpu_reg_reg );
10280 %}
10281 
10282 // Cisc-alternate to addFPR_reg
10283 // Spill to obtain 24-bit precision
10284 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10285   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10286   match(Set dst (AddF src1 (LoadF src2)));
10287 
10288   format %{ "FLD    $src2\n\t"
10289             "FADD   ST,$src1\n\t"
10290             "FSTP_S $dst" %}
10291   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10292   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10293               OpcReg_FPR(src1),
10294               Pop_Mem_FPR(dst) );
10295   ins_pipe( fpu_mem_reg_mem );
10296 %}
10297 //
10298 // Cisc-alternate to addFPR_reg
10299 // This instruction does not round to 24-bits
10300 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10301   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10302   match(Set dst (AddF dst (LoadF src)));
10303 
10304   format %{ "FADD   $dst,$src" %}
10305   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10306   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10307               OpcP, RegOpc(dst) );
10308   ins_pipe( fpu_reg_mem );
10309 %}
10310 
10311 // // Following two instructions for _222_mpegaudio
10312 // Spill to obtain 24-bit precision
10313 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10314   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10315   match(Set dst (AddF src1 src2));
10316 
10317   format %{ "FADD   $dst,$src1,$src2" %}
10318   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10319   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10320               OpcReg_FPR(src2),
10321               Pop_Mem_FPR(dst) );
10322   ins_pipe( fpu_mem_reg_mem );
10323 %}
10324 
10325 // Cisc-spill variant
10326 // Spill to obtain 24-bit precision
10327 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10328   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10329   match(Set dst (AddF src1 (LoadF src2)));
10330 
10331   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10332   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10333   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10334               set_instruction_start,
10335               OpcP, RMopc_Mem(secondary,src1),
10336               Pop_Mem_FPR(dst) );
10337   ins_pipe( fpu_mem_mem_mem );
10338 %}
10339 
10340 // Spill to obtain 24-bit precision
10341 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10342   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10343   match(Set dst (AddF src1 src2));
10344 
10345   format %{ "FADD   $dst,$src1,$src2" %}
10346   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10347   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10348               set_instruction_start,
10349               OpcP, RMopc_Mem(secondary,src1),
10350               Pop_Mem_FPR(dst) );
10351   ins_pipe( fpu_mem_mem_mem );
10352 %}
10353 
10354 
10355 // Spill to obtain 24-bit precision
10356 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10357   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10358   match(Set dst (AddF src con));
10359   format %{ "FLD    $src\n\t"
10360             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10361             "FSTP_S $dst"  %}
10362   ins_encode %{
10363     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10364     __ fadd_s($constantaddress($con));
10365     __ fstp_s(Address(rsp, $dst$$disp));
10366   %}
10367   ins_pipe(fpu_mem_reg_con);
10368 %}
10369 //
10370 // This instruction does not round to 24-bits
10371 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10372   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10373   match(Set dst (AddF src con));
10374   format %{ "FLD    $src\n\t"
10375             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10376             "FSTP   $dst"  %}
10377   ins_encode %{
10378     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10379     __ fadd_s($constantaddress($con));
10380     __ fstp_d($dst$$reg);
10381   %}
10382   ins_pipe(fpu_reg_reg_con);
10383 %}
10384 
10385 // Spill to obtain 24-bit precision
10386 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10387   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10388   match(Set dst (MulF src1 src2));
10389 
10390   format %{ "FLD    $src1\n\t"
10391             "FMUL   $src2\n\t"
10392             "FSTP_S $dst"  %}
10393   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10394   ins_encode( Push_Reg_FPR(src1),
10395               OpcReg_FPR(src2),
10396               Pop_Mem_FPR(dst) );
10397   ins_pipe( fpu_mem_reg_reg );
10398 %}
10399 //
10400 // This instruction does not round to 24-bits
10401 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10402   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10403   match(Set dst (MulF src1 src2));
10404 
10405   format %{ "FLD    $src1\n\t"
10406             "FMUL   $src2\n\t"
10407             "FSTP_S $dst"  %}
10408   opcode(0xD8, 0x1); /* D8 C8+i */
10409   ins_encode( Push_Reg_FPR(src2),
10410               OpcReg_FPR(src1),
10411               Pop_Reg_FPR(dst) );
10412   ins_pipe( fpu_reg_reg_reg );
10413 %}
10414 
10415 
10416 // Spill to obtain 24-bit precision
10417 // Cisc-alternate to reg-reg multiply
10418 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10419   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10420   match(Set dst (MulF src1 (LoadF src2)));
10421 
10422   format %{ "FLD_S  $src2\n\t"
10423             "FMUL   $src1\n\t"
10424             "FSTP_S $dst"  %}
10425   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10426   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10427               OpcReg_FPR(src1),
10428               Pop_Mem_FPR(dst) );
10429   ins_pipe( fpu_mem_reg_mem );
10430 %}
10431 //
10432 // This instruction does not round to 24-bits
10433 // Cisc-alternate to reg-reg multiply
10434 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10435   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10436   match(Set dst (MulF src1 (LoadF src2)));
10437 
10438   format %{ "FMUL   $dst,$src1,$src2" %}
10439   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10440   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10441               OpcReg_FPR(src1),
10442               Pop_Reg_FPR(dst) );
10443   ins_pipe( fpu_reg_reg_mem );
10444 %}
10445 
10446 // Spill to obtain 24-bit precision
10447 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10448   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10449   match(Set dst (MulF src1 src2));
10450 
10451   format %{ "FMUL   $dst,$src1,$src2" %}
10452   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10453   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10454               set_instruction_start,
10455               OpcP, RMopc_Mem(secondary,src1),
10456               Pop_Mem_FPR(dst) );
10457   ins_pipe( fpu_mem_mem_mem );
10458 %}
10459 
10460 // Spill to obtain 24-bit precision
10461 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10462   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10463   match(Set dst (MulF src con));
10464 
10465   format %{ "FLD    $src\n\t"
10466             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10467             "FSTP_S $dst"  %}
10468   ins_encode %{
10469     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10470     __ fmul_s($constantaddress($con));
10471     __ fstp_s(Address(rsp, $dst$$disp));
10472   %}
10473   ins_pipe(fpu_mem_reg_con);
10474 %}
10475 //
10476 // This instruction does not round to 24-bits
10477 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10478   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10479   match(Set dst (MulF src con));
10480 
10481   format %{ "FLD    $src\n\t"
10482             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10483             "FSTP   $dst"  %}
10484   ins_encode %{
10485     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10486     __ fmul_s($constantaddress($con));
10487     __ fstp_d($dst$$reg);
10488   %}
10489   ins_pipe(fpu_reg_reg_con);
10490 %}
10491 
10492 
10493 //
10494 // MACRO1 -- subsume unshared load into mulFPR
10495 // This instruction does not round to 24-bits
10496 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10497   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10498   match(Set dst (MulF (LoadF mem1) src));
10499 
10500   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10501             "FMUL   ST,$src\n\t"
10502             "FSTP   $dst" %}
10503   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10504   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10505               OpcReg_FPR(src),
10506               Pop_Reg_FPR(dst) );
10507   ins_pipe( fpu_reg_reg_mem );
10508 %}
10509 //
10510 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10511 // This instruction does not round to 24-bits
10512 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10513   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10514   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10515   ins_cost(95);
10516 
10517   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10518             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10519             "FADD   ST,$src2\n\t"
10520             "FSTP   $dst" %}
10521   opcode(0xD9); /* LoadF D9 /0 */
10522   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10523               FMul_ST_reg(src1),
10524               FAdd_ST_reg(src2),
10525               Pop_Reg_FPR(dst) );
10526   ins_pipe( fpu_reg_mem_reg_reg );
10527 %}
10528 
10529 // MACRO3 -- addFPR a mulFPR
10530 // This instruction does not round to 24-bits.  It is a '2-address'
10531 // instruction in that the result goes back to src2.  This eliminates
10532 // a move from the macro; possibly the register allocator will have
10533 // to add it back (and maybe not).
10534 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10535   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10536   match(Set src2 (AddF (MulF src0 src1) src2));
10537 
10538   format %{ "FLD    $src0     ===MACRO3===\n\t"
10539             "FMUL   ST,$src1\n\t"
10540             "FADDP  $src2,ST" %}
10541   opcode(0xD9); /* LoadF D9 /0 */
10542   ins_encode( Push_Reg_FPR(src0),
10543               FMul_ST_reg(src1),
10544               FAddP_reg_ST(src2) );
10545   ins_pipe( fpu_reg_reg_reg );
10546 %}
10547 
10548 // MACRO4 -- divFPR subFPR
10549 // This instruction does not round to 24-bits
10550 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10551   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10552   match(Set dst (DivF (SubF src2 src1) src3));
10553 
10554   format %{ "FLD    $src2   ===MACRO4===\n\t"
10555             "FSUB   ST,$src1\n\t"
10556             "FDIV   ST,$src3\n\t"
10557             "FSTP  $dst" %}
10558   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10559   ins_encode( Push_Reg_FPR(src2),
10560               subFPR_divFPR_encode(src1,src3),
10561               Pop_Reg_FPR(dst) );
10562   ins_pipe( fpu_reg_reg_reg_reg );
10563 %}
10564 
10565 // Spill to obtain 24-bit precision
10566 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10567   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10568   match(Set dst (DivF src1 src2));
10569 
10570   format %{ "FDIV   $dst,$src1,$src2" %}
10571   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10572   ins_encode( Push_Reg_FPR(src1),
10573               OpcReg_FPR(src2),
10574               Pop_Mem_FPR(dst) );
10575   ins_pipe( fpu_mem_reg_reg );
10576 %}
10577 //
10578 // This instruction does not round to 24-bits
10579 instruct divFPR_reg(regFPR dst, regFPR src) %{
10580   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10581   match(Set dst (DivF dst src));
10582 
10583   format %{ "FDIV   $dst,$src" %}
10584   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10585   ins_encode( Push_Reg_FPR(src),
10586               OpcP, RegOpc(dst) );
10587   ins_pipe( fpu_reg_reg );
10588 %}
10589 
10590 
10591 // Spill to obtain 24-bit precision
10592 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10593   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10594   match(Set dst (ModF src1 src2));
10595   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10596 
10597   format %{ "FMOD   $dst,$src1,$src2" %}
10598   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10599               emitModDPR(),
10600               Push_Result_Mod_DPR(src2),
10601               Pop_Mem_FPR(dst));
10602   ins_pipe( pipe_slow );
10603 %}
10604 //
10605 // This instruction does not round to 24-bits
10606 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10607   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10608   match(Set dst (ModF dst src));
10609   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10610 
10611   format %{ "FMOD   $dst,$src" %}
10612   ins_encode(Push_Reg_Mod_DPR(dst, src),
10613               emitModDPR(),
10614               Push_Result_Mod_DPR(src),
10615               Pop_Reg_FPR(dst));
10616   ins_pipe( pipe_slow );
10617 %}
10618 
10619 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10620   predicate(UseSSE>=1);
10621   match(Set dst (ModF src0 src1));
10622   effect(KILL rax, KILL cr);
10623   format %{ "SUB    ESP,4\t # FMOD\n"
10624           "\tMOVSS  [ESP+0],$src1\n"
10625           "\tFLD_S  [ESP+0]\n"
10626           "\tMOVSS  [ESP+0],$src0\n"
10627           "\tFLD_S  [ESP+0]\n"
10628      "loop:\tFPREM\n"
10629           "\tFWAIT\n"
10630           "\tFNSTSW AX\n"
10631           "\tSAHF\n"
10632           "\tJP     loop\n"
10633           "\tFSTP_S [ESP+0]\n"
10634           "\tMOVSS  $dst,[ESP+0]\n"
10635           "\tADD    ESP,4\n"
10636           "\tFSTP   ST0\t # Restore FPU Stack"
10637     %}
10638   ins_cost(250);
10639   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10640   ins_pipe( pipe_slow );
10641 %}
10642 
10643 
10644 //----------Arithmetic Conversion Instructions---------------------------------
10645 // The conversions operations are all Alpha sorted.  Please keep it that way!
10646 
10647 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10648   predicate(UseSSE==0);
10649   match(Set dst (RoundFloat src));
10650   ins_cost(125);
10651   format %{ "FST_S  $dst,$src\t# F-round" %}
10652   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10653   ins_pipe( fpu_mem_reg );
10654 %}
10655 
10656 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10657   predicate(UseSSE<=1);
10658   match(Set dst (RoundDouble src));
10659   ins_cost(125);
10660   format %{ "FST_D  $dst,$src\t# D-round" %}
10661   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10662   ins_pipe( fpu_mem_reg );
10663 %}
10664 
10665 // Force rounding to 24-bit precision and 6-bit exponent
10666 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10667   predicate(UseSSE==0);
10668   match(Set dst (ConvD2F src));
10669   format %{ "FST_S  $dst,$src\t# F-round" %}
10670   expand %{
10671     roundFloat_mem_reg(dst,src);
10672   %}
10673 %}
10674 
10675 // Force rounding to 24-bit precision and 6-bit exponent
10676 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10677   predicate(UseSSE==1);
10678   match(Set dst (ConvD2F src));
10679   effect( KILL cr );
10680   format %{ "SUB    ESP,4\n\t"
10681             "FST_S  [ESP],$src\t# F-round\n\t"
10682             "MOVSS  $dst,[ESP]\n\t"
10683             "ADD ESP,4" %}
10684   ins_encode %{
10685     __ subptr(rsp, 4);
10686     if ($src$$reg != FPR1L_enc) {
10687       __ fld_s($src$$reg-1);
10688       __ fstp_s(Address(rsp, 0));
10689     } else {
10690       __ fst_s(Address(rsp, 0));
10691     }
10692     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10693     __ addptr(rsp, 4);
10694   %}
10695   ins_pipe( pipe_slow );
10696 %}
10697 
10698 // Force rounding double precision to single precision
10699 instruct convD2F_reg(regF dst, regD src) %{
10700   predicate(UseSSE>=2);
10701   match(Set dst (ConvD2F src));
10702   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10703   ins_encode %{
10704     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10705   %}
10706   ins_pipe( pipe_slow );
10707 %}
10708 
10709 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10710   predicate(UseSSE==0);
10711   match(Set dst (ConvF2D src));
10712   format %{ "FST_S  $dst,$src\t# D-round" %}
10713   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10714   ins_pipe( fpu_reg_reg );
10715 %}
10716 
10717 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10718   predicate(UseSSE==1);
10719   match(Set dst (ConvF2D src));
10720   format %{ "FST_D  $dst,$src\t# D-round" %}
10721   expand %{
10722     roundDouble_mem_reg(dst,src);
10723   %}
10724 %}
10725 
10726 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10727   predicate(UseSSE==1);
10728   match(Set dst (ConvF2D src));
10729   effect( KILL cr );
10730   format %{ "SUB    ESP,4\n\t"
10731             "MOVSS  [ESP] $src\n\t"
10732             "FLD_S  [ESP]\n\t"
10733             "ADD    ESP,4\n\t"
10734             "FSTP   $dst\t# D-round" %}
10735   ins_encode %{
10736     __ subptr(rsp, 4);
10737     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10738     __ fld_s(Address(rsp, 0));
10739     __ addptr(rsp, 4);
10740     __ fstp_d($dst$$reg);
10741   %}
10742   ins_pipe( pipe_slow );
10743 %}
10744 
10745 instruct convF2D_reg(regD dst, regF src) %{
10746   predicate(UseSSE>=2);
10747   match(Set dst (ConvF2D src));
10748   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10749   ins_encode %{
10750     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10751   %}
10752   ins_pipe( pipe_slow );
10753 %}
10754 
10755 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10756 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10757   predicate(UseSSE<=1);
10758   match(Set dst (ConvD2I src));
10759   effect( KILL tmp, KILL cr );
10760   format %{ "FLD    $src\t# Convert double to int \n\t"
10761             "FLDCW  trunc mode\n\t"
10762             "SUB    ESP,4\n\t"
10763             "FISTp  [ESP + #0]\n\t"
10764             "FLDCW  std/24-bit mode\n\t"
10765             "POP    EAX\n\t"
10766             "CMP    EAX,0x80000000\n\t"
10767             "JNE,s  fast\n\t"
10768             "FLD_D  $src\n\t"
10769             "CALL   d2i_wrapper\n"
10770       "fast:" %}
10771   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10772   ins_pipe( pipe_slow );
10773 %}
10774 
10775 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10776 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10777   predicate(UseSSE>=2);
10778   match(Set dst (ConvD2I src));
10779   effect( KILL tmp, KILL cr );
10780   format %{ "CVTTSD2SI $dst, $src\n\t"
10781             "CMP    $dst,0x80000000\n\t"
10782             "JNE,s  fast\n\t"
10783             "SUB    ESP, 8\n\t"
10784             "MOVSD  [ESP], $src\n\t"
10785             "FLD_D  [ESP]\n\t"
10786             "ADD    ESP, 8\n\t"
10787             "CALL   d2i_wrapper\n"
10788       "fast:" %}
10789   ins_encode %{
10790     Label fast;
10791     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10792     __ cmpl($dst$$Register, 0x80000000);
10793     __ jccb(Assembler::notEqual, fast);
10794     __ subptr(rsp, 8);
10795     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10796     __ fld_d(Address(rsp, 0));
10797     __ addptr(rsp, 8);
10798     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10799     __ post_call_nop();
10800     __ bind(fast);
10801   %}
10802   ins_pipe( pipe_slow );
10803 %}
10804 
10805 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10806   predicate(UseSSE<=1);
10807   match(Set dst (ConvD2L src));
10808   effect( KILL cr );
10809   format %{ "FLD    $src\t# Convert double to long\n\t"
10810             "FLDCW  trunc mode\n\t"
10811             "SUB    ESP,8\n\t"
10812             "FISTp  [ESP + #0]\n\t"
10813             "FLDCW  std/24-bit mode\n\t"
10814             "POP    EAX\n\t"
10815             "POP    EDX\n\t"
10816             "CMP    EDX,0x80000000\n\t"
10817             "JNE,s  fast\n\t"
10818             "TEST   EAX,EAX\n\t"
10819             "JNE,s  fast\n\t"
10820             "FLD    $src\n\t"
10821             "CALL   d2l_wrapper\n"
10822       "fast:" %}
10823   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10824   ins_pipe( pipe_slow );
10825 %}
10826 
10827 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10828 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10829   predicate (UseSSE>=2);
10830   match(Set dst (ConvD2L src));
10831   effect( KILL cr );
10832   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10833             "MOVSD  [ESP],$src\n\t"
10834             "FLD_D  [ESP]\n\t"
10835             "FLDCW  trunc mode\n\t"
10836             "FISTp  [ESP + #0]\n\t"
10837             "FLDCW  std/24-bit mode\n\t"
10838             "POP    EAX\n\t"
10839             "POP    EDX\n\t"
10840             "CMP    EDX,0x80000000\n\t"
10841             "JNE,s  fast\n\t"
10842             "TEST   EAX,EAX\n\t"
10843             "JNE,s  fast\n\t"
10844             "SUB    ESP,8\n\t"
10845             "MOVSD  [ESP],$src\n\t"
10846             "FLD_D  [ESP]\n\t"
10847             "ADD    ESP,8\n\t"
10848             "CALL   d2l_wrapper\n"
10849       "fast:" %}
10850   ins_encode %{
10851     Label fast;
10852     __ subptr(rsp, 8);
10853     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10854     __ fld_d(Address(rsp, 0));
10855     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10856     __ fistp_d(Address(rsp, 0));
10857     // Restore the rounding mode, mask the exception
10858     if (Compile::current()->in_24_bit_fp_mode()) {
10859       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10860     } else {
10861       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10862     }
10863     // Load the converted long, adjust CPU stack
10864     __ pop(rax);
10865     __ pop(rdx);
10866     __ cmpl(rdx, 0x80000000);
10867     __ jccb(Assembler::notEqual, fast);
10868     __ testl(rax, rax);
10869     __ jccb(Assembler::notEqual, fast);
10870     __ subptr(rsp, 8);
10871     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10872     __ fld_d(Address(rsp, 0));
10873     __ addptr(rsp, 8);
10874     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10875     __ post_call_nop();
10876     __ bind(fast);
10877   %}
10878   ins_pipe( pipe_slow );
10879 %}
10880 
10881 // Convert a double to an int.  Java semantics require we do complex
10882 // manglations in the corner cases.  So we set the rounding mode to
10883 // 'zero', store the darned double down as an int, and reset the
10884 // rounding mode to 'nearest'.  The hardware stores a flag value down
10885 // if we would overflow or converted a NAN; we check for this and
10886 // and go the slow path if needed.
10887 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10888   predicate(UseSSE==0);
10889   match(Set dst (ConvF2I src));
10890   effect( KILL tmp, KILL cr );
10891   format %{ "FLD    $src\t# Convert float to int \n\t"
10892             "FLDCW  trunc mode\n\t"
10893             "SUB    ESP,4\n\t"
10894             "FISTp  [ESP + #0]\n\t"
10895             "FLDCW  std/24-bit mode\n\t"
10896             "POP    EAX\n\t"
10897             "CMP    EAX,0x80000000\n\t"
10898             "JNE,s  fast\n\t"
10899             "FLD    $src\n\t"
10900             "CALL   d2i_wrapper\n"
10901       "fast:" %}
10902   // DPR2I_encoding works for FPR2I
10903   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10904   ins_pipe( pipe_slow );
10905 %}
10906 
10907 // Convert a float in xmm to an int reg.
10908 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10909   predicate(UseSSE>=1);
10910   match(Set dst (ConvF2I src));
10911   effect( KILL tmp, KILL cr );
10912   format %{ "CVTTSS2SI $dst, $src\n\t"
10913             "CMP    $dst,0x80000000\n\t"
10914             "JNE,s  fast\n\t"
10915             "SUB    ESP, 4\n\t"
10916             "MOVSS  [ESP], $src\n\t"
10917             "FLD    [ESP]\n\t"
10918             "ADD    ESP, 4\n\t"
10919             "CALL   d2i_wrapper\n"
10920       "fast:" %}
10921   ins_encode %{
10922     Label fast;
10923     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10924     __ cmpl($dst$$Register, 0x80000000);
10925     __ jccb(Assembler::notEqual, fast);
10926     __ subptr(rsp, 4);
10927     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10928     __ fld_s(Address(rsp, 0));
10929     __ addptr(rsp, 4);
10930     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10931     __ post_call_nop();
10932     __ bind(fast);
10933   %}
10934   ins_pipe( pipe_slow );
10935 %}
10936 
10937 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10938   predicate(UseSSE==0);
10939   match(Set dst (ConvF2L src));
10940   effect( KILL cr );
10941   format %{ "FLD    $src\t# Convert float to long\n\t"
10942             "FLDCW  trunc mode\n\t"
10943             "SUB    ESP,8\n\t"
10944             "FISTp  [ESP + #0]\n\t"
10945             "FLDCW  std/24-bit mode\n\t"
10946             "POP    EAX\n\t"
10947             "POP    EDX\n\t"
10948             "CMP    EDX,0x80000000\n\t"
10949             "JNE,s  fast\n\t"
10950             "TEST   EAX,EAX\n\t"
10951             "JNE,s  fast\n\t"
10952             "FLD    $src\n\t"
10953             "CALL   d2l_wrapper\n"
10954       "fast:" %}
10955   // DPR2L_encoding works for FPR2L
10956   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10957   ins_pipe( pipe_slow );
10958 %}
10959 
10960 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10961 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10962   predicate (UseSSE>=1);
10963   match(Set dst (ConvF2L src));
10964   effect( KILL cr );
10965   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10966             "MOVSS  [ESP],$src\n\t"
10967             "FLD_S  [ESP]\n\t"
10968             "FLDCW  trunc mode\n\t"
10969             "FISTp  [ESP + #0]\n\t"
10970             "FLDCW  std/24-bit mode\n\t"
10971             "POP    EAX\n\t"
10972             "POP    EDX\n\t"
10973             "CMP    EDX,0x80000000\n\t"
10974             "JNE,s  fast\n\t"
10975             "TEST   EAX,EAX\n\t"
10976             "JNE,s  fast\n\t"
10977             "SUB    ESP,4\t# Convert float to long\n\t"
10978             "MOVSS  [ESP],$src\n\t"
10979             "FLD_S  [ESP]\n\t"
10980             "ADD    ESP,4\n\t"
10981             "CALL   d2l_wrapper\n"
10982       "fast:" %}
10983   ins_encode %{
10984     Label fast;
10985     __ subptr(rsp, 8);
10986     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10987     __ fld_s(Address(rsp, 0));
10988     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10989     __ fistp_d(Address(rsp, 0));
10990     // Restore the rounding mode, mask the exception
10991     if (Compile::current()->in_24_bit_fp_mode()) {
10992       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10993     } else {
10994       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10995     }
10996     // Load the converted long, adjust CPU stack
10997     __ pop(rax);
10998     __ pop(rdx);
10999     __ cmpl(rdx, 0x80000000);
11000     __ jccb(Assembler::notEqual, fast);
11001     __ testl(rax, rax);
11002     __ jccb(Assembler::notEqual, fast);
11003     __ subptr(rsp, 4);
11004     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11005     __ fld_s(Address(rsp, 0));
11006     __ addptr(rsp, 4);
11007     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11008     __ post_call_nop();
11009     __ bind(fast);
11010   %}
11011   ins_pipe( pipe_slow );
11012 %}
11013 
11014 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11015   predicate( UseSSE<=1 );
11016   match(Set dst (ConvI2D src));
11017   format %{ "FILD   $src\n\t"
11018             "FSTP   $dst" %}
11019   opcode(0xDB, 0x0);  /* DB /0 */
11020   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11021   ins_pipe( fpu_reg_mem );
11022 %}
11023 
11024 instruct convI2D_reg(regD dst, rRegI src) %{
11025   predicate( UseSSE>=2 && !UseXmmI2D );
11026   match(Set dst (ConvI2D src));
11027   format %{ "CVTSI2SD $dst,$src" %}
11028   ins_encode %{
11029     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11030   %}
11031   ins_pipe( pipe_slow );
11032 %}
11033 
11034 instruct convI2D_mem(regD dst, memory mem) %{
11035   predicate( UseSSE>=2 );
11036   match(Set dst (ConvI2D (LoadI mem)));
11037   format %{ "CVTSI2SD $dst,$mem" %}
11038   ins_encode %{
11039     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11040   %}
11041   ins_pipe( pipe_slow );
11042 %}
11043 
11044 instruct convXI2D_reg(regD dst, rRegI src)
11045 %{
11046   predicate( UseSSE>=2 && UseXmmI2D );
11047   match(Set dst (ConvI2D src));
11048 
11049   format %{ "MOVD  $dst,$src\n\t"
11050             "CVTDQ2PD $dst,$dst\t# i2d" %}
11051   ins_encode %{
11052     __ movdl($dst$$XMMRegister, $src$$Register);
11053     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11054   %}
11055   ins_pipe(pipe_slow); // XXX
11056 %}
11057 
11058 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11059   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11060   match(Set dst (ConvI2D (LoadI mem)));
11061   format %{ "FILD   $mem\n\t"
11062             "FSTP   $dst" %}
11063   opcode(0xDB);      /* DB /0 */
11064   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11065               Pop_Reg_DPR(dst));
11066   ins_pipe( fpu_reg_mem );
11067 %}
11068 
11069 // Convert a byte to a float; no rounding step needed.
11070 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11071   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11072   match(Set dst (ConvI2F src));
11073   format %{ "FILD   $src\n\t"
11074             "FSTP   $dst" %}
11075 
11076   opcode(0xDB, 0x0);  /* DB /0 */
11077   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11078   ins_pipe( fpu_reg_mem );
11079 %}
11080 
11081 // In 24-bit mode, force exponent rounding by storing back out
11082 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11083   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11084   match(Set dst (ConvI2F src));
11085   ins_cost(200);
11086   format %{ "FILD   $src\n\t"
11087             "FSTP_S $dst" %}
11088   opcode(0xDB, 0x0);  /* DB /0 */
11089   ins_encode( Push_Mem_I(src),
11090               Pop_Mem_FPR(dst));
11091   ins_pipe( fpu_mem_mem );
11092 %}
11093 
11094 // In 24-bit mode, force exponent rounding by storing back out
11095 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11096   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11097   match(Set dst (ConvI2F (LoadI mem)));
11098   ins_cost(200);
11099   format %{ "FILD   $mem\n\t"
11100             "FSTP_S $dst" %}
11101   opcode(0xDB);  /* DB /0 */
11102   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11103               Pop_Mem_FPR(dst));
11104   ins_pipe( fpu_mem_mem );
11105 %}
11106 
11107 // This instruction does not round to 24-bits
11108 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11109   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11110   match(Set dst (ConvI2F src));
11111   format %{ "FILD   $src\n\t"
11112             "FSTP   $dst" %}
11113   opcode(0xDB, 0x0);  /* DB /0 */
11114   ins_encode( Push_Mem_I(src),
11115               Pop_Reg_FPR(dst));
11116   ins_pipe( fpu_reg_mem );
11117 %}
11118 
11119 // This instruction does not round to 24-bits
11120 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11121   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11122   match(Set dst (ConvI2F (LoadI mem)));
11123   format %{ "FILD   $mem\n\t"
11124             "FSTP   $dst" %}
11125   opcode(0xDB);      /* DB /0 */
11126   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11127               Pop_Reg_FPR(dst));
11128   ins_pipe( fpu_reg_mem );
11129 %}
11130 
11131 // Convert an int to a float in xmm; no rounding step needed.
11132 instruct convI2F_reg(regF dst, rRegI src) %{
11133   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
11134   match(Set dst (ConvI2F src));
11135   format %{ "CVTSI2SS $dst, $src" %}
11136   ins_encode %{
11137     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11138   %}
11139   ins_pipe( pipe_slow );
11140 %}
11141 
11142  instruct convXI2F_reg(regF dst, rRegI src)
11143 %{
11144   predicate( UseSSE>=2 && UseXmmI2F );
11145   match(Set dst (ConvI2F src));
11146 
11147   format %{ "MOVD  $dst,$src\n\t"
11148             "CVTDQ2PS $dst,$dst\t# i2f" %}
11149   ins_encode %{
11150     __ movdl($dst$$XMMRegister, $src$$Register);
11151     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11152   %}
11153   ins_pipe(pipe_slow); // XXX
11154 %}
11155 
11156 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11157   match(Set dst (ConvI2L src));
11158   effect(KILL cr);
11159   ins_cost(375);
11160   format %{ "MOV    $dst.lo,$src\n\t"
11161             "MOV    $dst.hi,$src\n\t"
11162             "SAR    $dst.hi,31" %}
11163   ins_encode(convert_int_long(dst,src));
11164   ins_pipe( ialu_reg_reg_long );
11165 %}
11166 
11167 // Zero-extend convert int to long
11168 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11169   match(Set dst (AndL (ConvI2L src) mask) );
11170   effect( KILL flags );
11171   ins_cost(250);
11172   format %{ "MOV    $dst.lo,$src\n\t"
11173             "XOR    $dst.hi,$dst.hi" %}
11174   opcode(0x33); // XOR
11175   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11176   ins_pipe( ialu_reg_reg_long );
11177 %}
11178 
11179 // Zero-extend long
11180 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11181   match(Set dst (AndL src mask) );
11182   effect( KILL flags );
11183   ins_cost(250);
11184   format %{ "MOV    $dst.lo,$src.lo\n\t"
11185             "XOR    $dst.hi,$dst.hi\n\t" %}
11186   opcode(0x33); // XOR
11187   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11188   ins_pipe( ialu_reg_reg_long );
11189 %}
11190 
11191 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11192   predicate (UseSSE<=1);
11193   match(Set dst (ConvL2D src));
11194   effect( KILL cr );
11195   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11196             "PUSH   $src.lo\n\t"
11197             "FILD   ST,[ESP + #0]\n\t"
11198             "ADD    ESP,8\n\t"
11199             "FSTP_D $dst\t# D-round" %}
11200   opcode(0xDF, 0x5);  /* DF /5 */
11201   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11202   ins_pipe( pipe_slow );
11203 %}
11204 
11205 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11206   predicate (UseSSE>=2);
11207   match(Set dst (ConvL2D src));
11208   effect( KILL cr );
11209   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11210             "PUSH   $src.lo\n\t"
11211             "FILD_D [ESP]\n\t"
11212             "FSTP_D [ESP]\n\t"
11213             "MOVSD  $dst,[ESP]\n\t"
11214             "ADD    ESP,8" %}
11215   opcode(0xDF, 0x5);  /* DF /5 */
11216   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11217   ins_pipe( pipe_slow );
11218 %}
11219 
11220 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11221   predicate (UseSSE>=1);
11222   match(Set dst (ConvL2F src));
11223   effect( KILL cr );
11224   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11225             "PUSH   $src.lo\n\t"
11226             "FILD_D [ESP]\n\t"
11227             "FSTP_S [ESP]\n\t"
11228             "MOVSS  $dst,[ESP]\n\t"
11229             "ADD    ESP,8" %}
11230   opcode(0xDF, 0x5);  /* DF /5 */
11231   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11232   ins_pipe( pipe_slow );
11233 %}
11234 
11235 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11236   match(Set dst (ConvL2F src));
11237   effect( KILL cr );
11238   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11239             "PUSH   $src.lo\n\t"
11240             "FILD   ST,[ESP + #0]\n\t"
11241             "ADD    ESP,8\n\t"
11242             "FSTP_S $dst\t# F-round" %}
11243   opcode(0xDF, 0x5);  /* DF /5 */
11244   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11245   ins_pipe( pipe_slow );
11246 %}
11247 
11248 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11249   match(Set dst (ConvL2I src));
11250   effect( DEF dst, USE src );
11251   format %{ "MOV    $dst,$src.lo" %}
11252   ins_encode(enc_CopyL_Lo(dst,src));
11253   ins_pipe( ialu_reg_reg );
11254 %}
11255 
11256 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11257   match(Set dst (MoveF2I src));
11258   effect( DEF dst, USE src );
11259   ins_cost(100);
11260   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11261   ins_encode %{
11262     __ movl($dst$$Register, Address(rsp, $src$$disp));
11263   %}
11264   ins_pipe( ialu_reg_mem );
11265 %}
11266 
11267 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11268   predicate(UseSSE==0);
11269   match(Set dst (MoveF2I src));
11270   effect( DEF dst, USE src );
11271 
11272   ins_cost(125);
11273   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11274   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11275   ins_pipe( fpu_mem_reg );
11276 %}
11277 
11278 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11279   predicate(UseSSE>=1);
11280   match(Set dst (MoveF2I src));
11281   effect( DEF dst, USE src );
11282 
11283   ins_cost(95);
11284   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11285   ins_encode %{
11286     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11287   %}
11288   ins_pipe( pipe_slow );
11289 %}
11290 
11291 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11292   predicate(UseSSE>=2);
11293   match(Set dst (MoveF2I src));
11294   effect( DEF dst, USE src );
11295   ins_cost(85);
11296   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11297   ins_encode %{
11298     __ movdl($dst$$Register, $src$$XMMRegister);
11299   %}
11300   ins_pipe( pipe_slow );
11301 %}
11302 
11303 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11304   match(Set dst (MoveI2F src));
11305   effect( DEF dst, USE src );
11306 
11307   ins_cost(100);
11308   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11309   ins_encode %{
11310     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11311   %}
11312   ins_pipe( ialu_mem_reg );
11313 %}
11314 
11315 
11316 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11317   predicate(UseSSE==0);
11318   match(Set dst (MoveI2F src));
11319   effect(DEF dst, USE src);
11320 
11321   ins_cost(125);
11322   format %{ "FLD_S  $src\n\t"
11323             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11324   opcode(0xD9);               /* D9 /0, FLD m32real */
11325   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11326               Pop_Reg_FPR(dst) );
11327   ins_pipe( fpu_reg_mem );
11328 %}
11329 
11330 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11331   predicate(UseSSE>=1);
11332   match(Set dst (MoveI2F src));
11333   effect( DEF dst, USE src );
11334 
11335   ins_cost(95);
11336   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11337   ins_encode %{
11338     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11339   %}
11340   ins_pipe( pipe_slow );
11341 %}
11342 
11343 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11344   predicate(UseSSE>=2);
11345   match(Set dst (MoveI2F src));
11346   effect( DEF dst, USE src );
11347 
11348   ins_cost(85);
11349   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11350   ins_encode %{
11351     __ movdl($dst$$XMMRegister, $src$$Register);
11352   %}
11353   ins_pipe( pipe_slow );
11354 %}
11355 
11356 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11357   match(Set dst (MoveD2L src));
11358   effect(DEF dst, USE src);
11359 
11360   ins_cost(250);
11361   format %{ "MOV    $dst.lo,$src\n\t"
11362             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11363   opcode(0x8B, 0x8B);
11364   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11365   ins_pipe( ialu_mem_long_reg );
11366 %}
11367 
11368 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11369   predicate(UseSSE<=1);
11370   match(Set dst (MoveD2L src));
11371   effect(DEF dst, USE src);
11372 
11373   ins_cost(125);
11374   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11375   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11376   ins_pipe( fpu_mem_reg );
11377 %}
11378 
11379 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11380   predicate(UseSSE>=2);
11381   match(Set dst (MoveD2L src));
11382   effect(DEF dst, USE src);
11383   ins_cost(95);
11384   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11385   ins_encode %{
11386     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11387   %}
11388   ins_pipe( pipe_slow );
11389 %}
11390 
11391 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11392   predicate(UseSSE>=2);
11393   match(Set dst (MoveD2L src));
11394   effect(DEF dst, USE src, TEMP tmp);
11395   ins_cost(85);
11396   format %{ "MOVD   $dst.lo,$src\n\t"
11397             "PSHUFLW $tmp,$src,0x4E\n\t"
11398             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11399   ins_encode %{
11400     __ movdl($dst$$Register, $src$$XMMRegister);
11401     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11402     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11403   %}
11404   ins_pipe( pipe_slow );
11405 %}
11406 
11407 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11408   match(Set dst (MoveL2D src));
11409   effect(DEF dst, USE src);
11410 
11411   ins_cost(200);
11412   format %{ "MOV    $dst,$src.lo\n\t"
11413             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11414   opcode(0x89, 0x89);
11415   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11416   ins_pipe( ialu_mem_long_reg );
11417 %}
11418 
11419 
11420 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11421   predicate(UseSSE<=1);
11422   match(Set dst (MoveL2D src));
11423   effect(DEF dst, USE src);
11424   ins_cost(125);
11425 
11426   format %{ "FLD_D  $src\n\t"
11427             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11428   opcode(0xDD);               /* DD /0, FLD m64real */
11429   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11430               Pop_Reg_DPR(dst) );
11431   ins_pipe( fpu_reg_mem );
11432 %}
11433 
11434 
11435 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11436   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11437   match(Set dst (MoveL2D src));
11438   effect(DEF dst, USE src);
11439 
11440   ins_cost(95);
11441   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11442   ins_encode %{
11443     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11444   %}
11445   ins_pipe( pipe_slow );
11446 %}
11447 
11448 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11449   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11450   match(Set dst (MoveL2D src));
11451   effect(DEF dst, USE src);
11452 
11453   ins_cost(95);
11454   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11455   ins_encode %{
11456     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11457   %}
11458   ins_pipe( pipe_slow );
11459 %}
11460 
11461 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11462   predicate(UseSSE>=2);
11463   match(Set dst (MoveL2D src));
11464   effect(TEMP dst, USE src, TEMP tmp);
11465   ins_cost(85);
11466   format %{ "MOVD   $dst,$src.lo\n\t"
11467             "MOVD   $tmp,$src.hi\n\t"
11468             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11469   ins_encode %{
11470     __ movdl($dst$$XMMRegister, $src$$Register);
11471     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11472     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11473   %}
11474   ins_pipe( pipe_slow );
11475 %}
11476 
11477 //----------------------------- CompressBits/ExpandBits ------------------------
11478 
11479 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11480   predicate(n->bottom_type()->isa_long());
11481   match(Set dst (CompressBits src mask));
11482   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11483   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11484   ins_encode %{
11485     Label exit, partail_result;
11486     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11487     // Merge the results of upper and lower destination registers such that upper destination
11488     // results are contiguously laid out after the lower destination result.
11489     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11490     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11491     __ popcntl($rtmp$$Register, $mask$$Register);
11492     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11493     __ cmpl($rtmp$$Register, 32);
11494     __ jccb(Assembler::equal, exit);
11495     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11496     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11497     // Shift left the contents of upper destination register by true bit count of lower mask register
11498     // and merge with lower destination register.
11499     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11500     __ orl($dst$$Register, $rtmp$$Register);
11501     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11502     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11503     // since contents of upper destination have already been copied to lower destination
11504     // register.
11505     __ cmpl($rtmp$$Register, 0);
11506     __ jccb(Assembler::greater, partail_result);
11507     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11508     __ jmp(exit);
11509     __ bind(partail_result);
11510     // Perform right shift over upper destination register to move out bits already copied
11511     // to lower destination register.
11512     __ subl($rtmp$$Register, 32);
11513     __ negl($rtmp$$Register);
11514     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11515     __ bind(exit);
11516   %}
11517   ins_pipe( pipe_slow );
11518 %}
11519 
11520 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11521   predicate(n->bottom_type()->isa_long());
11522   match(Set dst (ExpandBits src mask));
11523   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11524   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11525   ins_encode %{
11526     // Extraction operation sequentially reads the bits from source register starting from LSB
11527     // and lays them out into destination register at bit locations corresponding to true bits
11528     // in mask register. Thus number of source bits read are equal to combined true bit count
11529     // of mask register pair.
11530     Label exit, mask_clipping;
11531     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11532     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11533     __ popcntl($rtmp$$Register, $mask$$Register);
11534     // If true bit count of lower mask register is 32 then none of bit of lower source register
11535     // will feed to upper destination register.
11536     __ cmpl($rtmp$$Register, 32);
11537     __ jccb(Assembler::equal, exit);
11538     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11539     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11540     // Shift right the contents of lower source register to remove already consumed bits.
11541     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11542     // Extract the bits from lower source register starting from LSB under the influence
11543     // of upper mask register.
11544     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11545     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11546     __ subl($rtmp$$Register, 32);
11547     __ negl($rtmp$$Register);
11548     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11549     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11550     // Clear the set bits in upper mask register which have been used to extract the contents
11551     // from lower source register.
11552     __ bind(mask_clipping);
11553     __ blsrl($mask$$Register, $mask$$Register);
11554     __ decrementl($rtmp$$Register, 1);
11555     __ jccb(Assembler::greater, mask_clipping);
11556     // Starting from LSB extract the bits from upper source register under the influence of
11557     // remaining set bits in upper mask register.
11558     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11559     // Merge the partial results extracted from lower and upper source register bits.
11560     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11561     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11562     __ bind(exit);
11563   %}
11564   ins_pipe( pipe_slow );
11565 %}
11566 
11567 // =======================================================================
11568 // fast clearing of an array
11569 // Small ClearArray non-AVX512.
11570 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11571   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11572   match(Set dummy (ClearArray cnt base));
11573   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11574 
11575   format %{ $$template
11576     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11577     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11578     $$emit$$"JG     LARGE\n\t"
11579     $$emit$$"SHL    ECX, 1\n\t"
11580     $$emit$$"DEC    ECX\n\t"
11581     $$emit$$"JS     DONE\t# Zero length\n\t"
11582     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11583     $$emit$$"DEC    ECX\n\t"
11584     $$emit$$"JGE    LOOP\n\t"
11585     $$emit$$"JMP    DONE\n\t"
11586     $$emit$$"# LARGE:\n\t"
11587     if (UseFastStosb) {
11588        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11589        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11590     } else if (UseXMMForObjInit) {
11591        $$emit$$"MOV     RDI,RAX\n\t"
11592        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11593        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11594        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11595        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11596        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11597        $$emit$$"ADD     0x40,RAX\n\t"
11598        $$emit$$"# L_zero_64_bytes:\n\t"
11599        $$emit$$"SUB     0x8,RCX\n\t"
11600        $$emit$$"JGE     L_loop\n\t"
11601        $$emit$$"ADD     0x4,RCX\n\t"
11602        $$emit$$"JL      L_tail\n\t"
11603        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11604        $$emit$$"ADD     0x20,RAX\n\t"
11605        $$emit$$"SUB     0x4,RCX\n\t"
11606        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11607        $$emit$$"ADD     0x4,RCX\n\t"
11608        $$emit$$"JLE     L_end\n\t"
11609        $$emit$$"DEC     RCX\n\t"
11610        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11611        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11612        $$emit$$"ADD     0x8,RAX\n\t"
11613        $$emit$$"DEC     RCX\n\t"
11614        $$emit$$"JGE     L_sloop\n\t"
11615        $$emit$$"# L_end:\n\t"
11616     } else {
11617        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11618        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11619     }
11620     $$emit$$"# DONE"
11621   %}
11622   ins_encode %{
11623     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11624                  $tmp$$XMMRegister, false, knoreg);
11625   %}
11626   ins_pipe( pipe_slow );
11627 %}
11628 
11629 // Small ClearArray AVX512 non-constant length.
11630 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11631   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11632   match(Set dummy (ClearArray cnt base));
11633   ins_cost(125);
11634   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11635 
11636   format %{ $$template
11637     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11638     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11639     $$emit$$"JG     LARGE\n\t"
11640     $$emit$$"SHL    ECX, 1\n\t"
11641     $$emit$$"DEC    ECX\n\t"
11642     $$emit$$"JS     DONE\t# Zero length\n\t"
11643     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11644     $$emit$$"DEC    ECX\n\t"
11645     $$emit$$"JGE    LOOP\n\t"
11646     $$emit$$"JMP    DONE\n\t"
11647     $$emit$$"# LARGE:\n\t"
11648     if (UseFastStosb) {
11649        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11650        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11651     } else if (UseXMMForObjInit) {
11652        $$emit$$"MOV     RDI,RAX\n\t"
11653        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11654        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11655        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11656        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11657        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11658        $$emit$$"ADD     0x40,RAX\n\t"
11659        $$emit$$"# L_zero_64_bytes:\n\t"
11660        $$emit$$"SUB     0x8,RCX\n\t"
11661        $$emit$$"JGE     L_loop\n\t"
11662        $$emit$$"ADD     0x4,RCX\n\t"
11663        $$emit$$"JL      L_tail\n\t"
11664        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11665        $$emit$$"ADD     0x20,RAX\n\t"
11666        $$emit$$"SUB     0x4,RCX\n\t"
11667        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11668        $$emit$$"ADD     0x4,RCX\n\t"
11669        $$emit$$"JLE     L_end\n\t"
11670        $$emit$$"DEC     RCX\n\t"
11671        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11672        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11673        $$emit$$"ADD     0x8,RAX\n\t"
11674        $$emit$$"DEC     RCX\n\t"
11675        $$emit$$"JGE     L_sloop\n\t"
11676        $$emit$$"# L_end:\n\t"
11677     } else {
11678        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11679        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11680     }
11681     $$emit$$"# DONE"
11682   %}
11683   ins_encode %{
11684     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11685                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11686   %}
11687   ins_pipe( pipe_slow );
11688 %}
11689 
11690 // Large ClearArray non-AVX512.
11691 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11692   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11693   match(Set dummy (ClearArray cnt base));
11694   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11695   format %{ $$template
11696     if (UseFastStosb) {
11697        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11698        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11699        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11700     } else if (UseXMMForObjInit) {
11701        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11702        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11703        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11704        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11705        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11706        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11707        $$emit$$"ADD     0x40,RAX\n\t"
11708        $$emit$$"# L_zero_64_bytes:\n\t"
11709        $$emit$$"SUB     0x8,RCX\n\t"
11710        $$emit$$"JGE     L_loop\n\t"
11711        $$emit$$"ADD     0x4,RCX\n\t"
11712        $$emit$$"JL      L_tail\n\t"
11713        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11714        $$emit$$"ADD     0x20,RAX\n\t"
11715        $$emit$$"SUB     0x4,RCX\n\t"
11716        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11717        $$emit$$"ADD     0x4,RCX\n\t"
11718        $$emit$$"JLE     L_end\n\t"
11719        $$emit$$"DEC     RCX\n\t"
11720        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11721        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11722        $$emit$$"ADD     0x8,RAX\n\t"
11723        $$emit$$"DEC     RCX\n\t"
11724        $$emit$$"JGE     L_sloop\n\t"
11725        $$emit$$"# L_end:\n\t"
11726     } else {
11727        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11728        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11729        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11730     }
11731     $$emit$$"# DONE"
11732   %}
11733   ins_encode %{
11734     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11735                  $tmp$$XMMRegister, true, knoreg);
11736   %}
11737   ins_pipe( pipe_slow );
11738 %}
11739 
11740 // Large ClearArray AVX512.
11741 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11742   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11743   match(Set dummy (ClearArray cnt base));
11744   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11745   format %{ $$template
11746     if (UseFastStosb) {
11747        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11748        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11749        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11750     } else if (UseXMMForObjInit) {
11751        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11752        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11753        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11754        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11755        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11756        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11757        $$emit$$"ADD     0x40,RAX\n\t"
11758        $$emit$$"# L_zero_64_bytes:\n\t"
11759        $$emit$$"SUB     0x8,RCX\n\t"
11760        $$emit$$"JGE     L_loop\n\t"
11761        $$emit$$"ADD     0x4,RCX\n\t"
11762        $$emit$$"JL      L_tail\n\t"
11763        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11764        $$emit$$"ADD     0x20,RAX\n\t"
11765        $$emit$$"SUB     0x4,RCX\n\t"
11766        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11767        $$emit$$"ADD     0x4,RCX\n\t"
11768        $$emit$$"JLE     L_end\n\t"
11769        $$emit$$"DEC     RCX\n\t"
11770        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11771        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11772        $$emit$$"ADD     0x8,RAX\n\t"
11773        $$emit$$"DEC     RCX\n\t"
11774        $$emit$$"JGE     L_sloop\n\t"
11775        $$emit$$"# L_end:\n\t"
11776     } else {
11777        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11778        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11779        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11780     }
11781     $$emit$$"# DONE"
11782   %}
11783   ins_encode %{
11784     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11785                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11786   %}
11787   ins_pipe( pipe_slow );
11788 %}
11789 
11790 // Small ClearArray AVX512 constant length.
11791 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11792 %{
11793   predicate(!((ClearArrayNode*)n)->is_large() &&
11794                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11795   match(Set dummy (ClearArray cnt base));
11796   ins_cost(100);
11797   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11798   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11799   ins_encode %{
11800    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11801   %}
11802   ins_pipe(pipe_slow);
11803 %}
11804 
11805 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11806                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11807   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11808   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11809   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11810 
11811   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11812   ins_encode %{
11813     __ string_compare($str1$$Register, $str2$$Register,
11814                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11815                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11816   %}
11817   ins_pipe( pipe_slow );
11818 %}
11819 
11820 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11821                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11822   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11823   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11824   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11825 
11826   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11827   ins_encode %{
11828     __ string_compare($str1$$Register, $str2$$Register,
11829                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11830                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11831   %}
11832   ins_pipe( pipe_slow );
11833 %}
11834 
11835 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11836                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11837   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11838   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11839   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11840 
11841   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11842   ins_encode %{
11843     __ string_compare($str1$$Register, $str2$$Register,
11844                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11845                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11846   %}
11847   ins_pipe( pipe_slow );
11848 %}
11849 
11850 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11851                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11852   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11853   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11854   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11855 
11856   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11857   ins_encode %{
11858     __ string_compare($str1$$Register, $str2$$Register,
11859                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11860                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11861   %}
11862   ins_pipe( pipe_slow );
11863 %}
11864 
11865 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11866                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11867   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11868   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11869   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11870 
11871   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11872   ins_encode %{
11873     __ string_compare($str1$$Register, $str2$$Register,
11874                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11875                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11876   %}
11877   ins_pipe( pipe_slow );
11878 %}
11879 
11880 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11881                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11882   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11883   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11884   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11885 
11886   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11887   ins_encode %{
11888     __ string_compare($str1$$Register, $str2$$Register,
11889                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11890                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11891   %}
11892   ins_pipe( pipe_slow );
11893 %}
11894 
11895 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11896                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11897   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11898   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11899   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11900 
11901   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11902   ins_encode %{
11903     __ string_compare($str2$$Register, $str1$$Register,
11904                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11905                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11906   %}
11907   ins_pipe( pipe_slow );
11908 %}
11909 
11910 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11911                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11912   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11913   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11914   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11915 
11916   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11917   ins_encode %{
11918     __ string_compare($str2$$Register, $str1$$Register,
11919                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11920                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11921   %}
11922   ins_pipe( pipe_slow );
11923 %}
11924 
11925 // fast string equals
11926 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11927                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11928   predicate(!VM_Version::supports_avx512vlbw());
11929   match(Set result (StrEquals (Binary str1 str2) cnt));
11930   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11931 
11932   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11933   ins_encode %{
11934     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11935                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11936                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11937   %}
11938 
11939   ins_pipe( pipe_slow );
11940 %}
11941 
11942 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11943                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11944   predicate(VM_Version::supports_avx512vlbw());
11945   match(Set result (StrEquals (Binary str1 str2) cnt));
11946   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11947 
11948   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11949   ins_encode %{
11950     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11951                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11952                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11953   %}
11954 
11955   ins_pipe( pipe_slow );
11956 %}
11957 
11958 
11959 // fast search of substring with known size.
11960 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11961                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11962   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11963   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11964   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11965 
11966   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11967   ins_encode %{
11968     int icnt2 = (int)$int_cnt2$$constant;
11969     if (icnt2 >= 16) {
11970       // IndexOf for constant substrings with size >= 16 elements
11971       // which don't need to be loaded through stack.
11972       __ string_indexofC8($str1$$Register, $str2$$Register,
11973                           $cnt1$$Register, $cnt2$$Register,
11974                           icnt2, $result$$Register,
11975                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11976     } else {
11977       // Small strings are loaded through stack if they cross page boundary.
11978       __ string_indexof($str1$$Register, $str2$$Register,
11979                         $cnt1$$Register, $cnt2$$Register,
11980                         icnt2, $result$$Register,
11981                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11982     }
11983   %}
11984   ins_pipe( pipe_slow );
11985 %}
11986 
11987 // fast search of substring with known size.
11988 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11989                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11990   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11991   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11992   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11993 
11994   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11995   ins_encode %{
11996     int icnt2 = (int)$int_cnt2$$constant;
11997     if (icnt2 >= 8) {
11998       // IndexOf for constant substrings with size >= 8 elements
11999       // which don't need to be loaded through stack.
12000       __ string_indexofC8($str1$$Register, $str2$$Register,
12001                           $cnt1$$Register, $cnt2$$Register,
12002                           icnt2, $result$$Register,
12003                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12004     } else {
12005       // Small strings are loaded through stack if they cross page boundary.
12006       __ string_indexof($str1$$Register, $str2$$Register,
12007                         $cnt1$$Register, $cnt2$$Register,
12008                         icnt2, $result$$Register,
12009                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12010     }
12011   %}
12012   ins_pipe( pipe_slow );
12013 %}
12014 
12015 // fast search of substring with known size.
12016 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12017                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12018   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12019   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12020   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12021 
12022   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12023   ins_encode %{
12024     int icnt2 = (int)$int_cnt2$$constant;
12025     if (icnt2 >= 8) {
12026       // IndexOf for constant substrings with size >= 8 elements
12027       // which don't need to be loaded through stack.
12028       __ string_indexofC8($str1$$Register, $str2$$Register,
12029                           $cnt1$$Register, $cnt2$$Register,
12030                           icnt2, $result$$Register,
12031                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12032     } else {
12033       // Small strings are loaded through stack if they cross page boundary.
12034       __ string_indexof($str1$$Register, $str2$$Register,
12035                         $cnt1$$Register, $cnt2$$Register,
12036                         icnt2, $result$$Register,
12037                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12038     }
12039   %}
12040   ins_pipe( pipe_slow );
12041 %}
12042 
12043 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12044                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12045   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12046   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12047   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12048 
12049   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12050   ins_encode %{
12051     __ string_indexof($str1$$Register, $str2$$Register,
12052                       $cnt1$$Register, $cnt2$$Register,
12053                       (-1), $result$$Register,
12054                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12055   %}
12056   ins_pipe( pipe_slow );
12057 %}
12058 
12059 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12060                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12061   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12062   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12063   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12064 
12065   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12066   ins_encode %{
12067     __ string_indexof($str1$$Register, $str2$$Register,
12068                       $cnt1$$Register, $cnt2$$Register,
12069                       (-1), $result$$Register,
12070                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12071   %}
12072   ins_pipe( pipe_slow );
12073 %}
12074 
12075 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12076                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12077   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12078   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12079   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12080 
12081   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12082   ins_encode %{
12083     __ string_indexof($str1$$Register, $str2$$Register,
12084                       $cnt1$$Register, $cnt2$$Register,
12085                       (-1), $result$$Register,
12086                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12087   %}
12088   ins_pipe( pipe_slow );
12089 %}
12090 
12091 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12092                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12093   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12094   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12095   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12096   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12097   ins_encode %{
12098     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12099                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12100   %}
12101   ins_pipe( pipe_slow );
12102 %}
12103 
12104 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12105                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12106   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12107   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12108   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12109   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12110   ins_encode %{
12111     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12112                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12113   %}
12114   ins_pipe( pipe_slow );
12115 %}
12116 
12117 
12118 // fast array equals
12119 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12120                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12121 %{
12122   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12123   match(Set result (AryEq ary1 ary2));
12124   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12125   //ins_cost(300);
12126 
12127   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12128   ins_encode %{
12129     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12130                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12131                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12132   %}
12133   ins_pipe( pipe_slow );
12134 %}
12135 
12136 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12137                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12138 %{
12139   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12140   match(Set result (AryEq ary1 ary2));
12141   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12142   //ins_cost(300);
12143 
12144   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12145   ins_encode %{
12146     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12147                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12148                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12149   %}
12150   ins_pipe( pipe_slow );
12151 %}
12152 
12153 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12154                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12155 %{
12156   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12157   match(Set result (AryEq ary1 ary2));
12158   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12159   //ins_cost(300);
12160 
12161   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12162   ins_encode %{
12163     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12164                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12165                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12166   %}
12167   ins_pipe( pipe_slow );
12168 %}
12169 
12170 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12171                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12172 %{
12173   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12174   match(Set result (AryEq ary1 ary2));
12175   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12176   //ins_cost(300);
12177 
12178   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12179   ins_encode %{
12180     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12181                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12182                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12183   %}
12184   ins_pipe( pipe_slow );
12185 %}
12186 
12187 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12188                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12189 %{
12190   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12191   match(Set result (CountPositives ary1 len));
12192   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12193 
12194   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12195   ins_encode %{
12196     __ count_positives($ary1$$Register, $len$$Register,
12197                        $result$$Register, $tmp3$$Register,
12198                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12199   %}
12200   ins_pipe( pipe_slow );
12201 %}
12202 
12203 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12204                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12205 %{
12206   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12207   match(Set result (CountPositives ary1 len));
12208   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12209 
12210   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12211   ins_encode %{
12212     __ count_positives($ary1$$Register, $len$$Register,
12213                        $result$$Register, $tmp3$$Register,
12214                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12215   %}
12216   ins_pipe( pipe_slow );
12217 %}
12218 
12219 
12220 // fast char[] to byte[] compression
12221 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12222                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12223   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12224   match(Set result (StrCompressedCopy src (Binary dst len)));
12225   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12226 
12227   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12228   ins_encode %{
12229     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12230                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12231                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12232                            knoreg, knoreg);
12233   %}
12234   ins_pipe( pipe_slow );
12235 %}
12236 
12237 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12238                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12239   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12240   match(Set result (StrCompressedCopy src (Binary dst len)));
12241   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12242 
12243   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12244   ins_encode %{
12245     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12246                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12247                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12248                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12249   %}
12250   ins_pipe( pipe_slow );
12251 %}
12252 
12253 // fast byte[] to char[] inflation
12254 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12255                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12256   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12257   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12258   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12259 
12260   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12261   ins_encode %{
12262     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12263                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12264   %}
12265   ins_pipe( pipe_slow );
12266 %}
12267 
12268 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12269                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12270   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12271   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12272   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12273 
12274   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12275   ins_encode %{
12276     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12277                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12278   %}
12279   ins_pipe( pipe_slow );
12280 %}
12281 
12282 // encode char[] to byte[] in ISO_8859_1
12283 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12284                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12285                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12286   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12287   match(Set result (EncodeISOArray src (Binary dst len)));
12288   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12289 
12290   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12291   ins_encode %{
12292     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12293                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12294                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12295   %}
12296   ins_pipe( pipe_slow );
12297 %}
12298 
12299 // encode char[] to byte[] in ASCII
12300 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12301                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12302                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12303   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12304   match(Set result (EncodeISOArray src (Binary dst len)));
12305   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12306 
12307   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12308   ins_encode %{
12309     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12310                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12311                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12312   %}
12313   ins_pipe( pipe_slow );
12314 %}
12315 
12316 //----------Control Flow Instructions------------------------------------------
12317 // Signed compare Instructions
12318 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12319   match(Set cr (CmpI op1 op2));
12320   effect( DEF cr, USE op1, USE op2 );
12321   format %{ "CMP    $op1,$op2" %}
12322   opcode(0x3B);  /* Opcode 3B /r */
12323   ins_encode( OpcP, RegReg( op1, op2) );
12324   ins_pipe( ialu_cr_reg_reg );
12325 %}
12326 
12327 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12328   match(Set cr (CmpI op1 op2));
12329   effect( DEF cr, USE op1 );
12330   format %{ "CMP    $op1,$op2" %}
12331   opcode(0x81,0x07);  /* Opcode 81 /7 */
12332   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12333   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12334   ins_pipe( ialu_cr_reg_imm );
12335 %}
12336 
12337 // Cisc-spilled version of cmpI_eReg
12338 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12339   match(Set cr (CmpI op1 (LoadI op2)));
12340 
12341   format %{ "CMP    $op1,$op2" %}
12342   ins_cost(500);
12343   opcode(0x3B);  /* Opcode 3B /r */
12344   ins_encode( OpcP, RegMem( op1, op2) );
12345   ins_pipe( ialu_cr_reg_mem );
12346 %}
12347 
12348 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12349   match(Set cr (CmpI src zero));
12350   effect( DEF cr, USE src );
12351 
12352   format %{ "TEST   $src,$src" %}
12353   opcode(0x85);
12354   ins_encode( OpcP, RegReg( src, src ) );
12355   ins_pipe( ialu_cr_reg_imm );
12356 %}
12357 
12358 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12359   match(Set cr (CmpI (AndI src con) zero));
12360 
12361   format %{ "TEST   $src,$con" %}
12362   opcode(0xF7,0x00);
12363   ins_encode( OpcP, RegOpc(src), Con32(con) );
12364   ins_pipe( ialu_cr_reg_imm );
12365 %}
12366 
12367 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12368   match(Set cr (CmpI (AndI src mem) zero));
12369 
12370   format %{ "TEST   $src,$mem" %}
12371   opcode(0x85);
12372   ins_encode( OpcP, RegMem( src, mem ) );
12373   ins_pipe( ialu_cr_reg_mem );
12374 %}
12375 
12376 // Unsigned compare Instructions; really, same as signed except they
12377 // produce an eFlagsRegU instead of eFlagsReg.
12378 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12379   match(Set cr (CmpU op1 op2));
12380 
12381   format %{ "CMPu   $op1,$op2" %}
12382   opcode(0x3B);  /* Opcode 3B /r */
12383   ins_encode( OpcP, RegReg( op1, op2) );
12384   ins_pipe( ialu_cr_reg_reg );
12385 %}
12386 
12387 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12388   match(Set cr (CmpU op1 op2));
12389 
12390   format %{ "CMPu   $op1,$op2" %}
12391   opcode(0x81,0x07);  /* Opcode 81 /7 */
12392   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12393   ins_pipe( ialu_cr_reg_imm );
12394 %}
12395 
12396 // // Cisc-spilled version of cmpU_eReg
12397 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12398   match(Set cr (CmpU op1 (LoadI op2)));
12399 
12400   format %{ "CMPu   $op1,$op2" %}
12401   ins_cost(500);
12402   opcode(0x3B);  /* Opcode 3B /r */
12403   ins_encode( OpcP, RegMem( op1, op2) );
12404   ins_pipe( ialu_cr_reg_mem );
12405 %}
12406 
12407 // // Cisc-spilled version of cmpU_eReg
12408 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12409 //  match(Set cr (CmpU (LoadI op1) op2));
12410 //
12411 //  format %{ "CMPu   $op1,$op2" %}
12412 //  ins_cost(500);
12413 //  opcode(0x39);  /* Opcode 39 /r */
12414 //  ins_encode( OpcP, RegMem( op1, op2) );
12415 //%}
12416 
12417 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12418   match(Set cr (CmpU src zero));
12419 
12420   format %{ "TESTu  $src,$src" %}
12421   opcode(0x85);
12422   ins_encode( OpcP, RegReg( src, src ) );
12423   ins_pipe( ialu_cr_reg_imm );
12424 %}
12425 
12426 // Unsigned pointer compare Instructions
12427 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12428   match(Set cr (CmpP op1 op2));
12429 
12430   format %{ "CMPu   $op1,$op2" %}
12431   opcode(0x3B);  /* Opcode 3B /r */
12432   ins_encode( OpcP, RegReg( op1, op2) );
12433   ins_pipe( ialu_cr_reg_reg );
12434 %}
12435 
12436 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12437   match(Set cr (CmpP op1 op2));
12438 
12439   format %{ "CMPu   $op1,$op2" %}
12440   opcode(0x81,0x07);  /* Opcode 81 /7 */
12441   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12442   ins_pipe( ialu_cr_reg_imm );
12443 %}
12444 
12445 // // Cisc-spilled version of cmpP_eReg
12446 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12447   match(Set cr (CmpP op1 (LoadP op2)));
12448 
12449   format %{ "CMPu   $op1,$op2" %}
12450   ins_cost(500);
12451   opcode(0x3B);  /* Opcode 3B /r */
12452   ins_encode( OpcP, RegMem( op1, op2) );
12453   ins_pipe( ialu_cr_reg_mem );
12454 %}
12455 
12456 // // Cisc-spilled version of cmpP_eReg
12457 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12458 //  match(Set cr (CmpP (LoadP op1) op2));
12459 //
12460 //  format %{ "CMPu   $op1,$op2" %}
12461 //  ins_cost(500);
12462 //  opcode(0x39);  /* Opcode 39 /r */
12463 //  ins_encode( OpcP, RegMem( op1, op2) );
12464 //%}
12465 
12466 // Compare raw pointer (used in out-of-heap check).
12467 // Only works because non-oop pointers must be raw pointers
12468 // and raw pointers have no anti-dependencies.
12469 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12470   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12471   match(Set cr (CmpP op1 (LoadP op2)));
12472 
12473   format %{ "CMPu   $op1,$op2" %}
12474   opcode(0x3B);  /* Opcode 3B /r */
12475   ins_encode( OpcP, RegMem( op1, op2) );
12476   ins_pipe( ialu_cr_reg_mem );
12477 %}
12478 
12479 //
12480 // This will generate a signed flags result. This should be ok
12481 // since any compare to a zero should be eq/neq.
12482 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12483   match(Set cr (CmpP src zero));
12484 
12485   format %{ "TEST   $src,$src" %}
12486   opcode(0x85);
12487   ins_encode( OpcP, RegReg( src, src ) );
12488   ins_pipe( ialu_cr_reg_imm );
12489 %}
12490 
12491 // Cisc-spilled version of testP_reg
12492 // This will generate a signed flags result. This should be ok
12493 // since any compare to a zero should be eq/neq.
12494 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12495   match(Set cr (CmpP (LoadP op) zero));
12496 
12497   format %{ "TEST   $op,0xFFFFFFFF" %}
12498   ins_cost(500);
12499   opcode(0xF7);               /* Opcode F7 /0 */
12500   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12501   ins_pipe( ialu_cr_reg_imm );
12502 %}
12503 
12504 // Yanked all unsigned pointer compare operations.
12505 // Pointer compares are done with CmpP which is already unsigned.
12506 
12507 //----------Max and Min--------------------------------------------------------
12508 // Min Instructions
12509 ////
12510 //   *** Min and Max using the conditional move are slower than the
12511 //   *** branch version on a Pentium III.
12512 // // Conditional move for min
12513 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12514 //  effect( USE_DEF op2, USE op1, USE cr );
12515 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12516 //  opcode(0x4C,0x0F);
12517 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12518 //  ins_pipe( pipe_cmov_reg );
12519 //%}
12520 //
12521 //// Min Register with Register (P6 version)
12522 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12523 //  predicate(VM_Version::supports_cmov() );
12524 //  match(Set op2 (MinI op1 op2));
12525 //  ins_cost(200);
12526 //  expand %{
12527 //    eFlagsReg cr;
12528 //    compI_eReg(cr,op1,op2);
12529 //    cmovI_reg_lt(op2,op1,cr);
12530 //  %}
12531 //%}
12532 
12533 // Min Register with Register (generic version)
12534 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12535   match(Set dst (MinI dst src));
12536   effect(KILL flags);
12537   ins_cost(300);
12538 
12539   format %{ "MIN    $dst,$src" %}
12540   opcode(0xCC);
12541   ins_encode( min_enc(dst,src) );
12542   ins_pipe( pipe_slow );
12543 %}
12544 
12545 // Max Register with Register
12546 //   *** Min and Max using the conditional move are slower than the
12547 //   *** branch version on a Pentium III.
12548 // // Conditional move for max
12549 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12550 //  effect( USE_DEF op2, USE op1, USE cr );
12551 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12552 //  opcode(0x4F,0x0F);
12553 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12554 //  ins_pipe( pipe_cmov_reg );
12555 //%}
12556 //
12557 // // Max Register with Register (P6 version)
12558 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12559 //  predicate(VM_Version::supports_cmov() );
12560 //  match(Set op2 (MaxI op1 op2));
12561 //  ins_cost(200);
12562 //  expand %{
12563 //    eFlagsReg cr;
12564 //    compI_eReg(cr,op1,op2);
12565 //    cmovI_reg_gt(op2,op1,cr);
12566 //  %}
12567 //%}
12568 
12569 // Max Register with Register (generic version)
12570 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12571   match(Set dst (MaxI dst src));
12572   effect(KILL flags);
12573   ins_cost(300);
12574 
12575   format %{ "MAX    $dst,$src" %}
12576   opcode(0xCC);
12577   ins_encode( max_enc(dst,src) );
12578   ins_pipe( pipe_slow );
12579 %}
12580 
12581 // ============================================================================
12582 // Counted Loop limit node which represents exact final iterator value.
12583 // Note: the resulting value should fit into integer range since
12584 // counted loops have limit check on overflow.
12585 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12586   match(Set limit (LoopLimit (Binary init limit) stride));
12587   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12588   ins_cost(300);
12589 
12590   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12591   ins_encode %{
12592     int strd = (int)$stride$$constant;
12593     assert(strd != 1 && strd != -1, "sanity");
12594     int m1 = (strd > 0) ? 1 : -1;
12595     // Convert limit to long (EAX:EDX)
12596     __ cdql();
12597     // Convert init to long (init:tmp)
12598     __ movl($tmp$$Register, $init$$Register);
12599     __ sarl($tmp$$Register, 31);
12600     // $limit - $init
12601     __ subl($limit$$Register, $init$$Register);
12602     __ sbbl($limit_hi$$Register, $tmp$$Register);
12603     // + ($stride - 1)
12604     if (strd > 0) {
12605       __ addl($limit$$Register, (strd - 1));
12606       __ adcl($limit_hi$$Register, 0);
12607       __ movl($tmp$$Register, strd);
12608     } else {
12609       __ addl($limit$$Register, (strd + 1));
12610       __ adcl($limit_hi$$Register, -1);
12611       __ lneg($limit_hi$$Register, $limit$$Register);
12612       __ movl($tmp$$Register, -strd);
12613     }
12614     // signed division: (EAX:EDX) / pos_stride
12615     __ idivl($tmp$$Register);
12616     if (strd < 0) {
12617       // restore sign
12618       __ negl($tmp$$Register);
12619     }
12620     // (EAX) * stride
12621     __ mull($tmp$$Register);
12622     // + init (ignore upper bits)
12623     __ addl($limit$$Register, $init$$Register);
12624   %}
12625   ins_pipe( pipe_slow );
12626 %}
12627 
12628 // ============================================================================
12629 // Branch Instructions
12630 // Jump Table
12631 instruct jumpXtnd(rRegI switch_val) %{
12632   match(Jump switch_val);
12633   ins_cost(350);
12634   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12635   ins_encode %{
12636     // Jump to Address(table_base + switch_reg)
12637     Address index(noreg, $switch_val$$Register, Address::times_1);
12638     __ jump(ArrayAddress($constantaddress, index), noreg);
12639   %}
12640   ins_pipe(pipe_jmp);
12641 %}
12642 
12643 // Jump Direct - Label defines a relative address from JMP+1
12644 instruct jmpDir(label labl) %{
12645   match(Goto);
12646   effect(USE labl);
12647 
12648   ins_cost(300);
12649   format %{ "JMP    $labl" %}
12650   size(5);
12651   ins_encode %{
12652     Label* L = $labl$$label;
12653     __ jmp(*L, false); // Always long jump
12654   %}
12655   ins_pipe( pipe_jmp );
12656 %}
12657 
12658 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12659 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12660   match(If cop cr);
12661   effect(USE labl);
12662 
12663   ins_cost(300);
12664   format %{ "J$cop    $labl" %}
12665   size(6);
12666   ins_encode %{
12667     Label* L = $labl$$label;
12668     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12669   %}
12670   ins_pipe( pipe_jcc );
12671 %}
12672 
12673 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12674 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12675   match(CountedLoopEnd cop cr);
12676   effect(USE labl);
12677 
12678   ins_cost(300);
12679   format %{ "J$cop    $labl\t# Loop end" %}
12680   size(6);
12681   ins_encode %{
12682     Label* L = $labl$$label;
12683     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12684   %}
12685   ins_pipe( pipe_jcc );
12686 %}
12687 
12688 // Jump Direct Conditional - using unsigned comparison
12689 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12690   match(If cop cmp);
12691   effect(USE labl);
12692 
12693   ins_cost(300);
12694   format %{ "J$cop,u  $labl" %}
12695   size(6);
12696   ins_encode %{
12697     Label* L = $labl$$label;
12698     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12699   %}
12700   ins_pipe(pipe_jcc);
12701 %}
12702 
12703 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12704   match(If cop cmp);
12705   effect(USE labl);
12706 
12707   ins_cost(200);
12708   format %{ "J$cop,u  $labl" %}
12709   size(6);
12710   ins_encode %{
12711     Label* L = $labl$$label;
12712     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12713   %}
12714   ins_pipe(pipe_jcc);
12715 %}
12716 
12717 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12718   match(If cop cmp);
12719   effect(USE labl);
12720 
12721   ins_cost(200);
12722   format %{ $$template
12723     if ($cop$$cmpcode == Assembler::notEqual) {
12724       $$emit$$"JP,u   $labl\n\t"
12725       $$emit$$"J$cop,u   $labl"
12726     } else {
12727       $$emit$$"JP,u   done\n\t"
12728       $$emit$$"J$cop,u   $labl\n\t"
12729       $$emit$$"done:"
12730     }
12731   %}
12732   ins_encode %{
12733     Label* l = $labl$$label;
12734     if ($cop$$cmpcode == Assembler::notEqual) {
12735       __ jcc(Assembler::parity, *l, false);
12736       __ jcc(Assembler::notEqual, *l, false);
12737     } else if ($cop$$cmpcode == Assembler::equal) {
12738       Label done;
12739       __ jccb(Assembler::parity, done);
12740       __ jcc(Assembler::equal, *l, false);
12741       __ bind(done);
12742     } else {
12743        ShouldNotReachHere();
12744     }
12745   %}
12746   ins_pipe(pipe_jcc);
12747 %}
12748 
12749 // ============================================================================
12750 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12751 // array for an instance of the superklass.  Set a hidden internal cache on a
12752 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12753 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12754 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12755   match(Set result (PartialSubtypeCheck sub super));
12756   effect( KILL rcx, KILL cr );
12757 
12758   ins_cost(1100);  // slightly larger than the next version
12759   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12760             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12761             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12762             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12763             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12764             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12765             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12766      "miss:\t" %}
12767 
12768   opcode(0x1); // Force a XOR of EDI
12769   ins_encode( enc_PartialSubtypeCheck() );
12770   ins_pipe( pipe_slow );
12771 %}
12772 
12773 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12774   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12775   effect( KILL rcx, KILL result );
12776 
12777   ins_cost(1000);
12778   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12779             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12780             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12781             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12782             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12783             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12784      "miss:\t" %}
12785 
12786   opcode(0x0);  // No need to XOR EDI
12787   ins_encode( enc_PartialSubtypeCheck() );
12788   ins_pipe( pipe_slow );
12789 %}
12790 
12791 // ============================================================================
12792 // Branch Instructions -- short offset versions
12793 //
12794 // These instructions are used to replace jumps of a long offset (the default
12795 // match) with jumps of a shorter offset.  These instructions are all tagged
12796 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12797 // match rules in general matching.  Instead, the ADLC generates a conversion
12798 // method in the MachNode which can be used to do in-place replacement of the
12799 // long variant with the shorter variant.  The compiler will determine if a
12800 // branch can be taken by the is_short_branch_offset() predicate in the machine
12801 // specific code section of the file.
12802 
12803 // Jump Direct - Label defines a relative address from JMP+1
12804 instruct jmpDir_short(label labl) %{
12805   match(Goto);
12806   effect(USE labl);
12807 
12808   ins_cost(300);
12809   format %{ "JMP,s  $labl" %}
12810   size(2);
12811   ins_encode %{
12812     Label* L = $labl$$label;
12813     __ jmpb(*L);
12814   %}
12815   ins_pipe( pipe_jmp );
12816   ins_short_branch(1);
12817 %}
12818 
12819 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12820 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12821   match(If cop cr);
12822   effect(USE labl);
12823 
12824   ins_cost(300);
12825   format %{ "J$cop,s  $labl" %}
12826   size(2);
12827   ins_encode %{
12828     Label* L = $labl$$label;
12829     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12830   %}
12831   ins_pipe( pipe_jcc );
12832   ins_short_branch(1);
12833 %}
12834 
12835 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12836 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12837   match(CountedLoopEnd cop cr);
12838   effect(USE labl);
12839 
12840   ins_cost(300);
12841   format %{ "J$cop,s  $labl\t# Loop end" %}
12842   size(2);
12843   ins_encode %{
12844     Label* L = $labl$$label;
12845     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12846   %}
12847   ins_pipe( pipe_jcc );
12848   ins_short_branch(1);
12849 %}
12850 
12851 // Jump Direct Conditional - using unsigned comparison
12852 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12853   match(If cop cmp);
12854   effect(USE labl);
12855 
12856   ins_cost(300);
12857   format %{ "J$cop,us $labl" %}
12858   size(2);
12859   ins_encode %{
12860     Label* L = $labl$$label;
12861     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12862   %}
12863   ins_pipe( pipe_jcc );
12864   ins_short_branch(1);
12865 %}
12866 
12867 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12868   match(If cop cmp);
12869   effect(USE labl);
12870 
12871   ins_cost(300);
12872   format %{ "J$cop,us $labl" %}
12873   size(2);
12874   ins_encode %{
12875     Label* L = $labl$$label;
12876     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12877   %}
12878   ins_pipe( pipe_jcc );
12879   ins_short_branch(1);
12880 %}
12881 
12882 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12883   match(If cop cmp);
12884   effect(USE labl);
12885 
12886   ins_cost(300);
12887   format %{ $$template
12888     if ($cop$$cmpcode == Assembler::notEqual) {
12889       $$emit$$"JP,u,s   $labl\n\t"
12890       $$emit$$"J$cop,u,s   $labl"
12891     } else {
12892       $$emit$$"JP,u,s   done\n\t"
12893       $$emit$$"J$cop,u,s  $labl\n\t"
12894       $$emit$$"done:"
12895     }
12896   %}
12897   size(4);
12898   ins_encode %{
12899     Label* l = $labl$$label;
12900     if ($cop$$cmpcode == Assembler::notEqual) {
12901       __ jccb(Assembler::parity, *l);
12902       __ jccb(Assembler::notEqual, *l);
12903     } else if ($cop$$cmpcode == Assembler::equal) {
12904       Label done;
12905       __ jccb(Assembler::parity, done);
12906       __ jccb(Assembler::equal, *l);
12907       __ bind(done);
12908     } else {
12909        ShouldNotReachHere();
12910     }
12911   %}
12912   ins_pipe(pipe_jcc);
12913   ins_short_branch(1);
12914 %}
12915 
12916 // ============================================================================
12917 // Long Compare
12918 //
12919 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12920 // is tricky.  The flavor of compare used depends on whether we are testing
12921 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12922 // The GE test is the negated LT test.  The LE test can be had by commuting
12923 // the operands (yielding a GE test) and then negating; negate again for the
12924 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12925 // NE test is negated from that.
12926 
12927 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12928 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12929 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12930 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12931 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12932 // foo match ends up with the wrong leaf.  One fix is to not match both
12933 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12934 // both forms beat the trinary form of long-compare and both are very useful
12935 // on Intel which has so few registers.
12936 
12937 // Manifest a CmpL result in an integer register.  Very painful.
12938 // This is the test to avoid.
12939 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12940   match(Set dst (CmpL3 src1 src2));
12941   effect( KILL flags );
12942   ins_cost(1000);
12943   format %{ "XOR    $dst,$dst\n\t"
12944             "CMP    $src1.hi,$src2.hi\n\t"
12945             "JLT,s  m_one\n\t"
12946             "JGT,s  p_one\n\t"
12947             "CMP    $src1.lo,$src2.lo\n\t"
12948             "JB,s   m_one\n\t"
12949             "JEQ,s  done\n"
12950     "p_one:\tINC    $dst\n\t"
12951             "JMP,s  done\n"
12952     "m_one:\tDEC    $dst\n"
12953      "done:" %}
12954   ins_encode %{
12955     Label p_one, m_one, done;
12956     __ xorptr($dst$$Register, $dst$$Register);
12957     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12958     __ jccb(Assembler::less,    m_one);
12959     __ jccb(Assembler::greater, p_one);
12960     __ cmpl($src1$$Register, $src2$$Register);
12961     __ jccb(Assembler::below,   m_one);
12962     __ jccb(Assembler::equal,   done);
12963     __ bind(p_one);
12964     __ incrementl($dst$$Register);
12965     __ jmpb(done);
12966     __ bind(m_one);
12967     __ decrementl($dst$$Register);
12968     __ bind(done);
12969   %}
12970   ins_pipe( pipe_slow );
12971 %}
12972 
12973 //======
12974 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12975 // compares.  Can be used for LE or GT compares by reversing arguments.
12976 // NOT GOOD FOR EQ/NE tests.
12977 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12978   match( Set flags (CmpL src zero ));
12979   ins_cost(100);
12980   format %{ "TEST   $src.hi,$src.hi" %}
12981   opcode(0x85);
12982   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12983   ins_pipe( ialu_cr_reg_reg );
12984 %}
12985 
12986 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12987 // compares.  Can be used for LE or GT compares by reversing arguments.
12988 // NOT GOOD FOR EQ/NE tests.
12989 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12990   match( Set flags (CmpL src1 src2 ));
12991   effect( TEMP tmp );
12992   ins_cost(300);
12993   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12994             "MOV    $tmp,$src1.hi\n\t"
12995             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12996   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12997   ins_pipe( ialu_cr_reg_reg );
12998 %}
12999 
13000 // Long compares reg < zero/req OR reg >= zero/req.
13001 // Just a wrapper for a normal branch, plus the predicate test.
13002 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13003   match(If cmp flags);
13004   effect(USE labl);
13005   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13006   expand %{
13007     jmpCon(cmp,flags,labl);    // JLT or JGE...
13008   %}
13009 %}
13010 
13011 //======
13012 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13013 // compares.  Can be used for LE or GT compares by reversing arguments.
13014 // NOT GOOD FOR EQ/NE tests.
13015 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13016   match(Set flags (CmpUL src zero));
13017   ins_cost(100);
13018   format %{ "TEST   $src.hi,$src.hi" %}
13019   opcode(0x85);
13020   ins_encode(OpcP, RegReg_Hi2(src, src));
13021   ins_pipe(ialu_cr_reg_reg);
13022 %}
13023 
13024 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13025 // compares.  Can be used for LE or GT compares by reversing arguments.
13026 // NOT GOOD FOR EQ/NE tests.
13027 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13028   match(Set flags (CmpUL src1 src2));
13029   effect(TEMP tmp);
13030   ins_cost(300);
13031   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13032             "MOV    $tmp,$src1.hi\n\t"
13033             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13034   ins_encode(long_cmp_flags2(src1, src2, tmp));
13035   ins_pipe(ialu_cr_reg_reg);
13036 %}
13037 
13038 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13039 // Just a wrapper for a normal branch, plus the predicate test.
13040 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13041   match(If cmp flags);
13042   effect(USE labl);
13043   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13044   expand %{
13045     jmpCon(cmp, flags, labl);    // JLT or JGE...
13046   %}
13047 %}
13048 
13049 // Compare 2 longs and CMOVE longs.
13050 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13051   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13052   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13053   ins_cost(400);
13054   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13055             "CMOV$cmp $dst.hi,$src.hi" %}
13056   opcode(0x0F,0x40);
13057   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13058   ins_pipe( pipe_cmov_reg_long );
13059 %}
13060 
13061 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13062   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13063   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13064   ins_cost(500);
13065   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13066             "CMOV$cmp $dst.hi,$src.hi" %}
13067   opcode(0x0F,0x40);
13068   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13069   ins_pipe( pipe_cmov_reg_long );
13070 %}
13071 
13072 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13073   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13074   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13075   ins_cost(400);
13076   expand %{
13077     cmovLL_reg_LTGE(cmp, flags, dst, src);
13078   %}
13079 %}
13080 
13081 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13082   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13083   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13084   ins_cost(500);
13085   expand %{
13086     cmovLL_mem_LTGE(cmp, flags, dst, src);
13087   %}
13088 %}
13089 
13090 // Compare 2 longs and CMOVE ints.
13091 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13092   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13093   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13094   ins_cost(200);
13095   format %{ "CMOV$cmp $dst,$src" %}
13096   opcode(0x0F,0x40);
13097   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13098   ins_pipe( pipe_cmov_reg );
13099 %}
13100 
13101 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13102   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13103   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13104   ins_cost(250);
13105   format %{ "CMOV$cmp $dst,$src" %}
13106   opcode(0x0F,0x40);
13107   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13108   ins_pipe( pipe_cmov_mem );
13109 %}
13110 
13111 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13112   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13113   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13114   ins_cost(200);
13115   expand %{
13116     cmovII_reg_LTGE(cmp, flags, dst, src);
13117   %}
13118 %}
13119 
13120 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13121   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13122   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13123   ins_cost(250);
13124   expand %{
13125     cmovII_mem_LTGE(cmp, flags, dst, src);
13126   %}
13127 %}
13128 
13129 // Compare 2 longs and CMOVE ptrs.
13130 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13131   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13132   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13133   ins_cost(200);
13134   format %{ "CMOV$cmp $dst,$src" %}
13135   opcode(0x0F,0x40);
13136   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13137   ins_pipe( pipe_cmov_reg );
13138 %}
13139 
13140 // Compare 2 unsigned longs and CMOVE ptrs.
13141 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13142   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13143   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13144   ins_cost(200);
13145   expand %{
13146     cmovPP_reg_LTGE(cmp,flags,dst,src);
13147   %}
13148 %}
13149 
13150 // Compare 2 longs and CMOVE doubles
13151 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13152   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13153   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13154   ins_cost(200);
13155   expand %{
13156     fcmovDPR_regS(cmp,flags,dst,src);
13157   %}
13158 %}
13159 
13160 // Compare 2 longs and CMOVE doubles
13161 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13162   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13163   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13164   ins_cost(200);
13165   expand %{
13166     fcmovD_regS(cmp,flags,dst,src);
13167   %}
13168 %}
13169 
13170 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13171   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13172   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13173   ins_cost(200);
13174   expand %{
13175     fcmovFPR_regS(cmp,flags,dst,src);
13176   %}
13177 %}
13178 
13179 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13180   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13181   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13182   ins_cost(200);
13183   expand %{
13184     fcmovF_regS(cmp,flags,dst,src);
13185   %}
13186 %}
13187 
13188 //======
13189 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13190 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13191   match( Set flags (CmpL src zero ));
13192   effect(TEMP tmp);
13193   ins_cost(200);
13194   format %{ "MOV    $tmp,$src.lo\n\t"
13195             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13196   ins_encode( long_cmp_flags0( src, tmp ) );
13197   ins_pipe( ialu_reg_reg_long );
13198 %}
13199 
13200 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13201 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13202   match( Set flags (CmpL src1 src2 ));
13203   ins_cost(200+300);
13204   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13205             "JNE,s  skip\n\t"
13206             "CMP    $src1.hi,$src2.hi\n\t"
13207      "skip:\t" %}
13208   ins_encode( long_cmp_flags1( src1, src2 ) );
13209   ins_pipe( ialu_cr_reg_reg );
13210 %}
13211 
13212 // Long compare reg == zero/reg OR reg != zero/reg
13213 // Just a wrapper for a normal branch, plus the predicate test.
13214 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13215   match(If cmp flags);
13216   effect(USE labl);
13217   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13218   expand %{
13219     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13220   %}
13221 %}
13222 
13223 //======
13224 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13225 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13226   match(Set flags (CmpUL src zero));
13227   effect(TEMP tmp);
13228   ins_cost(200);
13229   format %{ "MOV    $tmp,$src.lo\n\t"
13230             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13231   ins_encode(long_cmp_flags0(src, tmp));
13232   ins_pipe(ialu_reg_reg_long);
13233 %}
13234 
13235 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13236 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13237   match(Set flags (CmpUL src1 src2));
13238   ins_cost(200+300);
13239   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13240             "JNE,s  skip\n\t"
13241             "CMP    $src1.hi,$src2.hi\n\t"
13242      "skip:\t" %}
13243   ins_encode(long_cmp_flags1(src1, src2));
13244   ins_pipe(ialu_cr_reg_reg);
13245 %}
13246 
13247 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13248 // Just a wrapper for a normal branch, plus the predicate test.
13249 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13250   match(If cmp flags);
13251   effect(USE labl);
13252   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13253   expand %{
13254     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13255   %}
13256 %}
13257 
13258 // Compare 2 longs and CMOVE longs.
13259 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13260   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13261   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13262   ins_cost(400);
13263   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13264             "CMOV$cmp $dst.hi,$src.hi" %}
13265   opcode(0x0F,0x40);
13266   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13267   ins_pipe( pipe_cmov_reg_long );
13268 %}
13269 
13270 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13271   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13272   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13273   ins_cost(500);
13274   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13275             "CMOV$cmp $dst.hi,$src.hi" %}
13276   opcode(0x0F,0x40);
13277   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13278   ins_pipe( pipe_cmov_reg_long );
13279 %}
13280 
13281 // Compare 2 longs and CMOVE ints.
13282 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13283   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13284   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13285   ins_cost(200);
13286   format %{ "CMOV$cmp $dst,$src" %}
13287   opcode(0x0F,0x40);
13288   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13289   ins_pipe( pipe_cmov_reg );
13290 %}
13291 
13292 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13293   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13294   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13295   ins_cost(250);
13296   format %{ "CMOV$cmp $dst,$src" %}
13297   opcode(0x0F,0x40);
13298   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13299   ins_pipe( pipe_cmov_mem );
13300 %}
13301 
13302 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13303   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13304   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13305   ins_cost(200);
13306   expand %{
13307     cmovII_reg_EQNE(cmp, flags, dst, src);
13308   %}
13309 %}
13310 
13311 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13312   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13313   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13314   ins_cost(250);
13315   expand %{
13316     cmovII_mem_EQNE(cmp, flags, dst, src);
13317   %}
13318 %}
13319 
13320 // Compare 2 longs and CMOVE ptrs.
13321 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13322   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13323   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13324   ins_cost(200);
13325   format %{ "CMOV$cmp $dst,$src" %}
13326   opcode(0x0F,0x40);
13327   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13328   ins_pipe( pipe_cmov_reg );
13329 %}
13330 
13331 // Compare 2 unsigned longs and CMOVE ptrs.
13332 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13333   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13334   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13335   ins_cost(200);
13336   expand %{
13337     cmovPP_reg_EQNE(cmp,flags,dst,src);
13338   %}
13339 %}
13340 
13341 // Compare 2 longs and CMOVE doubles
13342 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13343   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13344   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13345   ins_cost(200);
13346   expand %{
13347     fcmovDPR_regS(cmp,flags,dst,src);
13348   %}
13349 %}
13350 
13351 // Compare 2 longs and CMOVE doubles
13352 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13353   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13354   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13355   ins_cost(200);
13356   expand %{
13357     fcmovD_regS(cmp,flags,dst,src);
13358   %}
13359 %}
13360 
13361 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13362   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13363   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13364   ins_cost(200);
13365   expand %{
13366     fcmovFPR_regS(cmp,flags,dst,src);
13367   %}
13368 %}
13369 
13370 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13371   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13372   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13373   ins_cost(200);
13374   expand %{
13375     fcmovF_regS(cmp,flags,dst,src);
13376   %}
13377 %}
13378 
13379 //======
13380 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13381 // Same as cmpL_reg_flags_LEGT except must negate src
13382 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13383   match( Set flags (CmpL src zero ));
13384   effect( TEMP tmp );
13385   ins_cost(300);
13386   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13387             "CMP    $tmp,$src.lo\n\t"
13388             "SBB    $tmp,$src.hi\n\t" %}
13389   ins_encode( long_cmp_flags3(src, tmp) );
13390   ins_pipe( ialu_reg_reg_long );
13391 %}
13392 
13393 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13394 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13395 // requires a commuted test to get the same result.
13396 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13397   match( Set flags (CmpL src1 src2 ));
13398   effect( TEMP tmp );
13399   ins_cost(300);
13400   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13401             "MOV    $tmp,$src2.hi\n\t"
13402             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13403   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13404   ins_pipe( ialu_cr_reg_reg );
13405 %}
13406 
13407 // Long compares reg < zero/req OR reg >= zero/req.
13408 // Just a wrapper for a normal branch, plus the predicate test
13409 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13410   match(If cmp flags);
13411   effect(USE labl);
13412   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13413   ins_cost(300);
13414   expand %{
13415     jmpCon(cmp,flags,labl);    // JGT or JLE...
13416   %}
13417 %}
13418 
13419 //======
13420 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13421 // Same as cmpUL_reg_flags_LEGT except must negate src
13422 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13423   match(Set flags (CmpUL src zero));
13424   effect(TEMP tmp);
13425   ins_cost(300);
13426   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13427             "CMP    $tmp,$src.lo\n\t"
13428             "SBB    $tmp,$src.hi\n\t" %}
13429   ins_encode(long_cmp_flags3(src, tmp));
13430   ins_pipe(ialu_reg_reg_long);
13431 %}
13432 
13433 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13434 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13435 // requires a commuted test to get the same result.
13436 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13437   match(Set flags (CmpUL src1 src2));
13438   effect(TEMP tmp);
13439   ins_cost(300);
13440   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13441             "MOV    $tmp,$src2.hi\n\t"
13442             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13443   ins_encode(long_cmp_flags2( src2, src1, tmp));
13444   ins_pipe(ialu_cr_reg_reg);
13445 %}
13446 
13447 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13448 // Just a wrapper for a normal branch, plus the predicate test
13449 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13450   match(If cmp flags);
13451   effect(USE labl);
13452   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13453   ins_cost(300);
13454   expand %{
13455     jmpCon(cmp, flags, labl);    // JGT or JLE...
13456   %}
13457 %}
13458 
13459 // Compare 2 longs and CMOVE longs.
13460 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13461   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13462   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13463   ins_cost(400);
13464   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13465             "CMOV$cmp $dst.hi,$src.hi" %}
13466   opcode(0x0F,0x40);
13467   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13468   ins_pipe( pipe_cmov_reg_long );
13469 %}
13470 
13471 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13472   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13473   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13474   ins_cost(500);
13475   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13476             "CMOV$cmp $dst.hi,$src.hi+4" %}
13477   opcode(0x0F,0x40);
13478   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13479   ins_pipe( pipe_cmov_reg_long );
13480 %}
13481 
13482 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13483   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13484   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13485   ins_cost(400);
13486   expand %{
13487     cmovLL_reg_LEGT(cmp, flags, dst, src);
13488   %}
13489 %}
13490 
13491 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13492   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13493   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13494   ins_cost(500);
13495   expand %{
13496     cmovLL_mem_LEGT(cmp, flags, dst, src);
13497   %}
13498 %}
13499 
13500 // Compare 2 longs and CMOVE ints.
13501 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13502   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13503   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13504   ins_cost(200);
13505   format %{ "CMOV$cmp $dst,$src" %}
13506   opcode(0x0F,0x40);
13507   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13508   ins_pipe( pipe_cmov_reg );
13509 %}
13510 
13511 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13512   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13513   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13514   ins_cost(250);
13515   format %{ "CMOV$cmp $dst,$src" %}
13516   opcode(0x0F,0x40);
13517   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13518   ins_pipe( pipe_cmov_mem );
13519 %}
13520 
13521 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13522   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13523   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13524   ins_cost(200);
13525   expand %{
13526     cmovII_reg_LEGT(cmp, flags, dst, src);
13527   %}
13528 %}
13529 
13530 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13531   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13532   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13533   ins_cost(250);
13534   expand %{
13535     cmovII_mem_LEGT(cmp, flags, dst, src);
13536   %}
13537 %}
13538 
13539 // Compare 2 longs and CMOVE ptrs.
13540 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13541   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13542   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13543   ins_cost(200);
13544   format %{ "CMOV$cmp $dst,$src" %}
13545   opcode(0x0F,0x40);
13546   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13547   ins_pipe( pipe_cmov_reg );
13548 %}
13549 
13550 // Compare 2 unsigned longs and CMOVE ptrs.
13551 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13552   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13553   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13554   ins_cost(200);
13555   expand %{
13556     cmovPP_reg_LEGT(cmp,flags,dst,src);
13557   %}
13558 %}
13559 
13560 // Compare 2 longs and CMOVE doubles
13561 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13562   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13563   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13564   ins_cost(200);
13565   expand %{
13566     fcmovDPR_regS(cmp,flags,dst,src);
13567   %}
13568 %}
13569 
13570 // Compare 2 longs and CMOVE doubles
13571 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13572   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13573   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13574   ins_cost(200);
13575   expand %{
13576     fcmovD_regS(cmp,flags,dst,src);
13577   %}
13578 %}
13579 
13580 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13581   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13582   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13583   ins_cost(200);
13584   expand %{
13585     fcmovFPR_regS(cmp,flags,dst,src);
13586   %}
13587 %}
13588 
13589 
13590 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13591   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13592   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13593   ins_cost(200);
13594   expand %{
13595     fcmovF_regS(cmp,flags,dst,src);
13596   %}
13597 %}
13598 
13599 
13600 // ============================================================================
13601 // Procedure Call/Return Instructions
13602 // Call Java Static Instruction
13603 // Note: If this code changes, the corresponding ret_addr_offset() and
13604 //       compute_padding() functions will have to be adjusted.
13605 instruct CallStaticJavaDirect(method meth) %{
13606   match(CallStaticJava);
13607   effect(USE meth);
13608 
13609   ins_cost(300);
13610   format %{ "CALL,static " %}
13611   opcode(0xE8); /* E8 cd */
13612   ins_encode( pre_call_resets,
13613               Java_Static_Call( meth ),
13614               call_epilog,
13615               post_call_FPU );
13616   ins_pipe( pipe_slow );
13617   ins_alignment(4);
13618 %}
13619 
13620 // Call Java Dynamic Instruction
13621 // Note: If this code changes, the corresponding ret_addr_offset() and
13622 //       compute_padding() functions will have to be adjusted.
13623 instruct CallDynamicJavaDirect(method meth) %{
13624   match(CallDynamicJava);
13625   effect(USE meth);
13626 
13627   ins_cost(300);
13628   format %{ "MOV    EAX,(oop)-1\n\t"
13629             "CALL,dynamic" %}
13630   opcode(0xE8); /* E8 cd */
13631   ins_encode( pre_call_resets,
13632               Java_Dynamic_Call( meth ),
13633               call_epilog,
13634               post_call_FPU );
13635   ins_pipe( pipe_slow );
13636   ins_alignment(4);
13637 %}
13638 
13639 // Call Runtime Instruction
13640 instruct CallRuntimeDirect(method meth) %{
13641   match(CallRuntime );
13642   effect(USE meth);
13643 
13644   ins_cost(300);
13645   format %{ "CALL,runtime " %}
13646   opcode(0xE8); /* E8 cd */
13647   // Use FFREEs to clear entries in float stack
13648   ins_encode( pre_call_resets,
13649               FFree_Float_Stack_All,
13650               Java_To_Runtime( meth ),
13651               post_call_FPU );
13652   ins_pipe( pipe_slow );
13653 %}
13654 
13655 // Call runtime without safepoint
13656 instruct CallLeafDirect(method meth) %{
13657   match(CallLeaf);
13658   effect(USE meth);
13659 
13660   ins_cost(300);
13661   format %{ "CALL_LEAF,runtime " %}
13662   opcode(0xE8); /* E8 cd */
13663   ins_encode( pre_call_resets,
13664               FFree_Float_Stack_All,
13665               Java_To_Runtime( meth ),
13666               Verify_FPU_For_Leaf, post_call_FPU );
13667   ins_pipe( pipe_slow );
13668 %}
13669 
13670 instruct CallLeafNoFPDirect(method meth) %{
13671   match(CallLeafNoFP);
13672   effect(USE meth);
13673 
13674   ins_cost(300);
13675   format %{ "CALL_LEAF_NOFP,runtime " %}
13676   opcode(0xE8); /* E8 cd */
13677   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13678   ins_pipe( pipe_slow );
13679 %}
13680 
13681 
13682 // Return Instruction
13683 // Remove the return address & jump to it.
13684 instruct Ret() %{
13685   match(Return);
13686   format %{ "RET" %}
13687   opcode(0xC3);
13688   ins_encode(OpcP);
13689   ins_pipe( pipe_jmp );
13690 %}
13691 
13692 // Tail Call; Jump from runtime stub to Java code.
13693 // Also known as an 'interprocedural jump'.
13694 // Target of jump will eventually return to caller.
13695 // TailJump below removes the return address.
13696 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13697   match(TailCall jump_target method_ptr);
13698   ins_cost(300);
13699   format %{ "JMP    $jump_target \t# EBX holds method" %}
13700   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13701   ins_encode( OpcP, RegOpc(jump_target) );
13702   ins_pipe( pipe_jmp );
13703 %}
13704 
13705 
13706 // Tail Jump; remove the return address; jump to target.
13707 // TailCall above leaves the return address around.
13708 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13709   match( TailJump jump_target ex_oop );
13710   ins_cost(300);
13711   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13712             "JMP    $jump_target " %}
13713   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13714   ins_encode( enc_pop_rdx,
13715               OpcP, RegOpc(jump_target) );
13716   ins_pipe( pipe_jmp );
13717 %}
13718 
13719 // Create exception oop: created by stack-crawling runtime code.
13720 // Created exception is now available to this handler, and is setup
13721 // just prior to jumping to this handler.  No code emitted.
13722 instruct CreateException( eAXRegP ex_oop )
13723 %{
13724   match(Set ex_oop (CreateEx));
13725 
13726   size(0);
13727   // use the following format syntax
13728   format %{ "# exception oop is in EAX; no code emitted" %}
13729   ins_encode();
13730   ins_pipe( empty );
13731 %}
13732 
13733 
13734 // Rethrow exception:
13735 // The exception oop will come in the first argument position.
13736 // Then JUMP (not call) to the rethrow stub code.
13737 instruct RethrowException()
13738 %{
13739   match(Rethrow);
13740 
13741   // use the following format syntax
13742   format %{ "JMP    rethrow_stub" %}
13743   ins_encode(enc_rethrow);
13744   ins_pipe( pipe_jmp );
13745 %}
13746 
13747 // inlined locking and unlocking
13748 
13749 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13750   predicate(Compile::current()->use_rtm());
13751   match(Set cr (FastLock object box));
13752   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13753   ins_cost(300);
13754   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13755   ins_encode %{
13756     __ get_thread($thread$$Register);
13757     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13758                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13759                  _rtm_counters, _stack_rtm_counters,
13760                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13761                  true, ra_->C->profile_rtm());
13762   %}
13763   ins_pipe(pipe_slow);
13764 %}
13765 
13766 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13767   predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
13768   match(Set cr (FastLock object box));
13769   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13770   ins_cost(300);
13771   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13772   ins_encode %{
13773     __ get_thread($thread$$Register);
13774     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13775                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false);
13776   %}
13777   ins_pipe(pipe_slow);
13778 %}
13779 
13780 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp, eRegP scr) %{
13781   predicate(LockingMode != LM_LIGHTWEIGHT);
13782   match(Set cr (FastUnlock object box));
13783   effect(TEMP tmp, TEMP scr, USE_KILL box);
13784   ins_cost(300);
13785   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13786   ins_encode %{
13787     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register, ra_->C->use_rtm());
13788   %}
13789   ins_pipe(pipe_slow);
13790 %}
13791 
13792 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13793   predicate(LockingMode == LM_LIGHTWEIGHT);
13794   match(Set cr (FastLock object box));
13795   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13796   ins_cost(300);
13797   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13798   ins_encode %{
13799     __ get_thread($thread$$Register);
13800     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13801   %}
13802   ins_pipe(pipe_slow);
13803 %}
13804 
13805 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP scr, eRegP thread) %{
13806   predicate(LockingMode == LM_LIGHTWEIGHT);
13807   match(Set cr (FastUnlock object eax_reg));
13808   effect(TEMP tmp, TEMP scr, USE_KILL eax_reg, TEMP thread);
13809   ins_cost(300);
13810   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13811   ins_encode %{
13812     __ get_thread($thread$$Register);
13813     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $scr$$Register, $thread$$Register);
13814   %}
13815   ins_pipe(pipe_slow);
13816 %}
13817 
13818 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13819   predicate(Matcher::vector_length(n) <= 32);
13820   match(Set dst (MaskAll src));
13821   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13822   ins_encode %{
13823     int mask_len = Matcher::vector_length(this);
13824     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13825   %}
13826   ins_pipe( pipe_slow );
13827 %}
13828 
13829 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13830   predicate(Matcher::vector_length(n) > 32);
13831   match(Set dst (MaskAll src));
13832   effect(TEMP ktmp);
13833   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13834   ins_encode %{
13835     int mask_len = Matcher::vector_length(this);
13836     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13837   %}
13838   ins_pipe( pipe_slow );
13839 %}
13840 
13841 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13842   predicate(Matcher::vector_length(n) > 32);
13843   match(Set dst (MaskAll src));
13844   effect(TEMP ktmp);
13845   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13846   ins_encode %{
13847     int mask_len = Matcher::vector_length(this);
13848     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13849   %}
13850   ins_pipe( pipe_slow );
13851 %}
13852 
13853 // ============================================================================
13854 // Safepoint Instruction
13855 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13856   match(SafePoint poll);
13857   effect(KILL cr, USE poll);
13858 
13859   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13860   ins_cost(125);
13861   // EBP would need size(3)
13862   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13863   ins_encode %{
13864     __ relocate(relocInfo::poll_type);
13865     address pre_pc = __ pc();
13866     __ testl(rax, Address($poll$$Register, 0));
13867     address post_pc = __ pc();
13868     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13869   %}
13870   ins_pipe(ialu_reg_mem);
13871 %}
13872 
13873 
13874 // ============================================================================
13875 // This name is KNOWN by the ADLC and cannot be changed.
13876 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13877 // for this guy.
13878 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13879   match(Set dst (ThreadLocal));
13880   effect(DEF dst, KILL cr);
13881 
13882   format %{ "MOV    $dst, Thread::current()" %}
13883   ins_encode %{
13884     Register dstReg = as_Register($dst$$reg);
13885     __ get_thread(dstReg);
13886   %}
13887   ins_pipe( ialu_reg_fat );
13888 %}
13889 
13890 
13891 
13892 //----------PEEPHOLE RULES-----------------------------------------------------
13893 // These must follow all instruction definitions as they use the names
13894 // defined in the instructions definitions.
13895 //
13896 // peepmatch ( root_instr_name [preceding_instruction]* );
13897 //
13898 // peepconstraint %{
13899 // (instruction_number.operand_name relational_op instruction_number.operand_name
13900 //  [, ...] );
13901 // // instruction numbers are zero-based using left to right order in peepmatch
13902 //
13903 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13904 // // provide an instruction_number.operand_name for each operand that appears
13905 // // in the replacement instruction's match rule
13906 //
13907 // ---------VM FLAGS---------------------------------------------------------
13908 //
13909 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13910 //
13911 // Each peephole rule is given an identifying number starting with zero and
13912 // increasing by one in the order seen by the parser.  An individual peephole
13913 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13914 // on the command-line.
13915 //
13916 // ---------CURRENT LIMITATIONS----------------------------------------------
13917 //
13918 // Only match adjacent instructions in same basic block
13919 // Only equality constraints
13920 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13921 // Only one replacement instruction
13922 //
13923 // ---------EXAMPLE----------------------------------------------------------
13924 //
13925 // // pertinent parts of existing instructions in architecture description
13926 // instruct movI(rRegI dst, rRegI src) %{
13927 //   match(Set dst (CopyI src));
13928 // %}
13929 //
13930 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13931 //   match(Set dst (AddI dst src));
13932 //   effect(KILL cr);
13933 // %}
13934 //
13935 // // Change (inc mov) to lea
13936 // peephole %{
13937 //   // increment preceded by register-register move
13938 //   peepmatch ( incI_eReg movI );
13939 //   // require that the destination register of the increment
13940 //   // match the destination register of the move
13941 //   peepconstraint ( 0.dst == 1.dst );
13942 //   // construct a replacement instruction that sets
13943 //   // the destination to ( move's source register + one )
13944 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13945 // %}
13946 //
13947 // Implementation no longer uses movX instructions since
13948 // machine-independent system no longer uses CopyX nodes.
13949 //
13950 // peephole %{
13951 //   peepmatch ( incI_eReg movI );
13952 //   peepconstraint ( 0.dst == 1.dst );
13953 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13954 // %}
13955 //
13956 // peephole %{
13957 //   peepmatch ( decI_eReg movI );
13958 //   peepconstraint ( 0.dst == 1.dst );
13959 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13960 // %}
13961 //
13962 // peephole %{
13963 //   peepmatch ( addI_eReg_imm movI );
13964 //   peepconstraint ( 0.dst == 1.dst );
13965 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13966 // %}
13967 //
13968 // peephole %{
13969 //   peepmatch ( addP_eReg_imm movP );
13970 //   peepconstraint ( 0.dst == 1.dst );
13971 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13972 // %}
13973 
13974 // // Change load of spilled value to only a spill
13975 // instruct storeI(memory mem, rRegI src) %{
13976 //   match(Set mem (StoreI mem src));
13977 // %}
13978 //
13979 // instruct loadI(rRegI dst, memory mem) %{
13980 //   match(Set dst (LoadI mem));
13981 // %}
13982 //
13983 peephole %{
13984   peepmatch ( loadI storeI );
13985   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13986   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13987 %}
13988 
13989 //----------SMARTSPILL RULES---------------------------------------------------
13990 // These must follow all instruction definitions as they use the names
13991 // defined in the instructions definitions.