1 //
    2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ masm->
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   __ emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   __ emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(C2_MacroAssembler *masm, int code) {
  353   __ emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
  358   __ relocate(__ inst_mark() + offset, reloc);
  359   emit_opcode(masm, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(C2_MacroAssembler *masm, int d8) {
  364   __ emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(C2_MacroAssembler *masm, int d16) {
  369   __ emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(C2_MacroAssembler *masm, int d32) {
  374   __ emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   __ relocate(__ inst_mark(), reloc, format);
  381   __ emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   __ relocate(__ inst_mark(), rspec, format);
  393   __ emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
  398   emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (masm, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(masm, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(masm, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(masm, 0x1, reg_encoding, base);
  423         emit_d8(masm, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(masm, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(masm, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (masm, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(masm, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(masm, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (masm, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(masm, 0x0, reg_encoding, 0x4);
  450       emit_rm(masm, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(masm, 0x1, reg_encoding, 0x4);
  456         emit_rm(masm, scale, index, base);
  457         emit_d8(masm, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(masm, 0x2, reg_encoding, 0x4);
  462           emit_rm(masm, scale, index, 0x04);
  463         } else {
  464           emit_rm(masm, 0x2, reg_encoding, 0x4);
  465           emit_rm(masm, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(masm, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (masm, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( masm, 0x8B );
  483     emit_rm(masm, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler* masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612 
  613   __ verified_entry(C);
  614 
  615   C->output()->set_frame_complete(__ offset());
  616 
  617   if (C->has_mach_constant_base_node()) {
  618     // NOTE: We set the table base offset here because users might be
  619     // emitted before MachConstantBaseNode.
  620     ConstantTable& constant_table = C->output()->constant_table();
  621     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  622   }
  623 }
  624 
  625 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  626   return MachNode::size(ra_); // too many variables; just compute it the hard way
  627 }
  628 
  629 int MachPrologNode::reloc() const {
  630   return 0; // a large enough number
  631 }
  632 
  633 //=============================================================================
  634 #ifndef PRODUCT
  635 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  636   Compile *C = ra_->C;
  637   int framesize = C->output()->frame_size_in_bytes();
  638   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  639   // Remove two words for return addr and rbp,
  640   framesize -= 2*wordSize;
  641 
  642   if (C->max_vector_size() > 16) {
  643     st->print("VZEROUPPER");
  644     st->cr(); st->print("\t");
  645   }
  646   if (C->in_24_bit_fp_mode()) {
  647     st->print("FLDCW  standard control word");
  648     st->cr(); st->print("\t");
  649   }
  650   if (framesize) {
  651     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  652     st->cr(); st->print("\t");
  653   }
  654   st->print_cr("POPL   EBP"); st->print("\t");
  655   if (do_polling() && C->is_method_compilation()) {
  656     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  657               "JA      #safepoint_stub\t"
  658               "# Safepoint: poll for GC");
  659   }
  660 }
  661 #endif
  662 
  663 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  664   Compile *C = ra_->C;
  665 
  666   if (C->max_vector_size() > 16) {
  667     // Clear upper bits of YMM registers when current compiled code uses
  668     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  669     __ vzeroupper();
  670   }
  671   // If method set FPU control word, restore to standard control word
  672   if (C->in_24_bit_fp_mode()) {
  673     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  674   }
  675 
  676   int framesize = C->output()->frame_size_in_bytes();
  677   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  678   // Remove two words for return addr and rbp,
  679   framesize -= 2*wordSize;
  680 
  681   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  682 
  683   if (framesize >= 128) {
  684     emit_opcode(masm, 0x81); // add  SP, #framesize
  685     emit_rm(masm, 0x3, 0x00, ESP_enc);
  686     emit_d32(masm, framesize);
  687   } else if (framesize) {
  688     emit_opcode(masm, 0x83); // add  SP, #framesize
  689     emit_rm(masm, 0x3, 0x00, ESP_enc);
  690     emit_d8(masm, framesize);
  691   }
  692 
  693   emit_opcode(masm, 0x58 | EBP_enc);
  694 
  695   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  696     __ reserved_stack_check();
  697   }
  698 
  699   if (do_polling() && C->is_method_compilation()) {
  700     Register thread = as_Register(EBX_enc);
  701     __ get_thread(thread);
  702     Label dummy_label;
  703     Label* code_stub = &dummy_label;
  704     if (!C->output()->in_scratch_emit_size()) {
  705       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  706       C->output()->add_stub(stub);
  707       code_stub = &stub->entry();
  708     }
  709     __ set_inst_mark();
  710     __ relocate(relocInfo::poll_return_type);
  711     __ clear_inst_mark();
  712     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  713   }
  714 }
  715 
  716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  717   return MachNode::size(ra_); // too many variables; just compute it
  718                               // the hard way
  719 }
  720 
  721 int MachEpilogNode::reloc() const {
  722   return 0; // a large enough number
  723 }
  724 
  725 const Pipeline * MachEpilogNode::pipeline() const {
  726   return MachNode::pipeline_class();
  727 }
  728 
  729 //=============================================================================
  730 
  731 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  732 static enum RC rc_class( OptoReg::Name reg ) {
  733 
  734   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  735   if (OptoReg::is_stack(reg)) return rc_stack;
  736 
  737   VMReg r = OptoReg::as_VMReg(reg);
  738   if (r->is_Register()) return rc_int;
  739   if (r->is_FloatRegister()) {
  740     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  741     return rc_float;
  742   }
  743   if (r->is_KRegister()) return rc_kreg;
  744   assert(r->is_XMMRegister(), "must be");
  745   return rc_xmm;
  746 }
  747 
  748 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
  749                         int opcode, const char *op_str, int size, outputStream* st ) {
  750   if( masm ) {
  751     masm->set_inst_mark();
  752     emit_opcode  (masm, opcode );
  753     encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  754     masm->clear_inst_mark();
  755 #ifndef PRODUCT
  756   } else if( !do_size ) {
  757     if( size != 0 ) st->print("\n\t");
  758     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  759       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  760       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  761     } else { // FLD, FST, PUSH, POP
  762       st->print("%s [ESP + #%d]",op_str,offset);
  763     }
  764 #endif
  765   }
  766   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  767   return size+3+offset_size;
  768 }
  769 
  770 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  771 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
  772                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  773   int in_size_in_bits = Assembler::EVEX_32bit;
  774   int evex_encoding = 0;
  775   if (reg_lo+1 == reg_hi) {
  776     in_size_in_bits = Assembler::EVEX_64bit;
  777     evex_encoding = Assembler::VEX_W;
  778   }
  779   if (masm) {
  780     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  781     //                          it maps more cases to single byte displacement
  782     __ set_managed();
  783     if (reg_lo+1 == reg_hi) { // double move?
  784       if (is_load) {
  785         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  786       } else {
  787         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  788       }
  789     } else {
  790       if (is_load) {
  791         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  792       } else {
  793         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  794       }
  795     }
  796 #ifndef PRODUCT
  797   } else if (!do_size) {
  798     if (size != 0) st->print("\n\t");
  799     if (reg_lo+1 == reg_hi) { // double move?
  800       if (is_load) st->print("%s %s,[ESP + #%d]",
  801                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  802                               Matcher::regName[reg_lo], offset);
  803       else         st->print("MOVSD  [ESP + #%d],%s",
  804                               offset, Matcher::regName[reg_lo]);
  805     } else {
  806       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  807                               Matcher::regName[reg_lo], offset);
  808       else         st->print("MOVSS  [ESP + #%d],%s",
  809                               offset, Matcher::regName[reg_lo]);
  810     }
  811 #endif
  812   }
  813   bool is_single_byte = false;
  814   if ((UseAVX > 2) && (offset != 0)) {
  815     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  816   }
  817   int offset_size = 0;
  818   if (UseAVX > 2 ) {
  819     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  820   } else {
  821     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  822   }
  823   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  824   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  825   return size+5+offset_size;
  826 }
  827 
  828 
  829 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  830                             int src_hi, int dst_hi, int size, outputStream* st ) {
  831   if (masm) {
  832     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  833     __ set_managed();
  834     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  835       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  836                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  837     } else {
  838       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  839                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  840     }
  841 #ifndef PRODUCT
  842   } else if (!do_size) {
  843     if (size != 0) st->print("\n\t");
  844     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  845       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  846         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  847       } else {
  848         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  849       }
  850     } else {
  851       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  852         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  853       } else {
  854         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  855       }
  856     }
  857 #endif
  858   }
  859   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  860   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  861   int sz = (UseAVX > 2) ? 6 : 4;
  862   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  863       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  864   return size + sz;
  865 }
  866 
  867 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  868                             int src_hi, int dst_hi, int size, outputStream* st ) {
  869   // 32-bit
  870   if (masm) {
  871     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  872     __ set_managed();
  873     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  874              as_Register(Matcher::_regEncode[src_lo]));
  875 #ifndef PRODUCT
  876   } else if (!do_size) {
  877     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  878 #endif
  879   }
  880   return (UseAVX> 2) ? 6 : 4;
  881 }
  882 
  883 
  884 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  885                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  886   // 32-bit
  887   if (masm) {
  888     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  889     __ set_managed();
  890     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  891              as_XMMRegister(Matcher::_regEncode[src_lo]));
  892 #ifndef PRODUCT
  893   } else if (!do_size) {
  894     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  895 #endif
  896   }
  897   return (UseAVX> 2) ? 6 : 4;
  898 }
  899 
  900 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
  901   if( masm ) {
  902     emit_opcode(masm, 0x8B );
  903     emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  904 #ifndef PRODUCT
  905   } else if( !do_size ) {
  906     if( size != 0 ) st->print("\n\t");
  907     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  908 #endif
  909   }
  910   return size+2;
  911 }
  912 
  913 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  914                                  int offset, int size, outputStream* st ) {
  915   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  916     if( masm ) {
  917       emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
  918       emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
  919 #ifndef PRODUCT
  920     } else if( !do_size ) {
  921       if( size != 0 ) st->print("\n\t");
  922       st->print("FLD    %s",Matcher::regName[src_lo]);
  923 #endif
  924     }
  925     size += 2;
  926   }
  927 
  928   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  929   const char *op_str;
  930   int op;
  931   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  932     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  933     op = 0xDD;
  934   } else {                   // 32-bit store
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  936     op = 0xD9;
  937     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  938   }
  939 
  940   return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
  941 }
  942 
  943 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  944 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
  945                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  946 
  947 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
  948                             int stack_offset, int reg, uint ireg, outputStream* st);
  949 
  950 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
  951                                      int dst_offset, uint ireg, outputStream* st) {
  952   if (masm) {
  953     switch (ireg) {
  954     case Op_VecS:
  955       __ pushl(Address(rsp, src_offset));
  956       __ popl (Address(rsp, dst_offset));
  957       break;
  958     case Op_VecD:
  959       __ pushl(Address(rsp, src_offset));
  960       __ popl (Address(rsp, dst_offset));
  961       __ pushl(Address(rsp, src_offset+4));
  962       __ popl (Address(rsp, dst_offset+4));
  963       break;
  964     case Op_VecX:
  965       __ movdqu(Address(rsp, -16), xmm0);
  966       __ movdqu(xmm0, Address(rsp, src_offset));
  967       __ movdqu(Address(rsp, dst_offset), xmm0);
  968       __ movdqu(xmm0, Address(rsp, -16));
  969       break;
  970     case Op_VecY:
  971       __ vmovdqu(Address(rsp, -32), xmm0);
  972       __ vmovdqu(xmm0, Address(rsp, src_offset));
  973       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  974       __ vmovdqu(xmm0, Address(rsp, -32));
  975       break;
  976     case Op_VecZ:
  977       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  978       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  979       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  980       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  981       break;
  982     default:
  983       ShouldNotReachHere();
  984     }
  985 #ifndef PRODUCT
  986   } else {
  987     switch (ireg) {
  988     case Op_VecS:
  989       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  990                 "popl    [rsp + #%d]",
  991                 src_offset, dst_offset);
  992       break;
  993     case Op_VecD:
  994       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  995                 "popq    [rsp + #%d]\n\t"
  996                 "pushl   [rsp + #%d]\n\t"
  997                 "popq    [rsp + #%d]",
  998                 src_offset, dst_offset, src_offset+4, dst_offset+4);
  999       break;
 1000      case Op_VecX:
 1001       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1002                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1003                 "movdqu  [rsp + #%d], xmm0\n\t"
 1004                 "movdqu  xmm0, [rsp - #16]",
 1005                 src_offset, dst_offset);
 1006       break;
 1007     case Op_VecY:
 1008       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1009                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1010                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1011                 "vmovdqu xmm0, [rsp - #32]",
 1012                 src_offset, dst_offset);
 1013       break;
 1014     case Op_VecZ:
 1015       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1016                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1017                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1018                 "vmovdqu xmm0, [rsp - #64]",
 1019                 src_offset, dst_offset);
 1020       break;
 1021     default:
 1022       ShouldNotReachHere();
 1023     }
 1024 #endif
 1025   }
 1026 }
 1027 
 1028 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1029   // Get registers to move
 1030   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1031   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1032   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1033   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1034 
 1035   enum RC src_second_rc = rc_class(src_second);
 1036   enum RC src_first_rc = rc_class(src_first);
 1037   enum RC dst_second_rc = rc_class(dst_second);
 1038   enum RC dst_first_rc = rc_class(dst_first);
 1039 
 1040   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1041 
 1042   // Generate spill code!
 1043   int size = 0;
 1044 
 1045   if( src_first == dst_first && src_second == dst_second )
 1046     return size;            // Self copy, no move
 1047 
 1048   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1049     uint ireg = ideal_reg();
 1050     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1051     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1052     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1053     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1054       // mem -> mem
 1055       int src_offset = ra_->reg2offset(src_first);
 1056       int dst_offset = ra_->reg2offset(dst_first);
 1057       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 1058     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1059       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 1060     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1061       int stack_offset = ra_->reg2offset(dst_first);
 1062       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 1063     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1064       int stack_offset = ra_->reg2offset(src_first);
 1065       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 1066     } else {
 1067       ShouldNotReachHere();
 1068     }
 1069     return 0;
 1070   }
 1071 
 1072   // --------------------------------------
 1073   // Check for mem-mem move.  push/pop to move.
 1074   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1075     if( src_second == dst_first ) { // overlapping stack copy ranges
 1076       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1077       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1078       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1079       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1080     }
 1081     // move low bits
 1082     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1083     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1084     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1085       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1086       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1087     }
 1088     return size;
 1089   }
 1090 
 1091   // --------------------------------------
 1092   // Check for integer reg-reg copy
 1093   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1094     size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
 1095 
 1096   // Check for integer store
 1097   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1098     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1099 
 1100   // Check for integer load
 1101   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1102     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1103 
 1104   // Check for integer reg-xmm reg copy
 1105   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1106     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1107             "no 64 bit integer-float reg moves" );
 1108     return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1109   }
 1110   // --------------------------------------
 1111   // Check for float reg-reg copy
 1112   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1113     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1114             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1115     if( masm ) {
 1116 
 1117       // Note the mucking with the register encode to compensate for the 0/1
 1118       // indexing issue mentioned in a comment in the reg_def sections
 1119       // for FPR registers many lines above here.
 1120 
 1121       if( src_first != FPR1L_num ) {
 1122         emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
 1123         emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
 1124         emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1125         emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1126      } else {
 1127         emit_opcode  (masm, 0xDD );           // FST    ST(i)
 1128         emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1129      }
 1130 #ifndef PRODUCT
 1131     } else if( !do_size ) {
 1132       if( size != 0 ) st->print("\n\t");
 1133       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1134       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1135 #endif
 1136     }
 1137     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1138   }
 1139 
 1140   // Check for float store
 1141   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1142     return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1143   }
 1144 
 1145   // Check for float load
 1146   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1147     int offset = ra_->reg2offset(src_first);
 1148     const char *op_str;
 1149     int op;
 1150     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1151       op_str = "FLD_D";
 1152       op = 0xDD;
 1153     } else {                   // 32-bit load
 1154       op_str = "FLD_S";
 1155       op = 0xD9;
 1156       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1157     }
 1158     if( masm ) {
 1159       masm->set_inst_mark();
 1160       emit_opcode  (masm, op );
 1161       encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1162       emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1163       emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1164       masm->clear_inst_mark();
 1165 #ifndef PRODUCT
 1166     } else if( !do_size ) {
 1167       if( size != 0 ) st->print("\n\t");
 1168       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1169 #endif
 1170     }
 1171     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1172     return size + 3+offset_size+2;
 1173   }
 1174 
 1175   // Check for xmm reg-reg copy
 1176   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1177     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1178             (src_first+1 == src_second && dst_first+1 == dst_second),
 1179             "no non-adjacent float-moves" );
 1180     return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1181   }
 1182 
 1183   // Check for xmm reg-integer reg copy
 1184   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1185     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1186             "no 64 bit float-integer reg moves" );
 1187     return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1188   }
 1189 
 1190   // Check for xmm store
 1191   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1192     return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1193   }
 1194 
 1195   // Check for float xmm load
 1196   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1197     return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1198   }
 1199 
 1200   // Copy from float reg to xmm reg
 1201   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1202     // copy to the top of stack from floating point reg
 1203     // and use LEA to preserve flags
 1204     if( masm ) {
 1205       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
 1206       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1207       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1208       emit_d8(masm,0xF8);
 1209 #ifndef PRODUCT
 1210     } else if( !do_size ) {
 1211       if( size != 0 ) st->print("\n\t");
 1212       st->print("LEA    ESP,[ESP-8]");
 1213 #endif
 1214     }
 1215     size += 4;
 1216 
 1217     size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1218 
 1219     // Copy from the temp memory to the xmm reg.
 1220     size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
 1221 
 1222     if( masm ) {
 1223       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
 1224       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1225       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1226       emit_d8(masm,0x08);
 1227 #ifndef PRODUCT
 1228     } else if( !do_size ) {
 1229       if( size != 0 ) st->print("\n\t");
 1230       st->print("LEA    ESP,[ESP+8]");
 1231 #endif
 1232     }
 1233     size += 4;
 1234     return size;
 1235   }
 1236 
 1237   // AVX-512 opmask specific spilling.
 1238   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1239     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1240     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1241     int offset = ra_->reg2offset(src_first);
 1242     if (masm != nullptr) {
 1243       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1244 #ifndef PRODUCT
 1245     } else {
 1246       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1247 #endif
 1248     }
 1249     return 0;
 1250   }
 1251 
 1252   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1253     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1254     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1255     int offset = ra_->reg2offset(dst_first);
 1256     if (masm != nullptr) {
 1257       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1258 #ifndef PRODUCT
 1259     } else {
 1260       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1261 #endif
 1262     }
 1263     return 0;
 1264   }
 1265 
 1266   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1267     Unimplemented();
 1268     return 0;
 1269   }
 1270 
 1271   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1272     Unimplemented();
 1273     return 0;
 1274   }
 1275 
 1276   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1277     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1278     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1279     if (masm != nullptr) {
 1280       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1281 #ifndef PRODUCT
 1282     } else {
 1283       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1284 #endif
 1285     }
 1286     return 0;
 1287   }
 1288 
 1289   assert( size > 0, "missed a case" );
 1290 
 1291   // --------------------------------------------------------------------
 1292   // Check for second bits still needing moving.
 1293   if( src_second == dst_second )
 1294     return size;               // Self copy; no move
 1295   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1296 
 1297   // Check for second word int-int move
 1298   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1299     return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
 1300 
 1301   // Check for second word integer store
 1302   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1303     return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1304 
 1305   // Check for second word integer load
 1306   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1307     return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1308 
 1309   Unimplemented();
 1310   return 0; // Mute compiler
 1311 }
 1312 
 1313 #ifndef PRODUCT
 1314 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1315   implementation( nullptr, ra_, false, st );
 1316 }
 1317 #endif
 1318 
 1319 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1320   implementation( masm, ra_, false, nullptr );
 1321 }
 1322 
 1323 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1324   return MachNode::size(ra_);
 1325 }
 1326 
 1327 
 1328 //=============================================================================
 1329 #ifndef PRODUCT
 1330 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1331   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1332   int reg = ra_->get_reg_first(this);
 1333   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1334 }
 1335 #endif
 1336 
 1337 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1338   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1339   int reg = ra_->get_encode(this);
 1340   if( offset >= 128 ) {
 1341     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1342     emit_rm(masm, 0x2, reg, 0x04);
 1343     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1344     emit_d32(masm, offset);
 1345   }
 1346   else {
 1347     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1348     emit_rm(masm, 0x1, reg, 0x04);
 1349     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1350     emit_d8(masm, offset);
 1351   }
 1352 }
 1353 
 1354 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1355   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1356   if( offset >= 128 ) {
 1357     return 7;
 1358   }
 1359   else {
 1360     return 4;
 1361   }
 1362 }
 1363 
 1364 //=============================================================================
 1365 #ifndef PRODUCT
 1366 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1367   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1368   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1369   st->print_cr("\tNOP");
 1370   st->print_cr("\tNOP");
 1371   if( !OptoBreakpoint )
 1372     st->print_cr("\tNOP");
 1373 }
 1374 #endif
 1375 
 1376 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1377   __ ic_check(CodeEntryAlignment);
 1378 }
 1379 
 1380 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1381   return MachNode::size(ra_); // too many variables; just compute it
 1382                               // the hard way
 1383 }
 1384 
 1385 
 1386 //=============================================================================
 1387 
 1388 // Vector calling convention not supported.
 1389 bool Matcher::supports_vector_calling_convention() {
 1390   return false;
 1391 }
 1392 
 1393 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1394   Unimplemented();
 1395   return OptoRegPair(0, 0);
 1396 }
 1397 
 1398 // Is this branch offset short enough that a short branch can be used?
 1399 //
 1400 // NOTE: If the platform does not provide any short branch variants, then
 1401 //       this method should return false for offset 0.
 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1403   // The passed offset is relative to address of the branch.
 1404   // On 86 a branch displacement is calculated relative to address
 1405   // of a next instruction.
 1406   offset -= br_size;
 1407 
 1408   // the short version of jmpConUCF2 contains multiple branches,
 1409   // making the reach slightly less
 1410   if (rule == jmpConUCF2_rule)
 1411     return (-126 <= offset && offset <= 125);
 1412   return (-128 <= offset && offset <= 127);
 1413 }
 1414 
 1415 // Return whether or not this register is ever used as an argument.  This
 1416 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1417 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1418 // arguments in those registers not be available to the callee.
 1419 bool Matcher::can_be_java_arg( int reg ) {
 1420   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1421   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1422   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1423   return false;
 1424 }
 1425 
 1426 bool Matcher::is_spillable_arg( int reg ) {
 1427   return can_be_java_arg(reg);
 1428 }
 1429 
 1430 uint Matcher::int_pressure_limit()
 1431 {
 1432   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1433 }
 1434 
 1435 uint Matcher::float_pressure_limit()
 1436 {
 1437   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1438 }
 1439 
 1440 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1441   // Use hardware integer DIV instruction when
 1442   // it is faster than a code which use multiply.
 1443   // Only when constant divisor fits into 32 bit
 1444   // (min_jint is excluded to get only correct
 1445   // positive 32 bit values from negative).
 1446   return VM_Version::has_fast_idiv() &&
 1447          (divisor == (int)divisor && divisor != min_jint);
 1448 }
 1449 
 1450 // Register for DIVI projection of divmodI
 1451 RegMask Matcher::divI_proj_mask() {
 1452   return EAX_REG_mask();
 1453 }
 1454 
 1455 // Register for MODI projection of divmodI
 1456 RegMask Matcher::modI_proj_mask() {
 1457   return EDX_REG_mask();
 1458 }
 1459 
 1460 // Register for DIVL projection of divmodL
 1461 RegMask Matcher::divL_proj_mask() {
 1462   ShouldNotReachHere();
 1463   return RegMask();
 1464 }
 1465 
 1466 // Register for MODL projection of divmodL
 1467 RegMask Matcher::modL_proj_mask() {
 1468   ShouldNotReachHere();
 1469   return RegMask();
 1470 }
 1471 
 1472 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1473   return NO_REG_mask();
 1474 }
 1475 
 1476 // Returns true if the high 32 bits of the value is known to be zero.
 1477 bool is_operand_hi32_zero(Node* n) {
 1478   int opc = n->Opcode();
 1479   if (opc == Op_AndL) {
 1480     Node* o2 = n->in(2);
 1481     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1482       return true;
 1483     }
 1484   }
 1485   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1486     return true;
 1487   }
 1488   return false;
 1489 }
 1490 
 1491 %}
 1492 
 1493 //----------ENCODING BLOCK-----------------------------------------------------
 1494 // This block specifies the encoding classes used by the compiler to output
 1495 // byte streams.  Encoding classes generate functions which are called by
 1496 // Machine Instruction Nodes in order to generate the bit encoding of the
 1497 // instruction.  Operands specify their base encoding interface with the
 1498 // interface keyword.  There are currently supported four interfaces,
 1499 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1500 // operand to generate a function which returns its register number when
 1501 // queried.   CONST_INTER causes an operand to generate a function which
 1502 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1503 // operand to generate four functions which return the Base Register, the
 1504 // Index Register, the Scale Value, and the Offset Value of the operand when
 1505 // queried.  COND_INTER causes an operand to generate six functions which
 1506 // return the encoding code (ie - encoding bits for the instruction)
 1507 // associated with each basic boolean condition for a conditional instruction.
 1508 // Instructions specify two basic values for encoding.  They use the
 1509 // ins_encode keyword to specify their encoding class (which must be one of
 1510 // the class names specified in the encoding block), and they use the
 1511 // opcode keyword to specify, in order, their primary, secondary, and
 1512 // tertiary opcode.  Only the opcode sections which a particular instruction
 1513 // needs for encoding need to be specified.
 1514 encode %{
 1515   // Build emit functions for each basic byte or larger field in the intel
 1516   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1517   // code in the enc_class source block.  Emit functions will live in the
 1518   // main source block for now.  In future, we can generalize this by
 1519   // adding a syntax that specifies the sizes of fields in an order,
 1520   // so that the adlc can build the emit functions automagically
 1521 
 1522   // Set instruction mark in MacroAssembler. This is used only in
 1523   // instructions that emit bytes directly to the CodeBuffer wraped
 1524   // in the MacroAssembler. Should go away once all "instruct" are
 1525   // patched to emit bytes only using methods in MacroAssembler.
 1526   enc_class SetInstMark %{
 1527     __ set_inst_mark();
 1528   %}
 1529 
 1530   enc_class ClearInstMark %{
 1531     __ clear_inst_mark();
 1532   %}
 1533 
 1534   // Emit primary opcode
 1535   enc_class OpcP %{
 1536     emit_opcode(masm, $primary);
 1537   %}
 1538 
 1539   // Emit secondary opcode
 1540   enc_class OpcS %{
 1541     emit_opcode(masm, $secondary);
 1542   %}
 1543 
 1544   // Emit opcode directly
 1545   enc_class Opcode(immI d8) %{
 1546     emit_opcode(masm, $d8$$constant);
 1547   %}
 1548 
 1549   enc_class SizePrefix %{
 1550     emit_opcode(masm,0x66);
 1551   %}
 1552 
 1553   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1554     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1555   %}
 1556 
 1557   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1558     emit_opcode(masm,$opcode$$constant);
 1559     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1560   %}
 1561 
 1562   enc_class mov_r32_imm0( rRegI dst ) %{
 1563     emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1564     emit_d32   ( masm, 0x0  );             //                         imm32==0x0
 1565   %}
 1566 
 1567   enc_class cdq_enc %{
 1568     // Full implementation of Java idiv and irem; checks for
 1569     // special case as described in JVM spec., p.243 & p.271.
 1570     //
 1571     //         normal case                           special case
 1572     //
 1573     // input : rax,: dividend                         min_int
 1574     //         reg: divisor                          -1
 1575     //
 1576     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1577     //         rdx: remainder (= rax, irem reg)       0
 1578     //
 1579     //  Code sequnce:
 1580     //
 1581     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1582     //  0F 85 0B 00 00 00    jne         normal_case
 1583     //  33 D2                xor         rdx,edx
 1584     //  83 F9 FF             cmp         rcx,0FFh
 1585     //  0F 84 03 00 00 00    je          done
 1586     //                  normal_case:
 1587     //  99                   cdq
 1588     //  F7 F9                idiv        rax,ecx
 1589     //                  done:
 1590     //
 1591     emit_opcode(masm,0x81); emit_d8(masm,0xF8);
 1592     emit_opcode(masm,0x00); emit_d8(masm,0x00);
 1593     emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
 1594     emit_opcode(masm,0x0F); emit_d8(masm,0x85);
 1595     emit_opcode(masm,0x0B); emit_d8(masm,0x00);
 1596     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
 1597     emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
 1598     emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
 1599     emit_opcode(masm,0x0F); emit_d8(masm,0x84);
 1600     emit_opcode(masm,0x03); emit_d8(masm,0x00);
 1601     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
 1602     // normal_case:
 1603     emit_opcode(masm,0x99);                                         // cdq
 1604     // idiv (note: must be emitted by the user of this rule)
 1605     // normal:
 1606   %}
 1607 
 1608   // Dense encoding for older common ops
 1609   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1610     emit_opcode(masm, $opcode$$constant + $reg$$reg);
 1611   %}
 1612 
 1613 
 1614   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1615   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1616     // Check for 8-bit immediate, and set sign extend bit in opcode
 1617     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1618       emit_opcode(masm, $primary | 0x02);
 1619     }
 1620     else {                          // If 32-bit immediate
 1621       emit_opcode(masm, $primary);
 1622     }
 1623   %}
 1624 
 1625   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1626     // Emit primary opcode and set sign-extend bit
 1627     // Check for 8-bit immediate, and set sign extend bit in opcode
 1628     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1629       emit_opcode(masm, $primary | 0x02);    }
 1630     else {                          // If 32-bit immediate
 1631       emit_opcode(masm, $primary);
 1632     }
 1633     // Emit r/m byte with secondary opcode, after primary opcode.
 1634     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1635   %}
 1636 
 1637   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1638     // Check for 8-bit immediate, and set sign extend bit in opcode
 1639     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1640       $$$emit8$imm$$constant;
 1641     }
 1642     else {                          // If 32-bit immediate
 1643       // Output immediate
 1644       $$$emit32$imm$$constant;
 1645     }
 1646   %}
 1647 
 1648   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1649     // Emit primary opcode and set sign-extend bit
 1650     // Check for 8-bit immediate, and set sign extend bit in opcode
 1651     int con = (int)$imm$$constant; // Throw away top bits
 1652     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1653     // Emit r/m byte with secondary opcode, after primary opcode.
 1654     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1655     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1656     else                               emit_d32(masm,con);
 1657   %}
 1658 
 1659   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1660     // Emit primary opcode and set sign-extend bit
 1661     // Check for 8-bit immediate, and set sign extend bit in opcode
 1662     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1663     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1664     // Emit r/m byte with tertiary opcode, after primary opcode.
 1665     emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1666     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1667     else                               emit_d32(masm,con);
 1668   %}
 1669 
 1670   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1671     emit_cc(masm, $secondary, $dst$$reg );
 1672   %}
 1673 
 1674   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1675     int destlo = $dst$$reg;
 1676     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1677     // bswap lo
 1678     emit_opcode(masm, 0x0F);
 1679     emit_cc(masm, 0xC8, destlo);
 1680     // bswap hi
 1681     emit_opcode(masm, 0x0F);
 1682     emit_cc(masm, 0xC8, desthi);
 1683     // xchg lo and hi
 1684     emit_opcode(masm, 0x87);
 1685     emit_rm(masm, 0x3, destlo, desthi);
 1686   %}
 1687 
 1688   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1689     emit_rm(masm, 0x3, $secondary, $div$$reg );
 1690   %}
 1691 
 1692   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1693     $$$emit8$primary;
 1694     emit_cc(masm, $secondary, $cop$$cmpcode);
 1695   %}
 1696 
 1697   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1698     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1699     emit_d8(masm, op >> 8 );
 1700     emit_d8(masm, op & 255);
 1701   %}
 1702 
 1703   // emulate a CMOV with a conditional branch around a MOV
 1704   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1705     // Invert sense of branch from sense of CMOV
 1706     emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
 1707     emit_d8( masm, $brOffs$$constant );
 1708   %}
 1709 
 1710   enc_class enc_PartialSubtypeCheck( ) %{
 1711     Register Redi = as_Register(EDI_enc); // result register
 1712     Register Reax = as_Register(EAX_enc); // super class
 1713     Register Recx = as_Register(ECX_enc); // killed
 1714     Register Resi = as_Register(ESI_enc); // sub class
 1715     Label miss;
 1716 
 1717     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1718                                      nullptr, &miss,
 1719                                      /*set_cond_codes:*/ true);
 1720     if ($primary) {
 1721       __ xorptr(Redi, Redi);
 1722     }
 1723     __ bind(miss);
 1724   %}
 1725 
 1726   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1727     int start = __ offset();
 1728     if (UseSSE >= 2) {
 1729       if (VerifyFPU) {
 1730         __ verify_FPU(0, "must be empty in SSE2+ mode");
 1731       }
 1732     } else {
 1733       // External c_calling_convention expects the FPU stack to be 'clean'.
 1734       // Compiled code leaves it dirty.  Do cleanup now.
 1735       __ empty_FPU_stack();
 1736     }
 1737     if (sizeof_FFree_Float_Stack_All == -1) {
 1738       sizeof_FFree_Float_Stack_All = __ offset() - start;
 1739     } else {
 1740       assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1741     }
 1742   %}
 1743 
 1744   enc_class Verify_FPU_For_Leaf %{
 1745     if( VerifyFPU ) {
 1746       __ verify_FPU( -3, "Returning from Runtime Leaf call");
 1747     }
 1748   %}
 1749 
 1750   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1751     // This is the instruction starting address for relocation info.
 1752     __ set_inst_mark();
 1753     $$$emit8$primary;
 1754     // CALL directly to the runtime
 1755     emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1756                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1757     __ clear_inst_mark();
 1758     __ post_call_nop();
 1759 
 1760     if (UseSSE >= 2) {
 1761       BasicType rt = tf()->return_type();
 1762 
 1763       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1764         // A C runtime call where the return value is unused.  In SSE2+
 1765         // mode the result needs to be removed from the FPU stack.  It's
 1766         // likely that this function call could be removed by the
 1767         // optimizer if the C function is a pure function.
 1768         __ ffree(0);
 1769       } else if (rt == T_FLOAT) {
 1770         __ lea(rsp, Address(rsp, -4));
 1771         __ fstp_s(Address(rsp, 0));
 1772         __ movflt(xmm0, Address(rsp, 0));
 1773         __ lea(rsp, Address(rsp,  4));
 1774       } else if (rt == T_DOUBLE) {
 1775         __ lea(rsp, Address(rsp, -8));
 1776         __ fstp_d(Address(rsp, 0));
 1777         __ movdbl(xmm0, Address(rsp, 0));
 1778         __ lea(rsp, Address(rsp,  8));
 1779       }
 1780     }
 1781   %}
 1782 
 1783   enc_class pre_call_resets %{
 1784     // If method sets FPU control word restore it here
 1785     debug_only(int off0 = __ offset());
 1786     if (ra_->C->in_24_bit_fp_mode()) {
 1787       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1788     }
 1789     // Clear upper bits of YMM registers when current compiled code uses
 1790     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1791     __ vzeroupper();
 1792     debug_only(int off1 = __ offset());
 1793     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1794   %}
 1795 
 1796   enc_class post_call_FPU %{
 1797     // If method sets FPU control word do it here also
 1798     if (Compile::current()->in_24_bit_fp_mode()) {
 1799       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1800     }
 1801   %}
 1802 
 1803   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1804     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1805     // who we intended to call.
 1806     __ set_inst_mark();
 1807     $$$emit8$primary;
 1808 
 1809     if (!_method) {
 1810       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1811                      runtime_call_Relocation::spec(),
 1812                      RELOC_IMM32);
 1813       __ clear_inst_mark();
 1814       __ post_call_nop();
 1815     } else {
 1816       int method_index = resolved_method_index(masm);
 1817       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1818                                                   : static_call_Relocation::spec(method_index);
 1819       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1820                      rspec, RELOC_DISP32);
 1821       __ post_call_nop();
 1822       address mark = __ inst_mark();
 1823       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1824         // Calls of the same statically bound method can share
 1825         // a stub to the interpreter.
 1826         __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
 1827         __ clear_inst_mark();
 1828       } else {
 1829         // Emit stubs for static call.
 1830         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 1831         __ clear_inst_mark();
 1832         if (stub == nullptr) {
 1833           ciEnv::current()->record_failure("CodeCache is full");
 1834           return;
 1835         }
 1836       }
 1837     }
 1838   %}
 1839 
 1840   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1841     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 1842     __ post_call_nop();
 1843   %}
 1844 
 1845   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1846     int disp = in_bytes(Method::from_compiled_offset());
 1847     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1848 
 1849     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1850     __ set_inst_mark();
 1851     $$$emit8$primary;
 1852     emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
 1853     emit_d8(masm, disp);             // Displacement
 1854     __ clear_inst_mark();
 1855     __ post_call_nop();
 1856   %}
 1857 
 1858   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1859     $$$emit8$primary;
 1860     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1861     $$$emit8$shift$$constant;
 1862   %}
 1863 
 1864   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1865     // Load immediate does not have a zero or sign extended version
 1866     // for 8-bit immediates
 1867     emit_opcode(masm, 0xB8 + $dst$$reg);
 1868     $$$emit32$src$$constant;
 1869   %}
 1870 
 1871   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1872     // Load immediate does not have a zero or sign extended version
 1873     // for 8-bit immediates
 1874     emit_opcode(masm, $primary + $dst$$reg);
 1875     $$$emit32$src$$constant;
 1876   %}
 1877 
 1878   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1879     // Load immediate does not have a zero or sign extended version
 1880     // for 8-bit immediates
 1881     int dst_enc = $dst$$reg;
 1882     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1883     if (src_con == 0) {
 1884       // xor dst, dst
 1885       emit_opcode(masm, 0x33);
 1886       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1887     } else {
 1888       emit_opcode(masm, $primary + dst_enc);
 1889       emit_d32(masm, src_con);
 1890     }
 1891   %}
 1892 
 1893   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1894     // Load immediate does not have a zero or sign extended version
 1895     // for 8-bit immediates
 1896     int dst_enc = $dst$$reg + 2;
 1897     int src_con = ((julong)($src$$constant)) >> 32;
 1898     if (src_con == 0) {
 1899       // xor dst, dst
 1900       emit_opcode(masm, 0x33);
 1901       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1902     } else {
 1903       emit_opcode(masm, $primary + dst_enc);
 1904       emit_d32(masm, src_con);
 1905     }
 1906   %}
 1907 
 1908 
 1909   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1910   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1911     encode_Copy( masm, $dst$$reg, $src$$reg );
 1912   %}
 1913 
 1914   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1915     encode_Copy( masm, $dst$$reg, $src$$reg );
 1916   %}
 1917 
 1918   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1919     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1920   %}
 1921 
 1922   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1923     $$$emit8$primary;
 1924     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1925   %}
 1926 
 1927   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1928     $$$emit8$secondary;
 1929     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1930   %}
 1931 
 1932   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1933     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1934   %}
 1935 
 1936   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1937     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1938   %}
 1939 
 1940   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1941     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1942   %}
 1943 
 1944   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1945     // Output immediate
 1946     $$$emit32$src$$constant;
 1947   %}
 1948 
 1949   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1950     // Output Float immediate bits
 1951     jfloat jf = $src$$constant;
 1952     int    jf_as_bits = jint_cast( jf );
 1953     emit_d32(masm, jf_as_bits);
 1954   %}
 1955 
 1956   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1957     // Output Float immediate bits
 1958     jfloat jf = $src$$constant;
 1959     int    jf_as_bits = jint_cast( jf );
 1960     emit_d32(masm, jf_as_bits);
 1961   %}
 1962 
 1963   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1964     // Output immediate
 1965     $$$emit16$src$$constant;
 1966   %}
 1967 
 1968   enc_class Con_d32(immI src) %{
 1969     emit_d32(masm,$src$$constant);
 1970   %}
 1971 
 1972   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1973     // Output immediate memory reference
 1974     emit_rm(masm, 0x00, $t1$$reg, 0x05 );
 1975     emit_d32(masm, 0x00);
 1976   %}
 1977 
 1978   enc_class lock_prefix( ) %{
 1979     emit_opcode(masm,0xF0);         // [Lock]
 1980   %}
 1981 
 1982   // Cmp-xchg long value.
 1983   // Note: we need to swap rbx, and rcx before and after the
 1984   //       cmpxchg8 instruction because the instruction uses
 1985   //       rcx as the high order word of the new value to store but
 1986   //       our register encoding uses rbx,.
 1987   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 1988 
 1989     // XCHG  rbx,ecx
 1990     emit_opcode(masm,0x87);
 1991     emit_opcode(masm,0xD9);
 1992     // [Lock]
 1993     emit_opcode(masm,0xF0);
 1994     // CMPXCHG8 [Eptr]
 1995     emit_opcode(masm,0x0F);
 1996     emit_opcode(masm,0xC7);
 1997     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 1998     // XCHG  rbx,ecx
 1999     emit_opcode(masm,0x87);
 2000     emit_opcode(masm,0xD9);
 2001   %}
 2002 
 2003   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2004     // [Lock]
 2005     emit_opcode(masm,0xF0);
 2006 
 2007     // CMPXCHG [Eptr]
 2008     emit_opcode(masm,0x0F);
 2009     emit_opcode(masm,0xB1);
 2010     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2011   %}
 2012 
 2013   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2014     // [Lock]
 2015     emit_opcode(masm,0xF0);
 2016 
 2017     // CMPXCHGB [Eptr]
 2018     emit_opcode(masm,0x0F);
 2019     emit_opcode(masm,0xB0);
 2020     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2021   %}
 2022 
 2023   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2024     // [Lock]
 2025     emit_opcode(masm,0xF0);
 2026 
 2027     // 16-bit mode
 2028     emit_opcode(masm, 0x66);
 2029 
 2030     // CMPXCHGW [Eptr]
 2031     emit_opcode(masm,0x0F);
 2032     emit_opcode(masm,0xB1);
 2033     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2034   %}
 2035 
 2036   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2037     int res_encoding = $res$$reg;
 2038 
 2039     // MOV  res,0
 2040     emit_opcode( masm, 0xB8 + res_encoding);
 2041     emit_d32( masm, 0 );
 2042     // JNE,s  fail
 2043     emit_opcode(masm,0x75);
 2044     emit_d8(masm, 5 );
 2045     // MOV  res,1
 2046     emit_opcode( masm, 0xB8 + res_encoding);
 2047     emit_d32( masm, 1 );
 2048     // fail:
 2049   %}
 2050 
 2051   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2052     int reg_encoding = $ereg$$reg;
 2053     int base  = $mem$$base;
 2054     int index = $mem$$index;
 2055     int scale = $mem$$scale;
 2056     int displace = $mem$$disp;
 2057     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2058     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2059   %}
 2060 
 2061   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2062     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2063     int base  = $mem$$base;
 2064     int index = $mem$$index;
 2065     int scale = $mem$$scale;
 2066     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2067     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2068     encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
 2069   %}
 2070 
 2071   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2072     int r1, r2;
 2073     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2074     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2075     emit_opcode(masm,0x0F);
 2076     emit_opcode(masm,$tertiary);
 2077     emit_rm(masm, 0x3, r1, r2);
 2078     emit_d8(masm,$cnt$$constant);
 2079     emit_d8(masm,$primary);
 2080     emit_rm(masm, 0x3, $secondary, r1);
 2081     emit_d8(masm,$cnt$$constant);
 2082   %}
 2083 
 2084   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2085     emit_opcode( masm, 0x8B ); // Move
 2086     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2087     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2088       emit_d8(masm,$primary);
 2089       emit_rm(masm, 0x3, $secondary, $dst$$reg);
 2090       emit_d8(masm,$cnt$$constant-32);
 2091     }
 2092     emit_d8(masm,$primary);
 2093     emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2094     emit_d8(masm,31);
 2095   %}
 2096 
 2097   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2098     int r1, r2;
 2099     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2100     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2101 
 2102     emit_opcode( masm, 0x8B ); // Move r1,r2
 2103     emit_rm(masm, 0x3, r1, r2);
 2104     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2105       emit_opcode(masm,$primary);
 2106       emit_rm(masm, 0x3, $secondary, r1);
 2107       emit_d8(masm,$cnt$$constant-32);
 2108     }
 2109     emit_opcode(masm,0x33);  // XOR r2,r2
 2110     emit_rm(masm, 0x3, r2, r2);
 2111   %}
 2112 
 2113   // Clone of RegMem but accepts an extra parameter to access each
 2114   // half of a double in memory; it never needs relocation info.
 2115   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2116     emit_opcode(masm,$opcode$$constant);
 2117     int reg_encoding = $rm_reg$$reg;
 2118     int base     = $mem$$base;
 2119     int index    = $mem$$index;
 2120     int scale    = $mem$$scale;
 2121     int displace = $mem$$disp + $disp_for_half$$constant;
 2122     relocInfo::relocType disp_reloc = relocInfo::none;
 2123     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2124   %}
 2125 
 2126   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2127   //
 2128   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2129   // and it never needs relocation information.
 2130   // Frequently used to move data between FPU's Stack Top and memory.
 2131   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2132     int rm_byte_opcode = $rm_opcode$$constant;
 2133     int base     = $mem$$base;
 2134     int index    = $mem$$index;
 2135     int scale    = $mem$$scale;
 2136     int displace = $mem$$disp;
 2137     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2138     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2139   %}
 2140 
 2141   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2142     int rm_byte_opcode = $rm_opcode$$constant;
 2143     int base     = $mem$$base;
 2144     int index    = $mem$$index;
 2145     int scale    = $mem$$scale;
 2146     int displace = $mem$$disp;
 2147     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2148     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2149   %}
 2150 
 2151   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2152     int reg_encoding = $dst$$reg;
 2153     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2154     int index        = 0x04;            // 0x04 indicates no index
 2155     int scale        = 0x00;            // 0x00 indicates no scale
 2156     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2157     relocInfo::relocType disp_reloc = relocInfo::none;
 2158     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2159   %}
 2160 
 2161   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2162     // Compare dst,src
 2163     emit_opcode(masm,0x3B);
 2164     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2165     // jmp dst < src around move
 2166     emit_opcode(masm,0x7C);
 2167     emit_d8(masm,2);
 2168     // move dst,src
 2169     emit_opcode(masm,0x8B);
 2170     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2171   %}
 2172 
 2173   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2174     // Compare dst,src
 2175     emit_opcode(masm,0x3B);
 2176     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2177     // jmp dst > src around move
 2178     emit_opcode(masm,0x7F);
 2179     emit_d8(masm,2);
 2180     // move dst,src
 2181     emit_opcode(masm,0x8B);
 2182     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2183   %}
 2184 
 2185   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2186     // If src is FPR1, we can just FST to store it.
 2187     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2188     int reg_encoding = 0x2; // Just store
 2189     int base  = $mem$$base;
 2190     int index = $mem$$index;
 2191     int scale = $mem$$scale;
 2192     int displace = $mem$$disp;
 2193     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2194     if( $src$$reg != FPR1L_enc ) {
 2195       reg_encoding = 0x3;  // Store & pop
 2196       emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
 2197       emit_d8( masm, 0xC0-1+$src$$reg );
 2198     }
 2199     __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
 2200     emit_opcode(masm,$primary);
 2201     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2202     __ clear_inst_mark();
 2203   %}
 2204 
 2205   enc_class neg_reg(rRegI dst) %{
 2206     // NEG $dst
 2207     emit_opcode(masm,0xF7);
 2208     emit_rm(masm, 0x3, 0x03, $dst$$reg );
 2209   %}
 2210 
 2211   enc_class setLT_reg(eCXRegI dst) %{
 2212     // SETLT $dst
 2213     emit_opcode(masm,0x0F);
 2214     emit_opcode(masm,0x9C);
 2215     emit_rm( masm, 0x3, 0x4, $dst$$reg );
 2216   %}
 2217 
 2218   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2219     int tmpReg = $tmp$$reg;
 2220 
 2221     // SUB $p,$q
 2222     emit_opcode(masm,0x2B);
 2223     emit_rm(masm, 0x3, $p$$reg, $q$$reg);
 2224     // SBB $tmp,$tmp
 2225     emit_opcode(masm,0x1B);
 2226     emit_rm(masm, 0x3, tmpReg, tmpReg);
 2227     // AND $tmp,$y
 2228     emit_opcode(masm,0x23);
 2229     emit_rm(masm, 0x3, tmpReg, $y$$reg);
 2230     // ADD $p,$tmp
 2231     emit_opcode(masm,0x03);
 2232     emit_rm(masm, 0x3, $p$$reg, tmpReg);
 2233   %}
 2234 
 2235   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2236     // TEST shift,32
 2237     emit_opcode(masm,0xF7);
 2238     emit_rm(masm, 0x3, 0, ECX_enc);
 2239     emit_d32(masm,0x20);
 2240     // JEQ,s small
 2241     emit_opcode(masm, 0x74);
 2242     emit_d8(masm, 0x04);
 2243     // MOV    $dst.hi,$dst.lo
 2244     emit_opcode( masm, 0x8B );
 2245     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2246     // CLR    $dst.lo
 2247     emit_opcode(masm, 0x33);
 2248     emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
 2249 // small:
 2250     // SHLD   $dst.hi,$dst.lo,$shift
 2251     emit_opcode(masm,0x0F);
 2252     emit_opcode(masm,0xA5);
 2253     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2254     // SHL    $dst.lo,$shift"
 2255     emit_opcode(masm,0xD3);
 2256     emit_rm(masm, 0x3, 0x4, $dst$$reg );
 2257   %}
 2258 
 2259   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2260     // TEST shift,32
 2261     emit_opcode(masm,0xF7);
 2262     emit_rm(masm, 0x3, 0, ECX_enc);
 2263     emit_d32(masm,0x20);
 2264     // JEQ,s small
 2265     emit_opcode(masm, 0x74);
 2266     emit_d8(masm, 0x04);
 2267     // MOV    $dst.lo,$dst.hi
 2268     emit_opcode( masm, 0x8B );
 2269     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2270     // CLR    $dst.hi
 2271     emit_opcode(masm, 0x33);
 2272     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2273 // small:
 2274     // SHRD   $dst.lo,$dst.hi,$shift
 2275     emit_opcode(masm,0x0F);
 2276     emit_opcode(masm,0xAD);
 2277     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2278     // SHR    $dst.hi,$shift"
 2279     emit_opcode(masm,0xD3);
 2280     emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2281   %}
 2282 
 2283   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2284     // TEST shift,32
 2285     emit_opcode(masm,0xF7);
 2286     emit_rm(masm, 0x3, 0, ECX_enc);
 2287     emit_d32(masm,0x20);
 2288     // JEQ,s small
 2289     emit_opcode(masm, 0x74);
 2290     emit_d8(masm, 0x05);
 2291     // MOV    $dst.lo,$dst.hi
 2292     emit_opcode( masm, 0x8B );
 2293     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2294     // SAR    $dst.hi,31
 2295     emit_opcode(masm, 0xC1);
 2296     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2297     emit_d8(masm, 0x1F );
 2298 // small:
 2299     // SHRD   $dst.lo,$dst.hi,$shift
 2300     emit_opcode(masm,0x0F);
 2301     emit_opcode(masm,0xAD);
 2302     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2303     // SAR    $dst.hi,$shift"
 2304     emit_opcode(masm,0xD3);
 2305     emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2306   %}
 2307 
 2308 
 2309   // ----------------- Encodings for floating point unit -----------------
 2310   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2311   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2312     $$$emit8$primary;
 2313     emit_rm(masm, 0x3, $secondary, $src$$reg );
 2314   %}
 2315 
 2316   // Pop argument in FPR0 with FSTP ST(0)
 2317   enc_class PopFPU() %{
 2318     emit_opcode( masm, 0xDD );
 2319     emit_d8( masm, 0xD8 );
 2320   %}
 2321 
 2322   // !!!!! equivalent to Pop_Reg_F
 2323   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2324     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2325     emit_d8( masm, 0xD8+$dst$$reg );
 2326   %}
 2327 
 2328   enc_class Push_Reg_DPR( regDPR dst ) %{
 2329     emit_opcode( masm, 0xD9 );
 2330     emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2331   %}
 2332 
 2333   enc_class strictfp_bias1( regDPR dst ) %{
 2334     emit_opcode( masm, 0xDB );           // FLD m80real
 2335     emit_opcode( masm, 0x2D );
 2336     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2337     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2338     emit_opcode( masm, 0xC8+$dst$$reg );
 2339   %}
 2340 
 2341   enc_class strictfp_bias2( regDPR dst ) %{
 2342     emit_opcode( masm, 0xDB );           // FLD m80real
 2343     emit_opcode( masm, 0x2D );
 2344     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2345     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2346     emit_opcode( masm, 0xC8+$dst$$reg );
 2347   %}
 2348 
 2349   // Special case for moving an integer register to a stack slot.
 2350   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2351     store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
 2352   %}
 2353 
 2354   // Special case for moving a register to a stack slot.
 2355   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2356     // Opcode already emitted
 2357     emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2358     emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2359     emit_d32(masm, $dst$$disp);   // Displacement
 2360   %}
 2361 
 2362   // Push the integer in stackSlot 'src' onto FP-stack
 2363   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2364     store_to_stackslot( masm, $primary, $secondary, $src$$disp );
 2365   %}
 2366 
 2367   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2368   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2369     store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
 2370   %}
 2371 
 2372   // Same as Pop_Mem_F except for opcode
 2373   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2374   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2375     store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
 2376   %}
 2377 
 2378   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2379     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2380     emit_d8( masm, 0xD8+$dst$$reg );
 2381   %}
 2382 
 2383   enc_class Push_Reg_FPR( regFPR dst ) %{
 2384     emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
 2385     emit_d8( masm, 0xC0-1+$dst$$reg );
 2386   %}
 2387 
 2388   // Push FPU's float to a stack-slot, and pop FPU-stack
 2389   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2390     int pop = 0x02;
 2391     if ($src$$reg != FPR1L_enc) {
 2392       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2393       emit_d8( masm, 0xC0-1+$src$$reg );
 2394       pop = 0x03;
 2395     }
 2396     store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2397   %}
 2398 
 2399   // Push FPU's double to a stack-slot, and pop FPU-stack
 2400   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2401     int pop = 0x02;
 2402     if ($src$$reg != FPR1L_enc) {
 2403       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2404       emit_d8( masm, 0xC0-1+$src$$reg );
 2405       pop = 0x03;
 2406     }
 2407     store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2408   %}
 2409 
 2410   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2411   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2412     int pop = 0xD0 - 1; // -1 since we skip FLD
 2413     if ($src$$reg != FPR1L_enc) {
 2414       emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
 2415       emit_d8( masm, 0xC0-1+$src$$reg );
 2416       pop = 0xD8;
 2417     }
 2418     emit_opcode( masm, 0xDD );
 2419     emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
 2420   %}
 2421 
 2422 
 2423   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2424     // load dst in FPR0
 2425     emit_opcode( masm, 0xD9 );
 2426     emit_d8( masm, 0xC0-1+$dst$$reg );
 2427     if ($src$$reg != FPR1L_enc) {
 2428       // fincstp
 2429       emit_opcode (masm, 0xD9);
 2430       emit_opcode (masm, 0xF7);
 2431       // swap src with FPR1:
 2432       // FXCH FPR1 with src
 2433       emit_opcode(masm, 0xD9);
 2434       emit_d8(masm, 0xC8-1+$src$$reg );
 2435       // fdecstp
 2436       emit_opcode (masm, 0xD9);
 2437       emit_opcode (masm, 0xF6);
 2438     }
 2439   %}
 2440 
 2441   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2442     __ subptr(rsp, 8);
 2443     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2444     __ fld_d(Address(rsp, 0));
 2445     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2446     __ fld_d(Address(rsp, 0));
 2447   %}
 2448 
 2449   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2450     __ subptr(rsp, 4);
 2451     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2452     __ fld_s(Address(rsp, 0));
 2453     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2454     __ fld_s(Address(rsp, 0));
 2455   %}
 2456 
 2457   enc_class Push_ResultD(regD dst) %{
 2458     __ fstp_d(Address(rsp, 0));
 2459     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2460     __ addptr(rsp, 8);
 2461   %}
 2462 
 2463   enc_class Push_ResultF(regF dst, immI d8) %{
 2464     __ fstp_s(Address(rsp, 0));
 2465     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2466     __ addptr(rsp, $d8$$constant);
 2467   %}
 2468 
 2469   enc_class Push_SrcD(regD src) %{
 2470     __ subptr(rsp, 8);
 2471     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2472     __ fld_d(Address(rsp, 0));
 2473   %}
 2474 
 2475   enc_class push_stack_temp_qword() %{
 2476     __ subptr(rsp, 8);
 2477   %}
 2478 
 2479   enc_class pop_stack_temp_qword() %{
 2480     __ addptr(rsp, 8);
 2481   %}
 2482 
 2483   enc_class push_xmm_to_fpr1(regD src) %{
 2484     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2485     __ fld_d(Address(rsp, 0));
 2486   %}
 2487 
 2488   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2489     if ($src$$reg != FPR1L_enc) {
 2490       // fincstp
 2491       emit_opcode (masm, 0xD9);
 2492       emit_opcode (masm, 0xF7);
 2493       // FXCH FPR1 with src
 2494       emit_opcode(masm, 0xD9);
 2495       emit_d8(masm, 0xC8-1+$src$$reg );
 2496       // fdecstp
 2497       emit_opcode (masm, 0xD9);
 2498       emit_opcode (masm, 0xF6);
 2499     }
 2500   %}
 2501 
 2502   enc_class fnstsw_sahf_skip_parity() %{
 2503     // fnstsw ax
 2504     emit_opcode( masm, 0xDF );
 2505     emit_opcode( masm, 0xE0 );
 2506     // sahf
 2507     emit_opcode( masm, 0x9E );
 2508     // jnp  ::skip
 2509     emit_opcode( masm, 0x7B );
 2510     emit_opcode( masm, 0x05 );
 2511   %}
 2512 
 2513   enc_class emitModDPR() %{
 2514     // fprem must be iterative
 2515     // :: loop
 2516     // fprem
 2517     emit_opcode( masm, 0xD9 );
 2518     emit_opcode( masm, 0xF8 );
 2519     // wait
 2520     emit_opcode( masm, 0x9b );
 2521     // fnstsw ax
 2522     emit_opcode( masm, 0xDF );
 2523     emit_opcode( masm, 0xE0 );
 2524     // sahf
 2525     emit_opcode( masm, 0x9E );
 2526     // jp  ::loop
 2527     emit_opcode( masm, 0x0F );
 2528     emit_opcode( masm, 0x8A );
 2529     emit_opcode( masm, 0xF4 );
 2530     emit_opcode( masm, 0xFF );
 2531     emit_opcode( masm, 0xFF );
 2532     emit_opcode( masm, 0xFF );
 2533   %}
 2534 
 2535   enc_class fpu_flags() %{
 2536     // fnstsw_ax
 2537     emit_opcode( masm, 0xDF);
 2538     emit_opcode( masm, 0xE0);
 2539     // test ax,0x0400
 2540     emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
 2541     emit_opcode( masm, 0xA9 );
 2542     emit_d16   ( masm, 0x0400 );
 2543     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2544     // // test rax,0x0400
 2545     // emit_opcode( masm, 0xA9 );
 2546     // emit_d32   ( masm, 0x00000400 );
 2547     //
 2548     // jz exit (no unordered comparison)
 2549     emit_opcode( masm, 0x74 );
 2550     emit_d8    ( masm, 0x02 );
 2551     // mov ah,1 - treat as LT case (set carry flag)
 2552     emit_opcode( masm, 0xB4 );
 2553     emit_d8    ( masm, 0x01 );
 2554     // sahf
 2555     emit_opcode( masm, 0x9E);
 2556   %}
 2557 
 2558   enc_class cmpF_P6_fixup() %{
 2559     // Fixup the integer flags in case comparison involved a NaN
 2560     //
 2561     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2562     emit_opcode( masm, 0x7B );
 2563     emit_d8    ( masm, 0x03 );
 2564     // MOV AH,1 - treat as LT case (set carry flag)
 2565     emit_opcode( masm, 0xB4 );
 2566     emit_d8    ( masm, 0x01 );
 2567     // SAHF
 2568     emit_opcode( masm, 0x9E);
 2569     // NOP     // target for branch to avoid branch to branch
 2570     emit_opcode( masm, 0x90);
 2571   %}
 2572 
 2573 //     fnstsw_ax();
 2574 //     sahf();
 2575 //     movl(dst, nan_result);
 2576 //     jcc(Assembler::parity, exit);
 2577 //     movl(dst, less_result);
 2578 //     jcc(Assembler::below, exit);
 2579 //     movl(dst, equal_result);
 2580 //     jcc(Assembler::equal, exit);
 2581 //     movl(dst, greater_result);
 2582 
 2583 // less_result     =  1;
 2584 // greater_result  = -1;
 2585 // equal_result    = 0;
 2586 // nan_result      = -1;
 2587 
 2588   enc_class CmpF_Result(rRegI dst) %{
 2589     // fnstsw_ax();
 2590     emit_opcode( masm, 0xDF);
 2591     emit_opcode( masm, 0xE0);
 2592     // sahf
 2593     emit_opcode( masm, 0x9E);
 2594     // movl(dst, nan_result);
 2595     emit_opcode( masm, 0xB8 + $dst$$reg);
 2596     emit_d32( masm, -1 );
 2597     // jcc(Assembler::parity, exit);
 2598     emit_opcode( masm, 0x7A );
 2599     emit_d8    ( masm, 0x13 );
 2600     // movl(dst, less_result);
 2601     emit_opcode( masm, 0xB8 + $dst$$reg);
 2602     emit_d32( masm, -1 );
 2603     // jcc(Assembler::below, exit);
 2604     emit_opcode( masm, 0x72 );
 2605     emit_d8    ( masm, 0x0C );
 2606     // movl(dst, equal_result);
 2607     emit_opcode( masm, 0xB8 + $dst$$reg);
 2608     emit_d32( masm, 0 );
 2609     // jcc(Assembler::equal, exit);
 2610     emit_opcode( masm, 0x74 );
 2611     emit_d8    ( masm, 0x05 );
 2612     // movl(dst, greater_result);
 2613     emit_opcode( masm, 0xB8 + $dst$$reg);
 2614     emit_d32( masm, 1 );
 2615   %}
 2616 
 2617 
 2618   // Compare the longs and set flags
 2619   // BROKEN!  Do Not use as-is
 2620   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2621     // CMP    $src1.hi,$src2.hi
 2622     emit_opcode( masm, 0x3B );
 2623     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2624     // JNE,s  done
 2625     emit_opcode(masm,0x75);
 2626     emit_d8(masm, 2 );
 2627     // CMP    $src1.lo,$src2.lo
 2628     emit_opcode( masm, 0x3B );
 2629     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2630 // done:
 2631   %}
 2632 
 2633   enc_class convert_int_long( regL dst, rRegI src ) %{
 2634     // mov $dst.lo,$src
 2635     int dst_encoding = $dst$$reg;
 2636     int src_encoding = $src$$reg;
 2637     encode_Copy( masm, dst_encoding  , src_encoding );
 2638     // mov $dst.hi,$src
 2639     encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2640     // sar $dst.hi,31
 2641     emit_opcode( masm, 0xC1 );
 2642     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2643     emit_d8(masm, 0x1F );
 2644   %}
 2645 
 2646   enc_class convert_long_double( eRegL src ) %{
 2647     // push $src.hi
 2648     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2649     // push $src.lo
 2650     emit_opcode(masm, 0x50+$src$$reg  );
 2651     // fild 64-bits at [SP]
 2652     emit_opcode(masm,0xdf);
 2653     emit_d8(masm, 0x6C);
 2654     emit_d8(masm, 0x24);
 2655     emit_d8(masm, 0x00);
 2656     // pop stack
 2657     emit_opcode(masm, 0x83); // add  SP, #8
 2658     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2659     emit_d8(masm, 0x8);
 2660   %}
 2661 
 2662   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2663     // IMUL   EDX:EAX,$src1
 2664     emit_opcode( masm, 0xF7 );
 2665     emit_rm( masm, 0x3, 0x5, $src1$$reg );
 2666     // SAR    EDX,$cnt-32
 2667     int shift_count = ((int)$cnt$$constant) - 32;
 2668     if (shift_count > 0) {
 2669       emit_opcode(masm, 0xC1);
 2670       emit_rm(masm, 0x3, 7, $dst$$reg );
 2671       emit_d8(masm, shift_count);
 2672     }
 2673   %}
 2674 
 2675   // this version doesn't have add sp, 8
 2676   enc_class convert_long_double2( eRegL src ) %{
 2677     // push $src.hi
 2678     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2679     // push $src.lo
 2680     emit_opcode(masm, 0x50+$src$$reg  );
 2681     // fild 64-bits at [SP]
 2682     emit_opcode(masm,0xdf);
 2683     emit_d8(masm, 0x6C);
 2684     emit_d8(masm, 0x24);
 2685     emit_d8(masm, 0x00);
 2686   %}
 2687 
 2688   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2689     // Basic idea: long = (long)int * (long)int
 2690     // IMUL EDX:EAX, src
 2691     emit_opcode( masm, 0xF7 );
 2692     emit_rm( masm, 0x3, 0x5, $src$$reg);
 2693   %}
 2694 
 2695   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2696     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2697     // MUL EDX:EAX, src
 2698     emit_opcode( masm, 0xF7 );
 2699     emit_rm( masm, 0x3, 0x4, $src$$reg);
 2700   %}
 2701 
 2702   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2703     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2704     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2705     // MOV    $tmp,$src.lo
 2706     encode_Copy( masm, $tmp$$reg, $src$$reg );
 2707     // IMUL   $tmp,EDX
 2708     emit_opcode( masm, 0x0F );
 2709     emit_opcode( masm, 0xAF );
 2710     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2711     // MOV    EDX,$src.hi
 2712     encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2713     // IMUL   EDX,EAX
 2714     emit_opcode( masm, 0x0F );
 2715     emit_opcode( masm, 0xAF );
 2716     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2717     // ADD    $tmp,EDX
 2718     emit_opcode( masm, 0x03 );
 2719     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2720     // MUL   EDX:EAX,$src.lo
 2721     emit_opcode( masm, 0xF7 );
 2722     emit_rm( masm, 0x3, 0x4, $src$$reg );
 2723     // ADD    EDX,ESI
 2724     emit_opcode( masm, 0x03 );
 2725     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2726   %}
 2727 
 2728   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2729     // Basic idea: lo(result) = lo(src * y_lo)
 2730     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2731     // IMUL   $tmp,EDX,$src
 2732     emit_opcode( masm, 0x6B );
 2733     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2734     emit_d8( masm, (int)$src$$constant );
 2735     // MOV    EDX,$src
 2736     emit_opcode(masm, 0xB8 + EDX_enc);
 2737     emit_d32( masm, (int)$src$$constant );
 2738     // MUL   EDX:EAX,EDX
 2739     emit_opcode( masm, 0xF7 );
 2740     emit_rm( masm, 0x3, 0x4, EDX_enc );
 2741     // ADD    EDX,ESI
 2742     emit_opcode( masm, 0x03 );
 2743     emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
 2744   %}
 2745 
 2746   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2747     // PUSH src1.hi
 2748     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2749     // PUSH src1.lo
 2750     emit_opcode(masm,               0x50+$src1$$reg  );
 2751     // PUSH src2.hi
 2752     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2753     // PUSH src2.lo
 2754     emit_opcode(masm,               0x50+$src2$$reg  );
 2755     // CALL directly to the runtime
 2756     __ set_inst_mark();
 2757     emit_opcode(masm,0xE8);       // Call into runtime
 2758     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2759     __ clear_inst_mark();
 2760     __ post_call_nop();
 2761     // Restore stack
 2762     emit_opcode(masm, 0x83); // add  SP, #framesize
 2763     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2764     emit_d8(masm, 4*4);
 2765   %}
 2766 
 2767   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2768     // PUSH src1.hi
 2769     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2770     // PUSH src1.lo
 2771     emit_opcode(masm,               0x50+$src1$$reg  );
 2772     // PUSH src2.hi
 2773     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2774     // PUSH src2.lo
 2775     emit_opcode(masm,               0x50+$src2$$reg  );
 2776     // CALL directly to the runtime
 2777     __ set_inst_mark();
 2778     emit_opcode(masm,0xE8);       // Call into runtime
 2779     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2780     __ clear_inst_mark();
 2781     __ post_call_nop();
 2782     // Restore stack
 2783     emit_opcode(masm, 0x83); // add  SP, #framesize
 2784     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2785     emit_d8(masm, 4*4);
 2786   %}
 2787 
 2788   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2789     // MOV   $tmp,$src.lo
 2790     emit_opcode(masm, 0x8B);
 2791     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
 2792     // OR    $tmp,$src.hi
 2793     emit_opcode(masm, 0x0B);
 2794     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2795   %}
 2796 
 2797   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2798     // CMP    $src1.lo,$src2.lo
 2799     emit_opcode( masm, 0x3B );
 2800     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2801     // JNE,s  skip
 2802     emit_cc(masm, 0x70, 0x5);
 2803     emit_d8(masm,2);
 2804     // CMP    $src1.hi,$src2.hi
 2805     emit_opcode( masm, 0x3B );
 2806     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2807   %}
 2808 
 2809   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2810     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2811     emit_opcode( masm, 0x3B );
 2812     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2813     // MOV    $tmp,$src1.hi
 2814     emit_opcode( masm, 0x8B );
 2815     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2816     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2817     emit_opcode( masm, 0x1B );
 2818     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2819   %}
 2820 
 2821   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2822     // XOR    $tmp,$tmp
 2823     emit_opcode(masm,0x33);  // XOR
 2824     emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
 2825     // CMP    $tmp,$src.lo
 2826     emit_opcode( masm, 0x3B );
 2827     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
 2828     // SBB    $tmp,$src.hi
 2829     emit_opcode( masm, 0x1B );
 2830     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2831   %}
 2832 
 2833  // Sniff, sniff... smells like Gnu Superoptimizer
 2834   enc_class neg_long( eRegL dst ) %{
 2835     emit_opcode(masm,0xF7);    // NEG hi
 2836     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2837     emit_opcode(masm,0xF7);    // NEG lo
 2838     emit_rm    (masm,0x3, 0x3,               $dst$$reg );
 2839     emit_opcode(masm,0x83);    // SBB hi,0
 2840     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2841     emit_d8    (masm,0 );
 2842   %}
 2843 
 2844   enc_class enc_pop_rdx() %{
 2845     emit_opcode(masm,0x5A);
 2846   %}
 2847 
 2848   enc_class enc_rethrow() %{
 2849     __ set_inst_mark();
 2850     emit_opcode(masm, 0xE9);        // jmp    entry
 2851     emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
 2852                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2853     __ clear_inst_mark();
 2854     __ post_call_nop();
 2855   %}
 2856 
 2857 
 2858   // Convert a double to an int.  Java semantics require we do complex
 2859   // manglelations in the corner cases.  So we set the rounding mode to
 2860   // 'zero', store the darned double down as an int, and reset the
 2861   // rounding mode to 'nearest'.  The hardware throws an exception which
 2862   // patches up the correct value directly to the stack.
 2863   enc_class DPR2I_encoding( regDPR src ) %{
 2864     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2865     // exceptions here, so that a NAN or other corner-case value will
 2866     // thrown an exception (but normal values get converted at full speed).
 2867     // However, I2C adapters and other float-stack manglers leave pending
 2868     // invalid-op exceptions hanging.  We would have to clear them before
 2869     // enabling them and that is more expensive than just testing for the
 2870     // invalid value Intel stores down in the corner cases.
 2871     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2872     emit_opcode(masm,0x2D);
 2873     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2874     // Allocate a word
 2875     emit_opcode(masm,0x83);            // SUB ESP,4
 2876     emit_opcode(masm,0xEC);
 2877     emit_d8(masm,0x04);
 2878     // Encoding assumes a double has been pushed into FPR0.
 2879     // Store down the double as an int, popping the FPU stack
 2880     emit_opcode(masm,0xDB);            // FISTP [ESP]
 2881     emit_opcode(masm,0x1C);
 2882     emit_d8(masm,0x24);
 2883     // Restore the rounding mode; mask the exception
 2884     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2885     emit_opcode(masm,0x2D);
 2886     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2887         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2888         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2889 
 2890     // Load the converted int; adjust CPU stack
 2891     emit_opcode(masm,0x58);       // POP EAX
 2892     emit_opcode(masm,0x3D);       // CMP EAX,imm
 2893     emit_d32   (masm,0x80000000); //         0x80000000
 2894     emit_opcode(masm,0x75);       // JNE around_slow_call
 2895     emit_d8    (masm,0x07);       // Size of slow_call
 2896     // Push src onto stack slow-path
 2897     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2898     emit_d8    (masm,0xC0-1+$src$$reg );
 2899     // CALL directly to the runtime
 2900     __ set_inst_mark();
 2901     emit_opcode(masm,0xE8);       // Call into runtime
 2902     emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2903     __ clear_inst_mark();
 2904     __ post_call_nop();
 2905     // Carry on here...
 2906   %}
 2907 
 2908   enc_class DPR2L_encoding( regDPR src ) %{
 2909     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2910     emit_opcode(masm,0x2D);
 2911     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2912     // Allocate a word
 2913     emit_opcode(masm,0x83);            // SUB ESP,8
 2914     emit_opcode(masm,0xEC);
 2915     emit_d8(masm,0x08);
 2916     // Encoding assumes a double has been pushed into FPR0.
 2917     // Store down the double as a long, popping the FPU stack
 2918     emit_opcode(masm,0xDF);            // FISTP [ESP]
 2919     emit_opcode(masm,0x3C);
 2920     emit_d8(masm,0x24);
 2921     // Restore the rounding mode; mask the exception
 2922     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2923     emit_opcode(masm,0x2D);
 2924     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2925         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2926         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2927 
 2928     // Load the converted int; adjust CPU stack
 2929     emit_opcode(masm,0x58);       // POP EAX
 2930     emit_opcode(masm,0x5A);       // POP EDX
 2931     emit_opcode(masm,0x81);       // CMP EDX,imm
 2932     emit_d8    (masm,0xFA);       // rdx
 2933     emit_d32   (masm,0x80000000); //         0x80000000
 2934     emit_opcode(masm,0x75);       // JNE around_slow_call
 2935     emit_d8    (masm,0x07+4);     // Size of slow_call
 2936     emit_opcode(masm,0x85);       // TEST EAX,EAX
 2937     emit_opcode(masm,0xC0);       // 2/rax,/rax,
 2938     emit_opcode(masm,0x75);       // JNE around_slow_call
 2939     emit_d8    (masm,0x07);       // Size of slow_call
 2940     // Push src onto stack slow-path
 2941     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2942     emit_d8    (masm,0xC0-1+$src$$reg );
 2943     // CALL directly to the runtime
 2944     __ set_inst_mark();
 2945     emit_opcode(masm,0xE8);       // Call into runtime
 2946     emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2947     __ clear_inst_mark();
 2948     __ post_call_nop();
 2949     // Carry on here...
 2950   %}
 2951 
 2952   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2953     // Operand was loaded from memory into fp ST (stack top)
 2954     // FMUL   ST,$src  /* D8 C8+i */
 2955     emit_opcode(masm, 0xD8);
 2956     emit_opcode(masm, 0xC8 + $src1$$reg);
 2957   %}
 2958 
 2959   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2960     // FADDP  ST,src2  /* D8 C0+i */
 2961     emit_opcode(masm, 0xD8);
 2962     emit_opcode(masm, 0xC0 + $src2$$reg);
 2963     //could use FADDP  src2,fpST  /* DE C0+i */
 2964   %}
 2965 
 2966   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2967     // FADDP  src2,ST  /* DE C0+i */
 2968     emit_opcode(masm, 0xDE);
 2969     emit_opcode(masm, 0xC0 + $src2$$reg);
 2970   %}
 2971 
 2972   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2973     // Operand has been loaded into fp ST (stack top)
 2974       // FSUB   ST,$src1
 2975       emit_opcode(masm, 0xD8);
 2976       emit_opcode(masm, 0xE0 + $src1$$reg);
 2977 
 2978       // FDIV
 2979       emit_opcode(masm, 0xD8);
 2980       emit_opcode(masm, 0xF0 + $src2$$reg);
 2981   %}
 2982 
 2983   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 2984     // Operand was loaded from memory into fp ST (stack top)
 2985     // FADD   ST,$src  /* D8 C0+i */
 2986     emit_opcode(masm, 0xD8);
 2987     emit_opcode(masm, 0xC0 + $src1$$reg);
 2988 
 2989     // FMUL  ST,src2  /* D8 C*+i */
 2990     emit_opcode(masm, 0xD8);
 2991     emit_opcode(masm, 0xC8 + $src2$$reg);
 2992   %}
 2993 
 2994 
 2995   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 2996     // Operand was loaded from memory into fp ST (stack top)
 2997     // FADD   ST,$src  /* D8 C0+i */
 2998     emit_opcode(masm, 0xD8);
 2999     emit_opcode(masm, 0xC0 + $src1$$reg);
 3000 
 3001     // FMULP  src2,ST  /* DE C8+i */
 3002     emit_opcode(masm, 0xDE);
 3003     emit_opcode(masm, 0xC8 + $src2$$reg);
 3004   %}
 3005 
 3006   // Atomically load the volatile long
 3007   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3008     emit_opcode(masm,0xDF);
 3009     int rm_byte_opcode = 0x05;
 3010     int base     = $mem$$base;
 3011     int index    = $mem$$index;
 3012     int scale    = $mem$$scale;
 3013     int displace = $mem$$disp;
 3014     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3015     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3016     store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
 3017   %}
 3018 
 3019   // Volatile Store Long.  Must be atomic, so move it into
 3020   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3021   // target address before the store (for null-ptr checks)
 3022   // so the memory operand is used twice in the encoding.
 3023   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3024     store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
 3025     __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
 3026     emit_opcode(masm,0xDF);
 3027     int rm_byte_opcode = 0x07;
 3028     int base     = $mem$$base;
 3029     int index    = $mem$$index;
 3030     int scale    = $mem$$scale;
 3031     int displace = $mem$$disp;
 3032     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3033     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3034     __ clear_inst_mark();
 3035   %}
 3036 
 3037 %}
 3038 
 3039 
 3040 //----------FRAME--------------------------------------------------------------
 3041 // Definition of frame structure and management information.
 3042 //
 3043 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3044 //                             |   (to get allocators register number
 3045 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3046 //  r   CALLER     |        |
 3047 //  o     |        +--------+      pad to even-align allocators stack-slot
 3048 //  w     V        |  pad0  |        numbers; owned by CALLER
 3049 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3050 //  h     ^        |   in   |  5
 3051 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3052 //  |     |        |        |  3
 3053 //  |     |        +--------+
 3054 //  V     |        | old out|      Empty on Intel, window on Sparc
 3055 //        |    old |preserve|      Must be even aligned.
 3056 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3057 //        |        |   in   |  3   area for Intel ret address
 3058 //     Owned by    |preserve|      Empty on Sparc.
 3059 //       SELF      +--------+
 3060 //        |        |  pad2  |  2   pad to align old SP
 3061 //        |        +--------+  1
 3062 //        |        | locks  |  0
 3063 //        |        +--------+----> OptoReg::stack0(), even aligned
 3064 //        |        |  pad1  | 11   pad to align new SP
 3065 //        |        +--------+
 3066 //        |        |        | 10
 3067 //        |        | spills |  9   spills
 3068 //        V        |        |  8   (pad0 slot for callee)
 3069 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3070 //        ^        |  out   |  7
 3071 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3072 //     Owned by    +--------+
 3073 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3074 //        |    new |preserve|      Must be even-aligned.
 3075 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3076 //        |        |        |
 3077 //
 3078 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3079 //         known from SELF's arguments and the Java calling convention.
 3080 //         Region 6-7 is determined per call site.
 3081 // Note 2: If the calling convention leaves holes in the incoming argument
 3082 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3083 //         are owned by the CALLEE.  Holes should not be necessary in the
 3084 //         incoming area, as the Java calling convention is completely under
 3085 //         the control of the AD file.  Doubles can be sorted and packed to
 3086 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3087 //         varargs C calling conventions.
 3088 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3089 //         even aligned with pad0 as needed.
 3090 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3091 //         region 6-11 is even aligned; it may be padded out more so that
 3092 //         the region from SP to FP meets the minimum stack alignment.
 3093 
 3094 frame %{
 3095   // These three registers define part of the calling convention
 3096   // between compiled code and the interpreter.
 3097   inline_cache_reg(EAX);                // Inline Cache Register
 3098 
 3099   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3100   cisc_spilling_operand_name(indOffset32);
 3101 
 3102   // Number of stack slots consumed by locking an object
 3103   sync_stack_slots(1);
 3104 
 3105   // Compiled code's Frame Pointer
 3106   frame_pointer(ESP);
 3107   // Interpreter stores its frame pointer in a register which is
 3108   // stored to the stack by I2CAdaptors.
 3109   // I2CAdaptors convert from interpreted java to compiled java.
 3110   interpreter_frame_pointer(EBP);
 3111 
 3112   // Stack alignment requirement
 3113   // Alignment size in bytes (128-bit -> 16 bytes)
 3114   stack_alignment(StackAlignmentInBytes);
 3115 
 3116   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3117   // for calls to C.  Supports the var-args backing area for register parms.
 3118   varargs_C_out_slots_killed(0);
 3119 
 3120   // The after-PROLOG location of the return address.  Location of
 3121   // return address specifies a type (REG or STACK) and a number
 3122   // representing the register number (i.e. - use a register name) or
 3123   // stack slot.
 3124   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3125   // Otherwise, it is above the locks and verification slot and alignment word
 3126   return_addr(STACK - 1 +
 3127               align_up((Compile::current()->in_preserve_stack_slots() +
 3128                         Compile::current()->fixed_slots()),
 3129                        stack_alignment_in_slots()));
 3130 
 3131   // Location of C & interpreter return values
 3132   c_return_value %{
 3133     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3134     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3135     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3136 
 3137     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3138     // that C functions return float and double results in XMM0.
 3139     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3140       return OptoRegPair(XMM0b_num,XMM0_num);
 3141     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3142       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3143 
 3144     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3145   %}
 3146 
 3147   // Location of return values
 3148   return_value %{
 3149     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3150     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3151     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3152     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3153       return OptoRegPair(XMM0b_num,XMM0_num);
 3154     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3155       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3156     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3157   %}
 3158 
 3159 %}
 3160 
 3161 //----------ATTRIBUTES---------------------------------------------------------
 3162 //----------Operand Attributes-------------------------------------------------
 3163 op_attrib op_cost(0);        // Required cost attribute
 3164 
 3165 //----------Instruction Attributes---------------------------------------------
 3166 ins_attrib ins_cost(100);       // Required cost attribute
 3167 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3168 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3169                                 // non-matching short branch variant of some
 3170                                                             // long branch?
 3171 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3172                                 // specifies the alignment that some part of the instruction (not
 3173                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3174                                 // function must be provided for the instruction
 3175 
 3176 //----------OPERANDS-----------------------------------------------------------
 3177 // Operand definitions must precede instruction definitions for correct parsing
 3178 // in the ADLC because operands constitute user defined types which are used in
 3179 // instruction definitions.
 3180 
 3181 //----------Simple Operands----------------------------------------------------
 3182 // Immediate Operands
 3183 // Integer Immediate
 3184 operand immI() %{
 3185   match(ConI);
 3186 
 3187   op_cost(10);
 3188   format %{ %}
 3189   interface(CONST_INTER);
 3190 %}
 3191 
 3192 // Constant for test vs zero
 3193 operand immI_0() %{
 3194   predicate(n->get_int() == 0);
 3195   match(ConI);
 3196 
 3197   op_cost(0);
 3198   format %{ %}
 3199   interface(CONST_INTER);
 3200 %}
 3201 
 3202 // Constant for increment
 3203 operand immI_1() %{
 3204   predicate(n->get_int() == 1);
 3205   match(ConI);
 3206 
 3207   op_cost(0);
 3208   format %{ %}
 3209   interface(CONST_INTER);
 3210 %}
 3211 
 3212 // Constant for decrement
 3213 operand immI_M1() %{
 3214   predicate(n->get_int() == -1);
 3215   match(ConI);
 3216 
 3217   op_cost(0);
 3218   format %{ %}
 3219   interface(CONST_INTER);
 3220 %}
 3221 
 3222 // Valid scale values for addressing modes
 3223 operand immI2() %{
 3224   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3225   match(ConI);
 3226 
 3227   format %{ %}
 3228   interface(CONST_INTER);
 3229 %}
 3230 
 3231 operand immI8() %{
 3232   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3233   match(ConI);
 3234 
 3235   op_cost(5);
 3236   format %{ %}
 3237   interface(CONST_INTER);
 3238 %}
 3239 
 3240 operand immU8() %{
 3241   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3242   match(ConI);
 3243 
 3244   op_cost(5);
 3245   format %{ %}
 3246   interface(CONST_INTER);
 3247 %}
 3248 
 3249 operand immI16() %{
 3250   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3251   match(ConI);
 3252 
 3253   op_cost(10);
 3254   format %{ %}
 3255   interface(CONST_INTER);
 3256 %}
 3257 
 3258 // Int Immediate non-negative
 3259 operand immU31()
 3260 %{
 3261   predicate(n->get_int() >= 0);
 3262   match(ConI);
 3263 
 3264   op_cost(0);
 3265   format %{ %}
 3266   interface(CONST_INTER);
 3267 %}
 3268 
 3269 // Constant for long shifts
 3270 operand immI_32() %{
 3271   predicate( n->get_int() == 32 );
 3272   match(ConI);
 3273 
 3274   op_cost(0);
 3275   format %{ %}
 3276   interface(CONST_INTER);
 3277 %}
 3278 
 3279 operand immI_1_31() %{
 3280   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3281   match(ConI);
 3282 
 3283   op_cost(0);
 3284   format %{ %}
 3285   interface(CONST_INTER);
 3286 %}
 3287 
 3288 operand immI_32_63() %{
 3289   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3290   match(ConI);
 3291   op_cost(0);
 3292 
 3293   format %{ %}
 3294   interface(CONST_INTER);
 3295 %}
 3296 
 3297 operand immI_2() %{
 3298   predicate( n->get_int() == 2 );
 3299   match(ConI);
 3300 
 3301   op_cost(0);
 3302   format %{ %}
 3303   interface(CONST_INTER);
 3304 %}
 3305 
 3306 operand immI_3() %{
 3307   predicate( n->get_int() == 3 );
 3308   match(ConI);
 3309 
 3310   op_cost(0);
 3311   format %{ %}
 3312   interface(CONST_INTER);
 3313 %}
 3314 
 3315 operand immI_4()
 3316 %{
 3317   predicate(n->get_int() == 4);
 3318   match(ConI);
 3319 
 3320   op_cost(0);
 3321   format %{ %}
 3322   interface(CONST_INTER);
 3323 %}
 3324 
 3325 operand immI_8()
 3326 %{
 3327   predicate(n->get_int() == 8);
 3328   match(ConI);
 3329 
 3330   op_cost(0);
 3331   format %{ %}
 3332   interface(CONST_INTER);
 3333 %}
 3334 
 3335 // Pointer Immediate
 3336 operand immP() %{
 3337   match(ConP);
 3338 
 3339   op_cost(10);
 3340   format %{ %}
 3341   interface(CONST_INTER);
 3342 %}
 3343 
 3344 // Null Pointer Immediate
 3345 operand immP0() %{
 3346   predicate( n->get_ptr() == 0 );
 3347   match(ConP);
 3348   op_cost(0);
 3349 
 3350   format %{ %}
 3351   interface(CONST_INTER);
 3352 %}
 3353 
 3354 // Long Immediate
 3355 operand immL() %{
 3356   match(ConL);
 3357 
 3358   op_cost(20);
 3359   format %{ %}
 3360   interface(CONST_INTER);
 3361 %}
 3362 
 3363 // Long Immediate zero
 3364 operand immL0() %{
 3365   predicate( n->get_long() == 0L );
 3366   match(ConL);
 3367   op_cost(0);
 3368 
 3369   format %{ %}
 3370   interface(CONST_INTER);
 3371 %}
 3372 
 3373 // Long Immediate zero
 3374 operand immL_M1() %{
 3375   predicate( n->get_long() == -1L );
 3376   match(ConL);
 3377   op_cost(0);
 3378 
 3379   format %{ %}
 3380   interface(CONST_INTER);
 3381 %}
 3382 
 3383 // Long immediate from 0 to 127.
 3384 // Used for a shorter form of long mul by 10.
 3385 operand immL_127() %{
 3386   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3387   match(ConL);
 3388   op_cost(0);
 3389 
 3390   format %{ %}
 3391   interface(CONST_INTER);
 3392 %}
 3393 
 3394 // Long Immediate: low 32-bit mask
 3395 operand immL_32bits() %{
 3396   predicate(n->get_long() == 0xFFFFFFFFL);
 3397   match(ConL);
 3398   op_cost(0);
 3399 
 3400   format %{ %}
 3401   interface(CONST_INTER);
 3402 %}
 3403 
 3404 // Long Immediate: low 32-bit mask
 3405 operand immL32() %{
 3406   predicate(n->get_long() == (int)(n->get_long()));
 3407   match(ConL);
 3408   op_cost(20);
 3409 
 3410   format %{ %}
 3411   interface(CONST_INTER);
 3412 %}
 3413 
 3414 //Double Immediate zero
 3415 operand immDPR0() %{
 3416   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3417   // bug that generates code such that NaNs compare equal to 0.0
 3418   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3419   match(ConD);
 3420 
 3421   op_cost(5);
 3422   format %{ %}
 3423   interface(CONST_INTER);
 3424 %}
 3425 
 3426 // Double Immediate one
 3427 operand immDPR1() %{
 3428   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3429   match(ConD);
 3430 
 3431   op_cost(5);
 3432   format %{ %}
 3433   interface(CONST_INTER);
 3434 %}
 3435 
 3436 // Double Immediate
 3437 operand immDPR() %{
 3438   predicate(UseSSE<=1);
 3439   match(ConD);
 3440 
 3441   op_cost(5);
 3442   format %{ %}
 3443   interface(CONST_INTER);
 3444 %}
 3445 
 3446 operand immD() %{
 3447   predicate(UseSSE>=2);
 3448   match(ConD);
 3449 
 3450   op_cost(5);
 3451   format %{ %}
 3452   interface(CONST_INTER);
 3453 %}
 3454 
 3455 // Double Immediate zero
 3456 operand immD0() %{
 3457   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3458   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3459   // compare equal to -0.0.
 3460   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3461   match(ConD);
 3462 
 3463   format %{ %}
 3464   interface(CONST_INTER);
 3465 %}
 3466 
 3467 // Float Immediate zero
 3468 operand immFPR0() %{
 3469   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3470   match(ConF);
 3471 
 3472   op_cost(5);
 3473   format %{ %}
 3474   interface(CONST_INTER);
 3475 %}
 3476 
 3477 // Float Immediate one
 3478 operand immFPR1() %{
 3479   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3480   match(ConF);
 3481 
 3482   op_cost(5);
 3483   format %{ %}
 3484   interface(CONST_INTER);
 3485 %}
 3486 
 3487 // Float Immediate
 3488 operand immFPR() %{
 3489   predicate( UseSSE == 0 );
 3490   match(ConF);
 3491 
 3492   op_cost(5);
 3493   format %{ %}
 3494   interface(CONST_INTER);
 3495 %}
 3496 
 3497 // Float Immediate
 3498 operand immF() %{
 3499   predicate(UseSSE >= 1);
 3500   match(ConF);
 3501 
 3502   op_cost(5);
 3503   format %{ %}
 3504   interface(CONST_INTER);
 3505 %}
 3506 
 3507 // Float Immediate zero.  Zero and not -0.0
 3508 operand immF0() %{
 3509   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3510   match(ConF);
 3511 
 3512   op_cost(5);
 3513   format %{ %}
 3514   interface(CONST_INTER);
 3515 %}
 3516 
 3517 // Immediates for special shifts (sign extend)
 3518 
 3519 // Constants for increment
 3520 operand immI_16() %{
 3521   predicate( n->get_int() == 16 );
 3522   match(ConI);
 3523 
 3524   format %{ %}
 3525   interface(CONST_INTER);
 3526 %}
 3527 
 3528 operand immI_24() %{
 3529   predicate( n->get_int() == 24 );
 3530   match(ConI);
 3531 
 3532   format %{ %}
 3533   interface(CONST_INTER);
 3534 %}
 3535 
 3536 // Constant for byte-wide masking
 3537 operand immI_255() %{
 3538   predicate( n->get_int() == 255 );
 3539   match(ConI);
 3540 
 3541   format %{ %}
 3542   interface(CONST_INTER);
 3543 %}
 3544 
 3545 // Constant for short-wide masking
 3546 operand immI_65535() %{
 3547   predicate(n->get_int() == 65535);
 3548   match(ConI);
 3549 
 3550   format %{ %}
 3551   interface(CONST_INTER);
 3552 %}
 3553 
 3554 operand kReg()
 3555 %{
 3556   constraint(ALLOC_IN_RC(vectmask_reg));
 3557   match(RegVectMask);
 3558   format %{%}
 3559   interface(REG_INTER);
 3560 %}
 3561 
 3562 // Register Operands
 3563 // Integer Register
 3564 operand rRegI() %{
 3565   constraint(ALLOC_IN_RC(int_reg));
 3566   match(RegI);
 3567   match(xRegI);
 3568   match(eAXRegI);
 3569   match(eBXRegI);
 3570   match(eCXRegI);
 3571   match(eDXRegI);
 3572   match(eDIRegI);
 3573   match(eSIRegI);
 3574 
 3575   format %{ %}
 3576   interface(REG_INTER);
 3577 %}
 3578 
 3579 // Subset of Integer Register
 3580 operand xRegI(rRegI reg) %{
 3581   constraint(ALLOC_IN_RC(int_x_reg));
 3582   match(reg);
 3583   match(eAXRegI);
 3584   match(eBXRegI);
 3585   match(eCXRegI);
 3586   match(eDXRegI);
 3587 
 3588   format %{ %}
 3589   interface(REG_INTER);
 3590 %}
 3591 
 3592 // Special Registers
 3593 operand eAXRegI(xRegI reg) %{
 3594   constraint(ALLOC_IN_RC(eax_reg));
 3595   match(reg);
 3596   match(rRegI);
 3597 
 3598   format %{ "EAX" %}
 3599   interface(REG_INTER);
 3600 %}
 3601 
 3602 // Special Registers
 3603 operand eBXRegI(xRegI reg) %{
 3604   constraint(ALLOC_IN_RC(ebx_reg));
 3605   match(reg);
 3606   match(rRegI);
 3607 
 3608   format %{ "EBX" %}
 3609   interface(REG_INTER);
 3610 %}
 3611 
 3612 operand eCXRegI(xRegI reg) %{
 3613   constraint(ALLOC_IN_RC(ecx_reg));
 3614   match(reg);
 3615   match(rRegI);
 3616 
 3617   format %{ "ECX" %}
 3618   interface(REG_INTER);
 3619 %}
 3620 
 3621 operand eDXRegI(xRegI reg) %{
 3622   constraint(ALLOC_IN_RC(edx_reg));
 3623   match(reg);
 3624   match(rRegI);
 3625 
 3626   format %{ "EDX" %}
 3627   interface(REG_INTER);
 3628 %}
 3629 
 3630 operand eDIRegI(xRegI reg) %{
 3631   constraint(ALLOC_IN_RC(edi_reg));
 3632   match(reg);
 3633   match(rRegI);
 3634 
 3635   format %{ "EDI" %}
 3636   interface(REG_INTER);
 3637 %}
 3638 
 3639 operand nadxRegI() %{
 3640   constraint(ALLOC_IN_RC(nadx_reg));
 3641   match(RegI);
 3642   match(eBXRegI);
 3643   match(eCXRegI);
 3644   match(eSIRegI);
 3645   match(eDIRegI);
 3646 
 3647   format %{ %}
 3648   interface(REG_INTER);
 3649 %}
 3650 
 3651 operand ncxRegI() %{
 3652   constraint(ALLOC_IN_RC(ncx_reg));
 3653   match(RegI);
 3654   match(eAXRegI);
 3655   match(eDXRegI);
 3656   match(eSIRegI);
 3657   match(eDIRegI);
 3658 
 3659   format %{ %}
 3660   interface(REG_INTER);
 3661 %}
 3662 
 3663 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3664 // //
 3665 operand eSIRegI(xRegI reg) %{
 3666    constraint(ALLOC_IN_RC(esi_reg));
 3667    match(reg);
 3668    match(rRegI);
 3669 
 3670    format %{ "ESI" %}
 3671    interface(REG_INTER);
 3672 %}
 3673 
 3674 // Pointer Register
 3675 operand anyRegP() %{
 3676   constraint(ALLOC_IN_RC(any_reg));
 3677   match(RegP);
 3678   match(eAXRegP);
 3679   match(eBXRegP);
 3680   match(eCXRegP);
 3681   match(eDIRegP);
 3682   match(eRegP);
 3683 
 3684   format %{ %}
 3685   interface(REG_INTER);
 3686 %}
 3687 
 3688 operand eRegP() %{
 3689   constraint(ALLOC_IN_RC(int_reg));
 3690   match(RegP);
 3691   match(eAXRegP);
 3692   match(eBXRegP);
 3693   match(eCXRegP);
 3694   match(eDIRegP);
 3695 
 3696   format %{ %}
 3697   interface(REG_INTER);
 3698 %}
 3699 
 3700 operand rRegP() %{
 3701   constraint(ALLOC_IN_RC(int_reg));
 3702   match(RegP);
 3703   match(eAXRegP);
 3704   match(eBXRegP);
 3705   match(eCXRegP);
 3706   match(eDIRegP);
 3707 
 3708   format %{ %}
 3709   interface(REG_INTER);
 3710 %}
 3711 
 3712 // On windows95, EBP is not safe to use for implicit null tests.
 3713 operand eRegP_no_EBP() %{
 3714   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3715   match(RegP);
 3716   match(eAXRegP);
 3717   match(eBXRegP);
 3718   match(eCXRegP);
 3719   match(eDIRegP);
 3720 
 3721   op_cost(100);
 3722   format %{ %}
 3723   interface(REG_INTER);
 3724 %}
 3725 
 3726 operand pRegP() %{
 3727   constraint(ALLOC_IN_RC(p_reg));
 3728   match(RegP);
 3729   match(eBXRegP);
 3730   match(eDXRegP);
 3731   match(eSIRegP);
 3732   match(eDIRegP);
 3733 
 3734   format %{ %}
 3735   interface(REG_INTER);
 3736 %}
 3737 
 3738 // Special Registers
 3739 // Return a pointer value
 3740 operand eAXRegP(eRegP reg) %{
 3741   constraint(ALLOC_IN_RC(eax_reg));
 3742   match(reg);
 3743   format %{ "EAX" %}
 3744   interface(REG_INTER);
 3745 %}
 3746 
 3747 // Used in AtomicAdd
 3748 operand eBXRegP(eRegP reg) %{
 3749   constraint(ALLOC_IN_RC(ebx_reg));
 3750   match(reg);
 3751   format %{ "EBX" %}
 3752   interface(REG_INTER);
 3753 %}
 3754 
 3755 // Tail-call (interprocedural jump) to interpreter
 3756 operand eCXRegP(eRegP reg) %{
 3757   constraint(ALLOC_IN_RC(ecx_reg));
 3758   match(reg);
 3759   format %{ "ECX" %}
 3760   interface(REG_INTER);
 3761 %}
 3762 
 3763 operand eDXRegP(eRegP reg) %{
 3764   constraint(ALLOC_IN_RC(edx_reg));
 3765   match(reg);
 3766   format %{ "EDX" %}
 3767   interface(REG_INTER);
 3768 %}
 3769 
 3770 operand eSIRegP(eRegP reg) %{
 3771   constraint(ALLOC_IN_RC(esi_reg));
 3772   match(reg);
 3773   format %{ "ESI" %}
 3774   interface(REG_INTER);
 3775 %}
 3776 
 3777 // Used in rep stosw
 3778 operand eDIRegP(eRegP reg) %{
 3779   constraint(ALLOC_IN_RC(edi_reg));
 3780   match(reg);
 3781   format %{ "EDI" %}
 3782   interface(REG_INTER);
 3783 %}
 3784 
 3785 operand eRegL() %{
 3786   constraint(ALLOC_IN_RC(long_reg));
 3787   match(RegL);
 3788   match(eADXRegL);
 3789 
 3790   format %{ %}
 3791   interface(REG_INTER);
 3792 %}
 3793 
 3794 operand eADXRegL( eRegL reg ) %{
 3795   constraint(ALLOC_IN_RC(eadx_reg));
 3796   match(reg);
 3797 
 3798   format %{ "EDX:EAX" %}
 3799   interface(REG_INTER);
 3800 %}
 3801 
 3802 operand eBCXRegL( eRegL reg ) %{
 3803   constraint(ALLOC_IN_RC(ebcx_reg));
 3804   match(reg);
 3805 
 3806   format %{ "EBX:ECX" %}
 3807   interface(REG_INTER);
 3808 %}
 3809 
 3810 operand eBDPRegL( eRegL reg ) %{
 3811   constraint(ALLOC_IN_RC(ebpd_reg));
 3812   match(reg);
 3813 
 3814   format %{ "EBP:EDI" %}
 3815   interface(REG_INTER);
 3816 %}
 3817 // Special case for integer high multiply
 3818 operand eADXRegL_low_only() %{
 3819   constraint(ALLOC_IN_RC(eadx_reg));
 3820   match(RegL);
 3821 
 3822   format %{ "EAX" %}
 3823   interface(REG_INTER);
 3824 %}
 3825 
 3826 // Flags register, used as output of compare instructions
 3827 operand rFlagsReg() %{
 3828   constraint(ALLOC_IN_RC(int_flags));
 3829   match(RegFlags);
 3830 
 3831   format %{ "EFLAGS" %}
 3832   interface(REG_INTER);
 3833 %}
 3834 
 3835 // Flags register, used as output of compare instructions
 3836 operand eFlagsReg() %{
 3837   constraint(ALLOC_IN_RC(int_flags));
 3838   match(RegFlags);
 3839 
 3840   format %{ "EFLAGS" %}
 3841   interface(REG_INTER);
 3842 %}
 3843 
 3844 // Flags register, used as output of FLOATING POINT compare instructions
 3845 operand eFlagsRegU() %{
 3846   constraint(ALLOC_IN_RC(int_flags));
 3847   match(RegFlags);
 3848 
 3849   format %{ "EFLAGS_U" %}
 3850   interface(REG_INTER);
 3851 %}
 3852 
 3853 operand eFlagsRegUCF() %{
 3854   constraint(ALLOC_IN_RC(int_flags));
 3855   match(RegFlags);
 3856   predicate(false);
 3857 
 3858   format %{ "EFLAGS_U_CF" %}
 3859   interface(REG_INTER);
 3860 %}
 3861 
 3862 // Condition Code Register used by long compare
 3863 operand flagsReg_long_LTGE() %{
 3864   constraint(ALLOC_IN_RC(int_flags));
 3865   match(RegFlags);
 3866   format %{ "FLAGS_LTGE" %}
 3867   interface(REG_INTER);
 3868 %}
 3869 operand flagsReg_long_EQNE() %{
 3870   constraint(ALLOC_IN_RC(int_flags));
 3871   match(RegFlags);
 3872   format %{ "FLAGS_EQNE" %}
 3873   interface(REG_INTER);
 3874 %}
 3875 operand flagsReg_long_LEGT() %{
 3876   constraint(ALLOC_IN_RC(int_flags));
 3877   match(RegFlags);
 3878   format %{ "FLAGS_LEGT" %}
 3879   interface(REG_INTER);
 3880 %}
 3881 
 3882 // Condition Code Register used by unsigned long compare
 3883 operand flagsReg_ulong_LTGE() %{
 3884   constraint(ALLOC_IN_RC(int_flags));
 3885   match(RegFlags);
 3886   format %{ "FLAGS_U_LTGE" %}
 3887   interface(REG_INTER);
 3888 %}
 3889 operand flagsReg_ulong_EQNE() %{
 3890   constraint(ALLOC_IN_RC(int_flags));
 3891   match(RegFlags);
 3892   format %{ "FLAGS_U_EQNE" %}
 3893   interface(REG_INTER);
 3894 %}
 3895 operand flagsReg_ulong_LEGT() %{
 3896   constraint(ALLOC_IN_RC(int_flags));
 3897   match(RegFlags);
 3898   format %{ "FLAGS_U_LEGT" %}
 3899   interface(REG_INTER);
 3900 %}
 3901 
 3902 // Float register operands
 3903 operand regDPR() %{
 3904   predicate( UseSSE < 2 );
 3905   constraint(ALLOC_IN_RC(fp_dbl_reg));
 3906   match(RegD);
 3907   match(regDPR1);
 3908   match(regDPR2);
 3909   format %{ %}
 3910   interface(REG_INTER);
 3911 %}
 3912 
 3913 operand regDPR1(regDPR reg) %{
 3914   predicate( UseSSE < 2 );
 3915   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 3916   match(reg);
 3917   format %{ "FPR1" %}
 3918   interface(REG_INTER);
 3919 %}
 3920 
 3921 operand regDPR2(regDPR reg) %{
 3922   predicate( UseSSE < 2 );
 3923   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 3924   match(reg);
 3925   format %{ "FPR2" %}
 3926   interface(REG_INTER);
 3927 %}
 3928 
 3929 operand regnotDPR1(regDPR reg) %{
 3930   predicate( UseSSE < 2 );
 3931   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 3932   match(reg);
 3933   format %{ %}
 3934   interface(REG_INTER);
 3935 %}
 3936 
 3937 // Float register operands
 3938 operand regFPR() %{
 3939   predicate( UseSSE < 2 );
 3940   constraint(ALLOC_IN_RC(fp_flt_reg));
 3941   match(RegF);
 3942   match(regFPR1);
 3943   format %{ %}
 3944   interface(REG_INTER);
 3945 %}
 3946 
 3947 // Float register operands
 3948 operand regFPR1(regFPR reg) %{
 3949   predicate( UseSSE < 2 );
 3950   constraint(ALLOC_IN_RC(fp_flt_reg0));
 3951   match(reg);
 3952   format %{ "FPR1" %}
 3953   interface(REG_INTER);
 3954 %}
 3955 
 3956 // XMM Float register operands
 3957 operand regF() %{
 3958   predicate( UseSSE>=1 );
 3959   constraint(ALLOC_IN_RC(float_reg_legacy));
 3960   match(RegF);
 3961   format %{ %}
 3962   interface(REG_INTER);
 3963 %}
 3964 
 3965 operand legRegF() %{
 3966   predicate( UseSSE>=1 );
 3967   constraint(ALLOC_IN_RC(float_reg_legacy));
 3968   match(RegF);
 3969   format %{ %}
 3970   interface(REG_INTER);
 3971 %}
 3972 
 3973 // Float register operands
 3974 operand vlRegF() %{
 3975    constraint(ALLOC_IN_RC(float_reg_vl));
 3976    match(RegF);
 3977 
 3978    format %{ %}
 3979    interface(REG_INTER);
 3980 %}
 3981 
 3982 // XMM Double register operands
 3983 operand regD() %{
 3984   predicate( UseSSE>=2 );
 3985   constraint(ALLOC_IN_RC(double_reg_legacy));
 3986   match(RegD);
 3987   format %{ %}
 3988   interface(REG_INTER);
 3989 %}
 3990 
 3991 // Double register operands
 3992 operand legRegD() %{
 3993   predicate( UseSSE>=2 );
 3994   constraint(ALLOC_IN_RC(double_reg_legacy));
 3995   match(RegD);
 3996   format %{ %}
 3997   interface(REG_INTER);
 3998 %}
 3999 
 4000 operand vlRegD() %{
 4001    constraint(ALLOC_IN_RC(double_reg_vl));
 4002    match(RegD);
 4003 
 4004    format %{ %}
 4005    interface(REG_INTER);
 4006 %}
 4007 
 4008 //----------Memory Operands----------------------------------------------------
 4009 // Direct Memory Operand
 4010 operand direct(immP addr) %{
 4011   match(addr);
 4012 
 4013   format %{ "[$addr]" %}
 4014   interface(MEMORY_INTER) %{
 4015     base(0xFFFFFFFF);
 4016     index(0x4);
 4017     scale(0x0);
 4018     disp($addr);
 4019   %}
 4020 %}
 4021 
 4022 // Indirect Memory Operand
 4023 operand indirect(eRegP reg) %{
 4024   constraint(ALLOC_IN_RC(int_reg));
 4025   match(reg);
 4026 
 4027   format %{ "[$reg]" %}
 4028   interface(MEMORY_INTER) %{
 4029     base($reg);
 4030     index(0x4);
 4031     scale(0x0);
 4032     disp(0x0);
 4033   %}
 4034 %}
 4035 
 4036 // Indirect Memory Plus Short Offset Operand
 4037 operand indOffset8(eRegP reg, immI8 off) %{
 4038   match(AddP reg off);
 4039 
 4040   format %{ "[$reg + $off]" %}
 4041   interface(MEMORY_INTER) %{
 4042     base($reg);
 4043     index(0x4);
 4044     scale(0x0);
 4045     disp($off);
 4046   %}
 4047 %}
 4048 
 4049 // Indirect Memory Plus Long Offset Operand
 4050 operand indOffset32(eRegP reg, immI off) %{
 4051   match(AddP reg off);
 4052 
 4053   format %{ "[$reg + $off]" %}
 4054   interface(MEMORY_INTER) %{
 4055     base($reg);
 4056     index(0x4);
 4057     scale(0x0);
 4058     disp($off);
 4059   %}
 4060 %}
 4061 
 4062 // Indirect Memory Plus Long Offset Operand
 4063 operand indOffset32X(rRegI reg, immP off) %{
 4064   match(AddP off reg);
 4065 
 4066   format %{ "[$reg + $off]" %}
 4067   interface(MEMORY_INTER) %{
 4068     base($reg);
 4069     index(0x4);
 4070     scale(0x0);
 4071     disp($off);
 4072   %}
 4073 %}
 4074 
 4075 // Indirect Memory Plus Index Register Plus Offset Operand
 4076 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4077   match(AddP (AddP reg ireg) off);
 4078 
 4079   op_cost(10);
 4080   format %{"[$reg + $off + $ireg]" %}
 4081   interface(MEMORY_INTER) %{
 4082     base($reg);
 4083     index($ireg);
 4084     scale(0x0);
 4085     disp($off);
 4086   %}
 4087 %}
 4088 
 4089 // Indirect Memory Plus Index Register Plus Offset Operand
 4090 operand indIndex(eRegP reg, rRegI ireg) %{
 4091   match(AddP reg ireg);
 4092 
 4093   op_cost(10);
 4094   format %{"[$reg + $ireg]" %}
 4095   interface(MEMORY_INTER) %{
 4096     base($reg);
 4097     index($ireg);
 4098     scale(0x0);
 4099     disp(0x0);
 4100   %}
 4101 %}
 4102 
 4103 // // -------------------------------------------------------------------------
 4104 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4105 // // -------------------------------------------------------------------------
 4106 // // Scaled Memory Operands
 4107 // // Indirect Memory Times Scale Plus Offset Operand
 4108 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4109 //   match(AddP off (LShiftI ireg scale));
 4110 //
 4111 //   op_cost(10);
 4112 //   format %{"[$off + $ireg << $scale]" %}
 4113 //   interface(MEMORY_INTER) %{
 4114 //     base(0x4);
 4115 //     index($ireg);
 4116 //     scale($scale);
 4117 //     disp($off);
 4118 //   %}
 4119 // %}
 4120 
 4121 // Indirect Memory Times Scale Plus Index Register
 4122 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4123   match(AddP reg (LShiftI ireg scale));
 4124 
 4125   op_cost(10);
 4126   format %{"[$reg + $ireg << $scale]" %}
 4127   interface(MEMORY_INTER) %{
 4128     base($reg);
 4129     index($ireg);
 4130     scale($scale);
 4131     disp(0x0);
 4132   %}
 4133 %}
 4134 
 4135 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4136 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4137   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4138 
 4139   op_cost(10);
 4140   format %{"[$reg + $off + $ireg << $scale]" %}
 4141   interface(MEMORY_INTER) %{
 4142     base($reg);
 4143     index($ireg);
 4144     scale($scale);
 4145     disp($off);
 4146   %}
 4147 %}
 4148 
 4149 //----------Load Long Memory Operands------------------------------------------
 4150 // The load-long idiom will use it's address expression again after loading
 4151 // the first word of the long.  If the load-long destination overlaps with
 4152 // registers used in the addressing expression, the 2nd half will be loaded
 4153 // from a clobbered address.  Fix this by requiring that load-long use
 4154 // address registers that do not overlap with the load-long target.
 4155 
 4156 // load-long support
 4157 operand load_long_RegP() %{
 4158   constraint(ALLOC_IN_RC(esi_reg));
 4159   match(RegP);
 4160   match(eSIRegP);
 4161   op_cost(100);
 4162   format %{  %}
 4163   interface(REG_INTER);
 4164 %}
 4165 
 4166 // Indirect Memory Operand Long
 4167 operand load_long_indirect(load_long_RegP reg) %{
 4168   constraint(ALLOC_IN_RC(esi_reg));
 4169   match(reg);
 4170 
 4171   format %{ "[$reg]" %}
 4172   interface(MEMORY_INTER) %{
 4173     base($reg);
 4174     index(0x4);
 4175     scale(0x0);
 4176     disp(0x0);
 4177   %}
 4178 %}
 4179 
 4180 // Indirect Memory Plus Long Offset Operand
 4181 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4182   match(AddP reg off);
 4183 
 4184   format %{ "[$reg + $off]" %}
 4185   interface(MEMORY_INTER) %{
 4186     base($reg);
 4187     index(0x4);
 4188     scale(0x0);
 4189     disp($off);
 4190   %}
 4191 %}
 4192 
 4193 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4194 
 4195 
 4196 //----------Special Memory Operands--------------------------------------------
 4197 // Stack Slot Operand - This operand is used for loading and storing temporary
 4198 //                      values on the stack where a match requires a value to
 4199 //                      flow through memory.
 4200 operand stackSlotP(sRegP reg) %{
 4201   constraint(ALLOC_IN_RC(stack_slots));
 4202   // No match rule because this operand is only generated in matching
 4203   format %{ "[$reg]" %}
 4204   interface(MEMORY_INTER) %{
 4205     base(0x4);   // ESP
 4206     index(0x4);  // No Index
 4207     scale(0x0);  // No Scale
 4208     disp($reg);  // Stack Offset
 4209   %}
 4210 %}
 4211 
 4212 operand stackSlotI(sRegI reg) %{
 4213   constraint(ALLOC_IN_RC(stack_slots));
 4214   // No match rule because this operand is only generated in matching
 4215   format %{ "[$reg]" %}
 4216   interface(MEMORY_INTER) %{
 4217     base(0x4);   // ESP
 4218     index(0x4);  // No Index
 4219     scale(0x0);  // No Scale
 4220     disp($reg);  // Stack Offset
 4221   %}
 4222 %}
 4223 
 4224 operand stackSlotF(sRegF reg) %{
 4225   constraint(ALLOC_IN_RC(stack_slots));
 4226   // No match rule because this operand is only generated in matching
 4227   format %{ "[$reg]" %}
 4228   interface(MEMORY_INTER) %{
 4229     base(0x4);   // ESP
 4230     index(0x4);  // No Index
 4231     scale(0x0);  // No Scale
 4232     disp($reg);  // Stack Offset
 4233   %}
 4234 %}
 4235 
 4236 operand stackSlotD(sRegD reg) %{
 4237   constraint(ALLOC_IN_RC(stack_slots));
 4238   // No match rule because this operand is only generated in matching
 4239   format %{ "[$reg]" %}
 4240   interface(MEMORY_INTER) %{
 4241     base(0x4);   // ESP
 4242     index(0x4);  // No Index
 4243     scale(0x0);  // No Scale
 4244     disp($reg);  // Stack Offset
 4245   %}
 4246 %}
 4247 
 4248 operand stackSlotL(sRegL reg) %{
 4249   constraint(ALLOC_IN_RC(stack_slots));
 4250   // No match rule because this operand is only generated in matching
 4251   format %{ "[$reg]" %}
 4252   interface(MEMORY_INTER) %{
 4253     base(0x4);   // ESP
 4254     index(0x4);  // No Index
 4255     scale(0x0);  // No Scale
 4256     disp($reg);  // Stack Offset
 4257   %}
 4258 %}
 4259 
 4260 //----------Conditional Branch Operands----------------------------------------
 4261 // Comparison Op  - This is the operation of the comparison, and is limited to
 4262 //                  the following set of codes:
 4263 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4264 //
 4265 // Other attributes of the comparison, such as unsignedness, are specified
 4266 // by the comparison instruction that sets a condition code flags register.
 4267 // That result is represented by a flags operand whose subtype is appropriate
 4268 // to the unsignedness (etc.) of the comparison.
 4269 //
 4270 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4271 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4272 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4273 
 4274 // Comparison Code
 4275 operand cmpOp() %{
 4276   match(Bool);
 4277 
 4278   format %{ "" %}
 4279   interface(COND_INTER) %{
 4280     equal(0x4, "e");
 4281     not_equal(0x5, "ne");
 4282     less(0xC, "l");
 4283     greater_equal(0xD, "ge");
 4284     less_equal(0xE, "le");
 4285     greater(0xF, "g");
 4286     overflow(0x0, "o");
 4287     no_overflow(0x1, "no");
 4288   %}
 4289 %}
 4290 
 4291 // Comparison Code, unsigned compare.  Used by FP also, with
 4292 // C2 (unordered) turned into GT or LT already.  The other bits
 4293 // C0 and C3 are turned into Carry & Zero flags.
 4294 operand cmpOpU() %{
 4295   match(Bool);
 4296 
 4297   format %{ "" %}
 4298   interface(COND_INTER) %{
 4299     equal(0x4, "e");
 4300     not_equal(0x5, "ne");
 4301     less(0x2, "b");
 4302     greater_equal(0x3, "nb");
 4303     less_equal(0x6, "be");
 4304     greater(0x7, "nbe");
 4305     overflow(0x0, "o");
 4306     no_overflow(0x1, "no");
 4307   %}
 4308 %}
 4309 
 4310 // Floating comparisons that don't require any fixup for the unordered case
 4311 operand cmpOpUCF() %{
 4312   match(Bool);
 4313   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4314             n->as_Bool()->_test._test == BoolTest::ge ||
 4315             n->as_Bool()->_test._test == BoolTest::le ||
 4316             n->as_Bool()->_test._test == BoolTest::gt);
 4317   format %{ "" %}
 4318   interface(COND_INTER) %{
 4319     equal(0x4, "e");
 4320     not_equal(0x5, "ne");
 4321     less(0x2, "b");
 4322     greater_equal(0x3, "nb");
 4323     less_equal(0x6, "be");
 4324     greater(0x7, "nbe");
 4325     overflow(0x0, "o");
 4326     no_overflow(0x1, "no");
 4327   %}
 4328 %}
 4329 
 4330 
 4331 // Floating comparisons that can be fixed up with extra conditional jumps
 4332 operand cmpOpUCF2() %{
 4333   match(Bool);
 4334   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4335             n->as_Bool()->_test._test == BoolTest::eq);
 4336   format %{ "" %}
 4337   interface(COND_INTER) %{
 4338     equal(0x4, "e");
 4339     not_equal(0x5, "ne");
 4340     less(0x2, "b");
 4341     greater_equal(0x3, "nb");
 4342     less_equal(0x6, "be");
 4343     greater(0x7, "nbe");
 4344     overflow(0x0, "o");
 4345     no_overflow(0x1, "no");
 4346   %}
 4347 %}
 4348 
 4349 // Comparison Code for FP conditional move
 4350 operand cmpOp_fcmov() %{
 4351   match(Bool);
 4352 
 4353   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4354             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4355   format %{ "" %}
 4356   interface(COND_INTER) %{
 4357     equal        (0x0C8);
 4358     not_equal    (0x1C8);
 4359     less         (0x0C0);
 4360     greater_equal(0x1C0);
 4361     less_equal   (0x0D0);
 4362     greater      (0x1D0);
 4363     overflow(0x0, "o"); // not really supported by the instruction
 4364     no_overflow(0x1, "no"); // not really supported by the instruction
 4365   %}
 4366 %}
 4367 
 4368 // Comparison Code used in long compares
 4369 operand cmpOp_commute() %{
 4370   match(Bool);
 4371 
 4372   format %{ "" %}
 4373   interface(COND_INTER) %{
 4374     equal(0x4, "e");
 4375     not_equal(0x5, "ne");
 4376     less(0xF, "g");
 4377     greater_equal(0xE, "le");
 4378     less_equal(0xD, "ge");
 4379     greater(0xC, "l");
 4380     overflow(0x0, "o");
 4381     no_overflow(0x1, "no");
 4382   %}
 4383 %}
 4384 
 4385 // Comparison Code used in unsigned long compares
 4386 operand cmpOpU_commute() %{
 4387   match(Bool);
 4388 
 4389   format %{ "" %}
 4390   interface(COND_INTER) %{
 4391     equal(0x4, "e");
 4392     not_equal(0x5, "ne");
 4393     less(0x7, "nbe");
 4394     greater_equal(0x6, "be");
 4395     less_equal(0x3, "nb");
 4396     greater(0x2, "b");
 4397     overflow(0x0, "o");
 4398     no_overflow(0x1, "no");
 4399   %}
 4400 %}
 4401 
 4402 //----------OPERAND CLASSES----------------------------------------------------
 4403 // Operand Classes are groups of operands that are used as to simplify
 4404 // instruction definitions by not requiring the AD writer to specify separate
 4405 // instructions for every form of operand when the instruction accepts
 4406 // multiple operand types with the same basic encoding and format.  The classic
 4407 // case of this is memory operands.
 4408 
 4409 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4410                indIndex, indIndexScale, indIndexScaleOffset);
 4411 
 4412 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4413 // This means some kind of offset is always required and you cannot use
 4414 // an oop as the offset (done when working on static globals).
 4415 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4416                     indIndex, indIndexScale, indIndexScaleOffset);
 4417 
 4418 
 4419 //----------PIPELINE-----------------------------------------------------------
 4420 // Rules which define the behavior of the target architectures pipeline.
 4421 pipeline %{
 4422 
 4423 //----------ATTRIBUTES---------------------------------------------------------
 4424 attributes %{
 4425   variable_size_instructions;        // Fixed size instructions
 4426   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4427   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4428   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4429   instruction_fetch_units = 1;       // of 16 bytes
 4430 
 4431   // List of nop instructions
 4432   nops( MachNop );
 4433 %}
 4434 
 4435 //----------RESOURCES----------------------------------------------------------
 4436 // Resources are the functional units available to the machine
 4437 
 4438 // Generic P2/P3 pipeline
 4439 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4440 // 3 instructions decoded per cycle.
 4441 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4442 // 2 ALU op, only ALU0 handles mul/div instructions.
 4443 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4444            MS0, MS1, MEM = MS0 | MS1,
 4445            BR, FPU,
 4446            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4447 
 4448 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4449 // Pipeline Description specifies the stages in the machine's pipeline
 4450 
 4451 // Generic P2/P3 pipeline
 4452 pipe_desc(S0, S1, S2, S3, S4, S5);
 4453 
 4454 //----------PIPELINE CLASSES---------------------------------------------------
 4455 // Pipeline Classes describe the stages in which input and output are
 4456 // referenced by the hardware pipeline.
 4457 
 4458 // Naming convention: ialu or fpu
 4459 // Then: _reg
 4460 // Then: _reg if there is a 2nd register
 4461 // Then: _long if it's a pair of instructions implementing a long
 4462 // Then: _fat if it requires the big decoder
 4463 //   Or: _mem if it requires the big decoder and a memory unit.
 4464 
 4465 // Integer ALU reg operation
 4466 pipe_class ialu_reg(rRegI dst) %{
 4467     single_instruction;
 4468     dst    : S4(write);
 4469     dst    : S3(read);
 4470     DECODE : S0;        // any decoder
 4471     ALU    : S3;        // any alu
 4472 %}
 4473 
 4474 // Long ALU reg operation
 4475 pipe_class ialu_reg_long(eRegL dst) %{
 4476     instruction_count(2);
 4477     dst    : S4(write);
 4478     dst    : S3(read);
 4479     DECODE : S0(2);     // any 2 decoders
 4480     ALU    : S3(2);     // both alus
 4481 %}
 4482 
 4483 // Integer ALU reg operation using big decoder
 4484 pipe_class ialu_reg_fat(rRegI dst) %{
 4485     single_instruction;
 4486     dst    : S4(write);
 4487     dst    : S3(read);
 4488     D0     : S0;        // big decoder only
 4489     ALU    : S3;        // any alu
 4490 %}
 4491 
 4492 // Long ALU reg operation using big decoder
 4493 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4494     instruction_count(2);
 4495     dst    : S4(write);
 4496     dst    : S3(read);
 4497     D0     : S0(2);     // big decoder only; twice
 4498     ALU    : S3(2);     // any 2 alus
 4499 %}
 4500 
 4501 // Integer ALU reg-reg operation
 4502 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4503     single_instruction;
 4504     dst    : S4(write);
 4505     src    : S3(read);
 4506     DECODE : S0;        // any decoder
 4507     ALU    : S3;        // any alu
 4508 %}
 4509 
 4510 // Long ALU reg-reg operation
 4511 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4512     instruction_count(2);
 4513     dst    : S4(write);
 4514     src    : S3(read);
 4515     DECODE : S0(2);     // any 2 decoders
 4516     ALU    : S3(2);     // both alus
 4517 %}
 4518 
 4519 // Integer ALU reg-reg operation
 4520 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4521     single_instruction;
 4522     dst    : S4(write);
 4523     src    : S3(read);
 4524     D0     : S0;        // big decoder only
 4525     ALU    : S3;        // any alu
 4526 %}
 4527 
 4528 // Long ALU reg-reg operation
 4529 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4530     instruction_count(2);
 4531     dst    : S4(write);
 4532     src    : S3(read);
 4533     D0     : S0(2);     // big decoder only; twice
 4534     ALU    : S3(2);     // both alus
 4535 %}
 4536 
 4537 // Integer ALU reg-mem operation
 4538 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4539     single_instruction;
 4540     dst    : S5(write);
 4541     mem    : S3(read);
 4542     D0     : S0;        // big decoder only
 4543     ALU    : S4;        // any alu
 4544     MEM    : S3;        // any mem
 4545 %}
 4546 
 4547 // Long ALU reg-mem operation
 4548 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4549     instruction_count(2);
 4550     dst    : S5(write);
 4551     mem    : S3(read);
 4552     D0     : S0(2);     // big decoder only; twice
 4553     ALU    : S4(2);     // any 2 alus
 4554     MEM    : S3(2);     // both mems
 4555 %}
 4556 
 4557 // Integer mem operation (prefetch)
 4558 pipe_class ialu_mem(memory mem)
 4559 %{
 4560     single_instruction;
 4561     mem    : S3(read);
 4562     D0     : S0;        // big decoder only
 4563     MEM    : S3;        // any mem
 4564 %}
 4565 
 4566 // Integer Store to Memory
 4567 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4568     single_instruction;
 4569     mem    : S3(read);
 4570     src    : S5(read);
 4571     D0     : S0;        // big decoder only
 4572     ALU    : S4;        // any alu
 4573     MEM    : S3;
 4574 %}
 4575 
 4576 // Long Store to Memory
 4577 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4578     instruction_count(2);
 4579     mem    : S3(read);
 4580     src    : S5(read);
 4581     D0     : S0(2);     // big decoder only; twice
 4582     ALU    : S4(2);     // any 2 alus
 4583     MEM    : S3(2);     // Both mems
 4584 %}
 4585 
 4586 // Integer Store to Memory
 4587 pipe_class ialu_mem_imm(memory mem) %{
 4588     single_instruction;
 4589     mem    : S3(read);
 4590     D0     : S0;        // big decoder only
 4591     ALU    : S4;        // any alu
 4592     MEM    : S3;
 4593 %}
 4594 
 4595 // Integer ALU0 reg-reg operation
 4596 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4597     single_instruction;
 4598     dst    : S4(write);
 4599     src    : S3(read);
 4600     D0     : S0;        // Big decoder only
 4601     ALU0   : S3;        // only alu0
 4602 %}
 4603 
 4604 // Integer ALU0 reg-mem operation
 4605 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4606     single_instruction;
 4607     dst    : S5(write);
 4608     mem    : S3(read);
 4609     D0     : S0;        // big decoder only
 4610     ALU0   : S4;        // ALU0 only
 4611     MEM    : S3;        // any mem
 4612 %}
 4613 
 4614 // Integer ALU reg-reg operation
 4615 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4616     single_instruction;
 4617     cr     : S4(write);
 4618     src1   : S3(read);
 4619     src2   : S3(read);
 4620     DECODE : S0;        // any decoder
 4621     ALU    : S3;        // any alu
 4622 %}
 4623 
 4624 // Integer ALU reg-imm operation
 4625 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4626     single_instruction;
 4627     cr     : S4(write);
 4628     src1   : S3(read);
 4629     DECODE : S0;        // any decoder
 4630     ALU    : S3;        // any alu
 4631 %}
 4632 
 4633 // Integer ALU reg-mem operation
 4634 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4635     single_instruction;
 4636     cr     : S4(write);
 4637     src1   : S3(read);
 4638     src2   : S3(read);
 4639     D0     : S0;        // big decoder only
 4640     ALU    : S4;        // any alu
 4641     MEM    : S3;
 4642 %}
 4643 
 4644 // Conditional move reg-reg
 4645 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4646     instruction_count(4);
 4647     y      : S4(read);
 4648     q      : S3(read);
 4649     p      : S3(read);
 4650     DECODE : S0(4);     // any decoder
 4651 %}
 4652 
 4653 // Conditional move reg-reg
 4654 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4655     single_instruction;
 4656     dst    : S4(write);
 4657     src    : S3(read);
 4658     cr     : S3(read);
 4659     DECODE : S0;        // any decoder
 4660 %}
 4661 
 4662 // Conditional move reg-mem
 4663 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4664     single_instruction;
 4665     dst    : S4(write);
 4666     src    : S3(read);
 4667     cr     : S3(read);
 4668     DECODE : S0;        // any decoder
 4669     MEM    : S3;
 4670 %}
 4671 
 4672 // Conditional move reg-reg long
 4673 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4674     single_instruction;
 4675     dst    : S4(write);
 4676     src    : S3(read);
 4677     cr     : S3(read);
 4678     DECODE : S0(2);     // any 2 decoders
 4679 %}
 4680 
 4681 // Conditional move double reg-reg
 4682 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4683     single_instruction;
 4684     dst    : S4(write);
 4685     src    : S3(read);
 4686     cr     : S3(read);
 4687     DECODE : S0;        // any decoder
 4688 %}
 4689 
 4690 // Float reg-reg operation
 4691 pipe_class fpu_reg(regDPR dst) %{
 4692     instruction_count(2);
 4693     dst    : S3(read);
 4694     DECODE : S0(2);     // any 2 decoders
 4695     FPU    : S3;
 4696 %}
 4697 
 4698 // Float reg-reg operation
 4699 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4700     instruction_count(2);
 4701     dst    : S4(write);
 4702     src    : S3(read);
 4703     DECODE : S0(2);     // any 2 decoders
 4704     FPU    : S3;
 4705 %}
 4706 
 4707 // Float reg-reg operation
 4708 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4709     instruction_count(3);
 4710     dst    : S4(write);
 4711     src1   : S3(read);
 4712     src2   : S3(read);
 4713     DECODE : S0(3);     // any 3 decoders
 4714     FPU    : S3(2);
 4715 %}
 4716 
 4717 // Float reg-reg operation
 4718 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4719     instruction_count(4);
 4720     dst    : S4(write);
 4721     src1   : S3(read);
 4722     src2   : S3(read);
 4723     src3   : S3(read);
 4724     DECODE : S0(4);     // any 3 decoders
 4725     FPU    : S3(2);
 4726 %}
 4727 
 4728 // Float reg-reg operation
 4729 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4730     instruction_count(4);
 4731     dst    : S4(write);
 4732     src1   : S3(read);
 4733     src2   : S3(read);
 4734     src3   : S3(read);
 4735     DECODE : S1(3);     // any 3 decoders
 4736     D0     : S0;        // Big decoder only
 4737     FPU    : S3(2);
 4738     MEM    : S3;
 4739 %}
 4740 
 4741 // Float reg-mem operation
 4742 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4743     instruction_count(2);
 4744     dst    : S5(write);
 4745     mem    : S3(read);
 4746     D0     : S0;        // big decoder only
 4747     DECODE : S1;        // any decoder for FPU POP
 4748     FPU    : S4;
 4749     MEM    : S3;        // any mem
 4750 %}
 4751 
 4752 // Float reg-mem operation
 4753 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4754     instruction_count(3);
 4755     dst    : S5(write);
 4756     src1   : S3(read);
 4757     mem    : S3(read);
 4758     D0     : S0;        // big decoder only
 4759     DECODE : S1(2);     // any decoder for FPU POP
 4760     FPU    : S4;
 4761     MEM    : S3;        // any mem
 4762 %}
 4763 
 4764 // Float mem-reg operation
 4765 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4766     instruction_count(2);
 4767     src    : S5(read);
 4768     mem    : S3(read);
 4769     DECODE : S0;        // any decoder for FPU PUSH
 4770     D0     : S1;        // big decoder only
 4771     FPU    : S4;
 4772     MEM    : S3;        // any mem
 4773 %}
 4774 
 4775 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4776     instruction_count(3);
 4777     src1   : S3(read);
 4778     src2   : S3(read);
 4779     mem    : S3(read);
 4780     DECODE : S0(2);     // any decoder for FPU PUSH
 4781     D0     : S1;        // big decoder only
 4782     FPU    : S4;
 4783     MEM    : S3;        // any mem
 4784 %}
 4785 
 4786 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4787     instruction_count(3);
 4788     src1   : S3(read);
 4789     src2   : S3(read);
 4790     mem    : S4(read);
 4791     DECODE : S0;        // any decoder for FPU PUSH
 4792     D0     : S0(2);     // big decoder only
 4793     FPU    : S4;
 4794     MEM    : S3(2);     // any mem
 4795 %}
 4796 
 4797 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4798     instruction_count(2);
 4799     src1   : S3(read);
 4800     dst    : S4(read);
 4801     D0     : S0(2);     // big decoder only
 4802     MEM    : S3(2);     // any mem
 4803 %}
 4804 
 4805 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4806     instruction_count(3);
 4807     src1   : S3(read);
 4808     src2   : S3(read);
 4809     dst    : S4(read);
 4810     D0     : S0(3);     // big decoder only
 4811     FPU    : S4;
 4812     MEM    : S3(3);     // any mem
 4813 %}
 4814 
 4815 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4816     instruction_count(3);
 4817     src1   : S4(read);
 4818     mem    : S4(read);
 4819     DECODE : S0;        // any decoder for FPU PUSH
 4820     D0     : S0(2);     // big decoder only
 4821     FPU    : S4;
 4822     MEM    : S3(2);     // any mem
 4823 %}
 4824 
 4825 // Float load constant
 4826 pipe_class fpu_reg_con(regDPR dst) %{
 4827     instruction_count(2);
 4828     dst    : S5(write);
 4829     D0     : S0;        // big decoder only for the load
 4830     DECODE : S1;        // any decoder for FPU POP
 4831     FPU    : S4;
 4832     MEM    : S3;        // any mem
 4833 %}
 4834 
 4835 // Float load constant
 4836 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4837     instruction_count(3);
 4838     dst    : S5(write);
 4839     src    : S3(read);
 4840     D0     : S0;        // big decoder only for the load
 4841     DECODE : S1(2);     // any decoder for FPU POP
 4842     FPU    : S4;
 4843     MEM    : S3;        // any mem
 4844 %}
 4845 
 4846 // UnConditional branch
 4847 pipe_class pipe_jmp( label labl ) %{
 4848     single_instruction;
 4849     BR   : S3;
 4850 %}
 4851 
 4852 // Conditional branch
 4853 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4854     single_instruction;
 4855     cr    : S1(read);
 4856     BR    : S3;
 4857 %}
 4858 
 4859 // Allocation idiom
 4860 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4861     instruction_count(1); force_serialization;
 4862     fixed_latency(6);
 4863     heap_ptr : S3(read);
 4864     DECODE   : S0(3);
 4865     D0       : S2;
 4866     MEM      : S3;
 4867     ALU      : S3(2);
 4868     dst      : S5(write);
 4869     BR       : S5;
 4870 %}
 4871 
 4872 // Generic big/slow expanded idiom
 4873 pipe_class pipe_slow(  ) %{
 4874     instruction_count(10); multiple_bundles; force_serialization;
 4875     fixed_latency(100);
 4876     D0  : S0(2);
 4877     MEM : S3(2);
 4878 %}
 4879 
 4880 // The real do-nothing guy
 4881 pipe_class empty( ) %{
 4882     instruction_count(0);
 4883 %}
 4884 
 4885 // Define the class for the Nop node
 4886 define %{
 4887    MachNop = empty;
 4888 %}
 4889 
 4890 %}
 4891 
 4892 //----------INSTRUCTIONS-------------------------------------------------------
 4893 //
 4894 // match      -- States which machine-independent subtree may be replaced
 4895 //               by this instruction.
 4896 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4897 //               selection to identify a minimum cost tree of machine
 4898 //               instructions that matches a tree of machine-independent
 4899 //               instructions.
 4900 // format     -- A string providing the disassembly for this instruction.
 4901 //               The value of an instruction's operand may be inserted
 4902 //               by referring to it with a '$' prefix.
 4903 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4904 //               to within an encode class as $primary, $secondary, and $tertiary
 4905 //               respectively.  The primary opcode is commonly used to
 4906 //               indicate the type of machine instruction, while secondary
 4907 //               and tertiary are often used for prefix options or addressing
 4908 //               modes.
 4909 // ins_encode -- A list of encode classes with parameters. The encode class
 4910 //               name must have been defined in an 'enc_class' specification
 4911 //               in the encode section of the architecture description.
 4912 
 4913 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4914 // Load Float
 4915 instruct MoveF2LEG(legRegF dst, regF src) %{
 4916   match(Set dst src);
 4917   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4918   ins_encode %{
 4919     ShouldNotReachHere();
 4920   %}
 4921   ins_pipe( fpu_reg_reg );
 4922 %}
 4923 
 4924 // Load Float
 4925 instruct MoveLEG2F(regF dst, legRegF src) %{
 4926   match(Set dst src);
 4927   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4928   ins_encode %{
 4929     ShouldNotReachHere();
 4930   %}
 4931   ins_pipe( fpu_reg_reg );
 4932 %}
 4933 
 4934 // Load Float
 4935 instruct MoveF2VL(vlRegF dst, regF src) %{
 4936   match(Set dst src);
 4937   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4938   ins_encode %{
 4939     ShouldNotReachHere();
 4940   %}
 4941   ins_pipe( fpu_reg_reg );
 4942 %}
 4943 
 4944 // Load Float
 4945 instruct MoveVL2F(regF dst, vlRegF src) %{
 4946   match(Set dst src);
 4947   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4948   ins_encode %{
 4949     ShouldNotReachHere();
 4950   %}
 4951   ins_pipe( fpu_reg_reg );
 4952 %}
 4953 
 4954 
 4955 
 4956 // Load Double
 4957 instruct MoveD2LEG(legRegD dst, regD src) %{
 4958   match(Set dst src);
 4959   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4960   ins_encode %{
 4961     ShouldNotReachHere();
 4962   %}
 4963   ins_pipe( fpu_reg_reg );
 4964 %}
 4965 
 4966 // Load Double
 4967 instruct MoveLEG2D(regD dst, legRegD src) %{
 4968   match(Set dst src);
 4969   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4970   ins_encode %{
 4971     ShouldNotReachHere();
 4972   %}
 4973   ins_pipe( fpu_reg_reg );
 4974 %}
 4975 
 4976 // Load Double
 4977 instruct MoveD2VL(vlRegD dst, regD src) %{
 4978   match(Set dst src);
 4979   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4980   ins_encode %{
 4981     ShouldNotReachHere();
 4982   %}
 4983   ins_pipe( fpu_reg_reg );
 4984 %}
 4985 
 4986 // Load Double
 4987 instruct MoveVL2D(regD dst, vlRegD src) %{
 4988   match(Set dst src);
 4989   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4990   ins_encode %{
 4991     ShouldNotReachHere();
 4992   %}
 4993   ins_pipe( fpu_reg_reg );
 4994 %}
 4995 
 4996 //----------BSWAP-Instruction--------------------------------------------------
 4997 instruct bytes_reverse_int(rRegI dst) %{
 4998   match(Set dst (ReverseBytesI dst));
 4999 
 5000   format %{ "BSWAP  $dst" %}
 5001   opcode(0x0F, 0xC8);
 5002   ins_encode( OpcP, OpcSReg(dst) );
 5003   ins_pipe( ialu_reg );
 5004 %}
 5005 
 5006 instruct bytes_reverse_long(eRegL dst) %{
 5007   match(Set dst (ReverseBytesL dst));
 5008 
 5009   format %{ "BSWAP  $dst.lo\n\t"
 5010             "BSWAP  $dst.hi\n\t"
 5011             "XCHG   $dst.lo $dst.hi" %}
 5012 
 5013   ins_cost(125);
 5014   ins_encode( bswap_long_bytes(dst) );
 5015   ins_pipe( ialu_reg_reg);
 5016 %}
 5017 
 5018 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5019   match(Set dst (ReverseBytesUS dst));
 5020   effect(KILL cr);
 5021 
 5022   format %{ "BSWAP  $dst\n\t"
 5023             "SHR    $dst,16\n\t" %}
 5024   ins_encode %{
 5025     __ bswapl($dst$$Register);
 5026     __ shrl($dst$$Register, 16);
 5027   %}
 5028   ins_pipe( ialu_reg );
 5029 %}
 5030 
 5031 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5032   match(Set dst (ReverseBytesS dst));
 5033   effect(KILL cr);
 5034 
 5035   format %{ "BSWAP  $dst\n\t"
 5036             "SAR    $dst,16\n\t" %}
 5037   ins_encode %{
 5038     __ bswapl($dst$$Register);
 5039     __ sarl($dst$$Register, 16);
 5040   %}
 5041   ins_pipe( ialu_reg );
 5042 %}
 5043 
 5044 
 5045 //---------- Zeros Count Instructions ------------------------------------------
 5046 
 5047 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5048   predicate(UseCountLeadingZerosInstruction);
 5049   match(Set dst (CountLeadingZerosI src));
 5050   effect(KILL cr);
 5051 
 5052   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5053   ins_encode %{
 5054     __ lzcntl($dst$$Register, $src$$Register);
 5055   %}
 5056   ins_pipe(ialu_reg);
 5057 %}
 5058 
 5059 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5060   predicate(!UseCountLeadingZerosInstruction);
 5061   match(Set dst (CountLeadingZerosI src));
 5062   effect(KILL cr);
 5063 
 5064   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5065             "JNZ    skip\n\t"
 5066             "MOV    $dst, -1\n"
 5067       "skip:\n\t"
 5068             "NEG    $dst\n\t"
 5069             "ADD    $dst, 31" %}
 5070   ins_encode %{
 5071     Register Rdst = $dst$$Register;
 5072     Register Rsrc = $src$$Register;
 5073     Label skip;
 5074     __ bsrl(Rdst, Rsrc);
 5075     __ jccb(Assembler::notZero, skip);
 5076     __ movl(Rdst, -1);
 5077     __ bind(skip);
 5078     __ negl(Rdst);
 5079     __ addl(Rdst, BitsPerInt - 1);
 5080   %}
 5081   ins_pipe(ialu_reg);
 5082 %}
 5083 
 5084 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5085   predicate(UseCountLeadingZerosInstruction);
 5086   match(Set dst (CountLeadingZerosL src));
 5087   effect(TEMP dst, KILL cr);
 5088 
 5089   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5090             "JNC    done\n\t"
 5091             "LZCNT  $dst, $src.lo\n\t"
 5092             "ADD    $dst, 32\n"
 5093       "done:" %}
 5094   ins_encode %{
 5095     Register Rdst = $dst$$Register;
 5096     Register Rsrc = $src$$Register;
 5097     Label done;
 5098     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5099     __ jccb(Assembler::carryClear, done);
 5100     __ lzcntl(Rdst, Rsrc);
 5101     __ addl(Rdst, BitsPerInt);
 5102     __ bind(done);
 5103   %}
 5104   ins_pipe(ialu_reg);
 5105 %}
 5106 
 5107 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5108   predicate(!UseCountLeadingZerosInstruction);
 5109   match(Set dst (CountLeadingZerosL src));
 5110   effect(TEMP dst, KILL cr);
 5111 
 5112   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5113             "JZ     msw_is_zero\n\t"
 5114             "ADD    $dst, 32\n\t"
 5115             "JMP    not_zero\n"
 5116       "msw_is_zero:\n\t"
 5117             "BSR    $dst, $src.lo\n\t"
 5118             "JNZ    not_zero\n\t"
 5119             "MOV    $dst, -1\n"
 5120       "not_zero:\n\t"
 5121             "NEG    $dst\n\t"
 5122             "ADD    $dst, 63\n" %}
 5123  ins_encode %{
 5124     Register Rdst = $dst$$Register;
 5125     Register Rsrc = $src$$Register;
 5126     Label msw_is_zero;
 5127     Label not_zero;
 5128     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5129     __ jccb(Assembler::zero, msw_is_zero);
 5130     __ addl(Rdst, BitsPerInt);
 5131     __ jmpb(not_zero);
 5132     __ bind(msw_is_zero);
 5133     __ bsrl(Rdst, Rsrc);
 5134     __ jccb(Assembler::notZero, not_zero);
 5135     __ movl(Rdst, -1);
 5136     __ bind(not_zero);
 5137     __ negl(Rdst);
 5138     __ addl(Rdst, BitsPerLong - 1);
 5139   %}
 5140   ins_pipe(ialu_reg);
 5141 %}
 5142 
 5143 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5144   predicate(UseCountTrailingZerosInstruction);
 5145   match(Set dst (CountTrailingZerosI src));
 5146   effect(KILL cr);
 5147 
 5148   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5149   ins_encode %{
 5150     __ tzcntl($dst$$Register, $src$$Register);
 5151   %}
 5152   ins_pipe(ialu_reg);
 5153 %}
 5154 
 5155 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5156   predicate(!UseCountTrailingZerosInstruction);
 5157   match(Set dst (CountTrailingZerosI src));
 5158   effect(KILL cr);
 5159 
 5160   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5161             "JNZ    done\n\t"
 5162             "MOV    $dst, 32\n"
 5163       "done:" %}
 5164   ins_encode %{
 5165     Register Rdst = $dst$$Register;
 5166     Label done;
 5167     __ bsfl(Rdst, $src$$Register);
 5168     __ jccb(Assembler::notZero, done);
 5169     __ movl(Rdst, BitsPerInt);
 5170     __ bind(done);
 5171   %}
 5172   ins_pipe(ialu_reg);
 5173 %}
 5174 
 5175 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5176   predicate(UseCountTrailingZerosInstruction);
 5177   match(Set dst (CountTrailingZerosL src));
 5178   effect(TEMP dst, KILL cr);
 5179 
 5180   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5181             "JNC    done\n\t"
 5182             "TZCNT  $dst, $src.hi\n\t"
 5183             "ADD    $dst, 32\n"
 5184             "done:" %}
 5185   ins_encode %{
 5186     Register Rdst = $dst$$Register;
 5187     Register Rsrc = $src$$Register;
 5188     Label done;
 5189     __ tzcntl(Rdst, Rsrc);
 5190     __ jccb(Assembler::carryClear, done);
 5191     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5192     __ addl(Rdst, BitsPerInt);
 5193     __ bind(done);
 5194   %}
 5195   ins_pipe(ialu_reg);
 5196 %}
 5197 
 5198 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5199   predicate(!UseCountTrailingZerosInstruction);
 5200   match(Set dst (CountTrailingZerosL src));
 5201   effect(TEMP dst, KILL cr);
 5202 
 5203   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5204             "JNZ    done\n\t"
 5205             "BSF    $dst, $src.hi\n\t"
 5206             "JNZ    msw_not_zero\n\t"
 5207             "MOV    $dst, 32\n"
 5208       "msw_not_zero:\n\t"
 5209             "ADD    $dst, 32\n"
 5210       "done:" %}
 5211   ins_encode %{
 5212     Register Rdst = $dst$$Register;
 5213     Register Rsrc = $src$$Register;
 5214     Label msw_not_zero;
 5215     Label done;
 5216     __ bsfl(Rdst, Rsrc);
 5217     __ jccb(Assembler::notZero, done);
 5218     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5219     __ jccb(Assembler::notZero, msw_not_zero);
 5220     __ movl(Rdst, BitsPerInt);
 5221     __ bind(msw_not_zero);
 5222     __ addl(Rdst, BitsPerInt);
 5223     __ bind(done);
 5224   %}
 5225   ins_pipe(ialu_reg);
 5226 %}
 5227 
 5228 
 5229 //---------- Population Count Instructions -------------------------------------
 5230 
 5231 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5232   predicate(UsePopCountInstruction);
 5233   match(Set dst (PopCountI src));
 5234   effect(KILL cr);
 5235 
 5236   format %{ "POPCNT $dst, $src" %}
 5237   ins_encode %{
 5238     __ popcntl($dst$$Register, $src$$Register);
 5239   %}
 5240   ins_pipe(ialu_reg);
 5241 %}
 5242 
 5243 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5244   predicate(UsePopCountInstruction);
 5245   match(Set dst (PopCountI (LoadI mem)));
 5246   effect(KILL cr);
 5247 
 5248   format %{ "POPCNT $dst, $mem" %}
 5249   ins_encode %{
 5250     __ popcntl($dst$$Register, $mem$$Address);
 5251   %}
 5252   ins_pipe(ialu_reg);
 5253 %}
 5254 
 5255 // Note: Long.bitCount(long) returns an int.
 5256 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5257   predicate(UsePopCountInstruction);
 5258   match(Set dst (PopCountL src));
 5259   effect(KILL cr, TEMP tmp, TEMP dst);
 5260 
 5261   format %{ "POPCNT $dst, $src.lo\n\t"
 5262             "POPCNT $tmp, $src.hi\n\t"
 5263             "ADD    $dst, $tmp" %}
 5264   ins_encode %{
 5265     __ popcntl($dst$$Register, $src$$Register);
 5266     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5267     __ addl($dst$$Register, $tmp$$Register);
 5268   %}
 5269   ins_pipe(ialu_reg);
 5270 %}
 5271 
 5272 // Note: Long.bitCount(long) returns an int.
 5273 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5274   predicate(UsePopCountInstruction);
 5275   match(Set dst (PopCountL (LoadL mem)));
 5276   effect(KILL cr, TEMP tmp, TEMP dst);
 5277 
 5278   format %{ "POPCNT $dst, $mem\n\t"
 5279             "POPCNT $tmp, $mem+4\n\t"
 5280             "ADD    $dst, $tmp" %}
 5281   ins_encode %{
 5282     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5283     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5284     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5285     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5286     __ addl($dst$$Register, $tmp$$Register);
 5287   %}
 5288   ins_pipe(ialu_reg);
 5289 %}
 5290 
 5291 
 5292 //----------Load/Store/Move Instructions---------------------------------------
 5293 //----------Load Instructions--------------------------------------------------
 5294 // Load Byte (8bit signed)
 5295 instruct loadB(xRegI dst, memory mem) %{
 5296   match(Set dst (LoadB mem));
 5297 
 5298   ins_cost(125);
 5299   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5300 
 5301   ins_encode %{
 5302     __ movsbl($dst$$Register, $mem$$Address);
 5303   %}
 5304 
 5305   ins_pipe(ialu_reg_mem);
 5306 %}
 5307 
 5308 // Load Byte (8bit signed) into Long Register
 5309 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5310   match(Set dst (ConvI2L (LoadB mem)));
 5311   effect(KILL cr);
 5312 
 5313   ins_cost(375);
 5314   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5315             "MOV    $dst.hi,$dst.lo\n\t"
 5316             "SAR    $dst.hi,7" %}
 5317 
 5318   ins_encode %{
 5319     __ movsbl($dst$$Register, $mem$$Address);
 5320     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5321     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5322   %}
 5323 
 5324   ins_pipe(ialu_reg_mem);
 5325 %}
 5326 
 5327 // Load Unsigned Byte (8bit UNsigned)
 5328 instruct loadUB(xRegI dst, memory mem) %{
 5329   match(Set dst (LoadUB mem));
 5330 
 5331   ins_cost(125);
 5332   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5333 
 5334   ins_encode %{
 5335     __ movzbl($dst$$Register, $mem$$Address);
 5336   %}
 5337 
 5338   ins_pipe(ialu_reg_mem);
 5339 %}
 5340 
 5341 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5342 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5343   match(Set dst (ConvI2L (LoadUB mem)));
 5344   effect(KILL cr);
 5345 
 5346   ins_cost(250);
 5347   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5348             "XOR    $dst.hi,$dst.hi" %}
 5349 
 5350   ins_encode %{
 5351     Register Rdst = $dst$$Register;
 5352     __ movzbl(Rdst, $mem$$Address);
 5353     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5354   %}
 5355 
 5356   ins_pipe(ialu_reg_mem);
 5357 %}
 5358 
 5359 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5360 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5361   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5362   effect(KILL cr);
 5363 
 5364   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5365             "XOR    $dst.hi,$dst.hi\n\t"
 5366             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5367   ins_encode %{
 5368     Register Rdst = $dst$$Register;
 5369     __ movzbl(Rdst, $mem$$Address);
 5370     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5371     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5372   %}
 5373   ins_pipe(ialu_reg_mem);
 5374 %}
 5375 
 5376 // Load Short (16bit signed)
 5377 instruct loadS(rRegI dst, memory mem) %{
 5378   match(Set dst (LoadS mem));
 5379 
 5380   ins_cost(125);
 5381   format %{ "MOVSX  $dst,$mem\t# short" %}
 5382 
 5383   ins_encode %{
 5384     __ movswl($dst$$Register, $mem$$Address);
 5385   %}
 5386 
 5387   ins_pipe(ialu_reg_mem);
 5388 %}
 5389 
 5390 // Load Short (16 bit signed) to Byte (8 bit signed)
 5391 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5392   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5393 
 5394   ins_cost(125);
 5395   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5396   ins_encode %{
 5397     __ movsbl($dst$$Register, $mem$$Address);
 5398   %}
 5399   ins_pipe(ialu_reg_mem);
 5400 %}
 5401 
 5402 // Load Short (16bit signed) into Long Register
 5403 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5404   match(Set dst (ConvI2L (LoadS mem)));
 5405   effect(KILL cr);
 5406 
 5407   ins_cost(375);
 5408   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5409             "MOV    $dst.hi,$dst.lo\n\t"
 5410             "SAR    $dst.hi,15" %}
 5411 
 5412   ins_encode %{
 5413     __ movswl($dst$$Register, $mem$$Address);
 5414     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5415     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5416   %}
 5417 
 5418   ins_pipe(ialu_reg_mem);
 5419 %}
 5420 
 5421 // Load Unsigned Short/Char (16bit unsigned)
 5422 instruct loadUS(rRegI dst, memory mem) %{
 5423   match(Set dst (LoadUS mem));
 5424 
 5425   ins_cost(125);
 5426   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5427 
 5428   ins_encode %{
 5429     __ movzwl($dst$$Register, $mem$$Address);
 5430   %}
 5431 
 5432   ins_pipe(ialu_reg_mem);
 5433 %}
 5434 
 5435 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5436 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5437   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5438 
 5439   ins_cost(125);
 5440   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5441   ins_encode %{
 5442     __ movsbl($dst$$Register, $mem$$Address);
 5443   %}
 5444   ins_pipe(ialu_reg_mem);
 5445 %}
 5446 
 5447 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5448 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5449   match(Set dst (ConvI2L (LoadUS mem)));
 5450   effect(KILL cr);
 5451 
 5452   ins_cost(250);
 5453   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5454             "XOR    $dst.hi,$dst.hi" %}
 5455 
 5456   ins_encode %{
 5457     __ movzwl($dst$$Register, $mem$$Address);
 5458     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5459   %}
 5460 
 5461   ins_pipe(ialu_reg_mem);
 5462 %}
 5463 
 5464 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5465 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5466   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5467   effect(KILL cr);
 5468 
 5469   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5470             "XOR    $dst.hi,$dst.hi" %}
 5471   ins_encode %{
 5472     Register Rdst = $dst$$Register;
 5473     __ movzbl(Rdst, $mem$$Address);
 5474     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5475   %}
 5476   ins_pipe(ialu_reg_mem);
 5477 %}
 5478 
 5479 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5480 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5481   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5482   effect(KILL cr);
 5483 
 5484   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5485             "XOR    $dst.hi,$dst.hi\n\t"
 5486             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5487   ins_encode %{
 5488     Register Rdst = $dst$$Register;
 5489     __ movzwl(Rdst, $mem$$Address);
 5490     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5491     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5492   %}
 5493   ins_pipe(ialu_reg_mem);
 5494 %}
 5495 
 5496 // Load Integer
 5497 instruct loadI(rRegI dst, memory mem) %{
 5498   match(Set dst (LoadI mem));
 5499 
 5500   ins_cost(125);
 5501   format %{ "MOV    $dst,$mem\t# int" %}
 5502 
 5503   ins_encode %{
 5504     __ movl($dst$$Register, $mem$$Address);
 5505   %}
 5506 
 5507   ins_pipe(ialu_reg_mem);
 5508 %}
 5509 
 5510 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5511 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5512   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5513 
 5514   ins_cost(125);
 5515   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5516   ins_encode %{
 5517     __ movsbl($dst$$Register, $mem$$Address);
 5518   %}
 5519   ins_pipe(ialu_reg_mem);
 5520 %}
 5521 
 5522 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5523 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5524   match(Set dst (AndI (LoadI mem) mask));
 5525 
 5526   ins_cost(125);
 5527   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5528   ins_encode %{
 5529     __ movzbl($dst$$Register, $mem$$Address);
 5530   %}
 5531   ins_pipe(ialu_reg_mem);
 5532 %}
 5533 
 5534 // Load Integer (32 bit signed) to Short (16 bit signed)
 5535 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5536   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5537 
 5538   ins_cost(125);
 5539   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5540   ins_encode %{
 5541     __ movswl($dst$$Register, $mem$$Address);
 5542   %}
 5543   ins_pipe(ialu_reg_mem);
 5544 %}
 5545 
 5546 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5547 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5548   match(Set dst (AndI (LoadI mem) mask));
 5549 
 5550   ins_cost(125);
 5551   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5552   ins_encode %{
 5553     __ movzwl($dst$$Register, $mem$$Address);
 5554   %}
 5555   ins_pipe(ialu_reg_mem);
 5556 %}
 5557 
 5558 // Load Integer into Long Register
 5559 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5560   match(Set dst (ConvI2L (LoadI mem)));
 5561   effect(KILL cr);
 5562 
 5563   ins_cost(375);
 5564   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5565             "MOV    $dst.hi,$dst.lo\n\t"
 5566             "SAR    $dst.hi,31" %}
 5567 
 5568   ins_encode %{
 5569     __ movl($dst$$Register, $mem$$Address);
 5570     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5571     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5572   %}
 5573 
 5574   ins_pipe(ialu_reg_mem);
 5575 %}
 5576 
 5577 // Load Integer with mask 0xFF into Long Register
 5578 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5579   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5580   effect(KILL cr);
 5581 
 5582   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5583             "XOR    $dst.hi,$dst.hi" %}
 5584   ins_encode %{
 5585     Register Rdst = $dst$$Register;
 5586     __ movzbl(Rdst, $mem$$Address);
 5587     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5588   %}
 5589   ins_pipe(ialu_reg_mem);
 5590 %}
 5591 
 5592 // Load Integer with mask 0xFFFF into Long Register
 5593 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5594   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5595   effect(KILL cr);
 5596 
 5597   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5598             "XOR    $dst.hi,$dst.hi" %}
 5599   ins_encode %{
 5600     Register Rdst = $dst$$Register;
 5601     __ movzwl(Rdst, $mem$$Address);
 5602     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5603   %}
 5604   ins_pipe(ialu_reg_mem);
 5605 %}
 5606 
 5607 // Load Integer with 31-bit mask into Long Register
 5608 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5609   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5610   effect(KILL cr);
 5611 
 5612   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5613             "XOR    $dst.hi,$dst.hi\n\t"
 5614             "AND    $dst.lo,$mask" %}
 5615   ins_encode %{
 5616     Register Rdst = $dst$$Register;
 5617     __ movl(Rdst, $mem$$Address);
 5618     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5619     __ andl(Rdst, $mask$$constant);
 5620   %}
 5621   ins_pipe(ialu_reg_mem);
 5622 %}
 5623 
 5624 // Load Unsigned Integer into Long Register
 5625 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5626   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5627   effect(KILL cr);
 5628 
 5629   ins_cost(250);
 5630   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5631             "XOR    $dst.hi,$dst.hi" %}
 5632 
 5633   ins_encode %{
 5634     __ movl($dst$$Register, $mem$$Address);
 5635     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5636   %}
 5637 
 5638   ins_pipe(ialu_reg_mem);
 5639 %}
 5640 
 5641 // Load Long.  Cannot clobber address while loading, so restrict address
 5642 // register to ESI
 5643 instruct loadL(eRegL dst, load_long_memory mem) %{
 5644   predicate(!((LoadLNode*)n)->require_atomic_access());
 5645   match(Set dst (LoadL mem));
 5646 
 5647   ins_cost(250);
 5648   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5649             "MOV    $dst.hi,$mem+4" %}
 5650 
 5651   ins_encode %{
 5652     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5653     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5654     __ movl($dst$$Register, Amemlo);
 5655     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5656   %}
 5657 
 5658   ins_pipe(ialu_reg_long_mem);
 5659 %}
 5660 
 5661 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5662 // then store it down to the stack and reload on the int
 5663 // side.
 5664 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5665   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5666   match(Set dst (LoadL mem));
 5667 
 5668   ins_cost(200);
 5669   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5670             "FISTp  $dst" %}
 5671   ins_encode(enc_loadL_volatile(mem,dst));
 5672   ins_pipe( fpu_reg_mem );
 5673 %}
 5674 
 5675 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5676   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5677   match(Set dst (LoadL mem));
 5678   effect(TEMP tmp);
 5679   ins_cost(180);
 5680   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5681             "MOVSD  $dst,$tmp" %}
 5682   ins_encode %{
 5683     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5684     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5685   %}
 5686   ins_pipe( pipe_slow );
 5687 %}
 5688 
 5689 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5690   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5691   match(Set dst (LoadL mem));
 5692   effect(TEMP tmp);
 5693   ins_cost(160);
 5694   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5695             "MOVD   $dst.lo,$tmp\n\t"
 5696             "PSRLQ  $tmp,32\n\t"
 5697             "MOVD   $dst.hi,$tmp" %}
 5698   ins_encode %{
 5699     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5700     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5701     __ psrlq($tmp$$XMMRegister, 32);
 5702     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5703   %}
 5704   ins_pipe( pipe_slow );
 5705 %}
 5706 
 5707 // Load Range
 5708 instruct loadRange(rRegI dst, memory mem) %{
 5709   match(Set dst (LoadRange mem));
 5710 
 5711   ins_cost(125);
 5712   format %{ "MOV    $dst,$mem" %}
 5713   opcode(0x8B);
 5714   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5715   ins_pipe( ialu_reg_mem );
 5716 %}
 5717 
 5718 
 5719 // Load Pointer
 5720 instruct loadP(eRegP dst, memory mem) %{
 5721   match(Set dst (LoadP mem));
 5722 
 5723   ins_cost(125);
 5724   format %{ "MOV    $dst,$mem" %}
 5725   opcode(0x8B);
 5726   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5727   ins_pipe( ialu_reg_mem );
 5728 %}
 5729 
 5730 // Load Klass Pointer
 5731 instruct loadKlass(eRegP dst, memory mem) %{
 5732   match(Set dst (LoadKlass mem));
 5733 
 5734   ins_cost(125);
 5735   format %{ "MOV    $dst,$mem" %}
 5736   opcode(0x8B);
 5737   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5738   ins_pipe( ialu_reg_mem );
 5739 %}
 5740 
 5741 // Load Double
 5742 instruct loadDPR(regDPR dst, memory mem) %{
 5743   predicate(UseSSE<=1);
 5744   match(Set dst (LoadD mem));
 5745 
 5746   ins_cost(150);
 5747   format %{ "FLD_D  ST,$mem\n\t"
 5748             "FSTP   $dst" %}
 5749   opcode(0xDD);               /* DD /0 */
 5750   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5751               Pop_Reg_DPR(dst), ClearInstMark );
 5752   ins_pipe( fpu_reg_mem );
 5753 %}
 5754 
 5755 // Load Double to XMM
 5756 instruct loadD(regD dst, memory mem) %{
 5757   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5758   match(Set dst (LoadD mem));
 5759   ins_cost(145);
 5760   format %{ "MOVSD  $dst,$mem" %}
 5761   ins_encode %{
 5762     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5763   %}
 5764   ins_pipe( pipe_slow );
 5765 %}
 5766 
 5767 instruct loadD_partial(regD dst, memory mem) %{
 5768   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5769   match(Set dst (LoadD mem));
 5770   ins_cost(145);
 5771   format %{ "MOVLPD $dst,$mem" %}
 5772   ins_encode %{
 5773     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5774   %}
 5775   ins_pipe( pipe_slow );
 5776 %}
 5777 
 5778 // Load to XMM register (single-precision floating point)
 5779 // MOVSS instruction
 5780 instruct loadF(regF dst, memory mem) %{
 5781   predicate(UseSSE>=1);
 5782   match(Set dst (LoadF mem));
 5783   ins_cost(145);
 5784   format %{ "MOVSS  $dst,$mem" %}
 5785   ins_encode %{
 5786     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5787   %}
 5788   ins_pipe( pipe_slow );
 5789 %}
 5790 
 5791 // Load Float
 5792 instruct loadFPR(regFPR dst, memory mem) %{
 5793   predicate(UseSSE==0);
 5794   match(Set dst (LoadF mem));
 5795 
 5796   ins_cost(150);
 5797   format %{ "FLD_S  ST,$mem\n\t"
 5798             "FSTP   $dst" %}
 5799   opcode(0xD9);               /* D9 /0 */
 5800   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5801               Pop_Reg_FPR(dst), ClearInstMark );
 5802   ins_pipe( fpu_reg_mem );
 5803 %}
 5804 
 5805 // Load Effective Address
 5806 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5807   match(Set dst mem);
 5808 
 5809   ins_cost(110);
 5810   format %{ "LEA    $dst,$mem" %}
 5811   opcode(0x8D);
 5812   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5813   ins_pipe( ialu_reg_reg_fat );
 5814 %}
 5815 
 5816 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5817   match(Set dst mem);
 5818 
 5819   ins_cost(110);
 5820   format %{ "LEA    $dst,$mem" %}
 5821   opcode(0x8D);
 5822   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5823   ins_pipe( ialu_reg_reg_fat );
 5824 %}
 5825 
 5826 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5827   match(Set dst mem);
 5828 
 5829   ins_cost(110);
 5830   format %{ "LEA    $dst,$mem" %}
 5831   opcode(0x8D);
 5832   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5833   ins_pipe( ialu_reg_reg_fat );
 5834 %}
 5835 
 5836 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5837   match(Set dst mem);
 5838 
 5839   ins_cost(110);
 5840   format %{ "LEA    $dst,$mem" %}
 5841   opcode(0x8D);
 5842   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5843   ins_pipe( ialu_reg_reg_fat );
 5844 %}
 5845 
 5846 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5847   match(Set dst mem);
 5848 
 5849   ins_cost(110);
 5850   format %{ "LEA    $dst,$mem" %}
 5851   opcode(0x8D);
 5852   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5853   ins_pipe( ialu_reg_reg_fat );
 5854 %}
 5855 
 5856 // Load Constant
 5857 instruct loadConI(rRegI dst, immI src) %{
 5858   match(Set dst src);
 5859 
 5860   format %{ "MOV    $dst,$src" %}
 5861   ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
 5862   ins_pipe( ialu_reg_fat );
 5863 %}
 5864 
 5865 // Load Constant zero
 5866 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5867   match(Set dst src);
 5868   effect(KILL cr);
 5869 
 5870   ins_cost(50);
 5871   format %{ "XOR    $dst,$dst" %}
 5872   opcode(0x33);  /* + rd */
 5873   ins_encode( OpcP, RegReg( dst, dst ) );
 5874   ins_pipe( ialu_reg );
 5875 %}
 5876 
 5877 instruct loadConP(eRegP dst, immP src) %{
 5878   match(Set dst src);
 5879 
 5880   format %{ "MOV    $dst,$src" %}
 5881   opcode(0xB8);  /* + rd */
 5882   ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
 5883   ins_pipe( ialu_reg_fat );
 5884 %}
 5885 
 5886 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5887   match(Set dst src);
 5888   effect(KILL cr);
 5889   ins_cost(200);
 5890   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5891             "MOV    $dst.hi,$src.hi" %}
 5892   opcode(0xB8);
 5893   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5894   ins_pipe( ialu_reg_long_fat );
 5895 %}
 5896 
 5897 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5898   match(Set dst src);
 5899   effect(KILL cr);
 5900   ins_cost(150);
 5901   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5902             "XOR    $dst.hi,$dst.hi" %}
 5903   opcode(0x33,0x33);
 5904   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5905   ins_pipe( ialu_reg_long );
 5906 %}
 5907 
 5908 // The instruction usage is guarded by predicate in operand immFPR().
 5909 instruct loadConFPR(regFPR dst, immFPR con) %{
 5910   match(Set dst con);
 5911   ins_cost(125);
 5912   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5913             "FSTP   $dst" %}
 5914   ins_encode %{
 5915     __ fld_s($constantaddress($con));
 5916     __ fstp_d($dst$$reg);
 5917   %}
 5918   ins_pipe(fpu_reg_con);
 5919 %}
 5920 
 5921 // The instruction usage is guarded by predicate in operand immFPR0().
 5922 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5923   match(Set dst con);
 5924   ins_cost(125);
 5925   format %{ "FLDZ   ST\n\t"
 5926             "FSTP   $dst" %}
 5927   ins_encode %{
 5928     __ fldz();
 5929     __ fstp_d($dst$$reg);
 5930   %}
 5931   ins_pipe(fpu_reg_con);
 5932 %}
 5933 
 5934 // The instruction usage is guarded by predicate in operand immFPR1().
 5935 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5936   match(Set dst con);
 5937   ins_cost(125);
 5938   format %{ "FLD1   ST\n\t"
 5939             "FSTP   $dst" %}
 5940   ins_encode %{
 5941     __ fld1();
 5942     __ fstp_d($dst$$reg);
 5943   %}
 5944   ins_pipe(fpu_reg_con);
 5945 %}
 5946 
 5947 // The instruction usage is guarded by predicate in operand immF().
 5948 instruct loadConF(regF dst, immF con) %{
 5949   match(Set dst con);
 5950   ins_cost(125);
 5951   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 5952   ins_encode %{
 5953     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5954   %}
 5955   ins_pipe(pipe_slow);
 5956 %}
 5957 
 5958 // The instruction usage is guarded by predicate in operand immF0().
 5959 instruct loadConF0(regF dst, immF0 src) %{
 5960   match(Set dst src);
 5961   ins_cost(100);
 5962   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 5963   ins_encode %{
 5964     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5965   %}
 5966   ins_pipe(pipe_slow);
 5967 %}
 5968 
 5969 // The instruction usage is guarded by predicate in operand immDPR().
 5970 instruct loadConDPR(regDPR dst, immDPR con) %{
 5971   match(Set dst con);
 5972   ins_cost(125);
 5973 
 5974   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 5975             "FSTP   $dst" %}
 5976   ins_encode %{
 5977     __ fld_d($constantaddress($con));
 5978     __ fstp_d($dst$$reg);
 5979   %}
 5980   ins_pipe(fpu_reg_con);
 5981 %}
 5982 
 5983 // The instruction usage is guarded by predicate in operand immDPR0().
 5984 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 5985   match(Set dst con);
 5986   ins_cost(125);
 5987 
 5988   format %{ "FLDZ   ST\n\t"
 5989             "FSTP   $dst" %}
 5990   ins_encode %{
 5991     __ fldz();
 5992     __ fstp_d($dst$$reg);
 5993   %}
 5994   ins_pipe(fpu_reg_con);
 5995 %}
 5996 
 5997 // The instruction usage is guarded by predicate in operand immDPR1().
 5998 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 5999   match(Set dst con);
 6000   ins_cost(125);
 6001 
 6002   format %{ "FLD1   ST\n\t"
 6003             "FSTP   $dst" %}
 6004   ins_encode %{
 6005     __ fld1();
 6006     __ fstp_d($dst$$reg);
 6007   %}
 6008   ins_pipe(fpu_reg_con);
 6009 %}
 6010 
 6011 // The instruction usage is guarded by predicate in operand immD().
 6012 instruct loadConD(regD dst, immD con) %{
 6013   match(Set dst con);
 6014   ins_cost(125);
 6015   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6016   ins_encode %{
 6017     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6018   %}
 6019   ins_pipe(pipe_slow);
 6020 %}
 6021 
 6022 // The instruction usage is guarded by predicate in operand immD0().
 6023 instruct loadConD0(regD dst, immD0 src) %{
 6024   match(Set dst src);
 6025   ins_cost(100);
 6026   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6027   ins_encode %{
 6028     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6029   %}
 6030   ins_pipe( pipe_slow );
 6031 %}
 6032 
 6033 // Load Stack Slot
 6034 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6035   match(Set dst src);
 6036   ins_cost(125);
 6037 
 6038   format %{ "MOV    $dst,$src" %}
 6039   opcode(0x8B);
 6040   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6041   ins_pipe( ialu_reg_mem );
 6042 %}
 6043 
 6044 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6045   match(Set dst src);
 6046 
 6047   ins_cost(200);
 6048   format %{ "MOV    $dst,$src.lo\n\t"
 6049             "MOV    $dst+4,$src.hi" %}
 6050   opcode(0x8B, 0x8B);
 6051   ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
 6052   ins_pipe( ialu_mem_long_reg );
 6053 %}
 6054 
 6055 // Load Stack Slot
 6056 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6057   match(Set dst src);
 6058   ins_cost(125);
 6059 
 6060   format %{ "MOV    $dst,$src" %}
 6061   opcode(0x8B);
 6062   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6063   ins_pipe( ialu_reg_mem );
 6064 %}
 6065 
 6066 // Load Stack Slot
 6067 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6068   match(Set dst src);
 6069   ins_cost(125);
 6070 
 6071   format %{ "FLD_S  $src\n\t"
 6072             "FSTP   $dst" %}
 6073   opcode(0xD9);               /* D9 /0, FLD m32real */
 6074   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6075               Pop_Reg_FPR(dst), ClearInstMark );
 6076   ins_pipe( fpu_reg_mem );
 6077 %}
 6078 
 6079 // Load Stack Slot
 6080 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6081   match(Set dst src);
 6082   ins_cost(125);
 6083 
 6084   format %{ "FLD_D  $src\n\t"
 6085             "FSTP   $dst" %}
 6086   opcode(0xDD);               /* DD /0, FLD m64real */
 6087   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6088               Pop_Reg_DPR(dst), ClearInstMark );
 6089   ins_pipe( fpu_reg_mem );
 6090 %}
 6091 
 6092 // Prefetch instructions for allocation.
 6093 // Must be safe to execute with invalid address (cannot fault).
 6094 
 6095 instruct prefetchAlloc0( memory mem ) %{
 6096   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6097   match(PrefetchAllocation mem);
 6098   ins_cost(0);
 6099   size(0);
 6100   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6101   ins_encode();
 6102   ins_pipe(empty);
 6103 %}
 6104 
 6105 instruct prefetchAlloc( memory mem ) %{
 6106   predicate(AllocatePrefetchInstr==3);
 6107   match( PrefetchAllocation mem );
 6108   ins_cost(100);
 6109 
 6110   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6111   ins_encode %{
 6112     __ prefetchw($mem$$Address);
 6113   %}
 6114   ins_pipe(ialu_mem);
 6115 %}
 6116 
 6117 instruct prefetchAllocNTA( memory mem ) %{
 6118   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6119   match(PrefetchAllocation mem);
 6120   ins_cost(100);
 6121 
 6122   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6123   ins_encode %{
 6124     __ prefetchnta($mem$$Address);
 6125   %}
 6126   ins_pipe(ialu_mem);
 6127 %}
 6128 
 6129 instruct prefetchAllocT0( memory mem ) %{
 6130   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6131   match(PrefetchAllocation mem);
 6132   ins_cost(100);
 6133 
 6134   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6135   ins_encode %{
 6136     __ prefetcht0($mem$$Address);
 6137   %}
 6138   ins_pipe(ialu_mem);
 6139 %}
 6140 
 6141 instruct prefetchAllocT2( memory mem ) %{
 6142   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6143   match(PrefetchAllocation mem);
 6144   ins_cost(100);
 6145 
 6146   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6147   ins_encode %{
 6148     __ prefetcht2($mem$$Address);
 6149   %}
 6150   ins_pipe(ialu_mem);
 6151 %}
 6152 
 6153 //----------Store Instructions-------------------------------------------------
 6154 
 6155 // Store Byte
 6156 instruct storeB(memory mem, xRegI src) %{
 6157   match(Set mem (StoreB mem src));
 6158 
 6159   ins_cost(125);
 6160   format %{ "MOV8   $mem,$src" %}
 6161   opcode(0x88);
 6162   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6163   ins_pipe( ialu_mem_reg );
 6164 %}
 6165 
 6166 // Store Char/Short
 6167 instruct storeC(memory mem, rRegI src) %{
 6168   match(Set mem (StoreC mem src));
 6169 
 6170   ins_cost(125);
 6171   format %{ "MOV16  $mem,$src" %}
 6172   opcode(0x89, 0x66);
 6173   ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
 6174   ins_pipe( ialu_mem_reg );
 6175 %}
 6176 
 6177 // Store Integer
 6178 instruct storeI(memory mem, rRegI src) %{
 6179   match(Set mem (StoreI mem src));
 6180 
 6181   ins_cost(125);
 6182   format %{ "MOV    $mem,$src" %}
 6183   opcode(0x89);
 6184   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6185   ins_pipe( ialu_mem_reg );
 6186 %}
 6187 
 6188 // Store Long
 6189 instruct storeL(long_memory mem, eRegL src) %{
 6190   predicate(!((StoreLNode*)n)->require_atomic_access());
 6191   match(Set mem (StoreL mem src));
 6192 
 6193   ins_cost(200);
 6194   format %{ "MOV    $mem,$src.lo\n\t"
 6195             "MOV    $mem+4,$src.hi" %}
 6196   opcode(0x89, 0x89);
 6197   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
 6198   ins_pipe( ialu_mem_long_reg );
 6199 %}
 6200 
 6201 // Store Long to Integer
 6202 instruct storeL2I(memory mem, eRegL src) %{
 6203   match(Set mem (StoreI mem (ConvL2I src)));
 6204 
 6205   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6206   ins_encode %{
 6207     __ movl($mem$$Address, $src$$Register);
 6208   %}
 6209   ins_pipe(ialu_mem_reg);
 6210 %}
 6211 
 6212 // Volatile Store Long.  Must be atomic, so move it into
 6213 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6214 // target address before the store (for null-ptr checks)
 6215 // so the memory operand is used twice in the encoding.
 6216 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6217   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6218   match(Set mem (StoreL mem src));
 6219   effect( KILL cr );
 6220   ins_cost(400);
 6221   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6222             "FILD   $src\n\t"
 6223             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6224   opcode(0x3B);
 6225   ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
 6226   ins_pipe( fpu_reg_mem );
 6227 %}
 6228 
 6229 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6230   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6231   match(Set mem (StoreL mem src));
 6232   effect( TEMP tmp, KILL cr );
 6233   ins_cost(380);
 6234   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6235             "MOVSD  $tmp,$src\n\t"
 6236             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6237   ins_encode %{
 6238     __ cmpl(rax, $mem$$Address);
 6239     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6240     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6241   %}
 6242   ins_pipe( pipe_slow );
 6243 %}
 6244 
 6245 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6246   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6247   match(Set mem (StoreL mem src));
 6248   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6249   ins_cost(360);
 6250   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6251             "MOVD   $tmp,$src.lo\n\t"
 6252             "MOVD   $tmp2,$src.hi\n\t"
 6253             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6254             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6255   ins_encode %{
 6256     __ cmpl(rax, $mem$$Address);
 6257     __ movdl($tmp$$XMMRegister, $src$$Register);
 6258     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6259     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6260     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6261   %}
 6262   ins_pipe( pipe_slow );
 6263 %}
 6264 
 6265 // Store Pointer; for storing unknown oops and raw pointers
 6266 instruct storeP(memory mem, anyRegP src) %{
 6267   match(Set mem (StoreP mem src));
 6268 
 6269   ins_cost(125);
 6270   format %{ "MOV    $mem,$src" %}
 6271   opcode(0x89);
 6272   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6273   ins_pipe( ialu_mem_reg );
 6274 %}
 6275 
 6276 // Store Integer Immediate
 6277 instruct storeImmI(memory mem, immI src) %{
 6278   match(Set mem (StoreI mem src));
 6279 
 6280   ins_cost(150);
 6281   format %{ "MOV    $mem,$src" %}
 6282   opcode(0xC7);               /* C7 /0 */
 6283   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
 6284   ins_pipe( ialu_mem_imm );
 6285 %}
 6286 
 6287 // Store Short/Char Immediate
 6288 instruct storeImmI16(memory mem, immI16 src) %{
 6289   predicate(UseStoreImmI16);
 6290   match(Set mem (StoreC mem src));
 6291 
 6292   ins_cost(150);
 6293   format %{ "MOV16  $mem,$src" %}
 6294   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6295   ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
 6296   ins_pipe( ialu_mem_imm );
 6297 %}
 6298 
 6299 // Store Pointer Immediate; null pointers or constant oops that do not
 6300 // need card-mark barriers.
 6301 instruct storeImmP(memory mem, immP src) %{
 6302   match(Set mem (StoreP mem src));
 6303 
 6304   ins_cost(150);
 6305   format %{ "MOV    $mem,$src" %}
 6306   opcode(0xC7);               /* C7 /0 */
 6307   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
 6308   ins_pipe( ialu_mem_imm );
 6309 %}
 6310 
 6311 // Store Byte Immediate
 6312 instruct storeImmB(memory mem, immI8 src) %{
 6313   match(Set mem (StoreB mem src));
 6314 
 6315   ins_cost(150);
 6316   format %{ "MOV8   $mem,$src" %}
 6317   opcode(0xC6);               /* C6 /0 */
 6318   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
 6319   ins_pipe( ialu_mem_imm );
 6320 %}
 6321 
 6322 // Store Double
 6323 instruct storeDPR( memory mem, regDPR1 src) %{
 6324   predicate(UseSSE<=1);
 6325   match(Set mem (StoreD mem src));
 6326 
 6327   ins_cost(100);
 6328   format %{ "FST_D  $mem,$src" %}
 6329   opcode(0xDD);       /* DD /2 */
 6330   ins_encode( enc_FPR_store(mem,src) );
 6331   ins_pipe( fpu_mem_reg );
 6332 %}
 6333 
 6334 // Store double does rounding on x86
 6335 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6336   predicate(UseSSE<=1);
 6337   match(Set mem (StoreD mem (RoundDouble src)));
 6338 
 6339   ins_cost(100);
 6340   format %{ "FST_D  $mem,$src\t# round" %}
 6341   opcode(0xDD);       /* DD /2 */
 6342   ins_encode( enc_FPR_store(mem,src) );
 6343   ins_pipe( fpu_mem_reg );
 6344 %}
 6345 
 6346 // Store XMM register to memory (double-precision floating points)
 6347 // MOVSD instruction
 6348 instruct storeD(memory mem, regD src) %{
 6349   predicate(UseSSE>=2);
 6350   match(Set mem (StoreD mem src));
 6351   ins_cost(95);
 6352   format %{ "MOVSD  $mem,$src" %}
 6353   ins_encode %{
 6354     __ movdbl($mem$$Address, $src$$XMMRegister);
 6355   %}
 6356   ins_pipe( pipe_slow );
 6357 %}
 6358 
 6359 // Store XMM register to memory (single-precision floating point)
 6360 // MOVSS instruction
 6361 instruct storeF(memory mem, regF src) %{
 6362   predicate(UseSSE>=1);
 6363   match(Set mem (StoreF mem src));
 6364   ins_cost(95);
 6365   format %{ "MOVSS  $mem,$src" %}
 6366   ins_encode %{
 6367     __ movflt($mem$$Address, $src$$XMMRegister);
 6368   %}
 6369   ins_pipe( pipe_slow );
 6370 %}
 6371 
 6372 
 6373 // Store Float
 6374 instruct storeFPR( memory mem, regFPR1 src) %{
 6375   predicate(UseSSE==0);
 6376   match(Set mem (StoreF mem src));
 6377 
 6378   ins_cost(100);
 6379   format %{ "FST_S  $mem,$src" %}
 6380   opcode(0xD9);       /* D9 /2 */
 6381   ins_encode( enc_FPR_store(mem,src) );
 6382   ins_pipe( fpu_mem_reg );
 6383 %}
 6384 
 6385 // Store Float does rounding on x86
 6386 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6387   predicate(UseSSE==0);
 6388   match(Set mem (StoreF mem (RoundFloat src)));
 6389 
 6390   ins_cost(100);
 6391   format %{ "FST_S  $mem,$src\t# round" %}
 6392   opcode(0xD9);       /* D9 /2 */
 6393   ins_encode( enc_FPR_store(mem,src) );
 6394   ins_pipe( fpu_mem_reg );
 6395 %}
 6396 
 6397 // Store Float does rounding on x86
 6398 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6399   predicate(UseSSE<=1);
 6400   match(Set mem (StoreF mem (ConvD2F src)));
 6401 
 6402   ins_cost(100);
 6403   format %{ "FST_S  $mem,$src\t# D-round" %}
 6404   opcode(0xD9);       /* D9 /2 */
 6405   ins_encode( enc_FPR_store(mem,src) );
 6406   ins_pipe( fpu_mem_reg );
 6407 %}
 6408 
 6409 // Store immediate Float value (it is faster than store from FPU register)
 6410 // The instruction usage is guarded by predicate in operand immFPR().
 6411 instruct storeFPR_imm( memory mem, immFPR src) %{
 6412   match(Set mem (StoreF mem src));
 6413 
 6414   ins_cost(50);
 6415   format %{ "MOV    $mem,$src\t# store float" %}
 6416   opcode(0xC7);               /* C7 /0 */
 6417   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
 6418   ins_pipe( ialu_mem_imm );
 6419 %}
 6420 
 6421 // Store immediate Float value (it is faster than store from XMM register)
 6422 // The instruction usage is guarded by predicate in operand immF().
 6423 instruct storeF_imm( memory mem, immF src) %{
 6424   match(Set mem (StoreF mem src));
 6425 
 6426   ins_cost(50);
 6427   format %{ "MOV    $mem,$src\t# store float" %}
 6428   opcode(0xC7);               /* C7 /0 */
 6429   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
 6430   ins_pipe( ialu_mem_imm );
 6431 %}
 6432 
 6433 // Store Integer to stack slot
 6434 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6435   match(Set dst src);
 6436 
 6437   ins_cost(100);
 6438   format %{ "MOV    $dst,$src" %}
 6439   opcode(0x89);
 6440   ins_encode( OpcPRegSS( dst, src ) );
 6441   ins_pipe( ialu_mem_reg );
 6442 %}
 6443 
 6444 // Store Integer to stack slot
 6445 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6446   match(Set dst src);
 6447 
 6448   ins_cost(100);
 6449   format %{ "MOV    $dst,$src" %}
 6450   opcode(0x89);
 6451   ins_encode( OpcPRegSS( dst, src ) );
 6452   ins_pipe( ialu_mem_reg );
 6453 %}
 6454 
 6455 // Store Long to stack slot
 6456 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6457   match(Set dst src);
 6458 
 6459   ins_cost(200);
 6460   format %{ "MOV    $dst,$src.lo\n\t"
 6461             "MOV    $dst+4,$src.hi" %}
 6462   opcode(0x89, 0x89);
 6463   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
 6464   ins_pipe( ialu_mem_long_reg );
 6465 %}
 6466 
 6467 //----------MemBar Instructions-----------------------------------------------
 6468 // Memory barrier flavors
 6469 
 6470 instruct membar_acquire() %{
 6471   match(MemBarAcquire);
 6472   match(LoadFence);
 6473   ins_cost(400);
 6474 
 6475   size(0);
 6476   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6477   ins_encode();
 6478   ins_pipe(empty);
 6479 %}
 6480 
 6481 instruct membar_acquire_lock() %{
 6482   match(MemBarAcquireLock);
 6483   ins_cost(0);
 6484 
 6485   size(0);
 6486   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6487   ins_encode( );
 6488   ins_pipe(empty);
 6489 %}
 6490 
 6491 instruct membar_release() %{
 6492   match(MemBarRelease);
 6493   match(StoreFence);
 6494   ins_cost(400);
 6495 
 6496   size(0);
 6497   format %{ "MEMBAR-release ! (empty encoding)" %}
 6498   ins_encode( );
 6499   ins_pipe(empty);
 6500 %}
 6501 
 6502 instruct membar_release_lock() %{
 6503   match(MemBarReleaseLock);
 6504   ins_cost(0);
 6505 
 6506   size(0);
 6507   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6508   ins_encode( );
 6509   ins_pipe(empty);
 6510 %}
 6511 
 6512 instruct membar_volatile(eFlagsReg cr) %{
 6513   match(MemBarVolatile);
 6514   effect(KILL cr);
 6515   ins_cost(400);
 6516 
 6517   format %{
 6518     $$template
 6519     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6520   %}
 6521   ins_encode %{
 6522     __ membar(Assembler::StoreLoad);
 6523   %}
 6524   ins_pipe(pipe_slow);
 6525 %}
 6526 
 6527 instruct unnecessary_membar_volatile() %{
 6528   match(MemBarVolatile);
 6529   predicate(Matcher::post_store_load_barrier(n));
 6530   ins_cost(0);
 6531 
 6532   size(0);
 6533   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6534   ins_encode( );
 6535   ins_pipe(empty);
 6536 %}
 6537 
 6538 instruct membar_storestore() %{
 6539   match(MemBarStoreStore);
 6540   match(StoreStoreFence);
 6541   ins_cost(0);
 6542 
 6543   size(0);
 6544   format %{ "MEMBAR-storestore (empty encoding)" %}
 6545   ins_encode( );
 6546   ins_pipe(empty);
 6547 %}
 6548 
 6549 //----------Move Instructions--------------------------------------------------
 6550 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6551   match(Set dst (CastX2P src));
 6552   format %{ "# X2P  $dst, $src" %}
 6553   ins_encode( /*empty encoding*/ );
 6554   ins_cost(0);
 6555   ins_pipe(empty);
 6556 %}
 6557 
 6558 instruct castP2X(rRegI dst, eRegP src ) %{
 6559   match(Set dst (CastP2X src));
 6560   ins_cost(50);
 6561   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6562   ins_encode( enc_Copy( dst, src) );
 6563   ins_pipe( ialu_reg_reg );
 6564 %}
 6565 
 6566 //----------Conditional Move---------------------------------------------------
 6567 // Conditional move
 6568 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6569   predicate(!VM_Version::supports_cmov() );
 6570   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6571   ins_cost(200);
 6572   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6573             "MOV    $dst,$src\n"
 6574       "skip:" %}
 6575   ins_encode %{
 6576     Label Lskip;
 6577     // Invert sense of branch from sense of CMOV
 6578     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6579     __ movl($dst$$Register, $src$$Register);
 6580     __ bind(Lskip);
 6581   %}
 6582   ins_pipe( pipe_cmov_reg );
 6583 %}
 6584 
 6585 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6586   predicate(!VM_Version::supports_cmov() );
 6587   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6588   ins_cost(200);
 6589   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6590             "MOV    $dst,$src\n"
 6591       "skip:" %}
 6592   ins_encode %{
 6593     Label Lskip;
 6594     // Invert sense of branch from sense of CMOV
 6595     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6596     __ movl($dst$$Register, $src$$Register);
 6597     __ bind(Lskip);
 6598   %}
 6599   ins_pipe( pipe_cmov_reg );
 6600 %}
 6601 
 6602 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6603   predicate(VM_Version::supports_cmov() );
 6604   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6605   ins_cost(200);
 6606   format %{ "CMOV$cop $dst,$src" %}
 6607   opcode(0x0F,0x40);
 6608   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6609   ins_pipe( pipe_cmov_reg );
 6610 %}
 6611 
 6612 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6613   predicate(VM_Version::supports_cmov() );
 6614   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6615   ins_cost(200);
 6616   format %{ "CMOV$cop $dst,$src" %}
 6617   opcode(0x0F,0x40);
 6618   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6619   ins_pipe( pipe_cmov_reg );
 6620 %}
 6621 
 6622 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6623   predicate(VM_Version::supports_cmov() );
 6624   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6625   ins_cost(200);
 6626   expand %{
 6627     cmovI_regU(cop, cr, dst, src);
 6628   %}
 6629 %}
 6630 
 6631 // Conditional move
 6632 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6633   predicate(VM_Version::supports_cmov() );
 6634   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6635   ins_cost(250);
 6636   format %{ "CMOV$cop $dst,$src" %}
 6637   opcode(0x0F,0x40);
 6638   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6639   ins_pipe( pipe_cmov_mem );
 6640 %}
 6641 
 6642 // Conditional move
 6643 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6644   predicate(VM_Version::supports_cmov() );
 6645   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6646   ins_cost(250);
 6647   format %{ "CMOV$cop $dst,$src" %}
 6648   opcode(0x0F,0x40);
 6649   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6650   ins_pipe( pipe_cmov_mem );
 6651 %}
 6652 
 6653 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6654   predicate(VM_Version::supports_cmov() );
 6655   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6656   ins_cost(250);
 6657   expand %{
 6658     cmovI_memU(cop, cr, dst, src);
 6659   %}
 6660 %}
 6661 
 6662 // Conditional move
 6663 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6664   predicate(VM_Version::supports_cmov() );
 6665   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6666   ins_cost(200);
 6667   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6668   opcode(0x0F,0x40);
 6669   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6670   ins_pipe( pipe_cmov_reg );
 6671 %}
 6672 
 6673 // Conditional move (non-P6 version)
 6674 // Note:  a CMoveP is generated for  stubs and native wrappers
 6675 //        regardless of whether we are on a P6, so we
 6676 //        emulate a cmov here
 6677 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6678   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6679   ins_cost(300);
 6680   format %{ "Jn$cop   skip\n\t"
 6681           "MOV    $dst,$src\t# pointer\n"
 6682       "skip:" %}
 6683   opcode(0x8b);
 6684   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6685   ins_pipe( pipe_cmov_reg );
 6686 %}
 6687 
 6688 // Conditional move
 6689 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6690   predicate(VM_Version::supports_cmov() );
 6691   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6692   ins_cost(200);
 6693   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6694   opcode(0x0F,0x40);
 6695   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6696   ins_pipe( pipe_cmov_reg );
 6697 %}
 6698 
 6699 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6700   predicate(VM_Version::supports_cmov() );
 6701   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6702   ins_cost(200);
 6703   expand %{
 6704     cmovP_regU(cop, cr, dst, src);
 6705   %}
 6706 %}
 6707 
 6708 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6709 // correctly meets the two pointer arguments; one is an incoming
 6710 // register but the other is a memory operand.  ALSO appears to
 6711 // be buggy with implicit null checks.
 6712 //
 6713 //// Conditional move
 6714 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6715 //  predicate(VM_Version::supports_cmov() );
 6716 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6717 //  ins_cost(250);
 6718 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6719 //  opcode(0x0F,0x40);
 6720 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6721 //  ins_pipe( pipe_cmov_mem );
 6722 //%}
 6723 //
 6724 //// Conditional move
 6725 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6726 //  predicate(VM_Version::supports_cmov() );
 6727 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6728 //  ins_cost(250);
 6729 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6730 //  opcode(0x0F,0x40);
 6731 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6732 //  ins_pipe( pipe_cmov_mem );
 6733 //%}
 6734 
 6735 // Conditional move
 6736 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6737   predicate(UseSSE<=1);
 6738   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6739   ins_cost(200);
 6740   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6741   opcode(0xDA);
 6742   ins_encode( enc_cmov_dpr(cop,src) );
 6743   ins_pipe( pipe_cmovDPR_reg );
 6744 %}
 6745 
 6746 // Conditional move
 6747 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6748   predicate(UseSSE==0);
 6749   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6750   ins_cost(200);
 6751   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6752   opcode(0xDA);
 6753   ins_encode( enc_cmov_dpr(cop,src) );
 6754   ins_pipe( pipe_cmovDPR_reg );
 6755 %}
 6756 
 6757 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6758 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6759   predicate(UseSSE<=1);
 6760   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6761   ins_cost(200);
 6762   format %{ "Jn$cop   skip\n\t"
 6763             "MOV    $dst,$src\t# double\n"
 6764       "skip:" %}
 6765   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6766   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6767   ins_pipe( pipe_cmovDPR_reg );
 6768 %}
 6769 
 6770 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6771 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6772   predicate(UseSSE==0);
 6773   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6774   ins_cost(200);
 6775   format %{ "Jn$cop    skip\n\t"
 6776             "MOV    $dst,$src\t# float\n"
 6777       "skip:" %}
 6778   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6779   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6780   ins_pipe( pipe_cmovDPR_reg );
 6781 %}
 6782 
 6783 // No CMOVE with SSE/SSE2
 6784 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6785   predicate (UseSSE>=1);
 6786   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6787   ins_cost(200);
 6788   format %{ "Jn$cop   skip\n\t"
 6789             "MOVSS  $dst,$src\t# float\n"
 6790       "skip:" %}
 6791   ins_encode %{
 6792     Label skip;
 6793     // Invert sense of branch from sense of CMOV
 6794     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6795     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6796     __ bind(skip);
 6797   %}
 6798   ins_pipe( pipe_slow );
 6799 %}
 6800 
 6801 // No CMOVE with SSE/SSE2
 6802 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6803   predicate (UseSSE>=2);
 6804   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6805   ins_cost(200);
 6806   format %{ "Jn$cop   skip\n\t"
 6807             "MOVSD  $dst,$src\t# float\n"
 6808       "skip:" %}
 6809   ins_encode %{
 6810     Label skip;
 6811     // Invert sense of branch from sense of CMOV
 6812     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6813     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6814     __ bind(skip);
 6815   %}
 6816   ins_pipe( pipe_slow );
 6817 %}
 6818 
 6819 // unsigned version
 6820 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6821   predicate (UseSSE>=1);
 6822   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6823   ins_cost(200);
 6824   format %{ "Jn$cop   skip\n\t"
 6825             "MOVSS  $dst,$src\t# float\n"
 6826       "skip:" %}
 6827   ins_encode %{
 6828     Label skip;
 6829     // Invert sense of branch from sense of CMOV
 6830     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6831     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6832     __ bind(skip);
 6833   %}
 6834   ins_pipe( pipe_slow );
 6835 %}
 6836 
 6837 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6838   predicate (UseSSE>=1);
 6839   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6840   ins_cost(200);
 6841   expand %{
 6842     fcmovF_regU(cop, cr, dst, src);
 6843   %}
 6844 %}
 6845 
 6846 // unsigned version
 6847 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6848   predicate (UseSSE>=2);
 6849   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6850   ins_cost(200);
 6851   format %{ "Jn$cop   skip\n\t"
 6852             "MOVSD  $dst,$src\t# float\n"
 6853       "skip:" %}
 6854   ins_encode %{
 6855     Label skip;
 6856     // Invert sense of branch from sense of CMOV
 6857     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6858     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6859     __ bind(skip);
 6860   %}
 6861   ins_pipe( pipe_slow );
 6862 %}
 6863 
 6864 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6865   predicate (UseSSE>=2);
 6866   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6867   ins_cost(200);
 6868   expand %{
 6869     fcmovD_regU(cop, cr, dst, src);
 6870   %}
 6871 %}
 6872 
 6873 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6874   predicate(VM_Version::supports_cmov() );
 6875   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6876   ins_cost(200);
 6877   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6878             "CMOV$cop $dst.hi,$src.hi" %}
 6879   opcode(0x0F,0x40);
 6880   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6881   ins_pipe( pipe_cmov_reg_long );
 6882 %}
 6883 
 6884 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6885   predicate(VM_Version::supports_cmov() );
 6886   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6887   ins_cost(200);
 6888   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6889             "CMOV$cop $dst.hi,$src.hi" %}
 6890   opcode(0x0F,0x40);
 6891   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6892   ins_pipe( pipe_cmov_reg_long );
 6893 %}
 6894 
 6895 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 6896   predicate(VM_Version::supports_cmov() );
 6897   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6898   ins_cost(200);
 6899   expand %{
 6900     cmovL_regU(cop, cr, dst, src);
 6901   %}
 6902 %}
 6903 
 6904 //----------Arithmetic Instructions--------------------------------------------
 6905 //----------Addition Instructions----------------------------------------------
 6906 
 6907 // Integer Addition Instructions
 6908 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 6909   match(Set dst (AddI dst src));
 6910   effect(KILL cr);
 6911 
 6912   size(2);
 6913   format %{ "ADD    $dst,$src" %}
 6914   opcode(0x03);
 6915   ins_encode( OpcP, RegReg( dst, src) );
 6916   ins_pipe( ialu_reg_reg );
 6917 %}
 6918 
 6919 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 6920   match(Set dst (AddI dst src));
 6921   effect(KILL cr);
 6922 
 6923   format %{ "ADD    $dst,$src" %}
 6924   opcode(0x81, 0x00); /* /0 id */
 6925   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6926   ins_pipe( ialu_reg );
 6927 %}
 6928 
 6929 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 6930   predicate(UseIncDec);
 6931   match(Set dst (AddI dst src));
 6932   effect(KILL cr);
 6933 
 6934   size(1);
 6935   format %{ "INC    $dst" %}
 6936   opcode(0x40); /*  */
 6937   ins_encode( Opc_plus( primary, dst ) );
 6938   ins_pipe( ialu_reg );
 6939 %}
 6940 
 6941 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 6942   match(Set dst (AddI src0 src1));
 6943   ins_cost(110);
 6944 
 6945   format %{ "LEA    $dst,[$src0 + $src1]" %}
 6946   opcode(0x8D); /* 0x8D /r */
 6947   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6948   ins_pipe( ialu_reg_reg );
 6949 %}
 6950 
 6951 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 6952   match(Set dst (AddP src0 src1));
 6953   ins_cost(110);
 6954 
 6955   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 6956   opcode(0x8D); /* 0x8D /r */
 6957   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6958   ins_pipe( ialu_reg_reg );
 6959 %}
 6960 
 6961 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 6962   predicate(UseIncDec);
 6963   match(Set dst (AddI dst src));
 6964   effect(KILL cr);
 6965 
 6966   size(1);
 6967   format %{ "DEC    $dst" %}
 6968   opcode(0x48); /*  */
 6969   ins_encode( Opc_plus( primary, dst ) );
 6970   ins_pipe( ialu_reg );
 6971 %}
 6972 
 6973 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 6974   match(Set dst (AddP dst src));
 6975   effect(KILL cr);
 6976 
 6977   size(2);
 6978   format %{ "ADD    $dst,$src" %}
 6979   opcode(0x03);
 6980   ins_encode( OpcP, RegReg( dst, src) );
 6981   ins_pipe( ialu_reg_reg );
 6982 %}
 6983 
 6984 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 6985   match(Set dst (AddP dst src));
 6986   effect(KILL cr);
 6987 
 6988   format %{ "ADD    $dst,$src" %}
 6989   opcode(0x81,0x00); /* Opcode 81 /0 id */
 6990   // ins_encode( RegImm( dst, src) );
 6991   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6992   ins_pipe( ialu_reg );
 6993 %}
 6994 
 6995 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 6996   match(Set dst (AddI dst (LoadI src)));
 6997   effect(KILL cr);
 6998 
 6999   ins_cost(150);
 7000   format %{ "ADD    $dst,$src" %}
 7001   opcode(0x03);
 7002   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7003   ins_pipe( ialu_reg_mem );
 7004 %}
 7005 
 7006 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7007   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7008   effect(KILL cr);
 7009 
 7010   ins_cost(150);
 7011   format %{ "ADD    $dst,$src" %}
 7012   opcode(0x01);  /* Opcode 01 /r */
 7013   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7014   ins_pipe( ialu_mem_reg );
 7015 %}
 7016 
 7017 // Add Memory with Immediate
 7018 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7019   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7020   effect(KILL cr);
 7021 
 7022   ins_cost(125);
 7023   format %{ "ADD    $dst,$src" %}
 7024   opcode(0x81);               /* Opcode 81 /0 id */
 7025   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
 7026   ins_pipe( ialu_mem_imm );
 7027 %}
 7028 
 7029 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7030   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7031   effect(KILL cr);
 7032 
 7033   ins_cost(125);
 7034   format %{ "INC    $dst" %}
 7035   opcode(0xFF);               /* Opcode FF /0 */
 7036   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
 7037   ins_pipe( ialu_mem_imm );
 7038 %}
 7039 
 7040 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7041   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7042   effect(KILL cr);
 7043 
 7044   ins_cost(125);
 7045   format %{ "DEC    $dst" %}
 7046   opcode(0xFF);               /* Opcode FF /1 */
 7047   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
 7048   ins_pipe( ialu_mem_imm );
 7049 %}
 7050 
 7051 
 7052 instruct checkCastPP( eRegP dst ) %{
 7053   match(Set dst (CheckCastPP dst));
 7054 
 7055   size(0);
 7056   format %{ "#checkcastPP of $dst" %}
 7057   ins_encode( /*empty encoding*/ );
 7058   ins_pipe( empty );
 7059 %}
 7060 
 7061 instruct castPP( eRegP dst ) %{
 7062   match(Set dst (CastPP dst));
 7063   format %{ "#castPP of $dst" %}
 7064   ins_encode( /*empty encoding*/ );
 7065   ins_pipe( empty );
 7066 %}
 7067 
 7068 instruct castII( rRegI dst ) %{
 7069   match(Set dst (CastII dst));
 7070   format %{ "#castII of $dst" %}
 7071   ins_encode( /*empty encoding*/ );
 7072   ins_cost(0);
 7073   ins_pipe( empty );
 7074 %}
 7075 
 7076 instruct castLL( eRegL dst ) %{
 7077   match(Set dst (CastLL dst));
 7078   format %{ "#castLL of $dst" %}
 7079   ins_encode( /*empty encoding*/ );
 7080   ins_cost(0);
 7081   ins_pipe( empty );
 7082 %}
 7083 
 7084 instruct castFF( regF dst ) %{
 7085   predicate(UseSSE >= 1);
 7086   match(Set dst (CastFF dst));
 7087   format %{ "#castFF of $dst" %}
 7088   ins_encode( /*empty encoding*/ );
 7089   ins_cost(0);
 7090   ins_pipe( empty );
 7091 %}
 7092 
 7093 instruct castDD( regD dst ) %{
 7094   predicate(UseSSE >= 2);
 7095   match(Set dst (CastDD dst));
 7096   format %{ "#castDD of $dst" %}
 7097   ins_encode( /*empty encoding*/ );
 7098   ins_cost(0);
 7099   ins_pipe( empty );
 7100 %}
 7101 
 7102 instruct castFF_PR( regFPR dst ) %{
 7103   predicate(UseSSE < 1);
 7104   match(Set dst (CastFF dst));
 7105   format %{ "#castFF of $dst" %}
 7106   ins_encode( /*empty encoding*/ );
 7107   ins_cost(0);
 7108   ins_pipe( empty );
 7109 %}
 7110 
 7111 instruct castDD_PR( regDPR dst ) %{
 7112   predicate(UseSSE < 2);
 7113   match(Set dst (CastDD dst));
 7114   format %{ "#castDD of $dst" %}
 7115   ins_encode( /*empty encoding*/ );
 7116   ins_cost(0);
 7117   ins_pipe( empty );
 7118 %}
 7119 
 7120 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7121 
 7122 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7123   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7124   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7125   effect(KILL cr, KILL oldval);
 7126   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7127             "MOV    $res,0\n\t"
 7128             "JNE,s  fail\n\t"
 7129             "MOV    $res,1\n"
 7130           "fail:" %}
 7131   ins_encode( enc_cmpxchg8(mem_ptr),
 7132               enc_flags_ne_to_boolean(res) );
 7133   ins_pipe( pipe_cmpxchg );
 7134 %}
 7135 
 7136 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7137   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7138   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7139   effect(KILL cr, KILL oldval);
 7140   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7141             "MOV    $res,0\n\t"
 7142             "JNE,s  fail\n\t"
 7143             "MOV    $res,1\n"
 7144           "fail:" %}
 7145   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7146   ins_pipe( pipe_cmpxchg );
 7147 %}
 7148 
 7149 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7150   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7151   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7152   effect(KILL cr, KILL oldval);
 7153   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7154             "MOV    $res,0\n\t"
 7155             "JNE,s  fail\n\t"
 7156             "MOV    $res,1\n"
 7157           "fail:" %}
 7158   ins_encode( enc_cmpxchgb(mem_ptr),
 7159               enc_flags_ne_to_boolean(res) );
 7160   ins_pipe( pipe_cmpxchg );
 7161 %}
 7162 
 7163 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7164   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7165   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7166   effect(KILL cr, KILL oldval);
 7167   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7168             "MOV    $res,0\n\t"
 7169             "JNE,s  fail\n\t"
 7170             "MOV    $res,1\n"
 7171           "fail:" %}
 7172   ins_encode( enc_cmpxchgw(mem_ptr),
 7173               enc_flags_ne_to_boolean(res) );
 7174   ins_pipe( pipe_cmpxchg );
 7175 %}
 7176 
 7177 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7178   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7179   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7180   effect(KILL cr, KILL oldval);
 7181   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7182             "MOV    $res,0\n\t"
 7183             "JNE,s  fail\n\t"
 7184             "MOV    $res,1\n"
 7185           "fail:" %}
 7186   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7187   ins_pipe( pipe_cmpxchg );
 7188 %}
 7189 
 7190 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7191   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7192   effect(KILL cr);
 7193   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7194   ins_encode( enc_cmpxchg8(mem_ptr) );
 7195   ins_pipe( pipe_cmpxchg );
 7196 %}
 7197 
 7198 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7199   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7200   effect(KILL cr);
 7201   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7202   ins_encode( enc_cmpxchg(mem_ptr) );
 7203   ins_pipe( pipe_cmpxchg );
 7204 %}
 7205 
 7206 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7207   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7208   effect(KILL cr);
 7209   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7210   ins_encode( enc_cmpxchgb(mem_ptr) );
 7211   ins_pipe( pipe_cmpxchg );
 7212 %}
 7213 
 7214 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7215   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7216   effect(KILL cr);
 7217   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7218   ins_encode( enc_cmpxchgw(mem_ptr) );
 7219   ins_pipe( pipe_cmpxchg );
 7220 %}
 7221 
 7222 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7223   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7224   effect(KILL cr);
 7225   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7226   ins_encode( enc_cmpxchg(mem_ptr) );
 7227   ins_pipe( pipe_cmpxchg );
 7228 %}
 7229 
 7230 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7231   predicate(n->as_LoadStore()->result_not_used());
 7232   match(Set dummy (GetAndAddB mem add));
 7233   effect(KILL cr);
 7234   format %{ "ADDB  [$mem],$add" %}
 7235   ins_encode %{
 7236     __ lock();
 7237     __ addb($mem$$Address, $add$$constant);
 7238   %}
 7239   ins_pipe( pipe_cmpxchg );
 7240 %}
 7241 
 7242 // Important to match to xRegI: only 8-bit regs.
 7243 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7244   match(Set newval (GetAndAddB mem newval));
 7245   effect(KILL cr);
 7246   format %{ "XADDB  [$mem],$newval" %}
 7247   ins_encode %{
 7248     __ lock();
 7249     __ xaddb($mem$$Address, $newval$$Register);
 7250   %}
 7251   ins_pipe( pipe_cmpxchg );
 7252 %}
 7253 
 7254 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7255   predicate(n->as_LoadStore()->result_not_used());
 7256   match(Set dummy (GetAndAddS mem add));
 7257   effect(KILL cr);
 7258   format %{ "ADDS  [$mem],$add" %}
 7259   ins_encode %{
 7260     __ lock();
 7261     __ addw($mem$$Address, $add$$constant);
 7262   %}
 7263   ins_pipe( pipe_cmpxchg );
 7264 %}
 7265 
 7266 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7267   match(Set newval (GetAndAddS mem newval));
 7268   effect(KILL cr);
 7269   format %{ "XADDS  [$mem],$newval" %}
 7270   ins_encode %{
 7271     __ lock();
 7272     __ xaddw($mem$$Address, $newval$$Register);
 7273   %}
 7274   ins_pipe( pipe_cmpxchg );
 7275 %}
 7276 
 7277 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7278   predicate(n->as_LoadStore()->result_not_used());
 7279   match(Set dummy (GetAndAddI mem add));
 7280   effect(KILL cr);
 7281   format %{ "ADDL  [$mem],$add" %}
 7282   ins_encode %{
 7283     __ lock();
 7284     __ addl($mem$$Address, $add$$constant);
 7285   %}
 7286   ins_pipe( pipe_cmpxchg );
 7287 %}
 7288 
 7289 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7290   match(Set newval (GetAndAddI mem newval));
 7291   effect(KILL cr);
 7292   format %{ "XADDL  [$mem],$newval" %}
 7293   ins_encode %{
 7294     __ lock();
 7295     __ xaddl($mem$$Address, $newval$$Register);
 7296   %}
 7297   ins_pipe( pipe_cmpxchg );
 7298 %}
 7299 
 7300 // Important to match to xRegI: only 8-bit regs.
 7301 instruct xchgB( memory mem, xRegI newval) %{
 7302   match(Set newval (GetAndSetB mem newval));
 7303   format %{ "XCHGB  $newval,[$mem]" %}
 7304   ins_encode %{
 7305     __ xchgb($newval$$Register, $mem$$Address);
 7306   %}
 7307   ins_pipe( pipe_cmpxchg );
 7308 %}
 7309 
 7310 instruct xchgS( memory mem, rRegI newval) %{
 7311   match(Set newval (GetAndSetS mem newval));
 7312   format %{ "XCHGW  $newval,[$mem]" %}
 7313   ins_encode %{
 7314     __ xchgw($newval$$Register, $mem$$Address);
 7315   %}
 7316   ins_pipe( pipe_cmpxchg );
 7317 %}
 7318 
 7319 instruct xchgI( memory mem, rRegI newval) %{
 7320   match(Set newval (GetAndSetI mem newval));
 7321   format %{ "XCHGL  $newval,[$mem]" %}
 7322   ins_encode %{
 7323     __ xchgl($newval$$Register, $mem$$Address);
 7324   %}
 7325   ins_pipe( pipe_cmpxchg );
 7326 %}
 7327 
 7328 instruct xchgP( memory mem, pRegP newval) %{
 7329   match(Set newval (GetAndSetP mem newval));
 7330   format %{ "XCHGL  $newval,[$mem]" %}
 7331   ins_encode %{
 7332     __ xchgl($newval$$Register, $mem$$Address);
 7333   %}
 7334   ins_pipe( pipe_cmpxchg );
 7335 %}
 7336 
 7337 //----------Subtraction Instructions-------------------------------------------
 7338 
 7339 // Integer Subtraction Instructions
 7340 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7341   match(Set dst (SubI dst src));
 7342   effect(KILL cr);
 7343 
 7344   size(2);
 7345   format %{ "SUB    $dst,$src" %}
 7346   opcode(0x2B);
 7347   ins_encode( OpcP, RegReg( dst, src) );
 7348   ins_pipe( ialu_reg_reg );
 7349 %}
 7350 
 7351 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7352   match(Set dst (SubI dst src));
 7353   effect(KILL cr);
 7354 
 7355   format %{ "SUB    $dst,$src" %}
 7356   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7357   // ins_encode( RegImm( dst, src) );
 7358   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7359   ins_pipe( ialu_reg );
 7360 %}
 7361 
 7362 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7363   match(Set dst (SubI dst (LoadI src)));
 7364   effect(KILL cr);
 7365 
 7366   ins_cost(150);
 7367   format %{ "SUB    $dst,$src" %}
 7368   opcode(0x2B);
 7369   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7370   ins_pipe( ialu_reg_mem );
 7371 %}
 7372 
 7373 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7374   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7375   effect(KILL cr);
 7376 
 7377   ins_cost(150);
 7378   format %{ "SUB    $dst,$src" %}
 7379   opcode(0x29);  /* Opcode 29 /r */
 7380   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7381   ins_pipe( ialu_mem_reg );
 7382 %}
 7383 
 7384 // Subtract from a pointer
 7385 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7386   match(Set dst (AddP dst (SubI zero src)));
 7387   effect(KILL cr);
 7388 
 7389   size(2);
 7390   format %{ "SUB    $dst,$src" %}
 7391   opcode(0x2B);
 7392   ins_encode( OpcP, RegReg( dst, src) );
 7393   ins_pipe( ialu_reg_reg );
 7394 %}
 7395 
 7396 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7397   match(Set dst (SubI zero dst));
 7398   effect(KILL cr);
 7399 
 7400   size(2);
 7401   format %{ "NEG    $dst" %}
 7402   opcode(0xF7,0x03);  // Opcode F7 /3
 7403   ins_encode( OpcP, RegOpc( dst ) );
 7404   ins_pipe( ialu_reg );
 7405 %}
 7406 
 7407 //----------Multiplication/Division Instructions-------------------------------
 7408 // Integer Multiplication Instructions
 7409 // Multiply Register
 7410 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7411   match(Set dst (MulI dst src));
 7412   effect(KILL cr);
 7413 
 7414   size(3);
 7415   ins_cost(300);
 7416   format %{ "IMUL   $dst,$src" %}
 7417   opcode(0xAF, 0x0F);
 7418   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7419   ins_pipe( ialu_reg_reg_alu0 );
 7420 %}
 7421 
 7422 // Multiply 32-bit Immediate
 7423 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7424   match(Set dst (MulI src imm));
 7425   effect(KILL cr);
 7426 
 7427   ins_cost(300);
 7428   format %{ "IMUL   $dst,$src,$imm" %}
 7429   opcode(0x69);  /* 69 /r id */
 7430   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7431   ins_pipe( ialu_reg_reg_alu0 );
 7432 %}
 7433 
 7434 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7435   match(Set dst src);
 7436   effect(KILL cr);
 7437 
 7438   // Note that this is artificially increased to make it more expensive than loadConL
 7439   ins_cost(250);
 7440   format %{ "MOV    EAX,$src\t// low word only" %}
 7441   opcode(0xB8);
 7442   ins_encode( LdImmL_Lo(dst, src) );
 7443   ins_pipe( ialu_reg_fat );
 7444 %}
 7445 
 7446 // Multiply by 32-bit Immediate, taking the shifted high order results
 7447 //  (special case for shift by 32)
 7448 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7449   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7450   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7451              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7452              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7453   effect(USE src1, KILL cr);
 7454 
 7455   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7456   ins_cost(0*100 + 1*400 - 150);
 7457   format %{ "IMUL   EDX:EAX,$src1" %}
 7458   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7459   ins_pipe( pipe_slow );
 7460 %}
 7461 
 7462 // Multiply by 32-bit Immediate, taking the shifted high order results
 7463 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7464   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7465   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7466              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7467              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7468   effect(USE src1, KILL cr);
 7469 
 7470   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7471   ins_cost(1*100 + 1*400 - 150);
 7472   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7473             "SAR    EDX,$cnt-32" %}
 7474   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7475   ins_pipe( pipe_slow );
 7476 %}
 7477 
 7478 // Multiply Memory 32-bit Immediate
 7479 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7480   match(Set dst (MulI (LoadI src) imm));
 7481   effect(KILL cr);
 7482 
 7483   ins_cost(300);
 7484   format %{ "IMUL   $dst,$src,$imm" %}
 7485   opcode(0x69);  /* 69 /r id */
 7486   ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
 7487   ins_pipe( ialu_reg_mem_alu0 );
 7488 %}
 7489 
 7490 // Multiply Memory
 7491 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7492   match(Set dst (MulI dst (LoadI src)));
 7493   effect(KILL cr);
 7494 
 7495   ins_cost(350);
 7496   format %{ "IMUL   $dst,$src" %}
 7497   opcode(0xAF, 0x0F);
 7498   ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
 7499   ins_pipe( ialu_reg_mem_alu0 );
 7500 %}
 7501 
 7502 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7503 %{
 7504   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7505   effect(KILL cr, KILL src2);
 7506 
 7507   expand %{ mulI_eReg(dst, src1, cr);
 7508            mulI_eReg(src2, src3, cr);
 7509            addI_eReg(dst, src2, cr); %}
 7510 %}
 7511 
 7512 // Multiply Register Int to Long
 7513 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7514   // Basic Idea: long = (long)int * (long)int
 7515   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7516   effect(DEF dst, USE src, USE src1, KILL flags);
 7517 
 7518   ins_cost(300);
 7519   format %{ "IMUL   $dst,$src1" %}
 7520 
 7521   ins_encode( long_int_multiply( dst, src1 ) );
 7522   ins_pipe( ialu_reg_reg_alu0 );
 7523 %}
 7524 
 7525 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7526   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7527   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7528   effect(KILL flags);
 7529 
 7530   ins_cost(300);
 7531   format %{ "MUL    $dst,$src1" %}
 7532 
 7533   ins_encode( long_uint_multiply(dst, src1) );
 7534   ins_pipe( ialu_reg_reg_alu0 );
 7535 %}
 7536 
 7537 // Multiply Register Long
 7538 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7539   match(Set dst (MulL dst src));
 7540   effect(KILL cr, TEMP tmp);
 7541   ins_cost(4*100+3*400);
 7542 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7543 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7544   format %{ "MOV    $tmp,$src.lo\n\t"
 7545             "IMUL   $tmp,EDX\n\t"
 7546             "MOV    EDX,$src.hi\n\t"
 7547             "IMUL   EDX,EAX\n\t"
 7548             "ADD    $tmp,EDX\n\t"
 7549             "MUL    EDX:EAX,$src.lo\n\t"
 7550             "ADD    EDX,$tmp" %}
 7551   ins_encode( long_multiply( dst, src, tmp ) );
 7552   ins_pipe( pipe_slow );
 7553 %}
 7554 
 7555 // Multiply Register Long where the left operand's high 32 bits are zero
 7556 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7557   predicate(is_operand_hi32_zero(n->in(1)));
 7558   match(Set dst (MulL dst src));
 7559   effect(KILL cr, TEMP tmp);
 7560   ins_cost(2*100+2*400);
 7561 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7562 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7563   format %{ "MOV    $tmp,$src.hi\n\t"
 7564             "IMUL   $tmp,EAX\n\t"
 7565             "MUL    EDX:EAX,$src.lo\n\t"
 7566             "ADD    EDX,$tmp" %}
 7567   ins_encode %{
 7568     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7569     __ imull($tmp$$Register, rax);
 7570     __ mull($src$$Register);
 7571     __ addl(rdx, $tmp$$Register);
 7572   %}
 7573   ins_pipe( pipe_slow );
 7574 %}
 7575 
 7576 // Multiply Register Long where the right operand's high 32 bits are zero
 7577 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7578   predicate(is_operand_hi32_zero(n->in(2)));
 7579   match(Set dst (MulL dst src));
 7580   effect(KILL cr, TEMP tmp);
 7581   ins_cost(2*100+2*400);
 7582 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7583 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7584   format %{ "MOV    $tmp,$src.lo\n\t"
 7585             "IMUL   $tmp,EDX\n\t"
 7586             "MUL    EDX:EAX,$src.lo\n\t"
 7587             "ADD    EDX,$tmp" %}
 7588   ins_encode %{
 7589     __ movl($tmp$$Register, $src$$Register);
 7590     __ imull($tmp$$Register, rdx);
 7591     __ mull($src$$Register);
 7592     __ addl(rdx, $tmp$$Register);
 7593   %}
 7594   ins_pipe( pipe_slow );
 7595 %}
 7596 
 7597 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7598 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7599   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7600   match(Set dst (MulL dst src));
 7601   effect(KILL cr);
 7602   ins_cost(1*400);
 7603 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7604 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7605   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7606   ins_encode %{
 7607     __ mull($src$$Register);
 7608   %}
 7609   ins_pipe( pipe_slow );
 7610 %}
 7611 
 7612 // Multiply Register Long by small constant
 7613 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7614   match(Set dst (MulL dst src));
 7615   effect(KILL cr, TEMP tmp);
 7616   ins_cost(2*100+2*400);
 7617   size(12);
 7618 // Basic idea: lo(result) = lo(src * EAX)
 7619 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7620   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7621             "MOV    EDX,$src\n\t"
 7622             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7623             "ADD    EDX,$tmp" %}
 7624   ins_encode( long_multiply_con( dst, src, tmp ) );
 7625   ins_pipe( pipe_slow );
 7626 %}
 7627 
 7628 // Integer DIV with Register
 7629 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7630   match(Set rax (DivI rax div));
 7631   effect(KILL rdx, KILL cr);
 7632   size(26);
 7633   ins_cost(30*100+10*100);
 7634   format %{ "CMP    EAX,0x80000000\n\t"
 7635             "JNE,s  normal\n\t"
 7636             "XOR    EDX,EDX\n\t"
 7637             "CMP    ECX,-1\n\t"
 7638             "JE,s   done\n"
 7639     "normal: CDQ\n\t"
 7640             "IDIV   $div\n\t"
 7641     "done:"        %}
 7642   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7643   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7644   ins_pipe( ialu_reg_reg_alu0 );
 7645 %}
 7646 
 7647 // Divide Register Long
 7648 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7649   match(Set dst (DivL src1 src2));
 7650   effect(CALL);
 7651   ins_cost(10000);
 7652   format %{ "PUSH   $src1.hi\n\t"
 7653             "PUSH   $src1.lo\n\t"
 7654             "PUSH   $src2.hi\n\t"
 7655             "PUSH   $src2.lo\n\t"
 7656             "CALL   SharedRuntime::ldiv\n\t"
 7657             "ADD    ESP,16" %}
 7658   ins_encode( long_div(src1,src2) );
 7659   ins_pipe( pipe_slow );
 7660 %}
 7661 
 7662 // Integer DIVMOD with Register, both quotient and mod results
 7663 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7664   match(DivModI rax div);
 7665   effect(KILL cr);
 7666   size(26);
 7667   ins_cost(30*100+10*100);
 7668   format %{ "CMP    EAX,0x80000000\n\t"
 7669             "JNE,s  normal\n\t"
 7670             "XOR    EDX,EDX\n\t"
 7671             "CMP    ECX,-1\n\t"
 7672             "JE,s   done\n"
 7673     "normal: CDQ\n\t"
 7674             "IDIV   $div\n\t"
 7675     "done:"        %}
 7676   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7677   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7678   ins_pipe( pipe_slow );
 7679 %}
 7680 
 7681 // Integer MOD with Register
 7682 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7683   match(Set rdx (ModI rax div));
 7684   effect(KILL rax, KILL cr);
 7685 
 7686   size(26);
 7687   ins_cost(300);
 7688   format %{ "CDQ\n\t"
 7689             "IDIV   $div" %}
 7690   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7691   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7692   ins_pipe( ialu_reg_reg_alu0 );
 7693 %}
 7694 
 7695 // Remainder Register Long
 7696 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7697   match(Set dst (ModL src1 src2));
 7698   effect(CALL);
 7699   ins_cost(10000);
 7700   format %{ "PUSH   $src1.hi\n\t"
 7701             "PUSH   $src1.lo\n\t"
 7702             "PUSH   $src2.hi\n\t"
 7703             "PUSH   $src2.lo\n\t"
 7704             "CALL   SharedRuntime::lrem\n\t"
 7705             "ADD    ESP,16" %}
 7706   ins_encode( long_mod(src1,src2) );
 7707   ins_pipe( pipe_slow );
 7708 %}
 7709 
 7710 // Divide Register Long (no special case since divisor != -1)
 7711 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7712   match(Set dst (DivL dst imm));
 7713   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7714   ins_cost(1000);
 7715   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7716             "XOR    $tmp2,$tmp2\n\t"
 7717             "CMP    $tmp,EDX\n\t"
 7718             "JA,s   fast\n\t"
 7719             "MOV    $tmp2,EAX\n\t"
 7720             "MOV    EAX,EDX\n\t"
 7721             "MOV    EDX,0\n\t"
 7722             "JLE,s  pos\n\t"
 7723             "LNEG   EAX : $tmp2\n\t"
 7724             "DIV    $tmp # unsigned division\n\t"
 7725             "XCHG   EAX,$tmp2\n\t"
 7726             "DIV    $tmp\n\t"
 7727             "LNEG   $tmp2 : EAX\n\t"
 7728             "JMP,s  done\n"
 7729     "pos:\n\t"
 7730             "DIV    $tmp\n\t"
 7731             "XCHG   EAX,$tmp2\n"
 7732     "fast:\n\t"
 7733             "DIV    $tmp\n"
 7734     "done:\n\t"
 7735             "MOV    EDX,$tmp2\n\t"
 7736             "NEG    EDX:EAX # if $imm < 0" %}
 7737   ins_encode %{
 7738     int con = (int)$imm$$constant;
 7739     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7740     int pcon = (con > 0) ? con : -con;
 7741     Label Lfast, Lpos, Ldone;
 7742 
 7743     __ movl($tmp$$Register, pcon);
 7744     __ xorl($tmp2$$Register,$tmp2$$Register);
 7745     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7746     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7747 
 7748     __ movl($tmp2$$Register, $dst$$Register); // save
 7749     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7750     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7751     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7752 
 7753     // Negative dividend.
 7754     // convert value to positive to use unsigned division
 7755     __ lneg($dst$$Register, $tmp2$$Register);
 7756     __ divl($tmp$$Register);
 7757     __ xchgl($dst$$Register, $tmp2$$Register);
 7758     __ divl($tmp$$Register);
 7759     // revert result back to negative
 7760     __ lneg($tmp2$$Register, $dst$$Register);
 7761     __ jmpb(Ldone);
 7762 
 7763     __ bind(Lpos);
 7764     __ divl($tmp$$Register); // Use unsigned division
 7765     __ xchgl($dst$$Register, $tmp2$$Register);
 7766     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7767 
 7768     __ bind(Lfast);
 7769     // fast path: src is positive
 7770     __ divl($tmp$$Register); // Use unsigned division
 7771 
 7772     __ bind(Ldone);
 7773     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7774     if (con < 0) {
 7775       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7776     }
 7777   %}
 7778   ins_pipe( pipe_slow );
 7779 %}
 7780 
 7781 // Remainder Register Long (remainder fit into 32 bits)
 7782 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7783   match(Set dst (ModL dst imm));
 7784   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7785   ins_cost(1000);
 7786   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7787             "CMP    $tmp,EDX\n\t"
 7788             "JA,s   fast\n\t"
 7789             "MOV    $tmp2,EAX\n\t"
 7790             "MOV    EAX,EDX\n\t"
 7791             "MOV    EDX,0\n\t"
 7792             "JLE,s  pos\n\t"
 7793             "LNEG   EAX : $tmp2\n\t"
 7794             "DIV    $tmp # unsigned division\n\t"
 7795             "MOV    EAX,$tmp2\n\t"
 7796             "DIV    $tmp\n\t"
 7797             "NEG    EDX\n\t"
 7798             "JMP,s  done\n"
 7799     "pos:\n\t"
 7800             "DIV    $tmp\n\t"
 7801             "MOV    EAX,$tmp2\n"
 7802     "fast:\n\t"
 7803             "DIV    $tmp\n"
 7804     "done:\n\t"
 7805             "MOV    EAX,EDX\n\t"
 7806             "SAR    EDX,31\n\t" %}
 7807   ins_encode %{
 7808     int con = (int)$imm$$constant;
 7809     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7810     int pcon = (con > 0) ? con : -con;
 7811     Label  Lfast, Lpos, Ldone;
 7812 
 7813     __ movl($tmp$$Register, pcon);
 7814     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7815     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7816 
 7817     __ movl($tmp2$$Register, $dst$$Register); // save
 7818     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7819     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7820     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7821 
 7822     // Negative dividend.
 7823     // convert value to positive to use unsigned division
 7824     __ lneg($dst$$Register, $tmp2$$Register);
 7825     __ divl($tmp$$Register);
 7826     __ movl($dst$$Register, $tmp2$$Register);
 7827     __ divl($tmp$$Register);
 7828     // revert remainder back to negative
 7829     __ negl(HIGH_FROM_LOW($dst$$Register));
 7830     __ jmpb(Ldone);
 7831 
 7832     __ bind(Lpos);
 7833     __ divl($tmp$$Register);
 7834     __ movl($dst$$Register, $tmp2$$Register);
 7835 
 7836     __ bind(Lfast);
 7837     // fast path: src is positive
 7838     __ divl($tmp$$Register);
 7839 
 7840     __ bind(Ldone);
 7841     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7842     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7843 
 7844   %}
 7845   ins_pipe( pipe_slow );
 7846 %}
 7847 
 7848 // Integer Shift Instructions
 7849 // Shift Left by one
 7850 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7851   match(Set dst (LShiftI dst shift));
 7852   effect(KILL cr);
 7853 
 7854   size(2);
 7855   format %{ "SHL    $dst,$shift" %}
 7856   opcode(0xD1, 0x4);  /* D1 /4 */
 7857   ins_encode( OpcP, RegOpc( dst ) );
 7858   ins_pipe( ialu_reg );
 7859 %}
 7860 
 7861 // Shift Left by 8-bit immediate
 7862 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7863   match(Set dst (LShiftI dst shift));
 7864   effect(KILL cr);
 7865 
 7866   size(3);
 7867   format %{ "SHL    $dst,$shift" %}
 7868   opcode(0xC1, 0x4);  /* C1 /4 ib */
 7869   ins_encode( RegOpcImm( dst, shift) );
 7870   ins_pipe( ialu_reg );
 7871 %}
 7872 
 7873 // Shift Left by variable
 7874 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7875   match(Set dst (LShiftI dst shift));
 7876   effect(KILL cr);
 7877 
 7878   size(2);
 7879   format %{ "SHL    $dst,$shift" %}
 7880   opcode(0xD3, 0x4);  /* D3 /4 */
 7881   ins_encode( OpcP, RegOpc( dst ) );
 7882   ins_pipe( ialu_reg_reg );
 7883 %}
 7884 
 7885 // Arithmetic shift right by one
 7886 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7887   match(Set dst (RShiftI dst shift));
 7888   effect(KILL cr);
 7889 
 7890   size(2);
 7891   format %{ "SAR    $dst,$shift" %}
 7892   opcode(0xD1, 0x7);  /* D1 /7 */
 7893   ins_encode( OpcP, RegOpc( dst ) );
 7894   ins_pipe( ialu_reg );
 7895 %}
 7896 
 7897 // Arithmetic shift right by one
 7898 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 7899   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7900   effect(KILL cr);
 7901   format %{ "SAR    $dst,$shift" %}
 7902   opcode(0xD1, 0x7);  /* D1 /7 */
 7903   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
 7904   ins_pipe( ialu_mem_imm );
 7905 %}
 7906 
 7907 // Arithmetic Shift Right by 8-bit immediate
 7908 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7909   match(Set dst (RShiftI dst shift));
 7910   effect(KILL cr);
 7911 
 7912   size(3);
 7913   format %{ "SAR    $dst,$shift" %}
 7914   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7915   ins_encode( RegOpcImm( dst, shift ) );
 7916   ins_pipe( ialu_mem_imm );
 7917 %}
 7918 
 7919 // Arithmetic Shift Right by 8-bit immediate
 7920 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 7921   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7922   effect(KILL cr);
 7923 
 7924   format %{ "SAR    $dst,$shift" %}
 7925   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7926   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
 7927   ins_pipe( ialu_mem_imm );
 7928 %}
 7929 
 7930 // Arithmetic Shift Right by variable
 7931 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7932   match(Set dst (RShiftI dst shift));
 7933   effect(KILL cr);
 7934 
 7935   size(2);
 7936   format %{ "SAR    $dst,$shift" %}
 7937   opcode(0xD3, 0x7);  /* D3 /7 */
 7938   ins_encode( OpcP, RegOpc( dst ) );
 7939   ins_pipe( ialu_reg_reg );
 7940 %}
 7941 
 7942 // Logical shift right by one
 7943 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7944   match(Set dst (URShiftI dst shift));
 7945   effect(KILL cr);
 7946 
 7947   size(2);
 7948   format %{ "SHR    $dst,$shift" %}
 7949   opcode(0xD1, 0x5);  /* D1 /5 */
 7950   ins_encode( OpcP, RegOpc( dst ) );
 7951   ins_pipe( ialu_reg );
 7952 %}
 7953 
 7954 // Logical Shift Right by 8-bit immediate
 7955 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7956   match(Set dst (URShiftI dst shift));
 7957   effect(KILL cr);
 7958 
 7959   size(3);
 7960   format %{ "SHR    $dst,$shift" %}
 7961   opcode(0xC1, 0x5);  /* C1 /5 ib */
 7962   ins_encode( RegOpcImm( dst, shift) );
 7963   ins_pipe( ialu_reg );
 7964 %}
 7965 
 7966 
 7967 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 7968 // This idiom is used by the compiler for the i2b bytecode.
 7969 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 7970   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 7971 
 7972   size(3);
 7973   format %{ "MOVSX  $dst,$src :8" %}
 7974   ins_encode %{
 7975     __ movsbl($dst$$Register, $src$$Register);
 7976   %}
 7977   ins_pipe(ialu_reg_reg);
 7978 %}
 7979 
 7980 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 7981 // This idiom is used by the compiler the i2s bytecode.
 7982 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 7983   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 7984 
 7985   size(3);
 7986   format %{ "MOVSX  $dst,$src :16" %}
 7987   ins_encode %{
 7988     __ movswl($dst$$Register, $src$$Register);
 7989   %}
 7990   ins_pipe(ialu_reg_reg);
 7991 %}
 7992 
 7993 
 7994 // Logical Shift Right by variable
 7995 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7996   match(Set dst (URShiftI dst shift));
 7997   effect(KILL cr);
 7998 
 7999   size(2);
 8000   format %{ "SHR    $dst,$shift" %}
 8001   opcode(0xD3, 0x5);  /* D3 /5 */
 8002   ins_encode( OpcP, RegOpc( dst ) );
 8003   ins_pipe( ialu_reg_reg );
 8004 %}
 8005 
 8006 
 8007 //----------Logical Instructions-----------------------------------------------
 8008 //----------Integer Logical Instructions---------------------------------------
 8009 // And Instructions
 8010 // And Register with Register
 8011 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8012   match(Set dst (AndI dst src));
 8013   effect(KILL cr);
 8014 
 8015   size(2);
 8016   format %{ "AND    $dst,$src" %}
 8017   opcode(0x23);
 8018   ins_encode( OpcP, RegReg( dst, src) );
 8019   ins_pipe( ialu_reg_reg );
 8020 %}
 8021 
 8022 // And Register with Immediate
 8023 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8024   match(Set dst (AndI dst src));
 8025   effect(KILL cr);
 8026 
 8027   format %{ "AND    $dst,$src" %}
 8028   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8029   // ins_encode( RegImm( dst, src) );
 8030   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8031   ins_pipe( ialu_reg );
 8032 %}
 8033 
 8034 // And Register with Memory
 8035 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8036   match(Set dst (AndI dst (LoadI src)));
 8037   effect(KILL cr);
 8038 
 8039   ins_cost(150);
 8040   format %{ "AND    $dst,$src" %}
 8041   opcode(0x23);
 8042   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8043   ins_pipe( ialu_reg_mem );
 8044 %}
 8045 
 8046 // And Memory with Register
 8047 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8048   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8049   effect(KILL cr);
 8050 
 8051   ins_cost(150);
 8052   format %{ "AND    $dst,$src" %}
 8053   opcode(0x21);  /* Opcode 21 /r */
 8054   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8055   ins_pipe( ialu_mem_reg );
 8056 %}
 8057 
 8058 // And Memory with Immediate
 8059 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8060   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8061   effect(KILL cr);
 8062 
 8063   ins_cost(125);
 8064   format %{ "AND    $dst,$src" %}
 8065   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8066   // ins_encode( MemImm( dst, src) );
 8067   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8068   ins_pipe( ialu_mem_imm );
 8069 %}
 8070 
 8071 // BMI1 instructions
 8072 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8073   match(Set dst (AndI (XorI src1 minus_1) src2));
 8074   predicate(UseBMI1Instructions);
 8075   effect(KILL cr);
 8076 
 8077   format %{ "ANDNL  $dst, $src1, $src2" %}
 8078 
 8079   ins_encode %{
 8080     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8081   %}
 8082   ins_pipe(ialu_reg);
 8083 %}
 8084 
 8085 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8086   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8087   predicate(UseBMI1Instructions);
 8088   effect(KILL cr);
 8089 
 8090   ins_cost(125);
 8091   format %{ "ANDNL  $dst, $src1, $src2" %}
 8092 
 8093   ins_encode %{
 8094     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8095   %}
 8096   ins_pipe(ialu_reg_mem);
 8097 %}
 8098 
 8099 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8100   match(Set dst (AndI (SubI imm_zero src) src));
 8101   predicate(UseBMI1Instructions);
 8102   effect(KILL cr);
 8103 
 8104   format %{ "BLSIL  $dst, $src" %}
 8105 
 8106   ins_encode %{
 8107     __ blsil($dst$$Register, $src$$Register);
 8108   %}
 8109   ins_pipe(ialu_reg);
 8110 %}
 8111 
 8112 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8113   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8114   predicate(UseBMI1Instructions);
 8115   effect(KILL cr);
 8116 
 8117   ins_cost(125);
 8118   format %{ "BLSIL  $dst, $src" %}
 8119 
 8120   ins_encode %{
 8121     __ blsil($dst$$Register, $src$$Address);
 8122   %}
 8123   ins_pipe(ialu_reg_mem);
 8124 %}
 8125 
 8126 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8127 %{
 8128   match(Set dst (XorI (AddI src minus_1) src));
 8129   predicate(UseBMI1Instructions);
 8130   effect(KILL cr);
 8131 
 8132   format %{ "BLSMSKL $dst, $src" %}
 8133 
 8134   ins_encode %{
 8135     __ blsmskl($dst$$Register, $src$$Register);
 8136   %}
 8137 
 8138   ins_pipe(ialu_reg);
 8139 %}
 8140 
 8141 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8142 %{
 8143   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8144   predicate(UseBMI1Instructions);
 8145   effect(KILL cr);
 8146 
 8147   ins_cost(125);
 8148   format %{ "BLSMSKL $dst, $src" %}
 8149 
 8150   ins_encode %{
 8151     __ blsmskl($dst$$Register, $src$$Address);
 8152   %}
 8153 
 8154   ins_pipe(ialu_reg_mem);
 8155 %}
 8156 
 8157 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8158 %{
 8159   match(Set dst (AndI (AddI src minus_1) src) );
 8160   predicate(UseBMI1Instructions);
 8161   effect(KILL cr);
 8162 
 8163   format %{ "BLSRL  $dst, $src" %}
 8164 
 8165   ins_encode %{
 8166     __ blsrl($dst$$Register, $src$$Register);
 8167   %}
 8168 
 8169   ins_pipe(ialu_reg);
 8170 %}
 8171 
 8172 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8173 %{
 8174   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8175   predicate(UseBMI1Instructions);
 8176   effect(KILL cr);
 8177 
 8178   ins_cost(125);
 8179   format %{ "BLSRL  $dst, $src" %}
 8180 
 8181   ins_encode %{
 8182     __ blsrl($dst$$Register, $src$$Address);
 8183   %}
 8184 
 8185   ins_pipe(ialu_reg_mem);
 8186 %}
 8187 
 8188 // Or Instructions
 8189 // Or Register with Register
 8190 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8191   match(Set dst (OrI dst src));
 8192   effect(KILL cr);
 8193 
 8194   size(2);
 8195   format %{ "OR     $dst,$src" %}
 8196   opcode(0x0B);
 8197   ins_encode( OpcP, RegReg( dst, src) );
 8198   ins_pipe( ialu_reg_reg );
 8199 %}
 8200 
 8201 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8202   match(Set dst (OrI dst (CastP2X src)));
 8203   effect(KILL cr);
 8204 
 8205   size(2);
 8206   format %{ "OR     $dst,$src" %}
 8207   opcode(0x0B);
 8208   ins_encode( OpcP, RegReg( dst, src) );
 8209   ins_pipe( ialu_reg_reg );
 8210 %}
 8211 
 8212 
 8213 // Or Register with Immediate
 8214 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8215   match(Set dst (OrI dst src));
 8216   effect(KILL cr);
 8217 
 8218   format %{ "OR     $dst,$src" %}
 8219   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8220   // ins_encode( RegImm( dst, src) );
 8221   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8222   ins_pipe( ialu_reg );
 8223 %}
 8224 
 8225 // Or Register with Memory
 8226 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8227   match(Set dst (OrI dst (LoadI src)));
 8228   effect(KILL cr);
 8229 
 8230   ins_cost(150);
 8231   format %{ "OR     $dst,$src" %}
 8232   opcode(0x0B);
 8233   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8234   ins_pipe( ialu_reg_mem );
 8235 %}
 8236 
 8237 // Or Memory with Register
 8238 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8239   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8240   effect(KILL cr);
 8241 
 8242   ins_cost(150);
 8243   format %{ "OR     $dst,$src" %}
 8244   opcode(0x09);  /* Opcode 09 /r */
 8245   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8246   ins_pipe( ialu_mem_reg );
 8247 %}
 8248 
 8249 // Or Memory with Immediate
 8250 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8251   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8252   effect(KILL cr);
 8253 
 8254   ins_cost(125);
 8255   format %{ "OR     $dst,$src" %}
 8256   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8257   // ins_encode( MemImm( dst, src) );
 8258   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8259   ins_pipe( ialu_mem_imm );
 8260 %}
 8261 
 8262 // ROL/ROR
 8263 // ROL expand
 8264 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8265   effect(USE_DEF dst, USE shift, KILL cr);
 8266 
 8267   format %{ "ROL    $dst, $shift" %}
 8268   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8269   ins_encode( OpcP, RegOpc( dst ));
 8270   ins_pipe( ialu_reg );
 8271 %}
 8272 
 8273 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8274   effect(USE_DEF dst, USE shift, KILL cr);
 8275 
 8276   format %{ "ROL    $dst, $shift" %}
 8277   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8278   ins_encode( RegOpcImm(dst, shift) );
 8279   ins_pipe(ialu_reg);
 8280 %}
 8281 
 8282 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8283   effect(USE_DEF dst, USE shift, KILL cr);
 8284 
 8285   format %{ "ROL    $dst, $shift" %}
 8286   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8287   ins_encode(OpcP, RegOpc(dst));
 8288   ins_pipe( ialu_reg_reg );
 8289 %}
 8290 // end of ROL expand
 8291 
 8292 // ROL 32bit by one once
 8293 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8294   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8295 
 8296   expand %{
 8297     rolI_eReg_imm1(dst, lshift, cr);
 8298   %}
 8299 %}
 8300 
 8301 // ROL 32bit var by imm8 once
 8302 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8303   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8304   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8305 
 8306   expand %{
 8307     rolI_eReg_imm8(dst, lshift, cr);
 8308   %}
 8309 %}
 8310 
 8311 // ROL 32bit var by var once
 8312 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8313   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8314 
 8315   expand %{
 8316     rolI_eReg_CL(dst, shift, cr);
 8317   %}
 8318 %}
 8319 
 8320 // ROL 32bit var by var once
 8321 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8322   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8323 
 8324   expand %{
 8325     rolI_eReg_CL(dst, shift, cr);
 8326   %}
 8327 %}
 8328 
 8329 // ROR expand
 8330 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8331   effect(USE_DEF dst, USE shift, KILL cr);
 8332 
 8333   format %{ "ROR    $dst, $shift" %}
 8334   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8335   ins_encode( OpcP, RegOpc( dst ) );
 8336   ins_pipe( ialu_reg );
 8337 %}
 8338 
 8339 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8340   effect (USE_DEF dst, USE shift, KILL cr);
 8341 
 8342   format %{ "ROR    $dst, $shift" %}
 8343   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8344   ins_encode( RegOpcImm(dst, shift) );
 8345   ins_pipe( ialu_reg );
 8346 %}
 8347 
 8348 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8349   effect(USE_DEF dst, USE shift, KILL cr);
 8350 
 8351   format %{ "ROR    $dst, $shift" %}
 8352   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8353   ins_encode(OpcP, RegOpc(dst));
 8354   ins_pipe( ialu_reg_reg );
 8355 %}
 8356 // end of ROR expand
 8357 
 8358 // ROR right once
 8359 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8360   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8361 
 8362   expand %{
 8363     rorI_eReg_imm1(dst, rshift, cr);
 8364   %}
 8365 %}
 8366 
 8367 // ROR 32bit by immI8 once
 8368 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8369   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8370   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8371 
 8372   expand %{
 8373     rorI_eReg_imm8(dst, rshift, cr);
 8374   %}
 8375 %}
 8376 
 8377 // ROR 32bit var by var once
 8378 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8379   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8380 
 8381   expand %{
 8382     rorI_eReg_CL(dst, shift, cr);
 8383   %}
 8384 %}
 8385 
 8386 // ROR 32bit var by var once
 8387 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8388   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8389 
 8390   expand %{
 8391     rorI_eReg_CL(dst, shift, cr);
 8392   %}
 8393 %}
 8394 
 8395 // Xor Instructions
 8396 // Xor Register with Register
 8397 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8398   match(Set dst (XorI dst src));
 8399   effect(KILL cr);
 8400 
 8401   size(2);
 8402   format %{ "XOR    $dst,$src" %}
 8403   opcode(0x33);
 8404   ins_encode( OpcP, RegReg( dst, src) );
 8405   ins_pipe( ialu_reg_reg );
 8406 %}
 8407 
 8408 // Xor Register with Immediate -1
 8409 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8410   match(Set dst (XorI dst imm));
 8411 
 8412   size(2);
 8413   format %{ "NOT    $dst" %}
 8414   ins_encode %{
 8415      __ notl($dst$$Register);
 8416   %}
 8417   ins_pipe( ialu_reg );
 8418 %}
 8419 
 8420 // Xor Register with Immediate
 8421 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8422   match(Set dst (XorI dst src));
 8423   effect(KILL cr);
 8424 
 8425   format %{ "XOR    $dst,$src" %}
 8426   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8427   // ins_encode( RegImm( dst, src) );
 8428   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8429   ins_pipe( ialu_reg );
 8430 %}
 8431 
 8432 // Xor Register with Memory
 8433 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8434   match(Set dst (XorI dst (LoadI src)));
 8435   effect(KILL cr);
 8436 
 8437   ins_cost(150);
 8438   format %{ "XOR    $dst,$src" %}
 8439   opcode(0x33);
 8440   ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
 8441   ins_pipe( ialu_reg_mem );
 8442 %}
 8443 
 8444 // Xor Memory with Register
 8445 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8446   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8447   effect(KILL cr);
 8448 
 8449   ins_cost(150);
 8450   format %{ "XOR    $dst,$src" %}
 8451   opcode(0x31);  /* Opcode 31 /r */
 8452   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8453   ins_pipe( ialu_mem_reg );
 8454 %}
 8455 
 8456 // Xor Memory with Immediate
 8457 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8458   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8459   effect(KILL cr);
 8460 
 8461   ins_cost(125);
 8462   format %{ "XOR    $dst,$src" %}
 8463   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8464   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8465   ins_pipe( ialu_mem_imm );
 8466 %}
 8467 
 8468 //----------Convert Int to Boolean---------------------------------------------
 8469 
 8470 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8471   effect( DEF dst, USE src );
 8472   format %{ "MOV    $dst,$src" %}
 8473   ins_encode( enc_Copy( dst, src) );
 8474   ins_pipe( ialu_reg_reg );
 8475 %}
 8476 
 8477 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8478   effect( USE_DEF dst, USE src, KILL cr );
 8479 
 8480   size(4);
 8481   format %{ "NEG    $dst\n\t"
 8482             "ADC    $dst,$src" %}
 8483   ins_encode( neg_reg(dst),
 8484               OpcRegReg(0x13,dst,src) );
 8485   ins_pipe( ialu_reg_reg_long );
 8486 %}
 8487 
 8488 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8489   match(Set dst (Conv2B src));
 8490 
 8491   expand %{
 8492     movI_nocopy(dst,src);
 8493     ci2b(dst,src,cr);
 8494   %}
 8495 %}
 8496 
 8497 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8498   effect( DEF dst, USE src );
 8499   format %{ "MOV    $dst,$src" %}
 8500   ins_encode( enc_Copy( dst, src) );
 8501   ins_pipe( ialu_reg_reg );
 8502 %}
 8503 
 8504 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8505   effect( USE_DEF dst, USE src, KILL cr );
 8506   format %{ "NEG    $dst\n\t"
 8507             "ADC    $dst,$src" %}
 8508   ins_encode( neg_reg(dst),
 8509               OpcRegReg(0x13,dst,src) );
 8510   ins_pipe( ialu_reg_reg_long );
 8511 %}
 8512 
 8513 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8514   match(Set dst (Conv2B src));
 8515 
 8516   expand %{
 8517     movP_nocopy(dst,src);
 8518     cp2b(dst,src,cr);
 8519   %}
 8520 %}
 8521 
 8522 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8523   match(Set dst (CmpLTMask p q));
 8524   effect(KILL cr);
 8525   ins_cost(400);
 8526 
 8527   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8528   format %{ "XOR    $dst,$dst\n\t"
 8529             "CMP    $p,$q\n\t"
 8530             "SETlt  $dst\n\t"
 8531             "NEG    $dst" %}
 8532   ins_encode %{
 8533     Register Rp = $p$$Register;
 8534     Register Rq = $q$$Register;
 8535     Register Rd = $dst$$Register;
 8536     Label done;
 8537     __ xorl(Rd, Rd);
 8538     __ cmpl(Rp, Rq);
 8539     __ setb(Assembler::less, Rd);
 8540     __ negl(Rd);
 8541   %}
 8542 
 8543   ins_pipe(pipe_slow);
 8544 %}
 8545 
 8546 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8547   match(Set dst (CmpLTMask dst zero));
 8548   effect(DEF dst, KILL cr);
 8549   ins_cost(100);
 8550 
 8551   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8552   ins_encode %{
 8553   __ sarl($dst$$Register, 31);
 8554   %}
 8555   ins_pipe(ialu_reg);
 8556 %}
 8557 
 8558 /* better to save a register than avoid a branch */
 8559 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8560   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8561   effect(KILL cr);
 8562   ins_cost(400);
 8563   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8564             "JGE    done\n\t"
 8565             "ADD    $p,$y\n"
 8566             "done:  " %}
 8567   ins_encode %{
 8568     Register Rp = $p$$Register;
 8569     Register Rq = $q$$Register;
 8570     Register Ry = $y$$Register;
 8571     Label done;
 8572     __ subl(Rp, Rq);
 8573     __ jccb(Assembler::greaterEqual, done);
 8574     __ addl(Rp, Ry);
 8575     __ bind(done);
 8576   %}
 8577 
 8578   ins_pipe(pipe_cmplt);
 8579 %}
 8580 
 8581 /* better to save a register than avoid a branch */
 8582 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8583   match(Set y (AndI (CmpLTMask p q) y));
 8584   effect(KILL cr);
 8585 
 8586   ins_cost(300);
 8587 
 8588   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8589             "JLT      done\n\t"
 8590             "XORL     $y, $y\n"
 8591             "done:  " %}
 8592   ins_encode %{
 8593     Register Rp = $p$$Register;
 8594     Register Rq = $q$$Register;
 8595     Register Ry = $y$$Register;
 8596     Label done;
 8597     __ cmpl(Rp, Rq);
 8598     __ jccb(Assembler::less, done);
 8599     __ xorl(Ry, Ry);
 8600     __ bind(done);
 8601   %}
 8602 
 8603   ins_pipe(pipe_cmplt);
 8604 %}
 8605 
 8606 /* If I enable this, I encourage spilling in the inner loop of compress.
 8607 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8608   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8609 */
 8610 //----------Overflow Math Instructions-----------------------------------------
 8611 
 8612 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8613 %{
 8614   match(Set cr (OverflowAddI op1 op2));
 8615   effect(DEF cr, USE_KILL op1, USE op2);
 8616 
 8617   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8618 
 8619   ins_encode %{
 8620     __ addl($op1$$Register, $op2$$Register);
 8621   %}
 8622   ins_pipe(ialu_reg_reg);
 8623 %}
 8624 
 8625 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8626 %{
 8627   match(Set cr (OverflowAddI op1 op2));
 8628   effect(DEF cr, USE_KILL op1, USE op2);
 8629 
 8630   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8631 
 8632   ins_encode %{
 8633     __ addl($op1$$Register, $op2$$constant);
 8634   %}
 8635   ins_pipe(ialu_reg_reg);
 8636 %}
 8637 
 8638 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8639 %{
 8640   match(Set cr (OverflowSubI op1 op2));
 8641 
 8642   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8643   ins_encode %{
 8644     __ cmpl($op1$$Register, $op2$$Register);
 8645   %}
 8646   ins_pipe(ialu_reg_reg);
 8647 %}
 8648 
 8649 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8650 %{
 8651   match(Set cr (OverflowSubI op1 op2));
 8652 
 8653   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8654   ins_encode %{
 8655     __ cmpl($op1$$Register, $op2$$constant);
 8656   %}
 8657   ins_pipe(ialu_reg_reg);
 8658 %}
 8659 
 8660 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8661 %{
 8662   match(Set cr (OverflowSubI zero op2));
 8663   effect(DEF cr, USE_KILL op2);
 8664 
 8665   format %{ "NEG    $op2\t# overflow check int" %}
 8666   ins_encode %{
 8667     __ negl($op2$$Register);
 8668   %}
 8669   ins_pipe(ialu_reg_reg);
 8670 %}
 8671 
 8672 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8673 %{
 8674   match(Set cr (OverflowMulI op1 op2));
 8675   effect(DEF cr, USE_KILL op1, USE op2);
 8676 
 8677   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8678   ins_encode %{
 8679     __ imull($op1$$Register, $op2$$Register);
 8680   %}
 8681   ins_pipe(ialu_reg_reg_alu0);
 8682 %}
 8683 
 8684 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8685 %{
 8686   match(Set cr (OverflowMulI op1 op2));
 8687   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8688 
 8689   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8690   ins_encode %{
 8691     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8692   %}
 8693   ins_pipe(ialu_reg_reg_alu0);
 8694 %}
 8695 
 8696 // Integer Absolute Instructions
 8697 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8698 %{
 8699   match(Set dst (AbsI src));
 8700   effect(TEMP dst, TEMP tmp, KILL cr);
 8701   format %{ "movl $tmp, $src\n\t"
 8702             "sarl $tmp, 31\n\t"
 8703             "movl $dst, $src\n\t"
 8704             "xorl $dst, $tmp\n\t"
 8705             "subl $dst, $tmp\n"
 8706           %}
 8707   ins_encode %{
 8708     __ movl($tmp$$Register, $src$$Register);
 8709     __ sarl($tmp$$Register, 31);
 8710     __ movl($dst$$Register, $src$$Register);
 8711     __ xorl($dst$$Register, $tmp$$Register);
 8712     __ subl($dst$$Register, $tmp$$Register);
 8713   %}
 8714 
 8715   ins_pipe(ialu_reg_reg);
 8716 %}
 8717 
 8718 //----------Long Instructions------------------------------------------------
 8719 // Add Long Register with Register
 8720 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8721   match(Set dst (AddL dst src));
 8722   effect(KILL cr);
 8723   ins_cost(200);
 8724   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8725             "ADC    $dst.hi,$src.hi" %}
 8726   opcode(0x03, 0x13);
 8727   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8728   ins_pipe( ialu_reg_reg_long );
 8729 %}
 8730 
 8731 // Add Long Register with Immediate
 8732 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8733   match(Set dst (AddL dst src));
 8734   effect(KILL cr);
 8735   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8736             "ADC    $dst.hi,$src.hi" %}
 8737   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8738   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8739   ins_pipe( ialu_reg_long );
 8740 %}
 8741 
 8742 // Add Long Register with Memory
 8743 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8744   match(Set dst (AddL dst (LoadL mem)));
 8745   effect(KILL cr);
 8746   ins_cost(125);
 8747   format %{ "ADD    $dst.lo,$mem\n\t"
 8748             "ADC    $dst.hi,$mem+4" %}
 8749   opcode(0x03, 0x13);
 8750   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8751   ins_pipe( ialu_reg_long_mem );
 8752 %}
 8753 
 8754 // Subtract Long Register with Register.
 8755 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8756   match(Set dst (SubL dst src));
 8757   effect(KILL cr);
 8758   ins_cost(200);
 8759   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8760             "SBB    $dst.hi,$src.hi" %}
 8761   opcode(0x2B, 0x1B);
 8762   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8763   ins_pipe( ialu_reg_reg_long );
 8764 %}
 8765 
 8766 // Subtract Long Register with Immediate
 8767 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8768   match(Set dst (SubL dst src));
 8769   effect(KILL cr);
 8770   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8771             "SBB    $dst.hi,$src.hi" %}
 8772   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8773   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8774   ins_pipe( ialu_reg_long );
 8775 %}
 8776 
 8777 // Subtract Long Register with Memory
 8778 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8779   match(Set dst (SubL dst (LoadL mem)));
 8780   effect(KILL cr);
 8781   ins_cost(125);
 8782   format %{ "SUB    $dst.lo,$mem\n\t"
 8783             "SBB    $dst.hi,$mem+4" %}
 8784   opcode(0x2B, 0x1B);
 8785   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8786   ins_pipe( ialu_reg_long_mem );
 8787 %}
 8788 
 8789 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8790   match(Set dst (SubL zero dst));
 8791   effect(KILL cr);
 8792   ins_cost(300);
 8793   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8794   ins_encode( neg_long(dst) );
 8795   ins_pipe( ialu_reg_reg_long );
 8796 %}
 8797 
 8798 // And Long Register with Register
 8799 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8800   match(Set dst (AndL dst src));
 8801   effect(KILL cr);
 8802   format %{ "AND    $dst.lo,$src.lo\n\t"
 8803             "AND    $dst.hi,$src.hi" %}
 8804   opcode(0x23,0x23);
 8805   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8806   ins_pipe( ialu_reg_reg_long );
 8807 %}
 8808 
 8809 // And Long Register with Immediate
 8810 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8811   match(Set dst (AndL dst src));
 8812   effect(KILL cr);
 8813   format %{ "AND    $dst.lo,$src.lo\n\t"
 8814             "AND    $dst.hi,$src.hi" %}
 8815   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8816   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8817   ins_pipe( ialu_reg_long );
 8818 %}
 8819 
 8820 // And Long Register with Memory
 8821 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8822   match(Set dst (AndL dst (LoadL mem)));
 8823   effect(KILL cr);
 8824   ins_cost(125);
 8825   format %{ "AND    $dst.lo,$mem\n\t"
 8826             "AND    $dst.hi,$mem+4" %}
 8827   opcode(0x23, 0x23);
 8828   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8829   ins_pipe( ialu_reg_long_mem );
 8830 %}
 8831 
 8832 // BMI1 instructions
 8833 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8834   match(Set dst (AndL (XorL src1 minus_1) src2));
 8835   predicate(UseBMI1Instructions);
 8836   effect(KILL cr, TEMP dst);
 8837 
 8838   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8839             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8840          %}
 8841 
 8842   ins_encode %{
 8843     Register Rdst = $dst$$Register;
 8844     Register Rsrc1 = $src1$$Register;
 8845     Register Rsrc2 = $src2$$Register;
 8846     __ andnl(Rdst, Rsrc1, Rsrc2);
 8847     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8848   %}
 8849   ins_pipe(ialu_reg_reg_long);
 8850 %}
 8851 
 8852 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8853   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8854   predicate(UseBMI1Instructions);
 8855   effect(KILL cr, TEMP dst);
 8856 
 8857   ins_cost(125);
 8858   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 8859             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 8860          %}
 8861 
 8862   ins_encode %{
 8863     Register Rdst = $dst$$Register;
 8864     Register Rsrc1 = $src1$$Register;
 8865     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 8866 
 8867     __ andnl(Rdst, Rsrc1, $src2$$Address);
 8868     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 8869   %}
 8870   ins_pipe(ialu_reg_mem);
 8871 %}
 8872 
 8873 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 8874   match(Set dst (AndL (SubL imm_zero src) src));
 8875   predicate(UseBMI1Instructions);
 8876   effect(KILL cr, TEMP dst);
 8877 
 8878   format %{ "MOVL   $dst.hi, 0\n\t"
 8879             "BLSIL  $dst.lo, $src.lo\n\t"
 8880             "JNZ    done\n\t"
 8881             "BLSIL  $dst.hi, $src.hi\n"
 8882             "done:"
 8883          %}
 8884 
 8885   ins_encode %{
 8886     Label done;
 8887     Register Rdst = $dst$$Register;
 8888     Register Rsrc = $src$$Register;
 8889     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8890     __ blsil(Rdst, Rsrc);
 8891     __ jccb(Assembler::notZero, done);
 8892     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8893     __ bind(done);
 8894   %}
 8895   ins_pipe(ialu_reg);
 8896 %}
 8897 
 8898 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 8899   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 8900   predicate(UseBMI1Instructions);
 8901   effect(KILL cr, TEMP dst);
 8902 
 8903   ins_cost(125);
 8904   format %{ "MOVL   $dst.hi, 0\n\t"
 8905             "BLSIL  $dst.lo, $src\n\t"
 8906             "JNZ    done\n\t"
 8907             "BLSIL  $dst.hi, $src+4\n"
 8908             "done:"
 8909          %}
 8910 
 8911   ins_encode %{
 8912     Label done;
 8913     Register Rdst = $dst$$Register;
 8914     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8915 
 8916     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8917     __ blsil(Rdst, $src$$Address);
 8918     __ jccb(Assembler::notZero, done);
 8919     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 8920     __ bind(done);
 8921   %}
 8922   ins_pipe(ialu_reg_mem);
 8923 %}
 8924 
 8925 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8926 %{
 8927   match(Set dst (XorL (AddL src minus_1) src));
 8928   predicate(UseBMI1Instructions);
 8929   effect(KILL cr, TEMP dst);
 8930 
 8931   format %{ "MOVL    $dst.hi, 0\n\t"
 8932             "BLSMSKL $dst.lo, $src.lo\n\t"
 8933             "JNC     done\n\t"
 8934             "BLSMSKL $dst.hi, $src.hi\n"
 8935             "done:"
 8936          %}
 8937 
 8938   ins_encode %{
 8939     Label done;
 8940     Register Rdst = $dst$$Register;
 8941     Register Rsrc = $src$$Register;
 8942     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8943     __ blsmskl(Rdst, Rsrc);
 8944     __ jccb(Assembler::carryClear, done);
 8945     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8946     __ bind(done);
 8947   %}
 8948 
 8949   ins_pipe(ialu_reg);
 8950 %}
 8951 
 8952 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 8953 %{
 8954   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 8955   predicate(UseBMI1Instructions);
 8956   effect(KILL cr, TEMP dst);
 8957 
 8958   ins_cost(125);
 8959   format %{ "MOVL    $dst.hi, 0\n\t"
 8960             "BLSMSKL $dst.lo, $src\n\t"
 8961             "JNC     done\n\t"
 8962             "BLSMSKL $dst.hi, $src+4\n"
 8963             "done:"
 8964          %}
 8965 
 8966   ins_encode %{
 8967     Label done;
 8968     Register Rdst = $dst$$Register;
 8969     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8970 
 8971     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8972     __ blsmskl(Rdst, $src$$Address);
 8973     __ jccb(Assembler::carryClear, done);
 8974     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 8975     __ bind(done);
 8976   %}
 8977 
 8978   ins_pipe(ialu_reg_mem);
 8979 %}
 8980 
 8981 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8982 %{
 8983   match(Set dst (AndL (AddL src minus_1) src) );
 8984   predicate(UseBMI1Instructions);
 8985   effect(KILL cr, TEMP dst);
 8986 
 8987   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 8988             "BLSRL  $dst.lo, $src.lo\n\t"
 8989             "JNC    done\n\t"
 8990             "BLSRL  $dst.hi, $src.hi\n"
 8991             "done:"
 8992   %}
 8993 
 8994   ins_encode %{
 8995     Label done;
 8996     Register Rdst = $dst$$Register;
 8997     Register Rsrc = $src$$Register;
 8998     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8999     __ blsrl(Rdst, Rsrc);
 9000     __ jccb(Assembler::carryClear, done);
 9001     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9002     __ bind(done);
 9003   %}
 9004 
 9005   ins_pipe(ialu_reg);
 9006 %}
 9007 
 9008 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9009 %{
 9010   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9011   predicate(UseBMI1Instructions);
 9012   effect(KILL cr, TEMP dst);
 9013 
 9014   ins_cost(125);
 9015   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9016             "BLSRL  $dst.lo, $src\n\t"
 9017             "JNC    done\n\t"
 9018             "BLSRL  $dst.hi, $src+4\n"
 9019             "done:"
 9020   %}
 9021 
 9022   ins_encode %{
 9023     Label done;
 9024     Register Rdst = $dst$$Register;
 9025     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9026     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9027     __ blsrl(Rdst, $src$$Address);
 9028     __ jccb(Assembler::carryClear, done);
 9029     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9030     __ bind(done);
 9031   %}
 9032 
 9033   ins_pipe(ialu_reg_mem);
 9034 %}
 9035 
 9036 // Or Long Register with Register
 9037 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9038   match(Set dst (OrL dst src));
 9039   effect(KILL cr);
 9040   format %{ "OR     $dst.lo,$src.lo\n\t"
 9041             "OR     $dst.hi,$src.hi" %}
 9042   opcode(0x0B,0x0B);
 9043   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9044   ins_pipe( ialu_reg_reg_long );
 9045 %}
 9046 
 9047 // Or Long Register with Immediate
 9048 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9049   match(Set dst (OrL dst src));
 9050   effect(KILL cr);
 9051   format %{ "OR     $dst.lo,$src.lo\n\t"
 9052             "OR     $dst.hi,$src.hi" %}
 9053   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9054   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9055   ins_pipe( ialu_reg_long );
 9056 %}
 9057 
 9058 // Or Long Register with Memory
 9059 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9060   match(Set dst (OrL dst (LoadL mem)));
 9061   effect(KILL cr);
 9062   ins_cost(125);
 9063   format %{ "OR     $dst.lo,$mem\n\t"
 9064             "OR     $dst.hi,$mem+4" %}
 9065   opcode(0x0B,0x0B);
 9066   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9067   ins_pipe( ialu_reg_long_mem );
 9068 %}
 9069 
 9070 // Xor Long Register with Register
 9071 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9072   match(Set dst (XorL dst src));
 9073   effect(KILL cr);
 9074   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9075             "XOR    $dst.hi,$src.hi" %}
 9076   opcode(0x33,0x33);
 9077   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9078   ins_pipe( ialu_reg_reg_long );
 9079 %}
 9080 
 9081 // Xor Long Register with Immediate -1
 9082 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9083   match(Set dst (XorL dst imm));
 9084   format %{ "NOT    $dst.lo\n\t"
 9085             "NOT    $dst.hi" %}
 9086   ins_encode %{
 9087      __ notl($dst$$Register);
 9088      __ notl(HIGH_FROM_LOW($dst$$Register));
 9089   %}
 9090   ins_pipe( ialu_reg_long );
 9091 %}
 9092 
 9093 // Xor Long Register with Immediate
 9094 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9095   match(Set dst (XorL dst src));
 9096   effect(KILL cr);
 9097   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9098             "XOR    $dst.hi,$src.hi" %}
 9099   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9100   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9101   ins_pipe( ialu_reg_long );
 9102 %}
 9103 
 9104 // Xor Long Register with Memory
 9105 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9106   match(Set dst (XorL dst (LoadL mem)));
 9107   effect(KILL cr);
 9108   ins_cost(125);
 9109   format %{ "XOR    $dst.lo,$mem\n\t"
 9110             "XOR    $dst.hi,$mem+4" %}
 9111   opcode(0x33,0x33);
 9112   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9113   ins_pipe( ialu_reg_long_mem );
 9114 %}
 9115 
 9116 // Shift Left Long by 1
 9117 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9118   predicate(UseNewLongLShift);
 9119   match(Set dst (LShiftL dst cnt));
 9120   effect(KILL cr);
 9121   ins_cost(100);
 9122   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9123             "ADC    $dst.hi,$dst.hi" %}
 9124   ins_encode %{
 9125     __ addl($dst$$Register,$dst$$Register);
 9126     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9127   %}
 9128   ins_pipe( ialu_reg_long );
 9129 %}
 9130 
 9131 // Shift Left Long by 2
 9132 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9133   predicate(UseNewLongLShift);
 9134   match(Set dst (LShiftL dst cnt));
 9135   effect(KILL cr);
 9136   ins_cost(100);
 9137   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9138             "ADC    $dst.hi,$dst.hi\n\t"
 9139             "ADD    $dst.lo,$dst.lo\n\t"
 9140             "ADC    $dst.hi,$dst.hi" %}
 9141   ins_encode %{
 9142     __ addl($dst$$Register,$dst$$Register);
 9143     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9144     __ addl($dst$$Register,$dst$$Register);
 9145     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9146   %}
 9147   ins_pipe( ialu_reg_long );
 9148 %}
 9149 
 9150 // Shift Left Long by 3
 9151 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9152   predicate(UseNewLongLShift);
 9153   match(Set dst (LShiftL dst cnt));
 9154   effect(KILL cr);
 9155   ins_cost(100);
 9156   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9157             "ADC    $dst.hi,$dst.hi\n\t"
 9158             "ADD    $dst.lo,$dst.lo\n\t"
 9159             "ADC    $dst.hi,$dst.hi\n\t"
 9160             "ADD    $dst.lo,$dst.lo\n\t"
 9161             "ADC    $dst.hi,$dst.hi" %}
 9162   ins_encode %{
 9163     __ addl($dst$$Register,$dst$$Register);
 9164     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9165     __ addl($dst$$Register,$dst$$Register);
 9166     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9167     __ addl($dst$$Register,$dst$$Register);
 9168     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9169   %}
 9170   ins_pipe( ialu_reg_long );
 9171 %}
 9172 
 9173 // Shift Left Long by 1-31
 9174 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9175   match(Set dst (LShiftL dst cnt));
 9176   effect(KILL cr);
 9177   ins_cost(200);
 9178   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9179             "SHL    $dst.lo,$cnt" %}
 9180   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9181   ins_encode( move_long_small_shift(dst,cnt) );
 9182   ins_pipe( ialu_reg_long );
 9183 %}
 9184 
 9185 // Shift Left Long by 32-63
 9186 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9187   match(Set dst (LShiftL dst cnt));
 9188   effect(KILL cr);
 9189   ins_cost(300);
 9190   format %{ "MOV    $dst.hi,$dst.lo\n"
 9191           "\tSHL    $dst.hi,$cnt-32\n"
 9192           "\tXOR    $dst.lo,$dst.lo" %}
 9193   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9194   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9195   ins_pipe( ialu_reg_long );
 9196 %}
 9197 
 9198 // Shift Left Long by variable
 9199 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9200   match(Set dst (LShiftL dst shift));
 9201   effect(KILL cr);
 9202   ins_cost(500+200);
 9203   size(17);
 9204   format %{ "TEST   $shift,32\n\t"
 9205             "JEQ,s  small\n\t"
 9206             "MOV    $dst.hi,$dst.lo\n\t"
 9207             "XOR    $dst.lo,$dst.lo\n"
 9208     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9209             "SHL    $dst.lo,$shift" %}
 9210   ins_encode( shift_left_long( dst, shift ) );
 9211   ins_pipe( pipe_slow );
 9212 %}
 9213 
 9214 // Shift Right Long by 1-31
 9215 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9216   match(Set dst (URShiftL dst cnt));
 9217   effect(KILL cr);
 9218   ins_cost(200);
 9219   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9220             "SHR    $dst.hi,$cnt" %}
 9221   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9222   ins_encode( move_long_small_shift(dst,cnt) );
 9223   ins_pipe( ialu_reg_long );
 9224 %}
 9225 
 9226 // Shift Right Long by 32-63
 9227 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9228   match(Set dst (URShiftL dst cnt));
 9229   effect(KILL cr);
 9230   ins_cost(300);
 9231   format %{ "MOV    $dst.lo,$dst.hi\n"
 9232           "\tSHR    $dst.lo,$cnt-32\n"
 9233           "\tXOR    $dst.hi,$dst.hi" %}
 9234   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9235   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9236   ins_pipe( ialu_reg_long );
 9237 %}
 9238 
 9239 // Shift Right Long by variable
 9240 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9241   match(Set dst (URShiftL dst shift));
 9242   effect(KILL cr);
 9243   ins_cost(600);
 9244   size(17);
 9245   format %{ "TEST   $shift,32\n\t"
 9246             "JEQ,s  small\n\t"
 9247             "MOV    $dst.lo,$dst.hi\n\t"
 9248             "XOR    $dst.hi,$dst.hi\n"
 9249     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9250             "SHR    $dst.hi,$shift" %}
 9251   ins_encode( shift_right_long( dst, shift ) );
 9252   ins_pipe( pipe_slow );
 9253 %}
 9254 
 9255 // Shift Right Long by 1-31
 9256 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9257   match(Set dst (RShiftL dst cnt));
 9258   effect(KILL cr);
 9259   ins_cost(200);
 9260   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9261             "SAR    $dst.hi,$cnt" %}
 9262   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9263   ins_encode( move_long_small_shift(dst,cnt) );
 9264   ins_pipe( ialu_reg_long );
 9265 %}
 9266 
 9267 // Shift Right Long by 32-63
 9268 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9269   match(Set dst (RShiftL dst cnt));
 9270   effect(KILL cr);
 9271   ins_cost(300);
 9272   format %{ "MOV    $dst.lo,$dst.hi\n"
 9273           "\tSAR    $dst.lo,$cnt-32\n"
 9274           "\tSAR    $dst.hi,31" %}
 9275   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9276   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9277   ins_pipe( ialu_reg_long );
 9278 %}
 9279 
 9280 // Shift Right arithmetic Long by variable
 9281 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9282   match(Set dst (RShiftL dst shift));
 9283   effect(KILL cr);
 9284   ins_cost(600);
 9285   size(18);
 9286   format %{ "TEST   $shift,32\n\t"
 9287             "JEQ,s  small\n\t"
 9288             "MOV    $dst.lo,$dst.hi\n\t"
 9289             "SAR    $dst.hi,31\n"
 9290     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9291             "SAR    $dst.hi,$shift" %}
 9292   ins_encode( shift_right_arith_long( dst, shift ) );
 9293   ins_pipe( pipe_slow );
 9294 %}
 9295 
 9296 
 9297 //----------Double Instructions------------------------------------------------
 9298 // Double Math
 9299 
 9300 // Compare & branch
 9301 
 9302 // P6 version of float compare, sets condition codes in EFLAGS
 9303 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9304   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9305   match(Set cr (CmpD src1 src2));
 9306   effect(KILL rax);
 9307   ins_cost(150);
 9308   format %{ "FLD    $src1\n\t"
 9309             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9310             "JNP    exit\n\t"
 9311             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9312             "SAHF\n"
 9313      "exit:\tNOP               // avoid branch to branch" %}
 9314   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9315   ins_encode( Push_Reg_DPR(src1),
 9316               OpcP, RegOpc(src2),
 9317               cmpF_P6_fixup );
 9318   ins_pipe( pipe_slow );
 9319 %}
 9320 
 9321 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9322   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9323   match(Set cr (CmpD src1 src2));
 9324   ins_cost(150);
 9325   format %{ "FLD    $src1\n\t"
 9326             "FUCOMIP ST,$src2  // P6 instruction" %}
 9327   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9328   ins_encode( Push_Reg_DPR(src1),
 9329               OpcP, RegOpc(src2));
 9330   ins_pipe( pipe_slow );
 9331 %}
 9332 
 9333 // Compare & branch
 9334 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9335   predicate(UseSSE<=1);
 9336   match(Set cr (CmpD src1 src2));
 9337   effect(KILL rax);
 9338   ins_cost(200);
 9339   format %{ "FLD    $src1\n\t"
 9340             "FCOMp  $src2\n\t"
 9341             "FNSTSW AX\n\t"
 9342             "TEST   AX,0x400\n\t"
 9343             "JZ,s   flags\n\t"
 9344             "MOV    AH,1\t# unordered treat as LT\n"
 9345     "flags:\tSAHF" %}
 9346   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9347   ins_encode( Push_Reg_DPR(src1),
 9348               OpcP, RegOpc(src2),
 9349               fpu_flags);
 9350   ins_pipe( pipe_slow );
 9351 %}
 9352 
 9353 // Compare vs zero into -1,0,1
 9354 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9355   predicate(UseSSE<=1);
 9356   match(Set dst (CmpD3 src1 zero));
 9357   effect(KILL cr, KILL rax);
 9358   ins_cost(280);
 9359   format %{ "FTSTD  $dst,$src1" %}
 9360   opcode(0xE4, 0xD9);
 9361   ins_encode( Push_Reg_DPR(src1),
 9362               OpcS, OpcP, PopFPU,
 9363               CmpF_Result(dst));
 9364   ins_pipe( pipe_slow );
 9365 %}
 9366 
 9367 // Compare into -1,0,1
 9368 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9369   predicate(UseSSE<=1);
 9370   match(Set dst (CmpD3 src1 src2));
 9371   effect(KILL cr, KILL rax);
 9372   ins_cost(300);
 9373   format %{ "FCMPD  $dst,$src1,$src2" %}
 9374   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9375   ins_encode( Push_Reg_DPR(src1),
 9376               OpcP, RegOpc(src2),
 9377               CmpF_Result(dst));
 9378   ins_pipe( pipe_slow );
 9379 %}
 9380 
 9381 // float compare and set condition codes in EFLAGS by XMM regs
 9382 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9383   predicate(UseSSE>=2);
 9384   match(Set cr (CmpD src1 src2));
 9385   ins_cost(145);
 9386   format %{ "UCOMISD $src1,$src2\n\t"
 9387             "JNP,s   exit\n\t"
 9388             "PUSHF\t# saw NaN, set CF\n\t"
 9389             "AND     [rsp], #0xffffff2b\n\t"
 9390             "POPF\n"
 9391     "exit:" %}
 9392   ins_encode %{
 9393     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9394     emit_cmpfp_fixup(masm);
 9395   %}
 9396   ins_pipe( pipe_slow );
 9397 %}
 9398 
 9399 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9400   predicate(UseSSE>=2);
 9401   match(Set cr (CmpD src1 src2));
 9402   ins_cost(100);
 9403   format %{ "UCOMISD $src1,$src2" %}
 9404   ins_encode %{
 9405     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9406   %}
 9407   ins_pipe( pipe_slow );
 9408 %}
 9409 
 9410 // float compare and set condition codes in EFLAGS by XMM regs
 9411 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9412   predicate(UseSSE>=2);
 9413   match(Set cr (CmpD src1 (LoadD src2)));
 9414   ins_cost(145);
 9415   format %{ "UCOMISD $src1,$src2\n\t"
 9416             "JNP,s   exit\n\t"
 9417             "PUSHF\t# saw NaN, set CF\n\t"
 9418             "AND     [rsp], #0xffffff2b\n\t"
 9419             "POPF\n"
 9420     "exit:" %}
 9421   ins_encode %{
 9422     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9423     emit_cmpfp_fixup(masm);
 9424   %}
 9425   ins_pipe( pipe_slow );
 9426 %}
 9427 
 9428 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9429   predicate(UseSSE>=2);
 9430   match(Set cr (CmpD src1 (LoadD src2)));
 9431   ins_cost(100);
 9432   format %{ "UCOMISD $src1,$src2" %}
 9433   ins_encode %{
 9434     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9435   %}
 9436   ins_pipe( pipe_slow );
 9437 %}
 9438 
 9439 // Compare into -1,0,1 in XMM
 9440 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9441   predicate(UseSSE>=2);
 9442   match(Set dst (CmpD3 src1 src2));
 9443   effect(KILL cr);
 9444   ins_cost(255);
 9445   format %{ "UCOMISD $src1, $src2\n\t"
 9446             "MOV     $dst, #-1\n\t"
 9447             "JP,s    done\n\t"
 9448             "JB,s    done\n\t"
 9449             "SETNE   $dst\n\t"
 9450             "MOVZB   $dst, $dst\n"
 9451     "done:" %}
 9452   ins_encode %{
 9453     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9454     emit_cmpfp3(masm, $dst$$Register);
 9455   %}
 9456   ins_pipe( pipe_slow );
 9457 %}
 9458 
 9459 // Compare into -1,0,1 in XMM and memory
 9460 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9461   predicate(UseSSE>=2);
 9462   match(Set dst (CmpD3 src1 (LoadD src2)));
 9463   effect(KILL cr);
 9464   ins_cost(275);
 9465   format %{ "UCOMISD $src1, $src2\n\t"
 9466             "MOV     $dst, #-1\n\t"
 9467             "JP,s    done\n\t"
 9468             "JB,s    done\n\t"
 9469             "SETNE   $dst\n\t"
 9470             "MOVZB   $dst, $dst\n"
 9471     "done:" %}
 9472   ins_encode %{
 9473     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9474     emit_cmpfp3(masm, $dst$$Register);
 9475   %}
 9476   ins_pipe( pipe_slow );
 9477 %}
 9478 
 9479 
 9480 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9481   predicate (UseSSE <=1);
 9482   match(Set dst (SubD dst src));
 9483 
 9484   format %{ "FLD    $src\n\t"
 9485             "DSUBp  $dst,ST" %}
 9486   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9487   ins_cost(150);
 9488   ins_encode( Push_Reg_DPR(src),
 9489               OpcP, RegOpc(dst) );
 9490   ins_pipe( fpu_reg_reg );
 9491 %}
 9492 
 9493 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9494   predicate (UseSSE <=1);
 9495   match(Set dst (RoundDouble (SubD src1 src2)));
 9496   ins_cost(250);
 9497 
 9498   format %{ "FLD    $src2\n\t"
 9499             "DSUB   ST,$src1\n\t"
 9500             "FSTP_D $dst\t# D-round" %}
 9501   opcode(0xD8, 0x5);
 9502   ins_encode( Push_Reg_DPR(src2),
 9503               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9504   ins_pipe( fpu_mem_reg_reg );
 9505 %}
 9506 
 9507 
 9508 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9509   predicate (UseSSE <=1);
 9510   match(Set dst (SubD dst (LoadD src)));
 9511   ins_cost(150);
 9512 
 9513   format %{ "FLD    $src\n\t"
 9514             "DSUBp  $dst,ST" %}
 9515   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9516   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9517               OpcP, RegOpc(dst), ClearInstMark );
 9518   ins_pipe( fpu_reg_mem );
 9519 %}
 9520 
 9521 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9522   predicate (UseSSE<=1);
 9523   match(Set dst (AbsD src));
 9524   ins_cost(100);
 9525   format %{ "FABS" %}
 9526   opcode(0xE1, 0xD9);
 9527   ins_encode( OpcS, OpcP );
 9528   ins_pipe( fpu_reg_reg );
 9529 %}
 9530 
 9531 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9532   predicate(UseSSE<=1);
 9533   match(Set dst (NegD src));
 9534   ins_cost(100);
 9535   format %{ "FCHS" %}
 9536   opcode(0xE0, 0xD9);
 9537   ins_encode( OpcS, OpcP );
 9538   ins_pipe( fpu_reg_reg );
 9539 %}
 9540 
 9541 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9542   predicate(UseSSE<=1);
 9543   match(Set dst (AddD dst src));
 9544   format %{ "FLD    $src\n\t"
 9545             "DADD   $dst,ST" %}
 9546   size(4);
 9547   ins_cost(150);
 9548   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9549   ins_encode( Push_Reg_DPR(src),
 9550               OpcP, RegOpc(dst) );
 9551   ins_pipe( fpu_reg_reg );
 9552 %}
 9553 
 9554 
 9555 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9556   predicate(UseSSE<=1);
 9557   match(Set dst (RoundDouble (AddD src1 src2)));
 9558   ins_cost(250);
 9559 
 9560   format %{ "FLD    $src2\n\t"
 9561             "DADD   ST,$src1\n\t"
 9562             "FSTP_D $dst\t# D-round" %}
 9563   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9564   ins_encode( Push_Reg_DPR(src2),
 9565               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9566   ins_pipe( fpu_mem_reg_reg );
 9567 %}
 9568 
 9569 
 9570 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9571   predicate(UseSSE<=1);
 9572   match(Set dst (AddD dst (LoadD src)));
 9573   ins_cost(150);
 9574 
 9575   format %{ "FLD    $src\n\t"
 9576             "DADDp  $dst,ST" %}
 9577   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9578   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9579               OpcP, RegOpc(dst), ClearInstMark );
 9580   ins_pipe( fpu_reg_mem );
 9581 %}
 9582 
 9583 // add-to-memory
 9584 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9585   predicate(UseSSE<=1);
 9586   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9587   ins_cost(150);
 9588 
 9589   format %{ "FLD_D  $dst\n\t"
 9590             "DADD   ST,$src\n\t"
 9591             "FST_D  $dst" %}
 9592   opcode(0xDD, 0x0);
 9593   ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
 9594               Opcode(0xD8), RegOpc(src), ClearInstMark,
 9595               SetInstMark,
 9596               Opcode(0xDD), RMopc_Mem(0x03,dst),
 9597               ClearInstMark);
 9598   ins_pipe( fpu_reg_mem );
 9599 %}
 9600 
 9601 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9602   predicate(UseSSE<=1);
 9603   match(Set dst (AddD dst con));
 9604   ins_cost(125);
 9605   format %{ "FLD1\n\t"
 9606             "DADDp  $dst,ST" %}
 9607   ins_encode %{
 9608     __ fld1();
 9609     __ faddp($dst$$reg);
 9610   %}
 9611   ins_pipe(fpu_reg);
 9612 %}
 9613 
 9614 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9615   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9616   match(Set dst (AddD dst con));
 9617   ins_cost(200);
 9618   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9619             "DADDp  $dst,ST" %}
 9620   ins_encode %{
 9621     __ fld_d($constantaddress($con));
 9622     __ faddp($dst$$reg);
 9623   %}
 9624   ins_pipe(fpu_reg_mem);
 9625 %}
 9626 
 9627 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9628   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9629   match(Set dst (RoundDouble (AddD src con)));
 9630   ins_cost(200);
 9631   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9632             "DADD   ST,$src\n\t"
 9633             "FSTP_D $dst\t# D-round" %}
 9634   ins_encode %{
 9635     __ fld_d($constantaddress($con));
 9636     __ fadd($src$$reg);
 9637     __ fstp_d(Address(rsp, $dst$$disp));
 9638   %}
 9639   ins_pipe(fpu_mem_reg_con);
 9640 %}
 9641 
 9642 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9643   predicate(UseSSE<=1);
 9644   match(Set dst (MulD dst src));
 9645   format %{ "FLD    $src\n\t"
 9646             "DMULp  $dst,ST" %}
 9647   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9648   ins_cost(150);
 9649   ins_encode( Push_Reg_DPR(src),
 9650               OpcP, RegOpc(dst) );
 9651   ins_pipe( fpu_reg_reg );
 9652 %}
 9653 
 9654 // Strict FP instruction biases argument before multiply then
 9655 // biases result to avoid double rounding of subnormals.
 9656 //
 9657 // scale arg1 by multiplying arg1 by 2^(-15360)
 9658 // load arg2
 9659 // multiply scaled arg1 by arg2
 9660 // rescale product by 2^(15360)
 9661 //
 9662 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9663   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9664   match(Set dst (MulD dst src));
 9665   ins_cost(1);   // Select this instruction for all FP double multiplies
 9666 
 9667   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9668             "DMULp  $dst,ST\n\t"
 9669             "FLD    $src\n\t"
 9670             "DMULp  $dst,ST\n\t"
 9671             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9672             "DMULp  $dst,ST\n\t" %}
 9673   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9674   ins_encode( strictfp_bias1(dst),
 9675               Push_Reg_DPR(src),
 9676               OpcP, RegOpc(dst),
 9677               strictfp_bias2(dst) );
 9678   ins_pipe( fpu_reg_reg );
 9679 %}
 9680 
 9681 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9682   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9683   match(Set dst (MulD dst con));
 9684   ins_cost(200);
 9685   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9686             "DMULp  $dst,ST" %}
 9687   ins_encode %{
 9688     __ fld_d($constantaddress($con));
 9689     __ fmulp($dst$$reg);
 9690   %}
 9691   ins_pipe(fpu_reg_mem);
 9692 %}
 9693 
 9694 
 9695 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9696   predicate( UseSSE<=1 );
 9697   match(Set dst (MulD dst (LoadD src)));
 9698   ins_cost(200);
 9699   format %{ "FLD_D  $src\n\t"
 9700             "DMULp  $dst,ST" %}
 9701   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9702   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9703               OpcP, RegOpc(dst), ClearInstMark );
 9704   ins_pipe( fpu_reg_mem );
 9705 %}
 9706 
 9707 //
 9708 // Cisc-alternate to reg-reg multiply
 9709 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9710   predicate( UseSSE<=1 );
 9711   match(Set dst (MulD src (LoadD mem)));
 9712   ins_cost(250);
 9713   format %{ "FLD_D  $mem\n\t"
 9714             "DMUL   ST,$src\n\t"
 9715             "FSTP_D $dst" %}
 9716   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9717   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
 9718               OpcReg_FPR(src),
 9719               Pop_Reg_DPR(dst), ClearInstMark );
 9720   ins_pipe( fpu_reg_reg_mem );
 9721 %}
 9722 
 9723 
 9724 // MACRO3 -- addDPR a mulDPR
 9725 // This instruction is a '2-address' instruction in that the result goes
 9726 // back to src2.  This eliminates a move from the macro; possibly the
 9727 // register allocator will have to add it back (and maybe not).
 9728 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9729   predicate( UseSSE<=1 );
 9730   match(Set src2 (AddD (MulD src0 src1) src2));
 9731   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9732             "DMUL   ST,$src1\n\t"
 9733             "DADDp  $src2,ST" %}
 9734   ins_cost(250);
 9735   opcode(0xDD); /* LoadD DD /0 */
 9736   ins_encode( Push_Reg_FPR(src0),
 9737               FMul_ST_reg(src1),
 9738               FAddP_reg_ST(src2) );
 9739   ins_pipe( fpu_reg_reg_reg );
 9740 %}
 9741 
 9742 
 9743 // MACRO3 -- subDPR a mulDPR
 9744 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9745   predicate( UseSSE<=1 );
 9746   match(Set src2 (SubD (MulD src0 src1) src2));
 9747   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9748             "DMUL   ST,$src1\n\t"
 9749             "DSUBRp $src2,ST" %}
 9750   ins_cost(250);
 9751   ins_encode( Push_Reg_FPR(src0),
 9752               FMul_ST_reg(src1),
 9753               Opcode(0xDE), Opc_plus(0xE0,src2));
 9754   ins_pipe( fpu_reg_reg_reg );
 9755 %}
 9756 
 9757 
 9758 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9759   predicate( UseSSE<=1 );
 9760   match(Set dst (DivD dst src));
 9761 
 9762   format %{ "FLD    $src\n\t"
 9763             "FDIVp  $dst,ST" %}
 9764   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9765   ins_cost(150);
 9766   ins_encode( Push_Reg_DPR(src),
 9767               OpcP, RegOpc(dst) );
 9768   ins_pipe( fpu_reg_reg );
 9769 %}
 9770 
 9771 // Strict FP instruction biases argument before division then
 9772 // biases result, to avoid double rounding of subnormals.
 9773 //
 9774 // scale dividend by multiplying dividend by 2^(-15360)
 9775 // load divisor
 9776 // divide scaled dividend by divisor
 9777 // rescale quotient by 2^(15360)
 9778 //
 9779 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9780   predicate (UseSSE<=1);
 9781   match(Set dst (DivD dst src));
 9782   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9783   ins_cost(01);
 9784 
 9785   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9786             "DMULp  $dst,ST\n\t"
 9787             "FLD    $src\n\t"
 9788             "FDIVp  $dst,ST\n\t"
 9789             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9790             "DMULp  $dst,ST\n\t" %}
 9791   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9792   ins_encode( strictfp_bias1(dst),
 9793               Push_Reg_DPR(src),
 9794               OpcP, RegOpc(dst),
 9795               strictfp_bias2(dst) );
 9796   ins_pipe( fpu_reg_reg );
 9797 %}
 9798 
 9799 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9800   predicate(UseSSE<=1);
 9801   match(Set dst (ModD dst src));
 9802   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9803 
 9804   format %{ "DMOD   $dst,$src" %}
 9805   ins_cost(250);
 9806   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9807               emitModDPR(),
 9808               Push_Result_Mod_DPR(src),
 9809               Pop_Reg_DPR(dst));
 9810   ins_pipe( pipe_slow );
 9811 %}
 9812 
 9813 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9814   predicate(UseSSE>=2);
 9815   match(Set dst (ModD src0 src1));
 9816   effect(KILL rax, KILL cr);
 9817 
 9818   format %{ "SUB    ESP,8\t # DMOD\n"
 9819           "\tMOVSD  [ESP+0],$src1\n"
 9820           "\tFLD_D  [ESP+0]\n"
 9821           "\tMOVSD  [ESP+0],$src0\n"
 9822           "\tFLD_D  [ESP+0]\n"
 9823      "loop:\tFPREM\n"
 9824           "\tFWAIT\n"
 9825           "\tFNSTSW AX\n"
 9826           "\tSAHF\n"
 9827           "\tJP     loop\n"
 9828           "\tFSTP_D [ESP+0]\n"
 9829           "\tMOVSD  $dst,[ESP+0]\n"
 9830           "\tADD    ESP,8\n"
 9831           "\tFSTP   ST0\t # Restore FPU Stack"
 9832     %}
 9833   ins_cost(250);
 9834   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9835   ins_pipe( pipe_slow );
 9836 %}
 9837 
 9838 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9839   predicate (UseSSE<=1);
 9840   match(Set dst(AtanD dst src));
 9841   format %{ "DATA   $dst,$src" %}
 9842   opcode(0xD9, 0xF3);
 9843   ins_encode( Push_Reg_DPR(src),
 9844               OpcP, OpcS, RegOpc(dst) );
 9845   ins_pipe( pipe_slow );
 9846 %}
 9847 
 9848 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9849   predicate (UseSSE>=2);
 9850   match(Set dst(AtanD dst src));
 9851   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9852   format %{ "DATA   $dst,$src" %}
 9853   opcode(0xD9, 0xF3);
 9854   ins_encode( Push_SrcD(src),
 9855               OpcP, OpcS, Push_ResultD(dst) );
 9856   ins_pipe( pipe_slow );
 9857 %}
 9858 
 9859 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
 9860   predicate (UseSSE<=1);
 9861   match(Set dst (SqrtD src));
 9862   format %{ "DSQRT  $dst,$src" %}
 9863   opcode(0xFA, 0xD9);
 9864   ins_encode( Push_Reg_DPR(src),
 9865               OpcS, OpcP, Pop_Reg_DPR(dst) );
 9866   ins_pipe( pipe_slow );
 9867 %}
 9868 
 9869 //-------------Float Instructions-------------------------------
 9870 // Float Math
 9871 
 9872 // Code for float compare:
 9873 //     fcompp();
 9874 //     fwait(); fnstsw_ax();
 9875 //     sahf();
 9876 //     movl(dst, unordered_result);
 9877 //     jcc(Assembler::parity, exit);
 9878 //     movl(dst, less_result);
 9879 //     jcc(Assembler::below, exit);
 9880 //     movl(dst, equal_result);
 9881 //     jcc(Assembler::equal, exit);
 9882 //     movl(dst, greater_result);
 9883 //   exit:
 9884 
 9885 // P6 version of float compare, sets condition codes in EFLAGS
 9886 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9887   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9888   match(Set cr (CmpF src1 src2));
 9889   effect(KILL rax);
 9890   ins_cost(150);
 9891   format %{ "FLD    $src1\n\t"
 9892             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9893             "JNP    exit\n\t"
 9894             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
 9895             "SAHF\n"
 9896      "exit:\tNOP               // avoid branch to branch" %}
 9897   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9898   ins_encode( Push_Reg_DPR(src1),
 9899               OpcP, RegOpc(src2),
 9900               cmpF_P6_fixup );
 9901   ins_pipe( pipe_slow );
 9902 %}
 9903 
 9904 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
 9905   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9906   match(Set cr (CmpF src1 src2));
 9907   ins_cost(100);
 9908   format %{ "FLD    $src1\n\t"
 9909             "FUCOMIP ST,$src2  // P6 instruction" %}
 9910   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9911   ins_encode( Push_Reg_DPR(src1),
 9912               OpcP, RegOpc(src2));
 9913   ins_pipe( pipe_slow );
 9914 %}
 9915 
 9916 
 9917 // Compare & branch
 9918 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9919   predicate(UseSSE == 0);
 9920   match(Set cr (CmpF src1 src2));
 9921   effect(KILL rax);
 9922   ins_cost(200);
 9923   format %{ "FLD    $src1\n\t"
 9924             "FCOMp  $src2\n\t"
 9925             "FNSTSW AX\n\t"
 9926             "TEST   AX,0x400\n\t"
 9927             "JZ,s   flags\n\t"
 9928             "MOV    AH,1\t# unordered treat as LT\n"
 9929     "flags:\tSAHF" %}
 9930   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9931   ins_encode( Push_Reg_DPR(src1),
 9932               OpcP, RegOpc(src2),
 9933               fpu_flags);
 9934   ins_pipe( pipe_slow );
 9935 %}
 9936 
 9937 // Compare vs zero into -1,0,1
 9938 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9939   predicate(UseSSE == 0);
 9940   match(Set dst (CmpF3 src1 zero));
 9941   effect(KILL cr, KILL rax);
 9942   ins_cost(280);
 9943   format %{ "FTSTF  $dst,$src1" %}
 9944   opcode(0xE4, 0xD9);
 9945   ins_encode( Push_Reg_DPR(src1),
 9946               OpcS, OpcP, PopFPU,
 9947               CmpF_Result(dst));
 9948   ins_pipe( pipe_slow );
 9949 %}
 9950 
 9951 // Compare into -1,0,1
 9952 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
 9953   predicate(UseSSE == 0);
 9954   match(Set dst (CmpF3 src1 src2));
 9955   effect(KILL cr, KILL rax);
 9956   ins_cost(300);
 9957   format %{ "FCMPF  $dst,$src1,$src2" %}
 9958   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9959   ins_encode( Push_Reg_DPR(src1),
 9960               OpcP, RegOpc(src2),
 9961               CmpF_Result(dst));
 9962   ins_pipe( pipe_slow );
 9963 %}
 9964 
 9965 // float compare and set condition codes in EFLAGS by XMM regs
 9966 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
 9967   predicate(UseSSE>=1);
 9968   match(Set cr (CmpF src1 src2));
 9969   ins_cost(145);
 9970   format %{ "UCOMISS $src1,$src2\n\t"
 9971             "JNP,s   exit\n\t"
 9972             "PUSHF\t# saw NaN, set CF\n\t"
 9973             "AND     [rsp], #0xffffff2b\n\t"
 9974             "POPF\n"
 9975     "exit:" %}
 9976   ins_encode %{
 9977     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9978     emit_cmpfp_fixup(masm);
 9979   %}
 9980   ins_pipe( pipe_slow );
 9981 %}
 9982 
 9983 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
 9984   predicate(UseSSE>=1);
 9985   match(Set cr (CmpF src1 src2));
 9986   ins_cost(100);
 9987   format %{ "UCOMISS $src1,$src2" %}
 9988   ins_encode %{
 9989     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9990   %}
 9991   ins_pipe( pipe_slow );
 9992 %}
 9993 
 9994 // float compare and set condition codes in EFLAGS by XMM regs
 9995 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
 9996   predicate(UseSSE>=1);
 9997   match(Set cr (CmpF src1 (LoadF src2)));
 9998   ins_cost(165);
 9999   format %{ "UCOMISS $src1,$src2\n\t"
10000             "JNP,s   exit\n\t"
10001             "PUSHF\t# saw NaN, set CF\n\t"
10002             "AND     [rsp], #0xffffff2b\n\t"
10003             "POPF\n"
10004     "exit:" %}
10005   ins_encode %{
10006     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10007     emit_cmpfp_fixup(masm);
10008   %}
10009   ins_pipe( pipe_slow );
10010 %}
10011 
10012 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10013   predicate(UseSSE>=1);
10014   match(Set cr (CmpF src1 (LoadF src2)));
10015   ins_cost(100);
10016   format %{ "UCOMISS $src1,$src2" %}
10017   ins_encode %{
10018     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10019   %}
10020   ins_pipe( pipe_slow );
10021 %}
10022 
10023 // Compare into -1,0,1 in XMM
10024 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10025   predicate(UseSSE>=1);
10026   match(Set dst (CmpF3 src1 src2));
10027   effect(KILL cr);
10028   ins_cost(255);
10029   format %{ "UCOMISS $src1, $src2\n\t"
10030             "MOV     $dst, #-1\n\t"
10031             "JP,s    done\n\t"
10032             "JB,s    done\n\t"
10033             "SETNE   $dst\n\t"
10034             "MOVZB   $dst, $dst\n"
10035     "done:" %}
10036   ins_encode %{
10037     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10038     emit_cmpfp3(masm, $dst$$Register);
10039   %}
10040   ins_pipe( pipe_slow );
10041 %}
10042 
10043 // Compare into -1,0,1 in XMM and memory
10044 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10045   predicate(UseSSE>=1);
10046   match(Set dst (CmpF3 src1 (LoadF src2)));
10047   effect(KILL cr);
10048   ins_cost(275);
10049   format %{ "UCOMISS $src1, $src2\n\t"
10050             "MOV     $dst, #-1\n\t"
10051             "JP,s    done\n\t"
10052             "JB,s    done\n\t"
10053             "SETNE   $dst\n\t"
10054             "MOVZB   $dst, $dst\n"
10055     "done:" %}
10056   ins_encode %{
10057     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10058     emit_cmpfp3(masm, $dst$$Register);
10059   %}
10060   ins_pipe( pipe_slow );
10061 %}
10062 
10063 // Spill to obtain 24-bit precision
10064 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10065   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10066   match(Set dst (SubF src1 src2));
10067 
10068   format %{ "FSUB   $dst,$src1 - $src2" %}
10069   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10070   ins_encode( Push_Reg_FPR(src1),
10071               OpcReg_FPR(src2),
10072               Pop_Mem_FPR(dst) );
10073   ins_pipe( fpu_mem_reg_reg );
10074 %}
10075 //
10076 // This instruction does not round to 24-bits
10077 instruct subFPR_reg(regFPR dst, regFPR src) %{
10078   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10079   match(Set dst (SubF dst src));
10080 
10081   format %{ "FSUB   $dst,$src" %}
10082   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10083   ins_encode( Push_Reg_FPR(src),
10084               OpcP, RegOpc(dst) );
10085   ins_pipe( fpu_reg_reg );
10086 %}
10087 
10088 // Spill to obtain 24-bit precision
10089 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10090   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10091   match(Set dst (AddF src1 src2));
10092 
10093   format %{ "FADD   $dst,$src1,$src2" %}
10094   opcode(0xD8, 0x0); /* D8 C0+i */
10095   ins_encode( Push_Reg_FPR(src2),
10096               OpcReg_FPR(src1),
10097               Pop_Mem_FPR(dst) );
10098   ins_pipe( fpu_mem_reg_reg );
10099 %}
10100 //
10101 // This instruction does not round to 24-bits
10102 instruct addFPR_reg(regFPR dst, regFPR src) %{
10103   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10104   match(Set dst (AddF dst src));
10105 
10106   format %{ "FLD    $src\n\t"
10107             "FADDp  $dst,ST" %}
10108   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10109   ins_encode( Push_Reg_FPR(src),
10110               OpcP, RegOpc(dst) );
10111   ins_pipe( fpu_reg_reg );
10112 %}
10113 
10114 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10115   predicate(UseSSE==0);
10116   match(Set dst (AbsF src));
10117   ins_cost(100);
10118   format %{ "FABS" %}
10119   opcode(0xE1, 0xD9);
10120   ins_encode( OpcS, OpcP );
10121   ins_pipe( fpu_reg_reg );
10122 %}
10123 
10124 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10125   predicate(UseSSE==0);
10126   match(Set dst (NegF src));
10127   ins_cost(100);
10128   format %{ "FCHS" %}
10129   opcode(0xE0, 0xD9);
10130   ins_encode( OpcS, OpcP );
10131   ins_pipe( fpu_reg_reg );
10132 %}
10133 
10134 // Cisc-alternate to addFPR_reg
10135 // Spill to obtain 24-bit precision
10136 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10137   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10138   match(Set dst (AddF src1 (LoadF src2)));
10139 
10140   format %{ "FLD    $src2\n\t"
10141             "FADD   ST,$src1\n\t"
10142             "FSTP_S $dst" %}
10143   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10144   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10145               OpcReg_FPR(src1),
10146               Pop_Mem_FPR(dst), ClearInstMark );
10147   ins_pipe( fpu_mem_reg_mem );
10148 %}
10149 //
10150 // Cisc-alternate to addFPR_reg
10151 // This instruction does not round to 24-bits
10152 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10153   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10154   match(Set dst (AddF dst (LoadF src)));
10155 
10156   format %{ "FADD   $dst,$src" %}
10157   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10158   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10159               OpcP, RegOpc(dst), ClearInstMark );
10160   ins_pipe( fpu_reg_mem );
10161 %}
10162 
10163 // // Following two instructions for _222_mpegaudio
10164 // Spill to obtain 24-bit precision
10165 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10166   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10167   match(Set dst (AddF src1 src2));
10168 
10169   format %{ "FADD   $dst,$src1,$src2" %}
10170   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10171   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10172               OpcReg_FPR(src2),
10173               Pop_Mem_FPR(dst), ClearInstMark );
10174   ins_pipe( fpu_mem_reg_mem );
10175 %}
10176 
10177 // Cisc-spill variant
10178 // Spill to obtain 24-bit precision
10179 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10180   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10181   match(Set dst (AddF src1 (LoadF src2)));
10182 
10183   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10184   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10185   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10186               OpcP, RMopc_Mem(secondary,src1),
10187               Pop_Mem_FPR(dst),
10188               ClearInstMark);
10189   ins_pipe( fpu_mem_mem_mem );
10190 %}
10191 
10192 // Spill to obtain 24-bit precision
10193 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10194   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10195   match(Set dst (AddF src1 src2));
10196 
10197   format %{ "FADD   $dst,$src1,$src2" %}
10198   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10199   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10200               OpcP, RMopc_Mem(secondary,src1),
10201               Pop_Mem_FPR(dst),
10202               ClearInstMark);
10203   ins_pipe( fpu_mem_mem_mem );
10204 %}
10205 
10206 
10207 // Spill to obtain 24-bit precision
10208 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10209   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10210   match(Set dst (AddF src con));
10211   format %{ "FLD    $src\n\t"
10212             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10213             "FSTP_S $dst"  %}
10214   ins_encode %{
10215     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10216     __ fadd_s($constantaddress($con));
10217     __ fstp_s(Address(rsp, $dst$$disp));
10218   %}
10219   ins_pipe(fpu_mem_reg_con);
10220 %}
10221 //
10222 // This instruction does not round to 24-bits
10223 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10224   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10225   match(Set dst (AddF src con));
10226   format %{ "FLD    $src\n\t"
10227             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10228             "FSTP   $dst"  %}
10229   ins_encode %{
10230     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10231     __ fadd_s($constantaddress($con));
10232     __ fstp_d($dst$$reg);
10233   %}
10234   ins_pipe(fpu_reg_reg_con);
10235 %}
10236 
10237 // Spill to obtain 24-bit precision
10238 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10239   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10240   match(Set dst (MulF src1 src2));
10241 
10242   format %{ "FLD    $src1\n\t"
10243             "FMUL   $src2\n\t"
10244             "FSTP_S $dst"  %}
10245   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10246   ins_encode( Push_Reg_FPR(src1),
10247               OpcReg_FPR(src2),
10248               Pop_Mem_FPR(dst) );
10249   ins_pipe( fpu_mem_reg_reg );
10250 %}
10251 //
10252 // This instruction does not round to 24-bits
10253 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10254   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10255   match(Set dst (MulF src1 src2));
10256 
10257   format %{ "FLD    $src1\n\t"
10258             "FMUL   $src2\n\t"
10259             "FSTP_S $dst"  %}
10260   opcode(0xD8, 0x1); /* D8 C8+i */
10261   ins_encode( Push_Reg_FPR(src2),
10262               OpcReg_FPR(src1),
10263               Pop_Reg_FPR(dst) );
10264   ins_pipe( fpu_reg_reg_reg );
10265 %}
10266 
10267 
10268 // Spill to obtain 24-bit precision
10269 // Cisc-alternate to reg-reg multiply
10270 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10271   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10272   match(Set dst (MulF src1 (LoadF src2)));
10273 
10274   format %{ "FLD_S  $src2\n\t"
10275             "FMUL   $src1\n\t"
10276             "FSTP_S $dst"  %}
10277   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10278   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10279               OpcReg_FPR(src1),
10280               Pop_Mem_FPR(dst), ClearInstMark );
10281   ins_pipe( fpu_mem_reg_mem );
10282 %}
10283 //
10284 // This instruction does not round to 24-bits
10285 // Cisc-alternate to reg-reg multiply
10286 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10287   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10288   match(Set dst (MulF src1 (LoadF src2)));
10289 
10290   format %{ "FMUL   $dst,$src1,$src2" %}
10291   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10292   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10293               OpcReg_FPR(src1),
10294               Pop_Reg_FPR(dst), ClearInstMark );
10295   ins_pipe( fpu_reg_reg_mem );
10296 %}
10297 
10298 // Spill to obtain 24-bit precision
10299 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10300   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10301   match(Set dst (MulF src1 src2));
10302 
10303   format %{ "FMUL   $dst,$src1,$src2" %}
10304   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10305   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10306               OpcP, RMopc_Mem(secondary,src1),
10307               Pop_Mem_FPR(dst),
10308               ClearInstMark );
10309   ins_pipe( fpu_mem_mem_mem );
10310 %}
10311 
10312 // Spill to obtain 24-bit precision
10313 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10314   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10315   match(Set dst (MulF src con));
10316 
10317   format %{ "FLD    $src\n\t"
10318             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10319             "FSTP_S $dst"  %}
10320   ins_encode %{
10321     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10322     __ fmul_s($constantaddress($con));
10323     __ fstp_s(Address(rsp, $dst$$disp));
10324   %}
10325   ins_pipe(fpu_mem_reg_con);
10326 %}
10327 //
10328 // This instruction does not round to 24-bits
10329 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10330   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10331   match(Set dst (MulF src con));
10332 
10333   format %{ "FLD    $src\n\t"
10334             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10335             "FSTP   $dst"  %}
10336   ins_encode %{
10337     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10338     __ fmul_s($constantaddress($con));
10339     __ fstp_d($dst$$reg);
10340   %}
10341   ins_pipe(fpu_reg_reg_con);
10342 %}
10343 
10344 
10345 //
10346 // MACRO1 -- subsume unshared load into mulFPR
10347 // This instruction does not round to 24-bits
10348 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10349   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10350   match(Set dst (MulF (LoadF mem1) src));
10351 
10352   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10353             "FMUL   ST,$src\n\t"
10354             "FSTP   $dst" %}
10355   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10356   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10357               OpcReg_FPR(src),
10358               Pop_Reg_FPR(dst), ClearInstMark );
10359   ins_pipe( fpu_reg_reg_mem );
10360 %}
10361 //
10362 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10363 // This instruction does not round to 24-bits
10364 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10365   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10366   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10367   ins_cost(95);
10368 
10369   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10370             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10371             "FADD   ST,$src2\n\t"
10372             "FSTP   $dst" %}
10373   opcode(0xD9); /* LoadF D9 /0 */
10374   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10375               FMul_ST_reg(src1),
10376               FAdd_ST_reg(src2),
10377               Pop_Reg_FPR(dst), ClearInstMark );
10378   ins_pipe( fpu_reg_mem_reg_reg );
10379 %}
10380 
10381 // MACRO3 -- addFPR a mulFPR
10382 // This instruction does not round to 24-bits.  It is a '2-address'
10383 // instruction in that the result goes back to src2.  This eliminates
10384 // a move from the macro; possibly the register allocator will have
10385 // to add it back (and maybe not).
10386 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10387   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10388   match(Set src2 (AddF (MulF src0 src1) src2));
10389 
10390   format %{ "FLD    $src0     ===MACRO3===\n\t"
10391             "FMUL   ST,$src1\n\t"
10392             "FADDP  $src2,ST" %}
10393   opcode(0xD9); /* LoadF D9 /0 */
10394   ins_encode( Push_Reg_FPR(src0),
10395               FMul_ST_reg(src1),
10396               FAddP_reg_ST(src2) );
10397   ins_pipe( fpu_reg_reg_reg );
10398 %}
10399 
10400 // MACRO4 -- divFPR subFPR
10401 // This instruction does not round to 24-bits
10402 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10403   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10404   match(Set dst (DivF (SubF src2 src1) src3));
10405 
10406   format %{ "FLD    $src2   ===MACRO4===\n\t"
10407             "FSUB   ST,$src1\n\t"
10408             "FDIV   ST,$src3\n\t"
10409             "FSTP  $dst" %}
10410   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10411   ins_encode( Push_Reg_FPR(src2),
10412               subFPR_divFPR_encode(src1,src3),
10413               Pop_Reg_FPR(dst) );
10414   ins_pipe( fpu_reg_reg_reg_reg );
10415 %}
10416 
10417 // Spill to obtain 24-bit precision
10418 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10419   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10420   match(Set dst (DivF src1 src2));
10421 
10422   format %{ "FDIV   $dst,$src1,$src2" %}
10423   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10424   ins_encode( Push_Reg_FPR(src1),
10425               OpcReg_FPR(src2),
10426               Pop_Mem_FPR(dst) );
10427   ins_pipe( fpu_mem_reg_reg );
10428 %}
10429 //
10430 // This instruction does not round to 24-bits
10431 instruct divFPR_reg(regFPR dst, regFPR src) %{
10432   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10433   match(Set dst (DivF dst src));
10434 
10435   format %{ "FDIV   $dst,$src" %}
10436   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10437   ins_encode( Push_Reg_FPR(src),
10438               OpcP, RegOpc(dst) );
10439   ins_pipe( fpu_reg_reg );
10440 %}
10441 
10442 
10443 // Spill to obtain 24-bit precision
10444 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10445   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10446   match(Set dst (ModF src1 src2));
10447   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10448 
10449   format %{ "FMOD   $dst,$src1,$src2" %}
10450   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10451               emitModDPR(),
10452               Push_Result_Mod_DPR(src2),
10453               Pop_Mem_FPR(dst));
10454   ins_pipe( pipe_slow );
10455 %}
10456 //
10457 // This instruction does not round to 24-bits
10458 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10459   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10460   match(Set dst (ModF dst src));
10461   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10462 
10463   format %{ "FMOD   $dst,$src" %}
10464   ins_encode(Push_Reg_Mod_DPR(dst, src),
10465               emitModDPR(),
10466               Push_Result_Mod_DPR(src),
10467               Pop_Reg_FPR(dst));
10468   ins_pipe( pipe_slow );
10469 %}
10470 
10471 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10472   predicate(UseSSE>=1);
10473   match(Set dst (ModF src0 src1));
10474   effect(KILL rax, KILL cr);
10475   format %{ "SUB    ESP,4\t # FMOD\n"
10476           "\tMOVSS  [ESP+0],$src1\n"
10477           "\tFLD_S  [ESP+0]\n"
10478           "\tMOVSS  [ESP+0],$src0\n"
10479           "\tFLD_S  [ESP+0]\n"
10480      "loop:\tFPREM\n"
10481           "\tFWAIT\n"
10482           "\tFNSTSW AX\n"
10483           "\tSAHF\n"
10484           "\tJP     loop\n"
10485           "\tFSTP_S [ESP+0]\n"
10486           "\tMOVSS  $dst,[ESP+0]\n"
10487           "\tADD    ESP,4\n"
10488           "\tFSTP   ST0\t # Restore FPU Stack"
10489     %}
10490   ins_cost(250);
10491   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10492   ins_pipe( pipe_slow );
10493 %}
10494 
10495 
10496 //----------Arithmetic Conversion Instructions---------------------------------
10497 // The conversions operations are all Alpha sorted.  Please keep it that way!
10498 
10499 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10500   predicate(UseSSE==0);
10501   match(Set dst (RoundFloat src));
10502   ins_cost(125);
10503   format %{ "FST_S  $dst,$src\t# F-round" %}
10504   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10505   ins_pipe( fpu_mem_reg );
10506 %}
10507 
10508 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10509   predicate(UseSSE<=1);
10510   match(Set dst (RoundDouble src));
10511   ins_cost(125);
10512   format %{ "FST_D  $dst,$src\t# D-round" %}
10513   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10514   ins_pipe( fpu_mem_reg );
10515 %}
10516 
10517 // Force rounding to 24-bit precision and 6-bit exponent
10518 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10519   predicate(UseSSE==0);
10520   match(Set dst (ConvD2F src));
10521   format %{ "FST_S  $dst,$src\t# F-round" %}
10522   expand %{
10523     roundFloat_mem_reg(dst,src);
10524   %}
10525 %}
10526 
10527 // Force rounding to 24-bit precision and 6-bit exponent
10528 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10529   predicate(UseSSE==1);
10530   match(Set dst (ConvD2F src));
10531   effect( KILL cr );
10532   format %{ "SUB    ESP,4\n\t"
10533             "FST_S  [ESP],$src\t# F-round\n\t"
10534             "MOVSS  $dst,[ESP]\n\t"
10535             "ADD ESP,4" %}
10536   ins_encode %{
10537     __ subptr(rsp, 4);
10538     if ($src$$reg != FPR1L_enc) {
10539       __ fld_s($src$$reg-1);
10540       __ fstp_s(Address(rsp, 0));
10541     } else {
10542       __ fst_s(Address(rsp, 0));
10543     }
10544     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10545     __ addptr(rsp, 4);
10546   %}
10547   ins_pipe( pipe_slow );
10548 %}
10549 
10550 // Force rounding double precision to single precision
10551 instruct convD2F_reg(regF dst, regD src) %{
10552   predicate(UseSSE>=2);
10553   match(Set dst (ConvD2F src));
10554   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10555   ins_encode %{
10556     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10557   %}
10558   ins_pipe( pipe_slow );
10559 %}
10560 
10561 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10562   predicate(UseSSE==0);
10563   match(Set dst (ConvF2D src));
10564   format %{ "FST_S  $dst,$src\t# D-round" %}
10565   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10566   ins_pipe( fpu_reg_reg );
10567 %}
10568 
10569 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10570   predicate(UseSSE==1);
10571   match(Set dst (ConvF2D src));
10572   format %{ "FST_D  $dst,$src\t# D-round" %}
10573   expand %{
10574     roundDouble_mem_reg(dst,src);
10575   %}
10576 %}
10577 
10578 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10579   predicate(UseSSE==1);
10580   match(Set dst (ConvF2D src));
10581   effect( KILL cr );
10582   format %{ "SUB    ESP,4\n\t"
10583             "MOVSS  [ESP] $src\n\t"
10584             "FLD_S  [ESP]\n\t"
10585             "ADD    ESP,4\n\t"
10586             "FSTP   $dst\t# D-round" %}
10587   ins_encode %{
10588     __ subptr(rsp, 4);
10589     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10590     __ fld_s(Address(rsp, 0));
10591     __ addptr(rsp, 4);
10592     __ fstp_d($dst$$reg);
10593   %}
10594   ins_pipe( pipe_slow );
10595 %}
10596 
10597 instruct convF2D_reg(regD dst, regF src) %{
10598   predicate(UseSSE>=2);
10599   match(Set dst (ConvF2D src));
10600   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10601   ins_encode %{
10602     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10603   %}
10604   ins_pipe( pipe_slow );
10605 %}
10606 
10607 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10608 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10609   predicate(UseSSE<=1);
10610   match(Set dst (ConvD2I src));
10611   effect( KILL tmp, KILL cr );
10612   format %{ "FLD    $src\t# Convert double to int \n\t"
10613             "FLDCW  trunc mode\n\t"
10614             "SUB    ESP,4\n\t"
10615             "FISTp  [ESP + #0]\n\t"
10616             "FLDCW  std/24-bit mode\n\t"
10617             "POP    EAX\n\t"
10618             "CMP    EAX,0x80000000\n\t"
10619             "JNE,s  fast\n\t"
10620             "FLD_D  $src\n\t"
10621             "CALL   d2i_wrapper\n"
10622       "fast:" %}
10623   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10624   ins_pipe( pipe_slow );
10625 %}
10626 
10627 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10628 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10629   predicate(UseSSE>=2);
10630   match(Set dst (ConvD2I src));
10631   effect( KILL tmp, KILL cr );
10632   format %{ "CVTTSD2SI $dst, $src\n\t"
10633             "CMP    $dst,0x80000000\n\t"
10634             "JNE,s  fast\n\t"
10635             "SUB    ESP, 8\n\t"
10636             "MOVSD  [ESP], $src\n\t"
10637             "FLD_D  [ESP]\n\t"
10638             "ADD    ESP, 8\n\t"
10639             "CALL   d2i_wrapper\n"
10640       "fast:" %}
10641   ins_encode %{
10642     Label fast;
10643     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10644     __ cmpl($dst$$Register, 0x80000000);
10645     __ jccb(Assembler::notEqual, fast);
10646     __ subptr(rsp, 8);
10647     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10648     __ fld_d(Address(rsp, 0));
10649     __ addptr(rsp, 8);
10650     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10651     __ post_call_nop();
10652     __ bind(fast);
10653   %}
10654   ins_pipe( pipe_slow );
10655 %}
10656 
10657 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10658   predicate(UseSSE<=1);
10659   match(Set dst (ConvD2L src));
10660   effect( KILL cr );
10661   format %{ "FLD    $src\t# Convert double to long\n\t"
10662             "FLDCW  trunc mode\n\t"
10663             "SUB    ESP,8\n\t"
10664             "FISTp  [ESP + #0]\n\t"
10665             "FLDCW  std/24-bit mode\n\t"
10666             "POP    EAX\n\t"
10667             "POP    EDX\n\t"
10668             "CMP    EDX,0x80000000\n\t"
10669             "JNE,s  fast\n\t"
10670             "TEST   EAX,EAX\n\t"
10671             "JNE,s  fast\n\t"
10672             "FLD    $src\n\t"
10673             "CALL   d2l_wrapper\n"
10674       "fast:" %}
10675   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10676   ins_pipe( pipe_slow );
10677 %}
10678 
10679 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10680 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10681   predicate (UseSSE>=2);
10682   match(Set dst (ConvD2L src));
10683   effect( KILL cr );
10684   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10685             "MOVSD  [ESP],$src\n\t"
10686             "FLD_D  [ESP]\n\t"
10687             "FLDCW  trunc mode\n\t"
10688             "FISTp  [ESP + #0]\n\t"
10689             "FLDCW  std/24-bit mode\n\t"
10690             "POP    EAX\n\t"
10691             "POP    EDX\n\t"
10692             "CMP    EDX,0x80000000\n\t"
10693             "JNE,s  fast\n\t"
10694             "TEST   EAX,EAX\n\t"
10695             "JNE,s  fast\n\t"
10696             "SUB    ESP,8\n\t"
10697             "MOVSD  [ESP],$src\n\t"
10698             "FLD_D  [ESP]\n\t"
10699             "ADD    ESP,8\n\t"
10700             "CALL   d2l_wrapper\n"
10701       "fast:" %}
10702   ins_encode %{
10703     Label fast;
10704     __ subptr(rsp, 8);
10705     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10706     __ fld_d(Address(rsp, 0));
10707     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10708     __ fistp_d(Address(rsp, 0));
10709     // Restore the rounding mode, mask the exception
10710     if (Compile::current()->in_24_bit_fp_mode()) {
10711       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10712     } else {
10713       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10714     }
10715     // Load the converted long, adjust CPU stack
10716     __ pop(rax);
10717     __ pop(rdx);
10718     __ cmpl(rdx, 0x80000000);
10719     __ jccb(Assembler::notEqual, fast);
10720     __ testl(rax, rax);
10721     __ jccb(Assembler::notEqual, fast);
10722     __ subptr(rsp, 8);
10723     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10724     __ fld_d(Address(rsp, 0));
10725     __ addptr(rsp, 8);
10726     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10727     __ post_call_nop();
10728     __ bind(fast);
10729   %}
10730   ins_pipe( pipe_slow );
10731 %}
10732 
10733 // Convert a double to an int.  Java semantics require we do complex
10734 // manglations in the corner cases.  So we set the rounding mode to
10735 // 'zero', store the darned double down as an int, and reset the
10736 // rounding mode to 'nearest'.  The hardware stores a flag value down
10737 // if we would overflow or converted a NAN; we check for this and
10738 // and go the slow path if needed.
10739 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10740   predicate(UseSSE==0);
10741   match(Set dst (ConvF2I src));
10742   effect( KILL tmp, KILL cr );
10743   format %{ "FLD    $src\t# Convert float to int \n\t"
10744             "FLDCW  trunc mode\n\t"
10745             "SUB    ESP,4\n\t"
10746             "FISTp  [ESP + #0]\n\t"
10747             "FLDCW  std/24-bit mode\n\t"
10748             "POP    EAX\n\t"
10749             "CMP    EAX,0x80000000\n\t"
10750             "JNE,s  fast\n\t"
10751             "FLD    $src\n\t"
10752             "CALL   d2i_wrapper\n"
10753       "fast:" %}
10754   // DPR2I_encoding works for FPR2I
10755   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10756   ins_pipe( pipe_slow );
10757 %}
10758 
10759 // Convert a float in xmm to an int reg.
10760 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10761   predicate(UseSSE>=1);
10762   match(Set dst (ConvF2I src));
10763   effect( KILL tmp, KILL cr );
10764   format %{ "CVTTSS2SI $dst, $src\n\t"
10765             "CMP    $dst,0x80000000\n\t"
10766             "JNE,s  fast\n\t"
10767             "SUB    ESP, 4\n\t"
10768             "MOVSS  [ESP], $src\n\t"
10769             "FLD    [ESP]\n\t"
10770             "ADD    ESP, 4\n\t"
10771             "CALL   d2i_wrapper\n"
10772       "fast:" %}
10773   ins_encode %{
10774     Label fast;
10775     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10776     __ cmpl($dst$$Register, 0x80000000);
10777     __ jccb(Assembler::notEqual, fast);
10778     __ subptr(rsp, 4);
10779     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10780     __ fld_s(Address(rsp, 0));
10781     __ addptr(rsp, 4);
10782     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10783     __ post_call_nop();
10784     __ bind(fast);
10785   %}
10786   ins_pipe( pipe_slow );
10787 %}
10788 
10789 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10790   predicate(UseSSE==0);
10791   match(Set dst (ConvF2L src));
10792   effect( KILL cr );
10793   format %{ "FLD    $src\t# Convert float to long\n\t"
10794             "FLDCW  trunc mode\n\t"
10795             "SUB    ESP,8\n\t"
10796             "FISTp  [ESP + #0]\n\t"
10797             "FLDCW  std/24-bit mode\n\t"
10798             "POP    EAX\n\t"
10799             "POP    EDX\n\t"
10800             "CMP    EDX,0x80000000\n\t"
10801             "JNE,s  fast\n\t"
10802             "TEST   EAX,EAX\n\t"
10803             "JNE,s  fast\n\t"
10804             "FLD    $src\n\t"
10805             "CALL   d2l_wrapper\n"
10806       "fast:" %}
10807   // DPR2L_encoding works for FPR2L
10808   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10809   ins_pipe( pipe_slow );
10810 %}
10811 
10812 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10813 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10814   predicate (UseSSE>=1);
10815   match(Set dst (ConvF2L src));
10816   effect( KILL cr );
10817   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10818             "MOVSS  [ESP],$src\n\t"
10819             "FLD_S  [ESP]\n\t"
10820             "FLDCW  trunc mode\n\t"
10821             "FISTp  [ESP + #0]\n\t"
10822             "FLDCW  std/24-bit mode\n\t"
10823             "POP    EAX\n\t"
10824             "POP    EDX\n\t"
10825             "CMP    EDX,0x80000000\n\t"
10826             "JNE,s  fast\n\t"
10827             "TEST   EAX,EAX\n\t"
10828             "JNE,s  fast\n\t"
10829             "SUB    ESP,4\t# Convert float to long\n\t"
10830             "MOVSS  [ESP],$src\n\t"
10831             "FLD_S  [ESP]\n\t"
10832             "ADD    ESP,4\n\t"
10833             "CALL   d2l_wrapper\n"
10834       "fast:" %}
10835   ins_encode %{
10836     Label fast;
10837     __ subptr(rsp, 8);
10838     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10839     __ fld_s(Address(rsp, 0));
10840     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10841     __ fistp_d(Address(rsp, 0));
10842     // Restore the rounding mode, mask the exception
10843     if (Compile::current()->in_24_bit_fp_mode()) {
10844       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10845     } else {
10846       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10847     }
10848     // Load the converted long, adjust CPU stack
10849     __ pop(rax);
10850     __ pop(rdx);
10851     __ cmpl(rdx, 0x80000000);
10852     __ jccb(Assembler::notEqual, fast);
10853     __ testl(rax, rax);
10854     __ jccb(Assembler::notEqual, fast);
10855     __ subptr(rsp, 4);
10856     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10857     __ fld_s(Address(rsp, 0));
10858     __ addptr(rsp, 4);
10859     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10860     __ post_call_nop();
10861     __ bind(fast);
10862   %}
10863   ins_pipe( pipe_slow );
10864 %}
10865 
10866 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10867   predicate( UseSSE<=1 );
10868   match(Set dst (ConvI2D src));
10869   format %{ "FILD   $src\n\t"
10870             "FSTP   $dst" %}
10871   opcode(0xDB, 0x0);  /* DB /0 */
10872   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10873   ins_pipe( fpu_reg_mem );
10874 %}
10875 
10876 instruct convI2D_reg(regD dst, rRegI src) %{
10877   predicate( UseSSE>=2 && !UseXmmI2D );
10878   match(Set dst (ConvI2D src));
10879   format %{ "CVTSI2SD $dst,$src" %}
10880   ins_encode %{
10881     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10882   %}
10883   ins_pipe( pipe_slow );
10884 %}
10885 
10886 instruct convI2D_mem(regD dst, memory mem) %{
10887   predicate( UseSSE>=2 );
10888   match(Set dst (ConvI2D (LoadI mem)));
10889   format %{ "CVTSI2SD $dst,$mem" %}
10890   ins_encode %{
10891     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10892   %}
10893   ins_pipe( pipe_slow );
10894 %}
10895 
10896 instruct convXI2D_reg(regD dst, rRegI src)
10897 %{
10898   predicate( UseSSE>=2 && UseXmmI2D );
10899   match(Set dst (ConvI2D src));
10900 
10901   format %{ "MOVD  $dst,$src\n\t"
10902             "CVTDQ2PD $dst,$dst\t# i2d" %}
10903   ins_encode %{
10904     __ movdl($dst$$XMMRegister, $src$$Register);
10905     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10906   %}
10907   ins_pipe(pipe_slow); // XXX
10908 %}
10909 
10910 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10911   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10912   match(Set dst (ConvI2D (LoadI mem)));
10913   format %{ "FILD   $mem\n\t"
10914             "FSTP   $dst" %}
10915   opcode(0xDB);      /* DB /0 */
10916   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10917               Pop_Reg_DPR(dst), ClearInstMark);
10918   ins_pipe( fpu_reg_mem );
10919 %}
10920 
10921 // Convert a byte to a float; no rounding step needed.
10922 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10923   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10924   match(Set dst (ConvI2F src));
10925   format %{ "FILD   $src\n\t"
10926             "FSTP   $dst" %}
10927 
10928   opcode(0xDB, 0x0);  /* DB /0 */
10929   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10930   ins_pipe( fpu_reg_mem );
10931 %}
10932 
10933 // In 24-bit mode, force exponent rounding by storing back out
10934 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10935   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10936   match(Set dst (ConvI2F src));
10937   ins_cost(200);
10938   format %{ "FILD   $src\n\t"
10939             "FSTP_S $dst" %}
10940   opcode(0xDB, 0x0);  /* DB /0 */
10941   ins_encode( Push_Mem_I(src),
10942               Pop_Mem_FPR(dst));
10943   ins_pipe( fpu_mem_mem );
10944 %}
10945 
10946 // In 24-bit mode, force exponent rounding by storing back out
10947 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10948   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10949   match(Set dst (ConvI2F (LoadI mem)));
10950   ins_cost(200);
10951   format %{ "FILD   $mem\n\t"
10952             "FSTP_S $dst" %}
10953   opcode(0xDB);  /* DB /0 */
10954   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10955               Pop_Mem_FPR(dst), ClearInstMark);
10956   ins_pipe( fpu_mem_mem );
10957 %}
10958 
10959 // This instruction does not round to 24-bits
10960 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10961   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10962   match(Set dst (ConvI2F src));
10963   format %{ "FILD   $src\n\t"
10964             "FSTP   $dst" %}
10965   opcode(0xDB, 0x0);  /* DB /0 */
10966   ins_encode( Push_Mem_I(src),
10967               Pop_Reg_FPR(dst));
10968   ins_pipe( fpu_reg_mem );
10969 %}
10970 
10971 // This instruction does not round to 24-bits
10972 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10973   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10974   match(Set dst (ConvI2F (LoadI mem)));
10975   format %{ "FILD   $mem\n\t"
10976             "FSTP   $dst" %}
10977   opcode(0xDB);      /* DB /0 */
10978   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10979               Pop_Reg_FPR(dst), ClearInstMark);
10980   ins_pipe( fpu_reg_mem );
10981 %}
10982 
10983 // Convert an int to a float in xmm; no rounding step needed.
10984 instruct convI2F_reg(regF dst, rRegI src) %{
10985   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
10986   match(Set dst (ConvI2F src));
10987   format %{ "CVTSI2SS $dst, $src" %}
10988   ins_encode %{
10989     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10990   %}
10991   ins_pipe( pipe_slow );
10992 %}
10993 
10994  instruct convXI2F_reg(regF dst, rRegI src)
10995 %{
10996   predicate( UseSSE>=2 && UseXmmI2F );
10997   match(Set dst (ConvI2F src));
10998 
10999   format %{ "MOVD  $dst,$src\n\t"
11000             "CVTDQ2PS $dst,$dst\t# i2f" %}
11001   ins_encode %{
11002     __ movdl($dst$$XMMRegister, $src$$Register);
11003     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11004   %}
11005   ins_pipe(pipe_slow); // XXX
11006 %}
11007 
11008 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11009   match(Set dst (ConvI2L src));
11010   effect(KILL cr);
11011   ins_cost(375);
11012   format %{ "MOV    $dst.lo,$src\n\t"
11013             "MOV    $dst.hi,$src\n\t"
11014             "SAR    $dst.hi,31" %}
11015   ins_encode(convert_int_long(dst,src));
11016   ins_pipe( ialu_reg_reg_long );
11017 %}
11018 
11019 // Zero-extend convert int to long
11020 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11021   match(Set dst (AndL (ConvI2L src) mask) );
11022   effect( KILL flags );
11023   ins_cost(250);
11024   format %{ "MOV    $dst.lo,$src\n\t"
11025             "XOR    $dst.hi,$dst.hi" %}
11026   opcode(0x33); // XOR
11027   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11028   ins_pipe( ialu_reg_reg_long );
11029 %}
11030 
11031 // Zero-extend long
11032 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11033   match(Set dst (AndL src mask) );
11034   effect( KILL flags );
11035   ins_cost(250);
11036   format %{ "MOV    $dst.lo,$src.lo\n\t"
11037             "XOR    $dst.hi,$dst.hi\n\t" %}
11038   opcode(0x33); // XOR
11039   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11040   ins_pipe( ialu_reg_reg_long );
11041 %}
11042 
11043 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11044   predicate (UseSSE<=1);
11045   match(Set dst (ConvL2D src));
11046   effect( KILL cr );
11047   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11048             "PUSH   $src.lo\n\t"
11049             "FILD   ST,[ESP + #0]\n\t"
11050             "ADD    ESP,8\n\t"
11051             "FSTP_D $dst\t# D-round" %}
11052   opcode(0xDF, 0x5);  /* DF /5 */
11053   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11054   ins_pipe( pipe_slow );
11055 %}
11056 
11057 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11058   predicate (UseSSE>=2);
11059   match(Set dst (ConvL2D src));
11060   effect( KILL cr );
11061   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11062             "PUSH   $src.lo\n\t"
11063             "FILD_D [ESP]\n\t"
11064             "FSTP_D [ESP]\n\t"
11065             "MOVSD  $dst,[ESP]\n\t"
11066             "ADD    ESP,8" %}
11067   opcode(0xDF, 0x5);  /* DF /5 */
11068   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11069   ins_pipe( pipe_slow );
11070 %}
11071 
11072 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11073   predicate (UseSSE>=1);
11074   match(Set dst (ConvL2F src));
11075   effect( KILL cr );
11076   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11077             "PUSH   $src.lo\n\t"
11078             "FILD_D [ESP]\n\t"
11079             "FSTP_S [ESP]\n\t"
11080             "MOVSS  $dst,[ESP]\n\t"
11081             "ADD    ESP,8" %}
11082   opcode(0xDF, 0x5);  /* DF /5 */
11083   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11084   ins_pipe( pipe_slow );
11085 %}
11086 
11087 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11088   match(Set dst (ConvL2F src));
11089   effect( KILL cr );
11090   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11091             "PUSH   $src.lo\n\t"
11092             "FILD   ST,[ESP + #0]\n\t"
11093             "ADD    ESP,8\n\t"
11094             "FSTP_S $dst\t# F-round" %}
11095   opcode(0xDF, 0x5);  /* DF /5 */
11096   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11097   ins_pipe( pipe_slow );
11098 %}
11099 
11100 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11101   match(Set dst (ConvL2I src));
11102   effect( DEF dst, USE src );
11103   format %{ "MOV    $dst,$src.lo" %}
11104   ins_encode(enc_CopyL_Lo(dst,src));
11105   ins_pipe( ialu_reg_reg );
11106 %}
11107 
11108 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11109   match(Set dst (MoveF2I src));
11110   effect( DEF dst, USE src );
11111   ins_cost(100);
11112   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11113   ins_encode %{
11114     __ movl($dst$$Register, Address(rsp, $src$$disp));
11115   %}
11116   ins_pipe( ialu_reg_mem );
11117 %}
11118 
11119 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11120   predicate(UseSSE==0);
11121   match(Set dst (MoveF2I src));
11122   effect( DEF dst, USE src );
11123 
11124   ins_cost(125);
11125   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11126   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11127   ins_pipe( fpu_mem_reg );
11128 %}
11129 
11130 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11131   predicate(UseSSE>=1);
11132   match(Set dst (MoveF2I src));
11133   effect( DEF dst, USE src );
11134 
11135   ins_cost(95);
11136   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11137   ins_encode %{
11138     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11139   %}
11140   ins_pipe( pipe_slow );
11141 %}
11142 
11143 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11144   predicate(UseSSE>=2);
11145   match(Set dst (MoveF2I src));
11146   effect( DEF dst, USE src );
11147   ins_cost(85);
11148   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11149   ins_encode %{
11150     __ movdl($dst$$Register, $src$$XMMRegister);
11151   %}
11152   ins_pipe( pipe_slow );
11153 %}
11154 
11155 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11156   match(Set dst (MoveI2F src));
11157   effect( DEF dst, USE src );
11158 
11159   ins_cost(100);
11160   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11161   ins_encode %{
11162     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11163   %}
11164   ins_pipe( ialu_mem_reg );
11165 %}
11166 
11167 
11168 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11169   predicate(UseSSE==0);
11170   match(Set dst (MoveI2F src));
11171   effect(DEF dst, USE src);
11172 
11173   ins_cost(125);
11174   format %{ "FLD_S  $src\n\t"
11175             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11176   opcode(0xD9);               /* D9 /0, FLD m32real */
11177   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11178               Pop_Reg_FPR(dst), ClearInstMark );
11179   ins_pipe( fpu_reg_mem );
11180 %}
11181 
11182 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11183   predicate(UseSSE>=1);
11184   match(Set dst (MoveI2F src));
11185   effect( DEF dst, USE src );
11186 
11187   ins_cost(95);
11188   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11189   ins_encode %{
11190     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11191   %}
11192   ins_pipe( pipe_slow );
11193 %}
11194 
11195 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11196   predicate(UseSSE>=2);
11197   match(Set dst (MoveI2F src));
11198   effect( DEF dst, USE src );
11199 
11200   ins_cost(85);
11201   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11202   ins_encode %{
11203     __ movdl($dst$$XMMRegister, $src$$Register);
11204   %}
11205   ins_pipe( pipe_slow );
11206 %}
11207 
11208 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11209   match(Set dst (MoveD2L src));
11210   effect(DEF dst, USE src);
11211 
11212   ins_cost(250);
11213   format %{ "MOV    $dst.lo,$src\n\t"
11214             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11215   opcode(0x8B, 0x8B);
11216   ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11217   ins_pipe( ialu_mem_long_reg );
11218 %}
11219 
11220 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11221   predicate(UseSSE<=1);
11222   match(Set dst (MoveD2L src));
11223   effect(DEF dst, USE src);
11224 
11225   ins_cost(125);
11226   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11227   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11228   ins_pipe( fpu_mem_reg );
11229 %}
11230 
11231 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11232   predicate(UseSSE>=2);
11233   match(Set dst (MoveD2L src));
11234   effect(DEF dst, USE src);
11235   ins_cost(95);
11236   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11237   ins_encode %{
11238     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11239   %}
11240   ins_pipe( pipe_slow );
11241 %}
11242 
11243 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11244   predicate(UseSSE>=2);
11245   match(Set dst (MoveD2L src));
11246   effect(DEF dst, USE src, TEMP tmp);
11247   ins_cost(85);
11248   format %{ "MOVD   $dst.lo,$src\n\t"
11249             "PSHUFLW $tmp,$src,0x4E\n\t"
11250             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11251   ins_encode %{
11252     __ movdl($dst$$Register, $src$$XMMRegister);
11253     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11254     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11255   %}
11256   ins_pipe( pipe_slow );
11257 %}
11258 
11259 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11260   match(Set dst (MoveL2D src));
11261   effect(DEF dst, USE src);
11262 
11263   ins_cost(200);
11264   format %{ "MOV    $dst,$src.lo\n\t"
11265             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11266   opcode(0x89, 0x89);
11267   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11268   ins_pipe( ialu_mem_long_reg );
11269 %}
11270 
11271 
11272 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11273   predicate(UseSSE<=1);
11274   match(Set dst (MoveL2D src));
11275   effect(DEF dst, USE src);
11276   ins_cost(125);
11277 
11278   format %{ "FLD_D  $src\n\t"
11279             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11280   opcode(0xDD);               /* DD /0, FLD m64real */
11281   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11282               Pop_Reg_DPR(dst), ClearInstMark );
11283   ins_pipe( fpu_reg_mem );
11284 %}
11285 
11286 
11287 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11288   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11289   match(Set dst (MoveL2D src));
11290   effect(DEF dst, USE src);
11291 
11292   ins_cost(95);
11293   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11294   ins_encode %{
11295     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11296   %}
11297   ins_pipe( pipe_slow );
11298 %}
11299 
11300 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11301   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11302   match(Set dst (MoveL2D src));
11303   effect(DEF dst, USE src);
11304 
11305   ins_cost(95);
11306   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11307   ins_encode %{
11308     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11309   %}
11310   ins_pipe( pipe_slow );
11311 %}
11312 
11313 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11314   predicate(UseSSE>=2);
11315   match(Set dst (MoveL2D src));
11316   effect(TEMP dst, USE src, TEMP tmp);
11317   ins_cost(85);
11318   format %{ "MOVD   $dst,$src.lo\n\t"
11319             "MOVD   $tmp,$src.hi\n\t"
11320             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11321   ins_encode %{
11322     __ movdl($dst$$XMMRegister, $src$$Register);
11323     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11324     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11325   %}
11326   ins_pipe( pipe_slow );
11327 %}
11328 
11329 //----------------------------- CompressBits/ExpandBits ------------------------
11330 
11331 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11332   predicate(n->bottom_type()->isa_long());
11333   match(Set dst (CompressBits src mask));
11334   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11335   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11336   ins_encode %{
11337     Label exit, partail_result;
11338     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11339     // Merge the results of upper and lower destination registers such that upper destination
11340     // results are contiguously laid out after the lower destination result.
11341     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11342     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11343     __ popcntl($rtmp$$Register, $mask$$Register);
11344     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11345     __ cmpl($rtmp$$Register, 32);
11346     __ jccb(Assembler::equal, exit);
11347     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11348     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11349     // Shift left the contents of upper destination register by true bit count of lower mask register
11350     // and merge with lower destination register.
11351     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11352     __ orl($dst$$Register, $rtmp$$Register);
11353     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11354     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11355     // since contents of upper destination have already been copied to lower destination
11356     // register.
11357     __ cmpl($rtmp$$Register, 0);
11358     __ jccb(Assembler::greater, partail_result);
11359     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11360     __ jmp(exit);
11361     __ bind(partail_result);
11362     // Perform right shift over upper destination register to move out bits already copied
11363     // to lower destination register.
11364     __ subl($rtmp$$Register, 32);
11365     __ negl($rtmp$$Register);
11366     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11367     __ bind(exit);
11368   %}
11369   ins_pipe( pipe_slow );
11370 %}
11371 
11372 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11373   predicate(n->bottom_type()->isa_long());
11374   match(Set dst (ExpandBits src mask));
11375   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11376   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11377   ins_encode %{
11378     // Extraction operation sequentially reads the bits from source register starting from LSB
11379     // and lays them out into destination register at bit locations corresponding to true bits
11380     // in mask register. Thus number of source bits read are equal to combined true bit count
11381     // of mask register pair.
11382     Label exit, mask_clipping;
11383     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11384     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11385     __ popcntl($rtmp$$Register, $mask$$Register);
11386     // If true bit count of lower mask register is 32 then none of bit of lower source register
11387     // will feed to upper destination register.
11388     __ cmpl($rtmp$$Register, 32);
11389     __ jccb(Assembler::equal, exit);
11390     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11391     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11392     // Shift right the contents of lower source register to remove already consumed bits.
11393     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11394     // Extract the bits from lower source register starting from LSB under the influence
11395     // of upper mask register.
11396     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11397     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11398     __ subl($rtmp$$Register, 32);
11399     __ negl($rtmp$$Register);
11400     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11401     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11402     // Clear the set bits in upper mask register which have been used to extract the contents
11403     // from lower source register.
11404     __ bind(mask_clipping);
11405     __ blsrl($mask$$Register, $mask$$Register);
11406     __ decrementl($rtmp$$Register, 1);
11407     __ jccb(Assembler::greater, mask_clipping);
11408     // Starting from LSB extract the bits from upper source register under the influence of
11409     // remaining set bits in upper mask register.
11410     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11411     // Merge the partial results extracted from lower and upper source register bits.
11412     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11413     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11414     __ bind(exit);
11415   %}
11416   ins_pipe( pipe_slow );
11417 %}
11418 
11419 // =======================================================================
11420 // Fast clearing of an array
11421 // Small non-constant length ClearArray for non-AVX512 targets.
11422 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11423   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11424   match(Set dummy (ClearArray cnt base));
11425   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11426 
11427   format %{ $$template
11428     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11429     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11430     $$emit$$"JG     LARGE\n\t"
11431     $$emit$$"SHL    ECX, 1\n\t"
11432     $$emit$$"DEC    ECX\n\t"
11433     $$emit$$"JS     DONE\t# Zero length\n\t"
11434     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11435     $$emit$$"DEC    ECX\n\t"
11436     $$emit$$"JGE    LOOP\n\t"
11437     $$emit$$"JMP    DONE\n\t"
11438     $$emit$$"# LARGE:\n\t"
11439     if (UseFastStosb) {
11440        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11441        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11442     } else if (UseXMMForObjInit) {
11443        $$emit$$"MOV     RDI,RAX\n\t"
11444        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11445        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11446        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11447        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11448        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11449        $$emit$$"ADD     0x40,RAX\n\t"
11450        $$emit$$"# L_zero_64_bytes:\n\t"
11451        $$emit$$"SUB     0x8,RCX\n\t"
11452        $$emit$$"JGE     L_loop\n\t"
11453        $$emit$$"ADD     0x4,RCX\n\t"
11454        $$emit$$"JL      L_tail\n\t"
11455        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11456        $$emit$$"ADD     0x20,RAX\n\t"
11457        $$emit$$"SUB     0x4,RCX\n\t"
11458        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11459        $$emit$$"ADD     0x4,RCX\n\t"
11460        $$emit$$"JLE     L_end\n\t"
11461        $$emit$$"DEC     RCX\n\t"
11462        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11463        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11464        $$emit$$"ADD     0x8,RAX\n\t"
11465        $$emit$$"DEC     RCX\n\t"
11466        $$emit$$"JGE     L_sloop\n\t"
11467        $$emit$$"# L_end:\n\t"
11468     } else {
11469        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11470        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11471     }
11472     $$emit$$"# DONE"
11473   %}
11474   ins_encode %{
11475     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11476                  $tmp$$XMMRegister, false, knoreg);
11477   %}
11478   ins_pipe( pipe_slow );
11479 %}
11480 
11481 // Small non-constant length ClearArray for AVX512 targets.
11482 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11483   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11484   match(Set dummy (ClearArray cnt base));
11485   ins_cost(125);
11486   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11487 
11488   format %{ $$template
11489     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11490     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11491     $$emit$$"JG     LARGE\n\t"
11492     $$emit$$"SHL    ECX, 1\n\t"
11493     $$emit$$"DEC    ECX\n\t"
11494     $$emit$$"JS     DONE\t# Zero length\n\t"
11495     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11496     $$emit$$"DEC    ECX\n\t"
11497     $$emit$$"JGE    LOOP\n\t"
11498     $$emit$$"JMP    DONE\n\t"
11499     $$emit$$"# LARGE:\n\t"
11500     if (UseFastStosb) {
11501        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11502        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11503     } else if (UseXMMForObjInit) {
11504        $$emit$$"MOV     RDI,RAX\n\t"
11505        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11506        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11507        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11508        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11509        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11510        $$emit$$"ADD     0x40,RAX\n\t"
11511        $$emit$$"# L_zero_64_bytes:\n\t"
11512        $$emit$$"SUB     0x8,RCX\n\t"
11513        $$emit$$"JGE     L_loop\n\t"
11514        $$emit$$"ADD     0x4,RCX\n\t"
11515        $$emit$$"JL      L_tail\n\t"
11516        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11517        $$emit$$"ADD     0x20,RAX\n\t"
11518        $$emit$$"SUB     0x4,RCX\n\t"
11519        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11520        $$emit$$"ADD     0x4,RCX\n\t"
11521        $$emit$$"JLE     L_end\n\t"
11522        $$emit$$"DEC     RCX\n\t"
11523        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11524        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11525        $$emit$$"ADD     0x8,RAX\n\t"
11526        $$emit$$"DEC     RCX\n\t"
11527        $$emit$$"JGE     L_sloop\n\t"
11528        $$emit$$"# L_end:\n\t"
11529     } else {
11530        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11531        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11532     }
11533     $$emit$$"# DONE"
11534   %}
11535   ins_encode %{
11536     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11537                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11538   %}
11539   ins_pipe( pipe_slow );
11540 %}
11541 
11542 // Large non-constant length ClearArray for non-AVX512 targets.
11543 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11544   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11545   match(Set dummy (ClearArray cnt base));
11546   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11547   format %{ $$template
11548     if (UseFastStosb) {
11549        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11550        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11551        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11552     } else if (UseXMMForObjInit) {
11553        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11554        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11555        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11556        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11557        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11558        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11559        $$emit$$"ADD     0x40,RAX\n\t"
11560        $$emit$$"# L_zero_64_bytes:\n\t"
11561        $$emit$$"SUB     0x8,RCX\n\t"
11562        $$emit$$"JGE     L_loop\n\t"
11563        $$emit$$"ADD     0x4,RCX\n\t"
11564        $$emit$$"JL      L_tail\n\t"
11565        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11566        $$emit$$"ADD     0x20,RAX\n\t"
11567        $$emit$$"SUB     0x4,RCX\n\t"
11568        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11569        $$emit$$"ADD     0x4,RCX\n\t"
11570        $$emit$$"JLE     L_end\n\t"
11571        $$emit$$"DEC     RCX\n\t"
11572        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11573        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11574        $$emit$$"ADD     0x8,RAX\n\t"
11575        $$emit$$"DEC     RCX\n\t"
11576        $$emit$$"JGE     L_sloop\n\t"
11577        $$emit$$"# L_end:\n\t"
11578     } else {
11579        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11580        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11581        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11582     }
11583     $$emit$$"# DONE"
11584   %}
11585   ins_encode %{
11586     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11587                  $tmp$$XMMRegister, true, knoreg);
11588   %}
11589   ins_pipe( pipe_slow );
11590 %}
11591 
11592 // Large non-constant length ClearArray for AVX512 targets.
11593 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11594   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11595   match(Set dummy (ClearArray cnt base));
11596   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11597   format %{ $$template
11598     if (UseFastStosb) {
11599        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11600        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11601        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11602     } else if (UseXMMForObjInit) {
11603        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11604        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11605        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11606        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11607        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11608        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11609        $$emit$$"ADD     0x40,RAX\n\t"
11610        $$emit$$"# L_zero_64_bytes:\n\t"
11611        $$emit$$"SUB     0x8,RCX\n\t"
11612        $$emit$$"JGE     L_loop\n\t"
11613        $$emit$$"ADD     0x4,RCX\n\t"
11614        $$emit$$"JL      L_tail\n\t"
11615        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11616        $$emit$$"ADD     0x20,RAX\n\t"
11617        $$emit$$"SUB     0x4,RCX\n\t"
11618        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11619        $$emit$$"ADD     0x4,RCX\n\t"
11620        $$emit$$"JLE     L_end\n\t"
11621        $$emit$$"DEC     RCX\n\t"
11622        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11623        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11624        $$emit$$"ADD     0x8,RAX\n\t"
11625        $$emit$$"DEC     RCX\n\t"
11626        $$emit$$"JGE     L_sloop\n\t"
11627        $$emit$$"# L_end:\n\t"
11628     } else {
11629        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11630        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11631        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11632     }
11633     $$emit$$"# DONE"
11634   %}
11635   ins_encode %{
11636     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11637                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11638   %}
11639   ins_pipe( pipe_slow );
11640 %}
11641 
11642 // Small constant length ClearArray for AVX512 targets.
11643 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11644 %{
11645   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11646   match(Set dummy (ClearArray cnt base));
11647   ins_cost(100);
11648   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11649   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11650   ins_encode %{
11651    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11652   %}
11653   ins_pipe(pipe_slow);
11654 %}
11655 
11656 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11657                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11658   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11659   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11660   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11661 
11662   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11663   ins_encode %{
11664     __ string_compare($str1$$Register, $str2$$Register,
11665                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11666                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11667   %}
11668   ins_pipe( pipe_slow );
11669 %}
11670 
11671 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11672                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11673   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11674   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11675   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11676 
11677   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11678   ins_encode %{
11679     __ string_compare($str1$$Register, $str2$$Register,
11680                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11681                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11682   %}
11683   ins_pipe( pipe_slow );
11684 %}
11685 
11686 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11687                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11688   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11689   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11690   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11691 
11692   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11693   ins_encode %{
11694     __ string_compare($str1$$Register, $str2$$Register,
11695                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11696                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11697   %}
11698   ins_pipe( pipe_slow );
11699 %}
11700 
11701 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11702                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11703   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11704   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11705   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11706 
11707   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11708   ins_encode %{
11709     __ string_compare($str1$$Register, $str2$$Register,
11710                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11711                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11712   %}
11713   ins_pipe( pipe_slow );
11714 %}
11715 
11716 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11717                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11718   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11719   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11720   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11721 
11722   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11723   ins_encode %{
11724     __ string_compare($str1$$Register, $str2$$Register,
11725                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11726                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11727   %}
11728   ins_pipe( pipe_slow );
11729 %}
11730 
11731 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11732                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11733   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11734   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11735   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11736 
11737   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11738   ins_encode %{
11739     __ string_compare($str1$$Register, $str2$$Register,
11740                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11741                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11742   %}
11743   ins_pipe( pipe_slow );
11744 %}
11745 
11746 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11747                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11748   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11749   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11750   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11751 
11752   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11753   ins_encode %{
11754     __ string_compare($str2$$Register, $str1$$Register,
11755                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11756                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11757   %}
11758   ins_pipe( pipe_slow );
11759 %}
11760 
11761 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11762                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11763   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11764   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11765   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11766 
11767   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11768   ins_encode %{
11769     __ string_compare($str2$$Register, $str1$$Register,
11770                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11771                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11772   %}
11773   ins_pipe( pipe_slow );
11774 %}
11775 
11776 // fast string equals
11777 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11778                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11779   predicate(!VM_Version::supports_avx512vlbw());
11780   match(Set result (StrEquals (Binary str1 str2) cnt));
11781   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11782 
11783   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11784   ins_encode %{
11785     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11786                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11787                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11788   %}
11789 
11790   ins_pipe( pipe_slow );
11791 %}
11792 
11793 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11794                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11795   predicate(VM_Version::supports_avx512vlbw());
11796   match(Set result (StrEquals (Binary str1 str2) cnt));
11797   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11798 
11799   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11800   ins_encode %{
11801     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11802                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11803                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11804   %}
11805 
11806   ins_pipe( pipe_slow );
11807 %}
11808 
11809 
11810 // fast search of substring with known size.
11811 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11812                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11813   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11814   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11815   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11816 
11817   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11818   ins_encode %{
11819     int icnt2 = (int)$int_cnt2$$constant;
11820     if (icnt2 >= 16) {
11821       // IndexOf for constant substrings with size >= 16 elements
11822       // which don't need to be loaded through stack.
11823       __ string_indexofC8($str1$$Register, $str2$$Register,
11824                           $cnt1$$Register, $cnt2$$Register,
11825                           icnt2, $result$$Register,
11826                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11827     } else {
11828       // Small strings are loaded through stack if they cross page boundary.
11829       __ string_indexof($str1$$Register, $str2$$Register,
11830                         $cnt1$$Register, $cnt2$$Register,
11831                         icnt2, $result$$Register,
11832                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11833     }
11834   %}
11835   ins_pipe( pipe_slow );
11836 %}
11837 
11838 // fast search of substring with known size.
11839 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11840                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11841   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11842   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11843   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11844 
11845   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11846   ins_encode %{
11847     int icnt2 = (int)$int_cnt2$$constant;
11848     if (icnt2 >= 8) {
11849       // IndexOf for constant substrings with size >= 8 elements
11850       // which don't need to be loaded through stack.
11851       __ string_indexofC8($str1$$Register, $str2$$Register,
11852                           $cnt1$$Register, $cnt2$$Register,
11853                           icnt2, $result$$Register,
11854                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11855     } else {
11856       // Small strings are loaded through stack if they cross page boundary.
11857       __ string_indexof($str1$$Register, $str2$$Register,
11858                         $cnt1$$Register, $cnt2$$Register,
11859                         icnt2, $result$$Register,
11860                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11861     }
11862   %}
11863   ins_pipe( pipe_slow );
11864 %}
11865 
11866 // fast search of substring with known size.
11867 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11868                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11869   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11870   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11871   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11872 
11873   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11874   ins_encode %{
11875     int icnt2 = (int)$int_cnt2$$constant;
11876     if (icnt2 >= 8) {
11877       // IndexOf for constant substrings with size >= 8 elements
11878       // which don't need to be loaded through stack.
11879       __ string_indexofC8($str1$$Register, $str2$$Register,
11880                           $cnt1$$Register, $cnt2$$Register,
11881                           icnt2, $result$$Register,
11882                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11883     } else {
11884       // Small strings are loaded through stack if they cross page boundary.
11885       __ string_indexof($str1$$Register, $str2$$Register,
11886                         $cnt1$$Register, $cnt2$$Register,
11887                         icnt2, $result$$Register,
11888                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11889     }
11890   %}
11891   ins_pipe( pipe_slow );
11892 %}
11893 
11894 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11895                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11896   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11897   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11898   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11899 
11900   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11901   ins_encode %{
11902     __ string_indexof($str1$$Register, $str2$$Register,
11903                       $cnt1$$Register, $cnt2$$Register,
11904                       (-1), $result$$Register,
11905                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11906   %}
11907   ins_pipe( pipe_slow );
11908 %}
11909 
11910 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11911                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11912   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11913   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11914   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11915 
11916   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11917   ins_encode %{
11918     __ string_indexof($str1$$Register, $str2$$Register,
11919                       $cnt1$$Register, $cnt2$$Register,
11920                       (-1), $result$$Register,
11921                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11922   %}
11923   ins_pipe( pipe_slow );
11924 %}
11925 
11926 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11927                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11928   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11929   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11930   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11931 
11932   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11933   ins_encode %{
11934     __ string_indexof($str1$$Register, $str2$$Register,
11935                       $cnt1$$Register, $cnt2$$Register,
11936                       (-1), $result$$Register,
11937                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11938   %}
11939   ins_pipe( pipe_slow );
11940 %}
11941 
11942 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11943                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11944   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11945   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11946   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11947   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11948   ins_encode %{
11949     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11950                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11951   %}
11952   ins_pipe( pipe_slow );
11953 %}
11954 
11955 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11956                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11957   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11958   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11959   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11960   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11961   ins_encode %{
11962     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11963                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11964   %}
11965   ins_pipe( pipe_slow );
11966 %}
11967 
11968 
11969 // fast array equals
11970 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11971                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11972 %{
11973   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11974   match(Set result (AryEq ary1 ary2));
11975   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11976   //ins_cost(300);
11977 
11978   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11979   ins_encode %{
11980     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11981                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11982                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11983   %}
11984   ins_pipe( pipe_slow );
11985 %}
11986 
11987 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11988                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11989 %{
11990   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11991   match(Set result (AryEq ary1 ary2));
11992   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11993   //ins_cost(300);
11994 
11995   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11996   ins_encode %{
11997     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11998                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11999                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12000   %}
12001   ins_pipe( pipe_slow );
12002 %}
12003 
12004 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12005                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12006 %{
12007   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12008   match(Set result (AryEq ary1 ary2));
12009   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12010   //ins_cost(300);
12011 
12012   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12013   ins_encode %{
12014     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12015                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12016                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12017   %}
12018   ins_pipe( pipe_slow );
12019 %}
12020 
12021 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12022                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12023 %{
12024   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12025   match(Set result (AryEq ary1 ary2));
12026   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12027   //ins_cost(300);
12028 
12029   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12030   ins_encode %{
12031     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12032                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12033                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12034   %}
12035   ins_pipe( pipe_slow );
12036 %}
12037 
12038 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12039                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12040 %{
12041   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12042   match(Set result (CountPositives ary1 len));
12043   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12044 
12045   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12046   ins_encode %{
12047     __ count_positives($ary1$$Register, $len$$Register,
12048                        $result$$Register, $tmp3$$Register,
12049                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12050   %}
12051   ins_pipe( pipe_slow );
12052 %}
12053 
12054 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12055                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12056 %{
12057   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12058   match(Set result (CountPositives ary1 len));
12059   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12060 
12061   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12062   ins_encode %{
12063     __ count_positives($ary1$$Register, $len$$Register,
12064                        $result$$Register, $tmp3$$Register,
12065                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12066   %}
12067   ins_pipe( pipe_slow );
12068 %}
12069 
12070 
12071 // fast char[] to byte[] compression
12072 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12073                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12074   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12075   match(Set result (StrCompressedCopy src (Binary dst len)));
12076   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12077 
12078   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12079   ins_encode %{
12080     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12081                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12082                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12083                            knoreg, knoreg);
12084   %}
12085   ins_pipe( pipe_slow );
12086 %}
12087 
12088 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12089                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12090   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12091   match(Set result (StrCompressedCopy src (Binary dst len)));
12092   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12093 
12094   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12095   ins_encode %{
12096     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12097                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12098                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12099                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12100   %}
12101   ins_pipe( pipe_slow );
12102 %}
12103 
12104 // fast byte[] to char[] inflation
12105 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12106                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12107   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12108   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12109   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12110 
12111   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12112   ins_encode %{
12113     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12114                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12115   %}
12116   ins_pipe( pipe_slow );
12117 %}
12118 
12119 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12120                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12121   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12122   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12123   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12124 
12125   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12126   ins_encode %{
12127     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12128                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12129   %}
12130   ins_pipe( pipe_slow );
12131 %}
12132 
12133 // encode char[] to byte[] in ISO_8859_1
12134 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12135                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12136                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12137   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12138   match(Set result (EncodeISOArray src (Binary dst len)));
12139   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12140 
12141   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12142   ins_encode %{
12143     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12144                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12145                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12146   %}
12147   ins_pipe( pipe_slow );
12148 %}
12149 
12150 // encode char[] to byte[] in ASCII
12151 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12152                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12153                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12154   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12155   match(Set result (EncodeISOArray src (Binary dst len)));
12156   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12157 
12158   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12159   ins_encode %{
12160     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12161                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12162                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12163   %}
12164   ins_pipe( pipe_slow );
12165 %}
12166 
12167 //----------Control Flow Instructions------------------------------------------
12168 // Signed compare Instructions
12169 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12170   match(Set cr (CmpI op1 op2));
12171   effect( DEF cr, USE op1, USE op2 );
12172   format %{ "CMP    $op1,$op2" %}
12173   opcode(0x3B);  /* Opcode 3B /r */
12174   ins_encode( OpcP, RegReg( op1, op2) );
12175   ins_pipe( ialu_cr_reg_reg );
12176 %}
12177 
12178 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12179   match(Set cr (CmpI op1 op2));
12180   effect( DEF cr, USE op1 );
12181   format %{ "CMP    $op1,$op2" %}
12182   opcode(0x81,0x07);  /* Opcode 81 /7 */
12183   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12184   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12185   ins_pipe( ialu_cr_reg_imm );
12186 %}
12187 
12188 // Cisc-spilled version of cmpI_eReg
12189 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12190   match(Set cr (CmpI op1 (LoadI op2)));
12191 
12192   format %{ "CMP    $op1,$op2" %}
12193   ins_cost(500);
12194   opcode(0x3B);  /* Opcode 3B /r */
12195   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12196   ins_pipe( ialu_cr_reg_mem );
12197 %}
12198 
12199 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12200   match(Set cr (CmpI src zero));
12201   effect( DEF cr, USE src );
12202 
12203   format %{ "TEST   $src,$src" %}
12204   opcode(0x85);
12205   ins_encode( OpcP, RegReg( src, src ) );
12206   ins_pipe( ialu_cr_reg_imm );
12207 %}
12208 
12209 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12210   match(Set cr (CmpI (AndI src con) zero));
12211 
12212   format %{ "TEST   $src,$con" %}
12213   opcode(0xF7,0x00);
12214   ins_encode( OpcP, RegOpc(src), Con32(con) );
12215   ins_pipe( ialu_cr_reg_imm );
12216 %}
12217 
12218 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12219   match(Set cr (CmpI (AndI src mem) zero));
12220 
12221   format %{ "TEST   $src,$mem" %}
12222   opcode(0x85);
12223   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12224   ins_pipe( ialu_cr_reg_mem );
12225 %}
12226 
12227 // Unsigned compare Instructions; really, same as signed except they
12228 // produce an eFlagsRegU instead of eFlagsReg.
12229 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12230   match(Set cr (CmpU op1 op2));
12231 
12232   format %{ "CMPu   $op1,$op2" %}
12233   opcode(0x3B);  /* Opcode 3B /r */
12234   ins_encode( OpcP, RegReg( op1, op2) );
12235   ins_pipe( ialu_cr_reg_reg );
12236 %}
12237 
12238 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12239   match(Set cr (CmpU op1 op2));
12240 
12241   format %{ "CMPu   $op1,$op2" %}
12242   opcode(0x81,0x07);  /* Opcode 81 /7 */
12243   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12244   ins_pipe( ialu_cr_reg_imm );
12245 %}
12246 
12247 // // Cisc-spilled version of cmpU_eReg
12248 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12249   match(Set cr (CmpU op1 (LoadI op2)));
12250 
12251   format %{ "CMPu   $op1,$op2" %}
12252   ins_cost(500);
12253   opcode(0x3B);  /* Opcode 3B /r */
12254   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12255   ins_pipe( ialu_cr_reg_mem );
12256 %}
12257 
12258 // // Cisc-spilled version of cmpU_eReg
12259 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12260 //  match(Set cr (CmpU (LoadI op1) op2));
12261 //
12262 //  format %{ "CMPu   $op1,$op2" %}
12263 //  ins_cost(500);
12264 //  opcode(0x39);  /* Opcode 39 /r */
12265 //  ins_encode( OpcP, RegMem( op1, op2) );
12266 //%}
12267 
12268 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12269   match(Set cr (CmpU src zero));
12270 
12271   format %{ "TESTu  $src,$src" %}
12272   opcode(0x85);
12273   ins_encode( OpcP, RegReg( src, src ) );
12274   ins_pipe( ialu_cr_reg_imm );
12275 %}
12276 
12277 // Unsigned pointer compare Instructions
12278 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12279   match(Set cr (CmpP op1 op2));
12280 
12281   format %{ "CMPu   $op1,$op2" %}
12282   opcode(0x3B);  /* Opcode 3B /r */
12283   ins_encode( OpcP, RegReg( op1, op2) );
12284   ins_pipe( ialu_cr_reg_reg );
12285 %}
12286 
12287 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12288   match(Set cr (CmpP op1 op2));
12289 
12290   format %{ "CMPu   $op1,$op2" %}
12291   opcode(0x81,0x07);  /* Opcode 81 /7 */
12292   ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12293   ins_pipe( ialu_cr_reg_imm );
12294 %}
12295 
12296 // // Cisc-spilled version of cmpP_eReg
12297 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12298   match(Set cr (CmpP op1 (LoadP op2)));
12299 
12300   format %{ "CMPu   $op1,$op2" %}
12301   ins_cost(500);
12302   opcode(0x3B);  /* Opcode 3B /r */
12303   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12304   ins_pipe( ialu_cr_reg_mem );
12305 %}
12306 
12307 // // Cisc-spilled version of cmpP_eReg
12308 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12309 //  match(Set cr (CmpP (LoadP op1) op2));
12310 //
12311 //  format %{ "CMPu   $op1,$op2" %}
12312 //  ins_cost(500);
12313 //  opcode(0x39);  /* Opcode 39 /r */
12314 //  ins_encode( OpcP, RegMem( op1, op2) );
12315 //%}
12316 
12317 // Compare raw pointer (used in out-of-heap check).
12318 // Only works because non-oop pointers must be raw pointers
12319 // and raw pointers have no anti-dependencies.
12320 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12321   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12322   match(Set cr (CmpP op1 (LoadP op2)));
12323 
12324   format %{ "CMPu   $op1,$op2" %}
12325   opcode(0x3B);  /* Opcode 3B /r */
12326   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12327   ins_pipe( ialu_cr_reg_mem );
12328 %}
12329 
12330 //
12331 // This will generate a signed flags result. This should be ok
12332 // since any compare to a zero should be eq/neq.
12333 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12334   match(Set cr (CmpP src zero));
12335 
12336   format %{ "TEST   $src,$src" %}
12337   opcode(0x85);
12338   ins_encode( OpcP, RegReg( src, src ) );
12339   ins_pipe( ialu_cr_reg_imm );
12340 %}
12341 
12342 // Cisc-spilled version of testP_reg
12343 // This will generate a signed flags result. This should be ok
12344 // since any compare to a zero should be eq/neq.
12345 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12346   match(Set cr (CmpP (LoadP op) zero));
12347 
12348   format %{ "TEST   $op,0xFFFFFFFF" %}
12349   ins_cost(500);
12350   opcode(0xF7);               /* Opcode F7 /0 */
12351   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12352   ins_pipe( ialu_cr_reg_imm );
12353 %}
12354 
12355 // Yanked all unsigned pointer compare operations.
12356 // Pointer compares are done with CmpP which is already unsigned.
12357 
12358 //----------Max and Min--------------------------------------------------------
12359 // Min Instructions
12360 ////
12361 //   *** Min and Max using the conditional move are slower than the
12362 //   *** branch version on a Pentium III.
12363 // // Conditional move for min
12364 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12365 //  effect( USE_DEF op2, USE op1, USE cr );
12366 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12367 //  opcode(0x4C,0x0F);
12368 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12369 //  ins_pipe( pipe_cmov_reg );
12370 //%}
12371 //
12372 //// Min Register with Register (P6 version)
12373 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12374 //  predicate(VM_Version::supports_cmov() );
12375 //  match(Set op2 (MinI op1 op2));
12376 //  ins_cost(200);
12377 //  expand %{
12378 //    eFlagsReg cr;
12379 //    compI_eReg(cr,op1,op2);
12380 //    cmovI_reg_lt(op2,op1,cr);
12381 //  %}
12382 //%}
12383 
12384 // Min Register with Register (generic version)
12385 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12386   match(Set dst (MinI dst src));
12387   effect(KILL flags);
12388   ins_cost(300);
12389 
12390   format %{ "MIN    $dst,$src" %}
12391   opcode(0xCC);
12392   ins_encode( min_enc(dst,src) );
12393   ins_pipe( pipe_slow );
12394 %}
12395 
12396 // Max Register with Register
12397 //   *** Min and Max using the conditional move are slower than the
12398 //   *** branch version on a Pentium III.
12399 // // Conditional move for max
12400 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12401 //  effect( USE_DEF op2, USE op1, USE cr );
12402 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12403 //  opcode(0x4F,0x0F);
12404 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12405 //  ins_pipe( pipe_cmov_reg );
12406 //%}
12407 //
12408 // // Max Register with Register (P6 version)
12409 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12410 //  predicate(VM_Version::supports_cmov() );
12411 //  match(Set op2 (MaxI op1 op2));
12412 //  ins_cost(200);
12413 //  expand %{
12414 //    eFlagsReg cr;
12415 //    compI_eReg(cr,op1,op2);
12416 //    cmovI_reg_gt(op2,op1,cr);
12417 //  %}
12418 //%}
12419 
12420 // Max Register with Register (generic version)
12421 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12422   match(Set dst (MaxI dst src));
12423   effect(KILL flags);
12424   ins_cost(300);
12425 
12426   format %{ "MAX    $dst,$src" %}
12427   opcode(0xCC);
12428   ins_encode( max_enc(dst,src) );
12429   ins_pipe( pipe_slow );
12430 %}
12431 
12432 // ============================================================================
12433 // Counted Loop limit node which represents exact final iterator value.
12434 // Note: the resulting value should fit into integer range since
12435 // counted loops have limit check on overflow.
12436 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12437   match(Set limit (LoopLimit (Binary init limit) stride));
12438   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12439   ins_cost(300);
12440 
12441   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12442   ins_encode %{
12443     int strd = (int)$stride$$constant;
12444     assert(strd != 1 && strd != -1, "sanity");
12445     int m1 = (strd > 0) ? 1 : -1;
12446     // Convert limit to long (EAX:EDX)
12447     __ cdql();
12448     // Convert init to long (init:tmp)
12449     __ movl($tmp$$Register, $init$$Register);
12450     __ sarl($tmp$$Register, 31);
12451     // $limit - $init
12452     __ subl($limit$$Register, $init$$Register);
12453     __ sbbl($limit_hi$$Register, $tmp$$Register);
12454     // + ($stride - 1)
12455     if (strd > 0) {
12456       __ addl($limit$$Register, (strd - 1));
12457       __ adcl($limit_hi$$Register, 0);
12458       __ movl($tmp$$Register, strd);
12459     } else {
12460       __ addl($limit$$Register, (strd + 1));
12461       __ adcl($limit_hi$$Register, -1);
12462       __ lneg($limit_hi$$Register, $limit$$Register);
12463       __ movl($tmp$$Register, -strd);
12464     }
12465     // signed division: (EAX:EDX) / pos_stride
12466     __ idivl($tmp$$Register);
12467     if (strd < 0) {
12468       // restore sign
12469       __ negl($tmp$$Register);
12470     }
12471     // (EAX) * stride
12472     __ mull($tmp$$Register);
12473     // + init (ignore upper bits)
12474     __ addl($limit$$Register, $init$$Register);
12475   %}
12476   ins_pipe( pipe_slow );
12477 %}
12478 
12479 // ============================================================================
12480 // Branch Instructions
12481 // Jump Table
12482 instruct jumpXtnd(rRegI switch_val) %{
12483   match(Jump switch_val);
12484   ins_cost(350);
12485   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12486   ins_encode %{
12487     // Jump to Address(table_base + switch_reg)
12488     Address index(noreg, $switch_val$$Register, Address::times_1);
12489     __ jump(ArrayAddress($constantaddress, index), noreg);
12490   %}
12491   ins_pipe(pipe_jmp);
12492 %}
12493 
12494 // Jump Direct - Label defines a relative address from JMP+1
12495 instruct jmpDir(label labl) %{
12496   match(Goto);
12497   effect(USE labl);
12498 
12499   ins_cost(300);
12500   format %{ "JMP    $labl" %}
12501   size(5);
12502   ins_encode %{
12503     Label* L = $labl$$label;
12504     __ jmp(*L, false); // Always long jump
12505   %}
12506   ins_pipe( pipe_jmp );
12507 %}
12508 
12509 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12510 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12511   match(If cop cr);
12512   effect(USE labl);
12513 
12514   ins_cost(300);
12515   format %{ "J$cop    $labl" %}
12516   size(6);
12517   ins_encode %{
12518     Label* L = $labl$$label;
12519     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12520   %}
12521   ins_pipe( pipe_jcc );
12522 %}
12523 
12524 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12525 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12526   match(CountedLoopEnd cop cr);
12527   effect(USE labl);
12528 
12529   ins_cost(300);
12530   format %{ "J$cop    $labl\t# Loop end" %}
12531   size(6);
12532   ins_encode %{
12533     Label* L = $labl$$label;
12534     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12535   %}
12536   ins_pipe( pipe_jcc );
12537 %}
12538 
12539 // Jump Direct Conditional - using unsigned comparison
12540 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12541   match(If cop cmp);
12542   effect(USE labl);
12543 
12544   ins_cost(300);
12545   format %{ "J$cop,u  $labl" %}
12546   size(6);
12547   ins_encode %{
12548     Label* L = $labl$$label;
12549     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12550   %}
12551   ins_pipe(pipe_jcc);
12552 %}
12553 
12554 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12555   match(If cop cmp);
12556   effect(USE labl);
12557 
12558   ins_cost(200);
12559   format %{ "J$cop,u  $labl" %}
12560   size(6);
12561   ins_encode %{
12562     Label* L = $labl$$label;
12563     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12564   %}
12565   ins_pipe(pipe_jcc);
12566 %}
12567 
12568 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12569   match(If cop cmp);
12570   effect(USE labl);
12571 
12572   ins_cost(200);
12573   format %{ $$template
12574     if ($cop$$cmpcode == Assembler::notEqual) {
12575       $$emit$$"JP,u   $labl\n\t"
12576       $$emit$$"J$cop,u   $labl"
12577     } else {
12578       $$emit$$"JP,u   done\n\t"
12579       $$emit$$"J$cop,u   $labl\n\t"
12580       $$emit$$"done:"
12581     }
12582   %}
12583   ins_encode %{
12584     Label* l = $labl$$label;
12585     if ($cop$$cmpcode == Assembler::notEqual) {
12586       __ jcc(Assembler::parity, *l, false);
12587       __ jcc(Assembler::notEqual, *l, false);
12588     } else if ($cop$$cmpcode == Assembler::equal) {
12589       Label done;
12590       __ jccb(Assembler::parity, done);
12591       __ jcc(Assembler::equal, *l, false);
12592       __ bind(done);
12593     } else {
12594        ShouldNotReachHere();
12595     }
12596   %}
12597   ins_pipe(pipe_jcc);
12598 %}
12599 
12600 // ============================================================================
12601 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12602 // array for an instance of the superklass.  Set a hidden internal cache on a
12603 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12604 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12605 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12606   match(Set result (PartialSubtypeCheck sub super));
12607   effect( KILL rcx, KILL cr );
12608 
12609   ins_cost(1100);  // slightly larger than the next version
12610   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12611             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12612             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12613             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12614             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12615             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12616             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12617      "miss:\t" %}
12618 
12619   opcode(0x1); // Force a XOR of EDI
12620   ins_encode( enc_PartialSubtypeCheck() );
12621   ins_pipe( pipe_slow );
12622 %}
12623 
12624 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12625   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12626   effect( KILL rcx, KILL result );
12627 
12628   ins_cost(1000);
12629   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12630             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12631             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12632             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12633             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12634             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12635      "miss:\t" %}
12636 
12637   opcode(0x0);  // No need to XOR EDI
12638   ins_encode( enc_PartialSubtypeCheck() );
12639   ins_pipe( pipe_slow );
12640 %}
12641 
12642 // ============================================================================
12643 // Branch Instructions -- short offset versions
12644 //
12645 // These instructions are used to replace jumps of a long offset (the default
12646 // match) with jumps of a shorter offset.  These instructions are all tagged
12647 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12648 // match rules in general matching.  Instead, the ADLC generates a conversion
12649 // method in the MachNode which can be used to do in-place replacement of the
12650 // long variant with the shorter variant.  The compiler will determine if a
12651 // branch can be taken by the is_short_branch_offset() predicate in the machine
12652 // specific code section of the file.
12653 
12654 // Jump Direct - Label defines a relative address from JMP+1
12655 instruct jmpDir_short(label labl) %{
12656   match(Goto);
12657   effect(USE labl);
12658 
12659   ins_cost(300);
12660   format %{ "JMP,s  $labl" %}
12661   size(2);
12662   ins_encode %{
12663     Label* L = $labl$$label;
12664     __ jmpb(*L);
12665   %}
12666   ins_pipe( pipe_jmp );
12667   ins_short_branch(1);
12668 %}
12669 
12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12671 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12672   match(If cop cr);
12673   effect(USE labl);
12674 
12675   ins_cost(300);
12676   format %{ "J$cop,s  $labl" %}
12677   size(2);
12678   ins_encode %{
12679     Label* L = $labl$$label;
12680     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12681   %}
12682   ins_pipe( pipe_jcc );
12683   ins_short_branch(1);
12684 %}
12685 
12686 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12687 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12688   match(CountedLoopEnd cop cr);
12689   effect(USE labl);
12690 
12691   ins_cost(300);
12692   format %{ "J$cop,s  $labl\t# Loop end" %}
12693   size(2);
12694   ins_encode %{
12695     Label* L = $labl$$label;
12696     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12697   %}
12698   ins_pipe( pipe_jcc );
12699   ins_short_branch(1);
12700 %}
12701 
12702 // Jump Direct Conditional - using unsigned comparison
12703 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12704   match(If cop cmp);
12705   effect(USE labl);
12706 
12707   ins_cost(300);
12708   format %{ "J$cop,us $labl" %}
12709   size(2);
12710   ins_encode %{
12711     Label* L = $labl$$label;
12712     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12713   %}
12714   ins_pipe( pipe_jcc );
12715   ins_short_branch(1);
12716 %}
12717 
12718 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12719   match(If cop cmp);
12720   effect(USE labl);
12721 
12722   ins_cost(300);
12723   format %{ "J$cop,us $labl" %}
12724   size(2);
12725   ins_encode %{
12726     Label* L = $labl$$label;
12727     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12728   %}
12729   ins_pipe( pipe_jcc );
12730   ins_short_branch(1);
12731 %}
12732 
12733 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12734   match(If cop cmp);
12735   effect(USE labl);
12736 
12737   ins_cost(300);
12738   format %{ $$template
12739     if ($cop$$cmpcode == Assembler::notEqual) {
12740       $$emit$$"JP,u,s   $labl\n\t"
12741       $$emit$$"J$cop,u,s   $labl"
12742     } else {
12743       $$emit$$"JP,u,s   done\n\t"
12744       $$emit$$"J$cop,u,s  $labl\n\t"
12745       $$emit$$"done:"
12746     }
12747   %}
12748   size(4);
12749   ins_encode %{
12750     Label* l = $labl$$label;
12751     if ($cop$$cmpcode == Assembler::notEqual) {
12752       __ jccb(Assembler::parity, *l);
12753       __ jccb(Assembler::notEqual, *l);
12754     } else if ($cop$$cmpcode == Assembler::equal) {
12755       Label done;
12756       __ jccb(Assembler::parity, done);
12757       __ jccb(Assembler::equal, *l);
12758       __ bind(done);
12759     } else {
12760        ShouldNotReachHere();
12761     }
12762   %}
12763   ins_pipe(pipe_jcc);
12764   ins_short_branch(1);
12765 %}
12766 
12767 // ============================================================================
12768 // Long Compare
12769 //
12770 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12771 // is tricky.  The flavor of compare used depends on whether we are testing
12772 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12773 // The GE test is the negated LT test.  The LE test can be had by commuting
12774 // the operands (yielding a GE test) and then negating; negate again for the
12775 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12776 // NE test is negated from that.
12777 
12778 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12779 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12780 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12781 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12782 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12783 // foo match ends up with the wrong leaf.  One fix is to not match both
12784 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12785 // both forms beat the trinary form of long-compare and both are very useful
12786 // on Intel which has so few registers.
12787 
12788 // Manifest a CmpL result in an integer register.  Very painful.
12789 // This is the test to avoid.
12790 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12791   match(Set dst (CmpL3 src1 src2));
12792   effect( KILL flags );
12793   ins_cost(1000);
12794   format %{ "XOR    $dst,$dst\n\t"
12795             "CMP    $src1.hi,$src2.hi\n\t"
12796             "JLT,s  m_one\n\t"
12797             "JGT,s  p_one\n\t"
12798             "CMP    $src1.lo,$src2.lo\n\t"
12799             "JB,s   m_one\n\t"
12800             "JEQ,s  done\n"
12801     "p_one:\tINC    $dst\n\t"
12802             "JMP,s  done\n"
12803     "m_one:\tDEC    $dst\n"
12804      "done:" %}
12805   ins_encode %{
12806     Label p_one, m_one, done;
12807     __ xorptr($dst$$Register, $dst$$Register);
12808     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12809     __ jccb(Assembler::less,    m_one);
12810     __ jccb(Assembler::greater, p_one);
12811     __ cmpl($src1$$Register, $src2$$Register);
12812     __ jccb(Assembler::below,   m_one);
12813     __ jccb(Assembler::equal,   done);
12814     __ bind(p_one);
12815     __ incrementl($dst$$Register);
12816     __ jmpb(done);
12817     __ bind(m_one);
12818     __ decrementl($dst$$Register);
12819     __ bind(done);
12820   %}
12821   ins_pipe( pipe_slow );
12822 %}
12823 
12824 //======
12825 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12826 // compares.  Can be used for LE or GT compares by reversing arguments.
12827 // NOT GOOD FOR EQ/NE tests.
12828 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12829   match( Set flags (CmpL src zero ));
12830   ins_cost(100);
12831   format %{ "TEST   $src.hi,$src.hi" %}
12832   opcode(0x85);
12833   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12834   ins_pipe( ialu_cr_reg_reg );
12835 %}
12836 
12837 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12838 // compares.  Can be used for LE or GT compares by reversing arguments.
12839 // NOT GOOD FOR EQ/NE tests.
12840 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12841   match( Set flags (CmpL src1 src2 ));
12842   effect( TEMP tmp );
12843   ins_cost(300);
12844   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12845             "MOV    $tmp,$src1.hi\n\t"
12846             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12847   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12848   ins_pipe( ialu_cr_reg_reg );
12849 %}
12850 
12851 // Long compares reg < zero/req OR reg >= zero/req.
12852 // Just a wrapper for a normal branch, plus the predicate test.
12853 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12854   match(If cmp flags);
12855   effect(USE labl);
12856   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12857   expand %{
12858     jmpCon(cmp,flags,labl);    // JLT or JGE...
12859   %}
12860 %}
12861 
12862 //======
12863 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12864 // compares.  Can be used for LE or GT compares by reversing arguments.
12865 // NOT GOOD FOR EQ/NE tests.
12866 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12867   match(Set flags (CmpUL src zero));
12868   ins_cost(100);
12869   format %{ "TEST   $src.hi,$src.hi" %}
12870   opcode(0x85);
12871   ins_encode(OpcP, RegReg_Hi2(src, src));
12872   ins_pipe(ialu_cr_reg_reg);
12873 %}
12874 
12875 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12876 // compares.  Can be used for LE or GT compares by reversing arguments.
12877 // NOT GOOD FOR EQ/NE tests.
12878 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12879   match(Set flags (CmpUL src1 src2));
12880   effect(TEMP tmp);
12881   ins_cost(300);
12882   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12883             "MOV    $tmp,$src1.hi\n\t"
12884             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12885   ins_encode(long_cmp_flags2(src1, src2, tmp));
12886   ins_pipe(ialu_cr_reg_reg);
12887 %}
12888 
12889 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12890 // Just a wrapper for a normal branch, plus the predicate test.
12891 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12892   match(If cmp flags);
12893   effect(USE labl);
12894   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12895   expand %{
12896     jmpCon(cmp, flags, labl);    // JLT or JGE...
12897   %}
12898 %}
12899 
12900 // Compare 2 longs and CMOVE longs.
12901 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12902   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12903   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12904   ins_cost(400);
12905   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12906             "CMOV$cmp $dst.hi,$src.hi" %}
12907   opcode(0x0F,0x40);
12908   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12909   ins_pipe( pipe_cmov_reg_long );
12910 %}
12911 
12912 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12913   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12914   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12915   ins_cost(500);
12916   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12917             "CMOV$cmp $dst.hi,$src.hi" %}
12918   opcode(0x0F,0x40);
12919   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12920   ins_pipe( pipe_cmov_reg_long );
12921 %}
12922 
12923 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12924   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12925   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12926   ins_cost(400);
12927   expand %{
12928     cmovLL_reg_LTGE(cmp, flags, dst, src);
12929   %}
12930 %}
12931 
12932 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12933   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12934   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12935   ins_cost(500);
12936   expand %{
12937     cmovLL_mem_LTGE(cmp, flags, dst, src);
12938   %}
12939 %}
12940 
12941 // Compare 2 longs and CMOVE ints.
12942 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12943   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12944   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12945   ins_cost(200);
12946   format %{ "CMOV$cmp $dst,$src" %}
12947   opcode(0x0F,0x40);
12948   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12949   ins_pipe( pipe_cmov_reg );
12950 %}
12951 
12952 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12953   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12954   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12955   ins_cost(250);
12956   format %{ "CMOV$cmp $dst,$src" %}
12957   opcode(0x0F,0x40);
12958   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12959   ins_pipe( pipe_cmov_mem );
12960 %}
12961 
12962 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12963   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12964   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12965   ins_cost(200);
12966   expand %{
12967     cmovII_reg_LTGE(cmp, flags, dst, src);
12968   %}
12969 %}
12970 
12971 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12972   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12973   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12974   ins_cost(250);
12975   expand %{
12976     cmovII_mem_LTGE(cmp, flags, dst, src);
12977   %}
12978 %}
12979 
12980 // Compare 2 longs and CMOVE ptrs.
12981 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12982   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12983   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12984   ins_cost(200);
12985   format %{ "CMOV$cmp $dst,$src" %}
12986   opcode(0x0F,0x40);
12987   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12988   ins_pipe( pipe_cmov_reg );
12989 %}
12990 
12991 // Compare 2 unsigned longs and CMOVE ptrs.
12992 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
12993   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12994   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12995   ins_cost(200);
12996   expand %{
12997     cmovPP_reg_LTGE(cmp,flags,dst,src);
12998   %}
12999 %}
13000 
13001 // Compare 2 longs and CMOVE doubles
13002 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13003   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13004   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13005   ins_cost(200);
13006   expand %{
13007     fcmovDPR_regS(cmp,flags,dst,src);
13008   %}
13009 %}
13010 
13011 // Compare 2 longs and CMOVE doubles
13012 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13013   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13014   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13015   ins_cost(200);
13016   expand %{
13017     fcmovD_regS(cmp,flags,dst,src);
13018   %}
13019 %}
13020 
13021 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13022   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13023   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13024   ins_cost(200);
13025   expand %{
13026     fcmovFPR_regS(cmp,flags,dst,src);
13027   %}
13028 %}
13029 
13030 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13031   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13032   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13033   ins_cost(200);
13034   expand %{
13035     fcmovF_regS(cmp,flags,dst,src);
13036   %}
13037 %}
13038 
13039 //======
13040 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13041 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13042   match( Set flags (CmpL src zero ));
13043   effect(TEMP tmp);
13044   ins_cost(200);
13045   format %{ "MOV    $tmp,$src.lo\n\t"
13046             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13047   ins_encode( long_cmp_flags0( src, tmp ) );
13048   ins_pipe( ialu_reg_reg_long );
13049 %}
13050 
13051 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13052 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13053   match( Set flags (CmpL src1 src2 ));
13054   ins_cost(200+300);
13055   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13056             "JNE,s  skip\n\t"
13057             "CMP    $src1.hi,$src2.hi\n\t"
13058      "skip:\t" %}
13059   ins_encode( long_cmp_flags1( src1, src2 ) );
13060   ins_pipe( ialu_cr_reg_reg );
13061 %}
13062 
13063 // Long compare reg == zero/reg OR reg != zero/reg
13064 // Just a wrapper for a normal branch, plus the predicate test.
13065 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13066   match(If cmp flags);
13067   effect(USE labl);
13068   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13069   expand %{
13070     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13071   %}
13072 %}
13073 
13074 //======
13075 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13076 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13077   match(Set flags (CmpUL src zero));
13078   effect(TEMP tmp);
13079   ins_cost(200);
13080   format %{ "MOV    $tmp,$src.lo\n\t"
13081             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13082   ins_encode(long_cmp_flags0(src, tmp));
13083   ins_pipe(ialu_reg_reg_long);
13084 %}
13085 
13086 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13087 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13088   match(Set flags (CmpUL src1 src2));
13089   ins_cost(200+300);
13090   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13091             "JNE,s  skip\n\t"
13092             "CMP    $src1.hi,$src2.hi\n\t"
13093      "skip:\t" %}
13094   ins_encode(long_cmp_flags1(src1, src2));
13095   ins_pipe(ialu_cr_reg_reg);
13096 %}
13097 
13098 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13099 // Just a wrapper for a normal branch, plus the predicate test.
13100 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13101   match(If cmp flags);
13102   effect(USE labl);
13103   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13104   expand %{
13105     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13106   %}
13107 %}
13108 
13109 // Compare 2 longs and CMOVE longs.
13110 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13111   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13112   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13113   ins_cost(400);
13114   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13115             "CMOV$cmp $dst.hi,$src.hi" %}
13116   opcode(0x0F,0x40);
13117   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13118   ins_pipe( pipe_cmov_reg_long );
13119 %}
13120 
13121 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13122   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13123   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13124   ins_cost(500);
13125   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13126             "CMOV$cmp $dst.hi,$src.hi" %}
13127   opcode(0x0F,0x40);
13128   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13129   ins_pipe( pipe_cmov_reg_long );
13130 %}
13131 
13132 // Compare 2 longs and CMOVE ints.
13133 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13134   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13135   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13136   ins_cost(200);
13137   format %{ "CMOV$cmp $dst,$src" %}
13138   opcode(0x0F,0x40);
13139   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13140   ins_pipe( pipe_cmov_reg );
13141 %}
13142 
13143 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13144   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13145   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13146   ins_cost(250);
13147   format %{ "CMOV$cmp $dst,$src" %}
13148   opcode(0x0F,0x40);
13149   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13150   ins_pipe( pipe_cmov_mem );
13151 %}
13152 
13153 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13154   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13155   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13156   ins_cost(200);
13157   expand %{
13158     cmovII_reg_EQNE(cmp, flags, dst, src);
13159   %}
13160 %}
13161 
13162 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13163   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13164   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13165   ins_cost(250);
13166   expand %{
13167     cmovII_mem_EQNE(cmp, flags, dst, src);
13168   %}
13169 %}
13170 
13171 // Compare 2 longs and CMOVE ptrs.
13172 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13173   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13174   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13175   ins_cost(200);
13176   format %{ "CMOV$cmp $dst,$src" %}
13177   opcode(0x0F,0x40);
13178   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13179   ins_pipe( pipe_cmov_reg );
13180 %}
13181 
13182 // Compare 2 unsigned longs and CMOVE ptrs.
13183 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13184   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13185   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13186   ins_cost(200);
13187   expand %{
13188     cmovPP_reg_EQNE(cmp,flags,dst,src);
13189   %}
13190 %}
13191 
13192 // Compare 2 longs and CMOVE doubles
13193 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13194   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13195   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13196   ins_cost(200);
13197   expand %{
13198     fcmovDPR_regS(cmp,flags,dst,src);
13199   %}
13200 %}
13201 
13202 // Compare 2 longs and CMOVE doubles
13203 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13204   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13205   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13206   ins_cost(200);
13207   expand %{
13208     fcmovD_regS(cmp,flags,dst,src);
13209   %}
13210 %}
13211 
13212 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13213   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13214   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13215   ins_cost(200);
13216   expand %{
13217     fcmovFPR_regS(cmp,flags,dst,src);
13218   %}
13219 %}
13220 
13221 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13222   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13223   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13224   ins_cost(200);
13225   expand %{
13226     fcmovF_regS(cmp,flags,dst,src);
13227   %}
13228 %}
13229 
13230 //======
13231 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13232 // Same as cmpL_reg_flags_LEGT except must negate src
13233 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13234   match( Set flags (CmpL src zero ));
13235   effect( TEMP tmp );
13236   ins_cost(300);
13237   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13238             "CMP    $tmp,$src.lo\n\t"
13239             "SBB    $tmp,$src.hi\n\t" %}
13240   ins_encode( long_cmp_flags3(src, tmp) );
13241   ins_pipe( ialu_reg_reg_long );
13242 %}
13243 
13244 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13245 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13246 // requires a commuted test to get the same result.
13247 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13248   match( Set flags (CmpL src1 src2 ));
13249   effect( TEMP tmp );
13250   ins_cost(300);
13251   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13252             "MOV    $tmp,$src2.hi\n\t"
13253             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13254   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13255   ins_pipe( ialu_cr_reg_reg );
13256 %}
13257 
13258 // Long compares reg < zero/req OR reg >= zero/req.
13259 // Just a wrapper for a normal branch, plus the predicate test
13260 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13261   match(If cmp flags);
13262   effect(USE labl);
13263   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13264   ins_cost(300);
13265   expand %{
13266     jmpCon(cmp,flags,labl);    // JGT or JLE...
13267   %}
13268 %}
13269 
13270 //======
13271 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13272 // Same as cmpUL_reg_flags_LEGT except must negate src
13273 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13274   match(Set flags (CmpUL src zero));
13275   effect(TEMP tmp);
13276   ins_cost(300);
13277   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13278             "CMP    $tmp,$src.lo\n\t"
13279             "SBB    $tmp,$src.hi\n\t" %}
13280   ins_encode(long_cmp_flags3(src, tmp));
13281   ins_pipe(ialu_reg_reg_long);
13282 %}
13283 
13284 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13285 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13286 // requires a commuted test to get the same result.
13287 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13288   match(Set flags (CmpUL src1 src2));
13289   effect(TEMP tmp);
13290   ins_cost(300);
13291   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13292             "MOV    $tmp,$src2.hi\n\t"
13293             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13294   ins_encode(long_cmp_flags2( src2, src1, tmp));
13295   ins_pipe(ialu_cr_reg_reg);
13296 %}
13297 
13298 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13299 // Just a wrapper for a normal branch, plus the predicate test
13300 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13301   match(If cmp flags);
13302   effect(USE labl);
13303   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13304   ins_cost(300);
13305   expand %{
13306     jmpCon(cmp, flags, labl);    // JGT or JLE...
13307   %}
13308 %}
13309 
13310 // Compare 2 longs and CMOVE longs.
13311 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13312   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13313   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13314   ins_cost(400);
13315   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13316             "CMOV$cmp $dst.hi,$src.hi" %}
13317   opcode(0x0F,0x40);
13318   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13319   ins_pipe( pipe_cmov_reg_long );
13320 %}
13321 
13322 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13323   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13324   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13325   ins_cost(500);
13326   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13327             "CMOV$cmp $dst.hi,$src.hi+4" %}
13328   opcode(0x0F,0x40);
13329   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13330   ins_pipe( pipe_cmov_reg_long );
13331 %}
13332 
13333 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13334   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13335   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13336   ins_cost(400);
13337   expand %{
13338     cmovLL_reg_LEGT(cmp, flags, dst, src);
13339   %}
13340 %}
13341 
13342 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13343   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13344   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13345   ins_cost(500);
13346   expand %{
13347     cmovLL_mem_LEGT(cmp, flags, dst, src);
13348   %}
13349 %}
13350 
13351 // Compare 2 longs and CMOVE ints.
13352 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13353   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13354   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13355   ins_cost(200);
13356   format %{ "CMOV$cmp $dst,$src" %}
13357   opcode(0x0F,0x40);
13358   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13359   ins_pipe( pipe_cmov_reg );
13360 %}
13361 
13362 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13363   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13364   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13365   ins_cost(250);
13366   format %{ "CMOV$cmp $dst,$src" %}
13367   opcode(0x0F,0x40);
13368   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13369   ins_pipe( pipe_cmov_mem );
13370 %}
13371 
13372 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13373   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13374   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13375   ins_cost(200);
13376   expand %{
13377     cmovII_reg_LEGT(cmp, flags, dst, src);
13378   %}
13379 %}
13380 
13381 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13382   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13383   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13384   ins_cost(250);
13385   expand %{
13386     cmovII_mem_LEGT(cmp, flags, dst, src);
13387   %}
13388 %}
13389 
13390 // Compare 2 longs and CMOVE ptrs.
13391 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13392   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13393   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13394   ins_cost(200);
13395   format %{ "CMOV$cmp $dst,$src" %}
13396   opcode(0x0F,0x40);
13397   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13398   ins_pipe( pipe_cmov_reg );
13399 %}
13400 
13401 // Compare 2 unsigned longs and CMOVE ptrs.
13402 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13403   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13404   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13405   ins_cost(200);
13406   expand %{
13407     cmovPP_reg_LEGT(cmp,flags,dst,src);
13408   %}
13409 %}
13410 
13411 // Compare 2 longs and CMOVE doubles
13412 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13413   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13414   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13415   ins_cost(200);
13416   expand %{
13417     fcmovDPR_regS(cmp,flags,dst,src);
13418   %}
13419 %}
13420 
13421 // Compare 2 longs and CMOVE doubles
13422 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13423   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13424   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13425   ins_cost(200);
13426   expand %{
13427     fcmovD_regS(cmp,flags,dst,src);
13428   %}
13429 %}
13430 
13431 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13432   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13433   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13434   ins_cost(200);
13435   expand %{
13436     fcmovFPR_regS(cmp,flags,dst,src);
13437   %}
13438 %}
13439 
13440 
13441 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13442   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13443   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13444   ins_cost(200);
13445   expand %{
13446     fcmovF_regS(cmp,flags,dst,src);
13447   %}
13448 %}
13449 
13450 
13451 // ============================================================================
13452 // Procedure Call/Return Instructions
13453 // Call Java Static Instruction
13454 // Note: If this code changes, the corresponding ret_addr_offset() and
13455 //       compute_padding() functions will have to be adjusted.
13456 instruct CallStaticJavaDirect(method meth) %{
13457   match(CallStaticJava);
13458   effect(USE meth);
13459 
13460   ins_cost(300);
13461   format %{ "CALL,static " %}
13462   opcode(0xE8); /* E8 cd */
13463   ins_encode( pre_call_resets,
13464               Java_Static_Call( meth ),
13465               call_epilog,
13466               post_call_FPU );
13467   ins_pipe( pipe_slow );
13468   ins_alignment(4);
13469 %}
13470 
13471 // Call Java Dynamic Instruction
13472 // Note: If this code changes, the corresponding ret_addr_offset() and
13473 //       compute_padding() functions will have to be adjusted.
13474 instruct CallDynamicJavaDirect(method meth) %{
13475   match(CallDynamicJava);
13476   effect(USE meth);
13477 
13478   ins_cost(300);
13479   format %{ "MOV    EAX,(oop)-1\n\t"
13480             "CALL,dynamic" %}
13481   opcode(0xE8); /* E8 cd */
13482   ins_encode( pre_call_resets,
13483               Java_Dynamic_Call( meth ),
13484               call_epilog,
13485               post_call_FPU );
13486   ins_pipe( pipe_slow );
13487   ins_alignment(4);
13488 %}
13489 
13490 // Call Runtime Instruction
13491 instruct CallRuntimeDirect(method meth) %{
13492   match(CallRuntime );
13493   effect(USE meth);
13494 
13495   ins_cost(300);
13496   format %{ "CALL,runtime " %}
13497   opcode(0xE8); /* E8 cd */
13498   // Use FFREEs to clear entries in float stack
13499   ins_encode( pre_call_resets,
13500               FFree_Float_Stack_All,
13501               Java_To_Runtime( meth ),
13502               post_call_FPU );
13503   ins_pipe( pipe_slow );
13504 %}
13505 
13506 // Call runtime without safepoint
13507 instruct CallLeafDirect(method meth) %{
13508   match(CallLeaf);
13509   effect(USE meth);
13510 
13511   ins_cost(300);
13512   format %{ "CALL_LEAF,runtime " %}
13513   opcode(0xE8); /* E8 cd */
13514   ins_encode( pre_call_resets,
13515               FFree_Float_Stack_All,
13516               Java_To_Runtime( meth ),
13517               Verify_FPU_For_Leaf, post_call_FPU );
13518   ins_pipe( pipe_slow );
13519 %}
13520 
13521 instruct CallLeafNoFPDirect(method meth) %{
13522   match(CallLeafNoFP);
13523   effect(USE meth);
13524 
13525   ins_cost(300);
13526   format %{ "CALL_LEAF_NOFP,runtime " %}
13527   opcode(0xE8); /* E8 cd */
13528   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13529   ins_pipe( pipe_slow );
13530 %}
13531 
13532 
13533 // Return Instruction
13534 // Remove the return address & jump to it.
13535 instruct Ret() %{
13536   match(Return);
13537   format %{ "RET" %}
13538   opcode(0xC3);
13539   ins_encode(OpcP);
13540   ins_pipe( pipe_jmp );
13541 %}
13542 
13543 // Tail Call; Jump from runtime stub to Java code.
13544 // Also known as an 'interprocedural jump'.
13545 // Target of jump will eventually return to caller.
13546 // TailJump below removes the return address.
13547 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13548 // emitted just above the TailCall which has reset ebp to the caller state.
13549 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13550   match(TailCall jump_target method_ptr);
13551   ins_cost(300);
13552   format %{ "JMP    $jump_target \t# EBX holds method" %}
13553   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13554   ins_encode( OpcP, RegOpc(jump_target) );
13555   ins_pipe( pipe_jmp );
13556 %}
13557 
13558 
13559 // Tail Jump; remove the return address; jump to target.
13560 // TailCall above leaves the return address around.
13561 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13562   match( TailJump jump_target ex_oop );
13563   ins_cost(300);
13564   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13565             "JMP    $jump_target " %}
13566   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13567   ins_encode( enc_pop_rdx,
13568               OpcP, RegOpc(jump_target) );
13569   ins_pipe( pipe_jmp );
13570 %}
13571 
13572 // Forward exception.
13573 instruct ForwardExceptionjmp()
13574 %{
13575   match(ForwardException);
13576 
13577   format %{ "JMP    forward_exception_stub" %}
13578   ins_encode %{
13579     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
13580   %}
13581   ins_pipe(pipe_jmp);
13582 %}
13583 
13584 // Create exception oop: created by stack-crawling runtime code.
13585 // Created exception is now available to this handler, and is setup
13586 // just prior to jumping to this handler.  No code emitted.
13587 instruct CreateException( eAXRegP ex_oop )
13588 %{
13589   match(Set ex_oop (CreateEx));
13590 
13591   size(0);
13592   // use the following format syntax
13593   format %{ "# exception oop is in EAX; no code emitted" %}
13594   ins_encode();
13595   ins_pipe( empty );
13596 %}
13597 
13598 
13599 // Rethrow exception:
13600 // The exception oop will come in the first argument position.
13601 // Then JUMP (not call) to the rethrow stub code.
13602 instruct RethrowException()
13603 %{
13604   match(Rethrow);
13605 
13606   // use the following format syntax
13607   format %{ "JMP    rethrow_stub" %}
13608   ins_encode(enc_rethrow);
13609   ins_pipe( pipe_jmp );
13610 %}
13611 
13612 // inlined locking and unlocking
13613 
13614 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13615   predicate(LockingMode != LM_LIGHTWEIGHT);
13616   match(Set cr (FastLock object box));
13617   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13618   ins_cost(300);
13619   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13620   ins_encode %{
13621     __ get_thread($thread$$Register);
13622     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13623                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13624   %}
13625   ins_pipe(pipe_slow);
13626 %}
13627 
13628 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13629   predicate(LockingMode != LM_LIGHTWEIGHT);
13630   match(Set cr (FastUnlock object box));
13631   effect(TEMP tmp, USE_KILL box);
13632   ins_cost(300);
13633   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13634   ins_encode %{
13635     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13636   %}
13637   ins_pipe(pipe_slow);
13638 %}
13639 
13640 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13641   predicate(LockingMode == LM_LIGHTWEIGHT);
13642   match(Set cr (FastLock object box));
13643   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13644   ins_cost(300);
13645   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13646   ins_encode %{
13647     __ get_thread($thread$$Register);
13648     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13649   %}
13650   ins_pipe(pipe_slow);
13651 %}
13652 
13653 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13654   predicate(LockingMode == LM_LIGHTWEIGHT);
13655   match(Set cr (FastUnlock object eax_reg));
13656   effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13657   ins_cost(300);
13658   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13659   ins_encode %{
13660     __ get_thread($thread$$Register);
13661     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13662   %}
13663   ins_pipe(pipe_slow);
13664 %}
13665 
13666 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13667   predicate(Matcher::vector_length(n) <= 32);
13668   match(Set dst (MaskAll src));
13669   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13670   ins_encode %{
13671     int mask_len = Matcher::vector_length(this);
13672     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13673   %}
13674   ins_pipe( pipe_slow );
13675 %}
13676 
13677 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13678   predicate(Matcher::vector_length(n) > 32);
13679   match(Set dst (MaskAll src));
13680   effect(TEMP ktmp);
13681   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13682   ins_encode %{
13683     int mask_len = Matcher::vector_length(this);
13684     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13685   %}
13686   ins_pipe( pipe_slow );
13687 %}
13688 
13689 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13690   predicate(Matcher::vector_length(n) > 32);
13691   match(Set dst (MaskAll src));
13692   effect(TEMP ktmp);
13693   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13694   ins_encode %{
13695     int mask_len = Matcher::vector_length(this);
13696     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13697   %}
13698   ins_pipe( pipe_slow );
13699 %}
13700 
13701 // ============================================================================
13702 // Safepoint Instruction
13703 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13704   match(SafePoint poll);
13705   effect(KILL cr, USE poll);
13706 
13707   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13708   ins_cost(125);
13709   // EBP would need size(3)
13710   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13711   ins_encode %{
13712     __ set_inst_mark();
13713     __ relocate(relocInfo::poll_type);
13714     __ clear_inst_mark();
13715     address pre_pc = __ pc();
13716     __ testl(rax, Address($poll$$Register, 0));
13717     address post_pc = __ pc();
13718     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13719   %}
13720   ins_pipe(ialu_reg_mem);
13721 %}
13722 
13723 
13724 // ============================================================================
13725 // This name is KNOWN by the ADLC and cannot be changed.
13726 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13727 // for this guy.
13728 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13729   match(Set dst (ThreadLocal));
13730   effect(DEF dst, KILL cr);
13731 
13732   format %{ "MOV    $dst, Thread::current()" %}
13733   ins_encode %{
13734     Register dstReg = as_Register($dst$$reg);
13735     __ get_thread(dstReg);
13736   %}
13737   ins_pipe( ialu_reg_fat );
13738 %}
13739 
13740 
13741 
13742 //----------PEEPHOLE RULES-----------------------------------------------------
13743 // These must follow all instruction definitions as they use the names
13744 // defined in the instructions definitions.
13745 //
13746 // peepmatch ( root_instr_name [preceding_instruction]* );
13747 //
13748 // peepconstraint %{
13749 // (instruction_number.operand_name relational_op instruction_number.operand_name
13750 //  [, ...] );
13751 // // instruction numbers are zero-based using left to right order in peepmatch
13752 //
13753 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13754 // // provide an instruction_number.operand_name for each operand that appears
13755 // // in the replacement instruction's match rule
13756 //
13757 // ---------VM FLAGS---------------------------------------------------------
13758 //
13759 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13760 //
13761 // Each peephole rule is given an identifying number starting with zero and
13762 // increasing by one in the order seen by the parser.  An individual peephole
13763 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13764 // on the command-line.
13765 //
13766 // ---------CURRENT LIMITATIONS----------------------------------------------
13767 //
13768 // Only match adjacent instructions in same basic block
13769 // Only equality constraints
13770 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13771 // Only one replacement instruction
13772 //
13773 // ---------EXAMPLE----------------------------------------------------------
13774 //
13775 // // pertinent parts of existing instructions in architecture description
13776 // instruct movI(rRegI dst, rRegI src) %{
13777 //   match(Set dst (CopyI src));
13778 // %}
13779 //
13780 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13781 //   match(Set dst (AddI dst src));
13782 //   effect(KILL cr);
13783 // %}
13784 //
13785 // // Change (inc mov) to lea
13786 // peephole %{
13787 //   // increment preceded by register-register move
13788 //   peepmatch ( incI_eReg movI );
13789 //   // require that the destination register of the increment
13790 //   // match the destination register of the move
13791 //   peepconstraint ( 0.dst == 1.dst );
13792 //   // construct a replacement instruction that sets
13793 //   // the destination to ( move's source register + one )
13794 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13795 // %}
13796 //
13797 // Implementation no longer uses movX instructions since
13798 // machine-independent system no longer uses CopyX nodes.
13799 //
13800 // peephole %{
13801 //   peepmatch ( incI_eReg movI );
13802 //   peepconstraint ( 0.dst == 1.dst );
13803 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13804 // %}
13805 //
13806 // peephole %{
13807 //   peepmatch ( decI_eReg movI );
13808 //   peepconstraint ( 0.dst == 1.dst );
13809 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13810 // %}
13811 //
13812 // peephole %{
13813 //   peepmatch ( addI_eReg_imm movI );
13814 //   peepconstraint ( 0.dst == 1.dst );
13815 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13816 // %}
13817 //
13818 // peephole %{
13819 //   peepmatch ( addP_eReg_imm movP );
13820 //   peepconstraint ( 0.dst == 1.dst );
13821 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13822 // %}
13823 
13824 // // Change load of spilled value to only a spill
13825 // instruct storeI(memory mem, rRegI src) %{
13826 //   match(Set mem (StoreI mem src));
13827 // %}
13828 //
13829 // instruct loadI(rRegI dst, memory mem) %{
13830 //   match(Set dst (LoadI mem));
13831 // %}
13832 //
13833 peephole %{
13834   peepmatch ( loadI storeI );
13835   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13836   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13837 %}
13838 
13839 //----------SMARTSPILL RULES---------------------------------------------------
13840 // These must follow all instruction definitions as they use the names
13841 // defined in the instructions definitions.