1 //
    2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ masm->
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   __ emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   __ emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(C2_MacroAssembler *masm, int code) {
  353   __ emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
  358   __ relocate(__ inst_mark() + offset, reloc);
  359   emit_opcode(masm, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(C2_MacroAssembler *masm, int d8) {
  364   __ emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(C2_MacroAssembler *masm, int d16) {
  369   __ emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(C2_MacroAssembler *masm, int d32) {
  374   __ emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   __ relocate(__ inst_mark(), reloc, format);
  381   __ emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   __ relocate(__ inst_mark(), rspec, format);
  393   __ emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
  398   emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (masm, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(masm, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(masm, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(masm, 0x1, reg_encoding, base);
  423         emit_d8(masm, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(masm, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(masm, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (masm, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(masm, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(masm, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (masm, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(masm, 0x0, reg_encoding, 0x4);
  450       emit_rm(masm, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(masm, 0x1, reg_encoding, 0x4);
  456         emit_rm(masm, scale, index, base);
  457         emit_d8(masm, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(masm, 0x2, reg_encoding, 0x4);
  462           emit_rm(masm, scale, index, 0x04);
  463         } else {
  464           emit_rm(masm, 0x2, reg_encoding, 0x4);
  465           emit_rm(masm, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(masm, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (masm, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( masm, 0x8B );
  483     emit_rm(masm, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler* masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612 
  613   int framesize = C->output()->frame_size_in_bytes();
  614   int bangsize = C->output()->bang_size_in_bytes();
  615 
  616   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
  617 
  618   C->output()->set_frame_complete(__ offset());
  619 
  620   if (C->has_mach_constant_base_node()) {
  621     // NOTE: We set the table base offset here because users might be
  622     // emitted before MachConstantBaseNode.
  623     ConstantTable& constant_table = C->output()->constant_table();
  624     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  625   }
  626 }
  627 
  628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  629   return MachNode::size(ra_); // too many variables; just compute it the hard way
  630 }
  631 
  632 int MachPrologNode::reloc() const {
  633   return 0; // a large enough number
  634 }
  635 
  636 //=============================================================================
  637 #ifndef PRODUCT
  638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  639   Compile *C = ra_->C;
  640   int framesize = C->output()->frame_size_in_bytes();
  641   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  642   // Remove two words for return addr and rbp,
  643   framesize -= 2*wordSize;
  644 
  645   if (C->max_vector_size() > 16) {
  646     st->print("VZEROUPPER");
  647     st->cr(); st->print("\t");
  648   }
  649   if (C->in_24_bit_fp_mode()) {
  650     st->print("FLDCW  standard control word");
  651     st->cr(); st->print("\t");
  652   }
  653   if (framesize) {
  654     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  655     st->cr(); st->print("\t");
  656   }
  657   st->print_cr("POPL   EBP"); st->print("\t");
  658   if (do_polling() && C->is_method_compilation()) {
  659     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  660               "JA      #safepoint_stub\t"
  661               "# Safepoint: poll for GC");
  662   }
  663 }
  664 #endif
  665 
  666 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  667   Compile *C = ra_->C;
  668 
  669   if (C->max_vector_size() > 16) {
  670     // Clear upper bits of YMM registers when current compiled code uses
  671     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  672     __ vzeroupper();
  673   }
  674   // If method set FPU control word, restore to standard control word
  675   if (C->in_24_bit_fp_mode()) {
  676     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  677   }
  678 
  679   int framesize = C->output()->frame_size_in_bytes();
  680   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  681   // Remove two words for return addr and rbp,
  682   framesize -= 2*wordSize;
  683 
  684   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  685 
  686   if (framesize >= 128) {
  687     emit_opcode(masm, 0x81); // add  SP, #framesize
  688     emit_rm(masm, 0x3, 0x00, ESP_enc);
  689     emit_d32(masm, framesize);
  690   } else if (framesize) {
  691     emit_opcode(masm, 0x83); // add  SP, #framesize
  692     emit_rm(masm, 0x3, 0x00, ESP_enc);
  693     emit_d8(masm, framesize);
  694   }
  695 
  696   emit_opcode(masm, 0x58 | EBP_enc);
  697 
  698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  699     __ reserved_stack_check();
  700   }
  701 
  702   if (do_polling() && C->is_method_compilation()) {
  703     Register thread = as_Register(EBX_enc);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  709       C->output()->add_stub(stub);
  710       code_stub = &stub->entry();
  711     }
  712     __ set_inst_mark();
  713     __ relocate(relocInfo::poll_return_type);
  714     __ clear_inst_mark();
  715     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  716   }
  717 }
  718 
  719 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  720   return MachNode::size(ra_); // too many variables; just compute it
  721                               // the hard way
  722 }
  723 
  724 int MachEpilogNode::reloc() const {
  725   return 0; // a large enough number
  726 }
  727 
  728 const Pipeline * MachEpilogNode::pipeline() const {
  729   return MachNode::pipeline_class();
  730 }
  731 
  732 //=============================================================================
  733 
  734 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  735 static enum RC rc_class( OptoReg::Name reg ) {
  736 
  737   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  738   if (OptoReg::is_stack(reg)) return rc_stack;
  739 
  740   VMReg r = OptoReg::as_VMReg(reg);
  741   if (r->is_Register()) return rc_int;
  742   if (r->is_FloatRegister()) {
  743     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  744     return rc_float;
  745   }
  746   if (r->is_KRegister()) return rc_kreg;
  747   assert(r->is_XMMRegister(), "must be");
  748   return rc_xmm;
  749 }
  750 
  751 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
  752                         int opcode, const char *op_str, int size, outputStream* st ) {
  753   if( masm ) {
  754     masm->set_inst_mark();
  755     emit_opcode  (masm, opcode );
  756     encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757     masm->clear_inst_mark();
  758 #ifndef PRODUCT
  759   } else if( !do_size ) {
  760     if( size != 0 ) st->print("\n\t");
  761     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  762       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  763       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  764     } else { // FLD, FST, PUSH, POP
  765       st->print("%s [ESP + #%d]",op_str,offset);
  766     }
  767 #endif
  768   }
  769   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  770   return size+3+offset_size;
  771 }
  772 
  773 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  774 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
  775                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  776   int in_size_in_bits = Assembler::EVEX_32bit;
  777   int evex_encoding = 0;
  778   if (reg_lo+1 == reg_hi) {
  779     in_size_in_bits = Assembler::EVEX_64bit;
  780     evex_encoding = Assembler::VEX_W;
  781   }
  782   if (masm) {
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     __ set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (masm) {
  835     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  836     __ set_managed();
  837     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  838       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  839                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  840     } else {
  841       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  842                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  843     }
  844 #ifndef PRODUCT
  845   } else if (!do_size) {
  846     if (size != 0) st->print("\n\t");
  847     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  848       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  849         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  850       } else {
  851         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  852       }
  853     } else {
  854       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  855         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  856       } else {
  857         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  858       }
  859     }
  860 #endif
  861   }
  862   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  863   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  864   int sz = (UseAVX > 2) ? 6 : 4;
  865   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  866       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  867   return size + sz;
  868 }
  869 
  870 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  871                             int src_hi, int dst_hi, int size, outputStream* st ) {
  872   // 32-bit
  873   if (masm) {
  874     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  875     __ set_managed();
  876     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  877              as_Register(Matcher::_regEncode[src_lo]));
  878 #ifndef PRODUCT
  879   } else if (!do_size) {
  880     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  881 #endif
  882   }
  883   return (UseAVX> 2) ? 6 : 4;
  884 }
  885 
  886 
  887 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  888                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  889   // 32-bit
  890   if (masm) {
  891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  892     __ set_managed();
  893     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  894              as_XMMRegister(Matcher::_regEncode[src_lo]));
  895 #ifndef PRODUCT
  896   } else if (!do_size) {
  897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  898 #endif
  899   }
  900   return (UseAVX> 2) ? 6 : 4;
  901 }
  902 
  903 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
  904   if( masm ) {
  905     emit_opcode(masm, 0x8B );
  906     emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  907 #ifndef PRODUCT
  908   } else if( !do_size ) {
  909     if( size != 0 ) st->print("\n\t");
  910     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  911 #endif
  912   }
  913   return size+2;
  914 }
  915 
  916 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  917                                  int offset, int size, outputStream* st ) {
  918   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  919     if( masm ) {
  920       emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
  921       emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
  922 #ifndef PRODUCT
  923     } else if( !do_size ) {
  924       if( size != 0 ) st->print("\n\t");
  925       st->print("FLD    %s",Matcher::regName[src_lo]);
  926 #endif
  927     }
  928     size += 2;
  929   }
  930 
  931   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  932   const char *op_str;
  933   int op;
  934   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  936     op = 0xDD;
  937   } else {                   // 32-bit store
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  939     op = 0xD9;
  940     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  941   }
  942 
  943   return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
  944 }
  945 
  946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  947 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
  948                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  949 
  950 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
  951                             int stack_offset, int reg, uint ireg, outputStream* st);
  952 
  953 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
  954                                      int dst_offset, uint ireg, outputStream* st) {
  955   if (masm) {
  956     switch (ireg) {
  957     case Op_VecS:
  958       __ pushl(Address(rsp, src_offset));
  959       __ popl (Address(rsp, dst_offset));
  960       break;
  961     case Op_VecD:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       __ pushl(Address(rsp, src_offset+4));
  965       __ popl (Address(rsp, dst_offset+4));
  966       break;
  967     case Op_VecX:
  968       __ movdqu(Address(rsp, -16), xmm0);
  969       __ movdqu(xmm0, Address(rsp, src_offset));
  970       __ movdqu(Address(rsp, dst_offset), xmm0);
  971       __ movdqu(xmm0, Address(rsp, -16));
  972       break;
  973     case Op_VecY:
  974       __ vmovdqu(Address(rsp, -32), xmm0);
  975       __ vmovdqu(xmm0, Address(rsp, src_offset));
  976       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  977       __ vmovdqu(xmm0, Address(rsp, -32));
  978       break;
  979     case Op_VecZ:
  980       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  981       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  982       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  983       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  984       break;
  985     default:
  986       ShouldNotReachHere();
  987     }
  988 #ifndef PRODUCT
  989   } else {
  990     switch (ireg) {
  991     case Op_VecS:
  992       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  993                 "popl    [rsp + #%d]",
  994                 src_offset, dst_offset);
  995       break;
  996     case Op_VecD:
  997       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  998                 "popq    [rsp + #%d]\n\t"
  999                 "pushl   [rsp + #%d]\n\t"
 1000                 "popq    [rsp + #%d]",
 1001                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1002       break;
 1003      case Op_VecX:
 1004       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1005                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1006                 "movdqu  [rsp + #%d], xmm0\n\t"
 1007                 "movdqu  xmm0, [rsp - #16]",
 1008                 src_offset, dst_offset);
 1009       break;
 1010     case Op_VecY:
 1011       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1012                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1013                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1014                 "vmovdqu xmm0, [rsp - #32]",
 1015                 src_offset, dst_offset);
 1016       break;
 1017     case Op_VecZ:
 1018       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1019                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1020                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1021                 "vmovdqu xmm0, [rsp - #64]",
 1022                 src_offset, dst_offset);
 1023       break;
 1024     default:
 1025       ShouldNotReachHere();
 1026     }
 1027 #endif
 1028   }
 1029 }
 1030 
 1031 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1032   // Get registers to move
 1033   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1034   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1035   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1036   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1037 
 1038   enum RC src_second_rc = rc_class(src_second);
 1039   enum RC src_first_rc = rc_class(src_first);
 1040   enum RC dst_second_rc = rc_class(dst_second);
 1041   enum RC dst_first_rc = rc_class(dst_first);
 1042 
 1043   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1044 
 1045   // Generate spill code!
 1046   int size = 0;
 1047 
 1048   if( src_first == dst_first && src_second == dst_second )
 1049     return size;            // Self copy, no move
 1050 
 1051   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1052     uint ireg = ideal_reg();
 1053     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1054     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1055     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1056     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1057       // mem -> mem
 1058       int src_offset = ra_->reg2offset(src_first);
 1059       int dst_offset = ra_->reg2offset(dst_first);
 1060       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 1061     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1062       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 1063     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1064       int stack_offset = ra_->reg2offset(dst_first);
 1065       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 1066     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1067       int stack_offset = ra_->reg2offset(src_first);
 1068       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 1069     } else {
 1070       ShouldNotReachHere();
 1071     }
 1072     return 0;
 1073   }
 1074 
 1075   // --------------------------------------
 1076   // Check for mem-mem move.  push/pop to move.
 1077   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1078     if( src_second == dst_first ) { // overlapping stack copy ranges
 1079       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1080       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1081       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1082       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1083     }
 1084     // move low bits
 1085     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1086     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1087     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1088       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1089       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1090     }
 1091     return size;
 1092   }
 1093 
 1094   // --------------------------------------
 1095   // Check for integer reg-reg copy
 1096   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1097     size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
 1098 
 1099   // Check for integer store
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1101     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1102 
 1103   // Check for integer load
 1104   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1105     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1106 
 1107   // Check for integer reg-xmm reg copy
 1108   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1109     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1110             "no 64 bit integer-float reg moves" );
 1111     return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1112   }
 1113   // --------------------------------------
 1114   // Check for float reg-reg copy
 1115   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1116     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1117             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1118     if( masm ) {
 1119 
 1120       // Note the mucking with the register encode to compensate for the 0/1
 1121       // indexing issue mentioned in a comment in the reg_def sections
 1122       // for FPR registers many lines above here.
 1123 
 1124       if( src_first != FPR1L_num ) {
 1125         emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
 1126         emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
 1127         emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1128         emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1129      } else {
 1130         emit_opcode  (masm, 0xDD );           // FST    ST(i)
 1131         emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1132      }
 1133 #ifndef PRODUCT
 1134     } else if( !do_size ) {
 1135       if( size != 0 ) st->print("\n\t");
 1136       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1137       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1138 #endif
 1139     }
 1140     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1141   }
 1142 
 1143   // Check for float store
 1144   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1145     return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1146   }
 1147 
 1148   // Check for float load
 1149   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1150     int offset = ra_->reg2offset(src_first);
 1151     const char *op_str;
 1152     int op;
 1153     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1154       op_str = "FLD_D";
 1155       op = 0xDD;
 1156     } else {                   // 32-bit load
 1157       op_str = "FLD_S";
 1158       op = 0xD9;
 1159       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1160     }
 1161     if( masm ) {
 1162       masm->set_inst_mark();
 1163       emit_opcode  (masm, op );
 1164       encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1165       emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1166       emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1167       masm->clear_inst_mark();
 1168 #ifndef PRODUCT
 1169     } else if( !do_size ) {
 1170       if( size != 0 ) st->print("\n\t");
 1171       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1172 #endif
 1173     }
 1174     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1175     return size + 3+offset_size+2;
 1176   }
 1177 
 1178   // Check for xmm reg-reg copy
 1179   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1180     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1181             (src_first+1 == src_second && dst_first+1 == dst_second),
 1182             "no non-adjacent float-moves" );
 1183     return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1184   }
 1185 
 1186   // Check for xmm reg-integer reg copy
 1187   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1188     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1189             "no 64 bit float-integer reg moves" );
 1190     return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1191   }
 1192 
 1193   // Check for xmm store
 1194   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1195     return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1196   }
 1197 
 1198   // Check for float xmm load
 1199   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1200     return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1201   }
 1202 
 1203   // Copy from float reg to xmm reg
 1204   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1205     // copy to the top of stack from floating point reg
 1206     // and use LEA to preserve flags
 1207     if( masm ) {
 1208       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
 1209       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1210       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1211       emit_d8(masm,0xF8);
 1212 #ifndef PRODUCT
 1213     } else if( !do_size ) {
 1214       if( size != 0 ) st->print("\n\t");
 1215       st->print("LEA    ESP,[ESP-8]");
 1216 #endif
 1217     }
 1218     size += 4;
 1219 
 1220     size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1221 
 1222     // Copy from the temp memory to the xmm reg.
 1223     size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
 1224 
 1225     if( masm ) {
 1226       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
 1227       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1228       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1229       emit_d8(masm,0x08);
 1230 #ifndef PRODUCT
 1231     } else if( !do_size ) {
 1232       if( size != 0 ) st->print("\n\t");
 1233       st->print("LEA    ESP,[ESP+8]");
 1234 #endif
 1235     }
 1236     size += 4;
 1237     return size;
 1238   }
 1239 
 1240   // AVX-512 opmask specific spilling.
 1241   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1242     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1243     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1244     int offset = ra_->reg2offset(src_first);
 1245     if (masm != nullptr) {
 1246       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1247 #ifndef PRODUCT
 1248     } else {
 1249       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1250 #endif
 1251     }
 1252     return 0;
 1253   }
 1254 
 1255   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1256     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1257     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1258     int offset = ra_->reg2offset(dst_first);
 1259     if (masm != nullptr) {
 1260       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1261 #ifndef PRODUCT
 1262     } else {
 1263       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1264 #endif
 1265     }
 1266     return 0;
 1267   }
 1268 
 1269   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1270     Unimplemented();
 1271     return 0;
 1272   }
 1273 
 1274   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1275     Unimplemented();
 1276     return 0;
 1277   }
 1278 
 1279   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1280     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1281     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1282     if (masm != nullptr) {
 1283       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1284 #ifndef PRODUCT
 1285     } else {
 1286       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1287 #endif
 1288     }
 1289     return 0;
 1290   }
 1291 
 1292   assert( size > 0, "missed a case" );
 1293 
 1294   // --------------------------------------------------------------------
 1295   // Check for second bits still needing moving.
 1296   if( src_second == dst_second )
 1297     return size;               // Self copy; no move
 1298   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1299 
 1300   // Check for second word int-int move
 1301   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1302     return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
 1303 
 1304   // Check for second word integer store
 1305   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1306     return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1307 
 1308   // Check for second word integer load
 1309   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1310     return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1311 
 1312   Unimplemented();
 1313   return 0; // Mute compiler
 1314 }
 1315 
 1316 #ifndef PRODUCT
 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1318   implementation( nullptr, ra_, false, st );
 1319 }
 1320 #endif
 1321 
 1322 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1323   implementation( masm, ra_, false, nullptr );
 1324 }
 1325 
 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1327   return MachNode::size(ra_);
 1328 }
 1329 
 1330 
 1331 //=============================================================================
 1332 #ifndef PRODUCT
 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1334   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1335   int reg = ra_->get_reg_first(this);
 1336   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1337 }
 1338 #endif
 1339 
 1340 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1341   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1342   int reg = ra_->get_encode(this);
 1343   if( offset >= 128 ) {
 1344     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1345     emit_rm(masm, 0x2, reg, 0x04);
 1346     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1347     emit_d32(masm, offset);
 1348   }
 1349   else {
 1350     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1351     emit_rm(masm, 0x1, reg, 0x04);
 1352     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1353     emit_d8(masm, offset);
 1354   }
 1355 }
 1356 
 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1358   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1359   if( offset >= 128 ) {
 1360     return 7;
 1361   }
 1362   else {
 1363     return 4;
 1364   }
 1365 }
 1366 
 1367 //=============================================================================
 1368 #ifndef PRODUCT
 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1370   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1371   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1372   st->print_cr("\tNOP");
 1373   st->print_cr("\tNOP");
 1374   if( !OptoBreakpoint )
 1375     st->print_cr("\tNOP");
 1376 }
 1377 #endif
 1378 
 1379 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1380   __ ic_check(CodeEntryAlignment);
 1381 }
 1382 
 1383 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1384   return MachNode::size(ra_); // too many variables; just compute it
 1385                               // the hard way
 1386 }
 1387 
 1388 
 1389 //=============================================================================
 1390 
 1391 // Vector calling convention not supported.
 1392 bool Matcher::supports_vector_calling_convention() {
 1393   return false;
 1394 }
 1395 
 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1397   Unimplemented();
 1398   return OptoRegPair(0, 0);
 1399 }
 1400 
 1401 // Is this branch offset short enough that a short branch can be used?
 1402 //
 1403 // NOTE: If the platform does not provide any short branch variants, then
 1404 //       this method should return false for offset 0.
 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1406   // The passed offset is relative to address of the branch.
 1407   // On 86 a branch displacement is calculated relative to address
 1408   // of a next instruction.
 1409   offset -= br_size;
 1410 
 1411   // the short version of jmpConUCF2 contains multiple branches,
 1412   // making the reach slightly less
 1413   if (rule == jmpConUCF2_rule)
 1414     return (-126 <= offset && offset <= 125);
 1415   return (-128 <= offset && offset <= 127);
 1416 }
 1417 
 1418 // Return whether or not this register is ever used as an argument.  This
 1419 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1421 // arguments in those registers not be available to the callee.
 1422 bool Matcher::can_be_java_arg( int reg ) {
 1423   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1424   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1425   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1426   return false;
 1427 }
 1428 
 1429 bool Matcher::is_spillable_arg( int reg ) {
 1430   return can_be_java_arg(reg);
 1431 }
 1432 
 1433 uint Matcher::int_pressure_limit()
 1434 {
 1435   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1436 }
 1437 
 1438 uint Matcher::float_pressure_limit()
 1439 {
 1440   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1441 }
 1442 
 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1444   // Use hardware integer DIV instruction when
 1445   // it is faster than a code which use multiply.
 1446   // Only when constant divisor fits into 32 bit
 1447   // (min_jint is excluded to get only correct
 1448   // positive 32 bit values from negative).
 1449   return VM_Version::has_fast_idiv() &&
 1450          (divisor == (int)divisor && divisor != min_jint);
 1451 }
 1452 
 1453 // Register for DIVI projection of divmodI
 1454 RegMask Matcher::divI_proj_mask() {
 1455   return EAX_REG_mask();
 1456 }
 1457 
 1458 // Register for MODI projection of divmodI
 1459 RegMask Matcher::modI_proj_mask() {
 1460   return EDX_REG_mask();
 1461 }
 1462 
 1463 // Register for DIVL projection of divmodL
 1464 RegMask Matcher::divL_proj_mask() {
 1465   ShouldNotReachHere();
 1466   return RegMask();
 1467 }
 1468 
 1469 // Register for MODL projection of divmodL
 1470 RegMask Matcher::modL_proj_mask() {
 1471   ShouldNotReachHere();
 1472   return RegMask();
 1473 }
 1474 
 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1476   return NO_REG_mask();
 1477 }
 1478 
 1479 // Returns true if the high 32 bits of the value is known to be zero.
 1480 bool is_operand_hi32_zero(Node* n) {
 1481   int opc = n->Opcode();
 1482   if (opc == Op_AndL) {
 1483     Node* o2 = n->in(2);
 1484     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1485       return true;
 1486     }
 1487   }
 1488   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1489     return true;
 1490   }
 1491   return false;
 1492 }
 1493 
 1494 %}
 1495 
 1496 //----------ENCODING BLOCK-----------------------------------------------------
 1497 // This block specifies the encoding classes used by the compiler to output
 1498 // byte streams.  Encoding classes generate functions which are called by
 1499 // Machine Instruction Nodes in order to generate the bit encoding of the
 1500 // instruction.  Operands specify their base encoding interface with the
 1501 // interface keyword.  There are currently supported four interfaces,
 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1503 // operand to generate a function which returns its register number when
 1504 // queried.   CONST_INTER causes an operand to generate a function which
 1505 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1506 // operand to generate four functions which return the Base Register, the
 1507 // Index Register, the Scale Value, and the Offset Value of the operand when
 1508 // queried.  COND_INTER causes an operand to generate six functions which
 1509 // return the encoding code (ie - encoding bits for the instruction)
 1510 // associated with each basic boolean condition for a conditional instruction.
 1511 // Instructions specify two basic values for encoding.  They use the
 1512 // ins_encode keyword to specify their encoding class (which must be one of
 1513 // the class names specified in the encoding block), and they use the
 1514 // opcode keyword to specify, in order, their primary, secondary, and
 1515 // tertiary opcode.  Only the opcode sections which a particular instruction
 1516 // needs for encoding need to be specified.
 1517 encode %{
 1518   // Build emit functions for each basic byte or larger field in the intel
 1519   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1520   // code in the enc_class source block.  Emit functions will live in the
 1521   // main source block for now.  In future, we can generalize this by
 1522   // adding a syntax that specifies the sizes of fields in an order,
 1523   // so that the adlc can build the emit functions automagically
 1524 
 1525   // Set instruction mark in MacroAssembler. This is used only in
 1526   // instructions that emit bytes directly to the CodeBuffer wraped
 1527   // in the MacroAssembler. Should go away once all "instruct" are
 1528   // patched to emit bytes only using methods in MacroAssembler.
 1529   enc_class SetInstMark %{
 1530     __ set_inst_mark();
 1531   %}
 1532 
 1533   enc_class ClearInstMark %{
 1534     __ clear_inst_mark();
 1535   %}
 1536 
 1537   // Emit primary opcode
 1538   enc_class OpcP %{
 1539     emit_opcode(masm, $primary);
 1540   %}
 1541 
 1542   // Emit secondary opcode
 1543   enc_class OpcS %{
 1544     emit_opcode(masm, $secondary);
 1545   %}
 1546 
 1547   // Emit opcode directly
 1548   enc_class Opcode(immI d8) %{
 1549     emit_opcode(masm, $d8$$constant);
 1550   %}
 1551 
 1552   enc_class SizePrefix %{
 1553     emit_opcode(masm,0x66);
 1554   %}
 1555 
 1556   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1557     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1558   %}
 1559 
 1560   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1561     emit_opcode(masm,$opcode$$constant);
 1562     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1563   %}
 1564 
 1565   enc_class mov_r32_imm0( rRegI dst ) %{
 1566     emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1567     emit_d32   ( masm, 0x0  );             //                         imm32==0x0
 1568   %}
 1569 
 1570   enc_class cdq_enc %{
 1571     // Full implementation of Java idiv and irem; checks for
 1572     // special case as described in JVM spec., p.243 & p.271.
 1573     //
 1574     //         normal case                           special case
 1575     //
 1576     // input : rax,: dividend                         min_int
 1577     //         reg: divisor                          -1
 1578     //
 1579     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1580     //         rdx: remainder (= rax, irem reg)       0
 1581     //
 1582     //  Code sequnce:
 1583     //
 1584     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1585     //  0F 85 0B 00 00 00    jne         normal_case
 1586     //  33 D2                xor         rdx,edx
 1587     //  83 F9 FF             cmp         rcx,0FFh
 1588     //  0F 84 03 00 00 00    je          done
 1589     //                  normal_case:
 1590     //  99                   cdq
 1591     //  F7 F9                idiv        rax,ecx
 1592     //                  done:
 1593     //
 1594     emit_opcode(masm,0x81); emit_d8(masm,0xF8);
 1595     emit_opcode(masm,0x00); emit_d8(masm,0x00);
 1596     emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
 1597     emit_opcode(masm,0x0F); emit_d8(masm,0x85);
 1598     emit_opcode(masm,0x0B); emit_d8(masm,0x00);
 1599     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
 1600     emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
 1601     emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
 1602     emit_opcode(masm,0x0F); emit_d8(masm,0x84);
 1603     emit_opcode(masm,0x03); emit_d8(masm,0x00);
 1604     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
 1605     // normal_case:
 1606     emit_opcode(masm,0x99);                                         // cdq
 1607     // idiv (note: must be emitted by the user of this rule)
 1608     // normal:
 1609   %}
 1610 
 1611   // Dense encoding for older common ops
 1612   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1613     emit_opcode(masm, $opcode$$constant + $reg$$reg);
 1614   %}
 1615 
 1616 
 1617   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1618   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1619     // Check for 8-bit immediate, and set sign extend bit in opcode
 1620     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1621       emit_opcode(masm, $primary | 0x02);
 1622     }
 1623     else {                          // If 32-bit immediate
 1624       emit_opcode(masm, $primary);
 1625     }
 1626   %}
 1627 
 1628   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1629     // Emit primary opcode and set sign-extend bit
 1630     // Check for 8-bit immediate, and set sign extend bit in opcode
 1631     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1632       emit_opcode(masm, $primary | 0x02);    }
 1633     else {                          // If 32-bit immediate
 1634       emit_opcode(masm, $primary);
 1635     }
 1636     // Emit r/m byte with secondary opcode, after primary opcode.
 1637     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1638   %}
 1639 
 1640   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1641     // Check for 8-bit immediate, and set sign extend bit in opcode
 1642     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1643       $$$emit8$imm$$constant;
 1644     }
 1645     else {                          // If 32-bit immediate
 1646       // Output immediate
 1647       $$$emit32$imm$$constant;
 1648     }
 1649   %}
 1650 
 1651   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1652     // Emit primary opcode and set sign-extend bit
 1653     // Check for 8-bit immediate, and set sign extend bit in opcode
 1654     int con = (int)$imm$$constant; // Throw away top bits
 1655     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1656     // Emit r/m byte with secondary opcode, after primary opcode.
 1657     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1658     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1659     else                               emit_d32(masm,con);
 1660   %}
 1661 
 1662   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1663     // Emit primary opcode and set sign-extend bit
 1664     // Check for 8-bit immediate, and set sign extend bit in opcode
 1665     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1666     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1667     // Emit r/m byte with tertiary opcode, after primary opcode.
 1668     emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1669     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1670     else                               emit_d32(masm,con);
 1671   %}
 1672 
 1673   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1674     emit_cc(masm, $secondary, $dst$$reg );
 1675   %}
 1676 
 1677   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1678     int destlo = $dst$$reg;
 1679     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1680     // bswap lo
 1681     emit_opcode(masm, 0x0F);
 1682     emit_cc(masm, 0xC8, destlo);
 1683     // bswap hi
 1684     emit_opcode(masm, 0x0F);
 1685     emit_cc(masm, 0xC8, desthi);
 1686     // xchg lo and hi
 1687     emit_opcode(masm, 0x87);
 1688     emit_rm(masm, 0x3, destlo, desthi);
 1689   %}
 1690 
 1691   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1692     emit_rm(masm, 0x3, $secondary, $div$$reg );
 1693   %}
 1694 
 1695   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1696     $$$emit8$primary;
 1697     emit_cc(masm, $secondary, $cop$$cmpcode);
 1698   %}
 1699 
 1700   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1701     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1702     emit_d8(masm, op >> 8 );
 1703     emit_d8(masm, op & 255);
 1704   %}
 1705 
 1706   // emulate a CMOV with a conditional branch around a MOV
 1707   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1708     // Invert sense of branch from sense of CMOV
 1709     emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
 1710     emit_d8( masm, $brOffs$$constant );
 1711   %}
 1712 
 1713   enc_class enc_PartialSubtypeCheck( ) %{
 1714     Register Redi = as_Register(EDI_enc); // result register
 1715     Register Reax = as_Register(EAX_enc); // super class
 1716     Register Recx = as_Register(ECX_enc); // killed
 1717     Register Resi = as_Register(ESI_enc); // sub class
 1718     Label miss;
 1719 
 1720     // NB: Callers may assume that, when $result is a valid register,
 1721     // check_klass_subtype_slow_path sets it to a nonzero value.
 1722      __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1723                                      nullptr, &miss,
 1724                                      /*set_cond_codes:*/ true);
 1725     if ($primary) {
 1726       __ xorptr(Redi, Redi);
 1727     }
 1728     __ bind(miss);
 1729   %}
 1730 
 1731   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1732     int start = __ offset();
 1733     if (UseSSE >= 2) {
 1734       if (VerifyFPU) {
 1735         __ verify_FPU(0, "must be empty in SSE2+ mode");
 1736       }
 1737     } else {
 1738       // External c_calling_convention expects the FPU stack to be 'clean'.
 1739       // Compiled code leaves it dirty.  Do cleanup now.
 1740       __ empty_FPU_stack();
 1741     }
 1742     if (sizeof_FFree_Float_Stack_All == -1) {
 1743       sizeof_FFree_Float_Stack_All = __ offset() - start;
 1744     } else {
 1745       assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1746     }
 1747   %}
 1748 
 1749   enc_class Verify_FPU_For_Leaf %{
 1750     if( VerifyFPU ) {
 1751       __ verify_FPU( -3, "Returning from Runtime Leaf call");
 1752     }
 1753   %}
 1754 
 1755   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1756     // This is the instruction starting address for relocation info.
 1757     __ set_inst_mark();
 1758     $$$emit8$primary;
 1759     // CALL directly to the runtime
 1760     emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1761                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1762     __ clear_inst_mark();
 1763     __ post_call_nop();
 1764 
 1765     if (UseSSE >= 2) {
 1766       BasicType rt = tf()->return_type();
 1767 
 1768       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1769         // A C runtime call where the return value is unused.  In SSE2+
 1770         // mode the result needs to be removed from the FPU stack.  It's
 1771         // likely that this function call could be removed by the
 1772         // optimizer if the C function is a pure function.
 1773         __ ffree(0);
 1774       } else if (rt == T_FLOAT) {
 1775         __ lea(rsp, Address(rsp, -4));
 1776         __ fstp_s(Address(rsp, 0));
 1777         __ movflt(xmm0, Address(rsp, 0));
 1778         __ lea(rsp, Address(rsp,  4));
 1779       } else if (rt == T_DOUBLE) {
 1780         __ lea(rsp, Address(rsp, -8));
 1781         __ fstp_d(Address(rsp, 0));
 1782         __ movdbl(xmm0, Address(rsp, 0));
 1783         __ lea(rsp, Address(rsp,  8));
 1784       }
 1785     }
 1786   %}
 1787 
 1788   enc_class pre_call_resets %{
 1789     // If method sets FPU control word restore it here
 1790     debug_only(int off0 = __ offset());
 1791     if (ra_->C->in_24_bit_fp_mode()) {
 1792       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1793     }
 1794     // Clear upper bits of YMM registers when current compiled code uses
 1795     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1796     __ vzeroupper();
 1797     debug_only(int off1 = __ offset());
 1798     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1799   %}
 1800 
 1801   enc_class post_call_FPU %{
 1802     // If method sets FPU control word do it here also
 1803     if (Compile::current()->in_24_bit_fp_mode()) {
 1804       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1805     }
 1806   %}
 1807 
 1808   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1809     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1810     // who we intended to call.
 1811     __ set_inst_mark();
 1812     $$$emit8$primary;
 1813 
 1814     if (!_method) {
 1815       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1816                      runtime_call_Relocation::spec(),
 1817                      RELOC_IMM32);
 1818       __ clear_inst_mark();
 1819       __ post_call_nop();
 1820     } else {
 1821       int method_index = resolved_method_index(masm);
 1822       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1823                                                   : static_call_Relocation::spec(method_index);
 1824       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1825                      rspec, RELOC_DISP32);
 1826       __ post_call_nop();
 1827       address mark = __ inst_mark();
 1828       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1829         // Calls of the same statically bound method can share
 1830         // a stub to the interpreter.
 1831         __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
 1832         __ clear_inst_mark();
 1833       } else {
 1834         // Emit stubs for static call.
 1835         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 1836         __ clear_inst_mark();
 1837         if (stub == nullptr) {
 1838           ciEnv::current()->record_failure("CodeCache is full");
 1839           return;
 1840         }
 1841       }
 1842     }
 1843   %}
 1844 
 1845   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1846     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 1847     __ post_call_nop();
 1848   %}
 1849 
 1850   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1851     int disp = in_bytes(Method::from_compiled_offset());
 1852     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1853 
 1854     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1855     __ set_inst_mark();
 1856     $$$emit8$primary;
 1857     emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
 1858     emit_d8(masm, disp);             // Displacement
 1859     __ clear_inst_mark();
 1860     __ post_call_nop();
 1861   %}
 1862 
 1863   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1864     $$$emit8$primary;
 1865     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1866     $$$emit8$shift$$constant;
 1867   %}
 1868 
 1869   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1870     // Load immediate does not have a zero or sign extended version
 1871     // for 8-bit immediates
 1872     emit_opcode(masm, 0xB8 + $dst$$reg);
 1873     $$$emit32$src$$constant;
 1874   %}
 1875 
 1876   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1877     // Load immediate does not have a zero or sign extended version
 1878     // for 8-bit immediates
 1879     emit_opcode(masm, $primary + $dst$$reg);
 1880     $$$emit32$src$$constant;
 1881   %}
 1882 
 1883   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1884     // Load immediate does not have a zero or sign extended version
 1885     // for 8-bit immediates
 1886     int dst_enc = $dst$$reg;
 1887     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1888     if (src_con == 0) {
 1889       // xor dst, dst
 1890       emit_opcode(masm, 0x33);
 1891       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1892     } else {
 1893       emit_opcode(masm, $primary + dst_enc);
 1894       emit_d32(masm, src_con);
 1895     }
 1896   %}
 1897 
 1898   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1899     // Load immediate does not have a zero or sign extended version
 1900     // for 8-bit immediates
 1901     int dst_enc = $dst$$reg + 2;
 1902     int src_con = ((julong)($src$$constant)) >> 32;
 1903     if (src_con == 0) {
 1904       // xor dst, dst
 1905       emit_opcode(masm, 0x33);
 1906       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1907     } else {
 1908       emit_opcode(masm, $primary + dst_enc);
 1909       emit_d32(masm, src_con);
 1910     }
 1911   %}
 1912 
 1913 
 1914   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1915   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1916     encode_Copy( masm, $dst$$reg, $src$$reg );
 1917   %}
 1918 
 1919   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1920     encode_Copy( masm, $dst$$reg, $src$$reg );
 1921   %}
 1922 
 1923   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1924     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1925   %}
 1926 
 1927   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1928     $$$emit8$primary;
 1929     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1930   %}
 1931 
 1932   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1933     $$$emit8$secondary;
 1934     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1935   %}
 1936 
 1937   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1938     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1939   %}
 1940 
 1941   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1942     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1943   %}
 1944 
 1945   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1946     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1947   %}
 1948 
 1949   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1950     // Output immediate
 1951     $$$emit32$src$$constant;
 1952   %}
 1953 
 1954   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1955     // Output Float immediate bits
 1956     jfloat jf = $src$$constant;
 1957     int    jf_as_bits = jint_cast( jf );
 1958     emit_d32(masm, jf_as_bits);
 1959   %}
 1960 
 1961   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1962     // Output Float immediate bits
 1963     jfloat jf = $src$$constant;
 1964     int    jf_as_bits = jint_cast( jf );
 1965     emit_d32(masm, jf_as_bits);
 1966   %}
 1967 
 1968   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1969     // Output immediate
 1970     $$$emit16$src$$constant;
 1971   %}
 1972 
 1973   enc_class Con_d32(immI src) %{
 1974     emit_d32(masm,$src$$constant);
 1975   %}
 1976 
 1977   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1978     // Output immediate memory reference
 1979     emit_rm(masm, 0x00, $t1$$reg, 0x05 );
 1980     emit_d32(masm, 0x00);
 1981   %}
 1982 
 1983   enc_class lock_prefix( ) %{
 1984     emit_opcode(masm,0xF0);         // [Lock]
 1985   %}
 1986 
 1987   // Cmp-xchg long value.
 1988   // Note: we need to swap rbx, and rcx before and after the
 1989   //       cmpxchg8 instruction because the instruction uses
 1990   //       rcx as the high order word of the new value to store but
 1991   //       our register encoding uses rbx,.
 1992   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 1993 
 1994     // XCHG  rbx,ecx
 1995     emit_opcode(masm,0x87);
 1996     emit_opcode(masm,0xD9);
 1997     // [Lock]
 1998     emit_opcode(masm,0xF0);
 1999     // CMPXCHG8 [Eptr]
 2000     emit_opcode(masm,0x0F);
 2001     emit_opcode(masm,0xC7);
 2002     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2003     // XCHG  rbx,ecx
 2004     emit_opcode(masm,0x87);
 2005     emit_opcode(masm,0xD9);
 2006   %}
 2007 
 2008   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2009     // [Lock]
 2010     emit_opcode(masm,0xF0);
 2011 
 2012     // CMPXCHG [Eptr]
 2013     emit_opcode(masm,0x0F);
 2014     emit_opcode(masm,0xB1);
 2015     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2016   %}
 2017 
 2018   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2019     // [Lock]
 2020     emit_opcode(masm,0xF0);
 2021 
 2022     // CMPXCHGB [Eptr]
 2023     emit_opcode(masm,0x0F);
 2024     emit_opcode(masm,0xB0);
 2025     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2026   %}
 2027 
 2028   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2029     // [Lock]
 2030     emit_opcode(masm,0xF0);
 2031 
 2032     // 16-bit mode
 2033     emit_opcode(masm, 0x66);
 2034 
 2035     // CMPXCHGW [Eptr]
 2036     emit_opcode(masm,0x0F);
 2037     emit_opcode(masm,0xB1);
 2038     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2039   %}
 2040 
 2041   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2042     int res_encoding = $res$$reg;
 2043 
 2044     // MOV  res,0
 2045     emit_opcode( masm, 0xB8 + res_encoding);
 2046     emit_d32( masm, 0 );
 2047     // JNE,s  fail
 2048     emit_opcode(masm,0x75);
 2049     emit_d8(masm, 5 );
 2050     // MOV  res,1
 2051     emit_opcode( masm, 0xB8 + res_encoding);
 2052     emit_d32( masm, 1 );
 2053     // fail:
 2054   %}
 2055 
 2056   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2057     int reg_encoding = $ereg$$reg;
 2058     int base  = $mem$$base;
 2059     int index = $mem$$index;
 2060     int scale = $mem$$scale;
 2061     int displace = $mem$$disp;
 2062     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2063     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2064   %}
 2065 
 2066   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2067     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2068     int base  = $mem$$base;
 2069     int index = $mem$$index;
 2070     int scale = $mem$$scale;
 2071     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2072     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2073     encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
 2074   %}
 2075 
 2076   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2077     int r1, r2;
 2078     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2079     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2080     emit_opcode(masm,0x0F);
 2081     emit_opcode(masm,$tertiary);
 2082     emit_rm(masm, 0x3, r1, r2);
 2083     emit_d8(masm,$cnt$$constant);
 2084     emit_d8(masm,$primary);
 2085     emit_rm(masm, 0x3, $secondary, r1);
 2086     emit_d8(masm,$cnt$$constant);
 2087   %}
 2088 
 2089   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2090     emit_opcode( masm, 0x8B ); // Move
 2091     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2092     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2093       emit_d8(masm,$primary);
 2094       emit_rm(masm, 0x3, $secondary, $dst$$reg);
 2095       emit_d8(masm,$cnt$$constant-32);
 2096     }
 2097     emit_d8(masm,$primary);
 2098     emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2099     emit_d8(masm,31);
 2100   %}
 2101 
 2102   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2103     int r1, r2;
 2104     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2105     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2106 
 2107     emit_opcode( masm, 0x8B ); // Move r1,r2
 2108     emit_rm(masm, 0x3, r1, r2);
 2109     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2110       emit_opcode(masm,$primary);
 2111       emit_rm(masm, 0x3, $secondary, r1);
 2112       emit_d8(masm,$cnt$$constant-32);
 2113     }
 2114     emit_opcode(masm,0x33);  // XOR r2,r2
 2115     emit_rm(masm, 0x3, r2, r2);
 2116   %}
 2117 
 2118   // Clone of RegMem but accepts an extra parameter to access each
 2119   // half of a double in memory; it never needs relocation info.
 2120   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2121     emit_opcode(masm,$opcode$$constant);
 2122     int reg_encoding = $rm_reg$$reg;
 2123     int base     = $mem$$base;
 2124     int index    = $mem$$index;
 2125     int scale    = $mem$$scale;
 2126     int displace = $mem$$disp + $disp_for_half$$constant;
 2127     relocInfo::relocType disp_reloc = relocInfo::none;
 2128     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2129   %}
 2130 
 2131   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2132   //
 2133   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2134   // and it never needs relocation information.
 2135   // Frequently used to move data between FPU's Stack Top and memory.
 2136   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2137     int rm_byte_opcode = $rm_opcode$$constant;
 2138     int base     = $mem$$base;
 2139     int index    = $mem$$index;
 2140     int scale    = $mem$$scale;
 2141     int displace = $mem$$disp;
 2142     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2143     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2144   %}
 2145 
 2146   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2147     int rm_byte_opcode = $rm_opcode$$constant;
 2148     int base     = $mem$$base;
 2149     int index    = $mem$$index;
 2150     int scale    = $mem$$scale;
 2151     int displace = $mem$$disp;
 2152     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2153     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2154   %}
 2155 
 2156   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2157     int reg_encoding = $dst$$reg;
 2158     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2159     int index        = 0x04;            // 0x04 indicates no index
 2160     int scale        = 0x00;            // 0x00 indicates no scale
 2161     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2162     relocInfo::relocType disp_reloc = relocInfo::none;
 2163     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2164   %}
 2165 
 2166   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2167     // Compare dst,src
 2168     emit_opcode(masm,0x3B);
 2169     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2170     // jmp dst < src around move
 2171     emit_opcode(masm,0x7C);
 2172     emit_d8(masm,2);
 2173     // move dst,src
 2174     emit_opcode(masm,0x8B);
 2175     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2176   %}
 2177 
 2178   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2179     // Compare dst,src
 2180     emit_opcode(masm,0x3B);
 2181     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2182     // jmp dst > src around move
 2183     emit_opcode(masm,0x7F);
 2184     emit_d8(masm,2);
 2185     // move dst,src
 2186     emit_opcode(masm,0x8B);
 2187     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2188   %}
 2189 
 2190   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2191     // If src is FPR1, we can just FST to store it.
 2192     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2193     int reg_encoding = 0x2; // Just store
 2194     int base  = $mem$$base;
 2195     int index = $mem$$index;
 2196     int scale = $mem$$scale;
 2197     int displace = $mem$$disp;
 2198     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2199     if( $src$$reg != FPR1L_enc ) {
 2200       reg_encoding = 0x3;  // Store & pop
 2201       emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
 2202       emit_d8( masm, 0xC0-1+$src$$reg );
 2203     }
 2204     __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
 2205     emit_opcode(masm,$primary);
 2206     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2207     __ clear_inst_mark();
 2208   %}
 2209 
 2210   enc_class neg_reg(rRegI dst) %{
 2211     // NEG $dst
 2212     emit_opcode(masm,0xF7);
 2213     emit_rm(masm, 0x3, 0x03, $dst$$reg );
 2214   %}
 2215 
 2216   enc_class setLT_reg(eCXRegI dst) %{
 2217     // SETLT $dst
 2218     emit_opcode(masm,0x0F);
 2219     emit_opcode(masm,0x9C);
 2220     emit_rm( masm, 0x3, 0x4, $dst$$reg );
 2221   %}
 2222 
 2223   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2224     int tmpReg = $tmp$$reg;
 2225 
 2226     // SUB $p,$q
 2227     emit_opcode(masm,0x2B);
 2228     emit_rm(masm, 0x3, $p$$reg, $q$$reg);
 2229     // SBB $tmp,$tmp
 2230     emit_opcode(masm,0x1B);
 2231     emit_rm(masm, 0x3, tmpReg, tmpReg);
 2232     // AND $tmp,$y
 2233     emit_opcode(masm,0x23);
 2234     emit_rm(masm, 0x3, tmpReg, $y$$reg);
 2235     // ADD $p,$tmp
 2236     emit_opcode(masm,0x03);
 2237     emit_rm(masm, 0x3, $p$$reg, tmpReg);
 2238   %}
 2239 
 2240   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2241     // TEST shift,32
 2242     emit_opcode(masm,0xF7);
 2243     emit_rm(masm, 0x3, 0, ECX_enc);
 2244     emit_d32(masm,0x20);
 2245     // JEQ,s small
 2246     emit_opcode(masm, 0x74);
 2247     emit_d8(masm, 0x04);
 2248     // MOV    $dst.hi,$dst.lo
 2249     emit_opcode( masm, 0x8B );
 2250     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2251     // CLR    $dst.lo
 2252     emit_opcode(masm, 0x33);
 2253     emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
 2254 // small:
 2255     // SHLD   $dst.hi,$dst.lo,$shift
 2256     emit_opcode(masm,0x0F);
 2257     emit_opcode(masm,0xA5);
 2258     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2259     // SHL    $dst.lo,$shift"
 2260     emit_opcode(masm,0xD3);
 2261     emit_rm(masm, 0x3, 0x4, $dst$$reg );
 2262   %}
 2263 
 2264   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2265     // TEST shift,32
 2266     emit_opcode(masm,0xF7);
 2267     emit_rm(masm, 0x3, 0, ECX_enc);
 2268     emit_d32(masm,0x20);
 2269     // JEQ,s small
 2270     emit_opcode(masm, 0x74);
 2271     emit_d8(masm, 0x04);
 2272     // MOV    $dst.lo,$dst.hi
 2273     emit_opcode( masm, 0x8B );
 2274     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2275     // CLR    $dst.hi
 2276     emit_opcode(masm, 0x33);
 2277     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2278 // small:
 2279     // SHRD   $dst.lo,$dst.hi,$shift
 2280     emit_opcode(masm,0x0F);
 2281     emit_opcode(masm,0xAD);
 2282     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2283     // SHR    $dst.hi,$shift"
 2284     emit_opcode(masm,0xD3);
 2285     emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2286   %}
 2287 
 2288   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2289     // TEST shift,32
 2290     emit_opcode(masm,0xF7);
 2291     emit_rm(masm, 0x3, 0, ECX_enc);
 2292     emit_d32(masm,0x20);
 2293     // JEQ,s small
 2294     emit_opcode(masm, 0x74);
 2295     emit_d8(masm, 0x05);
 2296     // MOV    $dst.lo,$dst.hi
 2297     emit_opcode( masm, 0x8B );
 2298     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2299     // SAR    $dst.hi,31
 2300     emit_opcode(masm, 0xC1);
 2301     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2302     emit_d8(masm, 0x1F );
 2303 // small:
 2304     // SHRD   $dst.lo,$dst.hi,$shift
 2305     emit_opcode(masm,0x0F);
 2306     emit_opcode(masm,0xAD);
 2307     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2308     // SAR    $dst.hi,$shift"
 2309     emit_opcode(masm,0xD3);
 2310     emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2311   %}
 2312 
 2313 
 2314   // ----------------- Encodings for floating point unit -----------------
 2315   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2316   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2317     $$$emit8$primary;
 2318     emit_rm(masm, 0x3, $secondary, $src$$reg );
 2319   %}
 2320 
 2321   // Pop argument in FPR0 with FSTP ST(0)
 2322   enc_class PopFPU() %{
 2323     emit_opcode( masm, 0xDD );
 2324     emit_d8( masm, 0xD8 );
 2325   %}
 2326 
 2327   // !!!!! equivalent to Pop_Reg_F
 2328   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2329     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2330     emit_d8( masm, 0xD8+$dst$$reg );
 2331   %}
 2332 
 2333   enc_class Push_Reg_DPR( regDPR dst ) %{
 2334     emit_opcode( masm, 0xD9 );
 2335     emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2336   %}
 2337 
 2338   enc_class strictfp_bias1( regDPR dst ) %{
 2339     emit_opcode( masm, 0xDB );           // FLD m80real
 2340     emit_opcode( masm, 0x2D );
 2341     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2342     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2343     emit_opcode( masm, 0xC8+$dst$$reg );
 2344   %}
 2345 
 2346   enc_class strictfp_bias2( regDPR dst ) %{
 2347     emit_opcode( masm, 0xDB );           // FLD m80real
 2348     emit_opcode( masm, 0x2D );
 2349     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2350     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2351     emit_opcode( masm, 0xC8+$dst$$reg );
 2352   %}
 2353 
 2354   // Special case for moving an integer register to a stack slot.
 2355   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2356     store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
 2357   %}
 2358 
 2359   // Special case for moving a register to a stack slot.
 2360   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2361     // Opcode already emitted
 2362     emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2363     emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2364     emit_d32(masm, $dst$$disp);   // Displacement
 2365   %}
 2366 
 2367   // Push the integer in stackSlot 'src' onto FP-stack
 2368   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2369     store_to_stackslot( masm, $primary, $secondary, $src$$disp );
 2370   %}
 2371 
 2372   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2373   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2374     store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
 2375   %}
 2376 
 2377   // Same as Pop_Mem_F except for opcode
 2378   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2379   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2380     store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
 2381   %}
 2382 
 2383   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2384     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2385     emit_d8( masm, 0xD8+$dst$$reg );
 2386   %}
 2387 
 2388   enc_class Push_Reg_FPR( regFPR dst ) %{
 2389     emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
 2390     emit_d8( masm, 0xC0-1+$dst$$reg );
 2391   %}
 2392 
 2393   // Push FPU's float to a stack-slot, and pop FPU-stack
 2394   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2395     int pop = 0x02;
 2396     if ($src$$reg != FPR1L_enc) {
 2397       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2398       emit_d8( masm, 0xC0-1+$src$$reg );
 2399       pop = 0x03;
 2400     }
 2401     store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2402   %}
 2403 
 2404   // Push FPU's double to a stack-slot, and pop FPU-stack
 2405   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2406     int pop = 0x02;
 2407     if ($src$$reg != FPR1L_enc) {
 2408       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2409       emit_d8( masm, 0xC0-1+$src$$reg );
 2410       pop = 0x03;
 2411     }
 2412     store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2413   %}
 2414 
 2415   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2416   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2417     int pop = 0xD0 - 1; // -1 since we skip FLD
 2418     if ($src$$reg != FPR1L_enc) {
 2419       emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
 2420       emit_d8( masm, 0xC0-1+$src$$reg );
 2421       pop = 0xD8;
 2422     }
 2423     emit_opcode( masm, 0xDD );
 2424     emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
 2425   %}
 2426 
 2427 
 2428   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2429     // load dst in FPR0
 2430     emit_opcode( masm, 0xD9 );
 2431     emit_d8( masm, 0xC0-1+$dst$$reg );
 2432     if ($src$$reg != FPR1L_enc) {
 2433       // fincstp
 2434       emit_opcode (masm, 0xD9);
 2435       emit_opcode (masm, 0xF7);
 2436       // swap src with FPR1:
 2437       // FXCH FPR1 with src
 2438       emit_opcode(masm, 0xD9);
 2439       emit_d8(masm, 0xC8-1+$src$$reg );
 2440       // fdecstp
 2441       emit_opcode (masm, 0xD9);
 2442       emit_opcode (masm, 0xF6);
 2443     }
 2444   %}
 2445 
 2446   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2447     __ subptr(rsp, 8);
 2448     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2449     __ fld_d(Address(rsp, 0));
 2450     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2451     __ fld_d(Address(rsp, 0));
 2452   %}
 2453 
 2454   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2455     __ subptr(rsp, 4);
 2456     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2457     __ fld_s(Address(rsp, 0));
 2458     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2459     __ fld_s(Address(rsp, 0));
 2460   %}
 2461 
 2462   enc_class Push_ResultD(regD dst) %{
 2463     __ fstp_d(Address(rsp, 0));
 2464     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2465     __ addptr(rsp, 8);
 2466   %}
 2467 
 2468   enc_class Push_ResultF(regF dst, immI d8) %{
 2469     __ fstp_s(Address(rsp, 0));
 2470     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2471     __ addptr(rsp, $d8$$constant);
 2472   %}
 2473 
 2474   enc_class Push_SrcD(regD src) %{
 2475     __ subptr(rsp, 8);
 2476     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2477     __ fld_d(Address(rsp, 0));
 2478   %}
 2479 
 2480   enc_class push_stack_temp_qword() %{
 2481     __ subptr(rsp, 8);
 2482   %}
 2483 
 2484   enc_class pop_stack_temp_qword() %{
 2485     __ addptr(rsp, 8);
 2486   %}
 2487 
 2488   enc_class push_xmm_to_fpr1(regD src) %{
 2489     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2490     __ fld_d(Address(rsp, 0));
 2491   %}
 2492 
 2493   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2494     if ($src$$reg != FPR1L_enc) {
 2495       // fincstp
 2496       emit_opcode (masm, 0xD9);
 2497       emit_opcode (masm, 0xF7);
 2498       // FXCH FPR1 with src
 2499       emit_opcode(masm, 0xD9);
 2500       emit_d8(masm, 0xC8-1+$src$$reg );
 2501       // fdecstp
 2502       emit_opcode (masm, 0xD9);
 2503       emit_opcode (masm, 0xF6);
 2504     }
 2505   %}
 2506 
 2507   enc_class fnstsw_sahf_skip_parity() %{
 2508     // fnstsw ax
 2509     emit_opcode( masm, 0xDF );
 2510     emit_opcode( masm, 0xE0 );
 2511     // sahf
 2512     emit_opcode( masm, 0x9E );
 2513     // jnp  ::skip
 2514     emit_opcode( masm, 0x7B );
 2515     emit_opcode( masm, 0x05 );
 2516   %}
 2517 
 2518   enc_class emitModDPR() %{
 2519     // fprem must be iterative
 2520     // :: loop
 2521     // fprem
 2522     emit_opcode( masm, 0xD9 );
 2523     emit_opcode( masm, 0xF8 );
 2524     // wait
 2525     emit_opcode( masm, 0x9b );
 2526     // fnstsw ax
 2527     emit_opcode( masm, 0xDF );
 2528     emit_opcode( masm, 0xE0 );
 2529     // sahf
 2530     emit_opcode( masm, 0x9E );
 2531     // jp  ::loop
 2532     emit_opcode( masm, 0x0F );
 2533     emit_opcode( masm, 0x8A );
 2534     emit_opcode( masm, 0xF4 );
 2535     emit_opcode( masm, 0xFF );
 2536     emit_opcode( masm, 0xFF );
 2537     emit_opcode( masm, 0xFF );
 2538   %}
 2539 
 2540   enc_class fpu_flags() %{
 2541     // fnstsw_ax
 2542     emit_opcode( masm, 0xDF);
 2543     emit_opcode( masm, 0xE0);
 2544     // test ax,0x0400
 2545     emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
 2546     emit_opcode( masm, 0xA9 );
 2547     emit_d16   ( masm, 0x0400 );
 2548     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2549     // // test rax,0x0400
 2550     // emit_opcode( masm, 0xA9 );
 2551     // emit_d32   ( masm, 0x00000400 );
 2552     //
 2553     // jz exit (no unordered comparison)
 2554     emit_opcode( masm, 0x74 );
 2555     emit_d8    ( masm, 0x02 );
 2556     // mov ah,1 - treat as LT case (set carry flag)
 2557     emit_opcode( masm, 0xB4 );
 2558     emit_d8    ( masm, 0x01 );
 2559     // sahf
 2560     emit_opcode( masm, 0x9E);
 2561   %}
 2562 
 2563   enc_class cmpF_P6_fixup() %{
 2564     // Fixup the integer flags in case comparison involved a NaN
 2565     //
 2566     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2567     emit_opcode( masm, 0x7B );
 2568     emit_d8    ( masm, 0x03 );
 2569     // MOV AH,1 - treat as LT case (set carry flag)
 2570     emit_opcode( masm, 0xB4 );
 2571     emit_d8    ( masm, 0x01 );
 2572     // SAHF
 2573     emit_opcode( masm, 0x9E);
 2574     // NOP     // target for branch to avoid branch to branch
 2575     emit_opcode( masm, 0x90);
 2576   %}
 2577 
 2578 //     fnstsw_ax();
 2579 //     sahf();
 2580 //     movl(dst, nan_result);
 2581 //     jcc(Assembler::parity, exit);
 2582 //     movl(dst, less_result);
 2583 //     jcc(Assembler::below, exit);
 2584 //     movl(dst, equal_result);
 2585 //     jcc(Assembler::equal, exit);
 2586 //     movl(dst, greater_result);
 2587 
 2588 // less_result     =  1;
 2589 // greater_result  = -1;
 2590 // equal_result    = 0;
 2591 // nan_result      = -1;
 2592 
 2593   enc_class CmpF_Result(rRegI dst) %{
 2594     // fnstsw_ax();
 2595     emit_opcode( masm, 0xDF);
 2596     emit_opcode( masm, 0xE0);
 2597     // sahf
 2598     emit_opcode( masm, 0x9E);
 2599     // movl(dst, nan_result);
 2600     emit_opcode( masm, 0xB8 + $dst$$reg);
 2601     emit_d32( masm, -1 );
 2602     // jcc(Assembler::parity, exit);
 2603     emit_opcode( masm, 0x7A );
 2604     emit_d8    ( masm, 0x13 );
 2605     // movl(dst, less_result);
 2606     emit_opcode( masm, 0xB8 + $dst$$reg);
 2607     emit_d32( masm, -1 );
 2608     // jcc(Assembler::below, exit);
 2609     emit_opcode( masm, 0x72 );
 2610     emit_d8    ( masm, 0x0C );
 2611     // movl(dst, equal_result);
 2612     emit_opcode( masm, 0xB8 + $dst$$reg);
 2613     emit_d32( masm, 0 );
 2614     // jcc(Assembler::equal, exit);
 2615     emit_opcode( masm, 0x74 );
 2616     emit_d8    ( masm, 0x05 );
 2617     // movl(dst, greater_result);
 2618     emit_opcode( masm, 0xB8 + $dst$$reg);
 2619     emit_d32( masm, 1 );
 2620   %}
 2621 
 2622 
 2623   // Compare the longs and set flags
 2624   // BROKEN!  Do Not use as-is
 2625   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2626     // CMP    $src1.hi,$src2.hi
 2627     emit_opcode( masm, 0x3B );
 2628     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2629     // JNE,s  done
 2630     emit_opcode(masm,0x75);
 2631     emit_d8(masm, 2 );
 2632     // CMP    $src1.lo,$src2.lo
 2633     emit_opcode( masm, 0x3B );
 2634     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2635 // done:
 2636   %}
 2637 
 2638   enc_class convert_int_long( regL dst, rRegI src ) %{
 2639     // mov $dst.lo,$src
 2640     int dst_encoding = $dst$$reg;
 2641     int src_encoding = $src$$reg;
 2642     encode_Copy( masm, dst_encoding  , src_encoding );
 2643     // mov $dst.hi,$src
 2644     encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2645     // sar $dst.hi,31
 2646     emit_opcode( masm, 0xC1 );
 2647     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2648     emit_d8(masm, 0x1F );
 2649   %}
 2650 
 2651   enc_class convert_long_double( eRegL src ) %{
 2652     // push $src.hi
 2653     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2654     // push $src.lo
 2655     emit_opcode(masm, 0x50+$src$$reg  );
 2656     // fild 64-bits at [SP]
 2657     emit_opcode(masm,0xdf);
 2658     emit_d8(masm, 0x6C);
 2659     emit_d8(masm, 0x24);
 2660     emit_d8(masm, 0x00);
 2661     // pop stack
 2662     emit_opcode(masm, 0x83); // add  SP, #8
 2663     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2664     emit_d8(masm, 0x8);
 2665   %}
 2666 
 2667   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2668     // IMUL   EDX:EAX,$src1
 2669     emit_opcode( masm, 0xF7 );
 2670     emit_rm( masm, 0x3, 0x5, $src1$$reg );
 2671     // SAR    EDX,$cnt-32
 2672     int shift_count = ((int)$cnt$$constant) - 32;
 2673     if (shift_count > 0) {
 2674       emit_opcode(masm, 0xC1);
 2675       emit_rm(masm, 0x3, 7, $dst$$reg );
 2676       emit_d8(masm, shift_count);
 2677     }
 2678   %}
 2679 
 2680   // this version doesn't have add sp, 8
 2681   enc_class convert_long_double2( eRegL src ) %{
 2682     // push $src.hi
 2683     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2684     // push $src.lo
 2685     emit_opcode(masm, 0x50+$src$$reg  );
 2686     // fild 64-bits at [SP]
 2687     emit_opcode(masm,0xdf);
 2688     emit_d8(masm, 0x6C);
 2689     emit_d8(masm, 0x24);
 2690     emit_d8(masm, 0x00);
 2691   %}
 2692 
 2693   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2694     // Basic idea: long = (long)int * (long)int
 2695     // IMUL EDX:EAX, src
 2696     emit_opcode( masm, 0xF7 );
 2697     emit_rm( masm, 0x3, 0x5, $src$$reg);
 2698   %}
 2699 
 2700   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2701     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2702     // MUL EDX:EAX, src
 2703     emit_opcode( masm, 0xF7 );
 2704     emit_rm( masm, 0x3, 0x4, $src$$reg);
 2705   %}
 2706 
 2707   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2708     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2709     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2710     // MOV    $tmp,$src.lo
 2711     encode_Copy( masm, $tmp$$reg, $src$$reg );
 2712     // IMUL   $tmp,EDX
 2713     emit_opcode( masm, 0x0F );
 2714     emit_opcode( masm, 0xAF );
 2715     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2716     // MOV    EDX,$src.hi
 2717     encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2718     // IMUL   EDX,EAX
 2719     emit_opcode( masm, 0x0F );
 2720     emit_opcode( masm, 0xAF );
 2721     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2722     // ADD    $tmp,EDX
 2723     emit_opcode( masm, 0x03 );
 2724     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2725     // MUL   EDX:EAX,$src.lo
 2726     emit_opcode( masm, 0xF7 );
 2727     emit_rm( masm, 0x3, 0x4, $src$$reg );
 2728     // ADD    EDX,ESI
 2729     emit_opcode( masm, 0x03 );
 2730     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2731   %}
 2732 
 2733   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2734     // Basic idea: lo(result) = lo(src * y_lo)
 2735     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2736     // IMUL   $tmp,EDX,$src
 2737     emit_opcode( masm, 0x6B );
 2738     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2739     emit_d8( masm, (int)$src$$constant );
 2740     // MOV    EDX,$src
 2741     emit_opcode(masm, 0xB8 + EDX_enc);
 2742     emit_d32( masm, (int)$src$$constant );
 2743     // MUL   EDX:EAX,EDX
 2744     emit_opcode( masm, 0xF7 );
 2745     emit_rm( masm, 0x3, 0x4, EDX_enc );
 2746     // ADD    EDX,ESI
 2747     emit_opcode( masm, 0x03 );
 2748     emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
 2749   %}
 2750 
 2751   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2752     // PUSH src1.hi
 2753     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2754     // PUSH src1.lo
 2755     emit_opcode(masm,               0x50+$src1$$reg  );
 2756     // PUSH src2.hi
 2757     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2758     // PUSH src2.lo
 2759     emit_opcode(masm,               0x50+$src2$$reg  );
 2760     // CALL directly to the runtime
 2761     __ set_inst_mark();
 2762     emit_opcode(masm,0xE8);       // Call into runtime
 2763     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2764     __ clear_inst_mark();
 2765     __ post_call_nop();
 2766     // Restore stack
 2767     emit_opcode(masm, 0x83); // add  SP, #framesize
 2768     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2769     emit_d8(masm, 4*4);
 2770   %}
 2771 
 2772   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2773     // PUSH src1.hi
 2774     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2775     // PUSH src1.lo
 2776     emit_opcode(masm,               0x50+$src1$$reg  );
 2777     // PUSH src2.hi
 2778     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2779     // PUSH src2.lo
 2780     emit_opcode(masm,               0x50+$src2$$reg  );
 2781     // CALL directly to the runtime
 2782     __ set_inst_mark();
 2783     emit_opcode(masm,0xE8);       // Call into runtime
 2784     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2785     __ clear_inst_mark();
 2786     __ post_call_nop();
 2787     // Restore stack
 2788     emit_opcode(masm, 0x83); // add  SP, #framesize
 2789     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2790     emit_d8(masm, 4*4);
 2791   %}
 2792 
 2793   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2794     // MOV   $tmp,$src.lo
 2795     emit_opcode(masm, 0x8B);
 2796     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
 2797     // OR    $tmp,$src.hi
 2798     emit_opcode(masm, 0x0B);
 2799     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2800   %}
 2801 
 2802   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2803     // CMP    $src1.lo,$src2.lo
 2804     emit_opcode( masm, 0x3B );
 2805     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2806     // JNE,s  skip
 2807     emit_cc(masm, 0x70, 0x5);
 2808     emit_d8(masm,2);
 2809     // CMP    $src1.hi,$src2.hi
 2810     emit_opcode( masm, 0x3B );
 2811     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2812   %}
 2813 
 2814   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2815     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2816     emit_opcode( masm, 0x3B );
 2817     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2818     // MOV    $tmp,$src1.hi
 2819     emit_opcode( masm, 0x8B );
 2820     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2821     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2822     emit_opcode( masm, 0x1B );
 2823     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2824   %}
 2825 
 2826   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2827     // XOR    $tmp,$tmp
 2828     emit_opcode(masm,0x33);  // XOR
 2829     emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
 2830     // CMP    $tmp,$src.lo
 2831     emit_opcode( masm, 0x3B );
 2832     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
 2833     // SBB    $tmp,$src.hi
 2834     emit_opcode( masm, 0x1B );
 2835     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2836   %}
 2837 
 2838  // Sniff, sniff... smells like Gnu Superoptimizer
 2839   enc_class neg_long( eRegL dst ) %{
 2840     emit_opcode(masm,0xF7);    // NEG hi
 2841     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2842     emit_opcode(masm,0xF7);    // NEG lo
 2843     emit_rm    (masm,0x3, 0x3,               $dst$$reg );
 2844     emit_opcode(masm,0x83);    // SBB hi,0
 2845     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2846     emit_d8    (masm,0 );
 2847   %}
 2848 
 2849   enc_class enc_pop_rdx() %{
 2850     emit_opcode(masm,0x5A);
 2851   %}
 2852 
 2853   enc_class enc_rethrow() %{
 2854     __ set_inst_mark();
 2855     emit_opcode(masm, 0xE9);        // jmp    entry
 2856     emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
 2857                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2858     __ clear_inst_mark();
 2859     __ post_call_nop();
 2860   %}
 2861 
 2862 
 2863   // Convert a double to an int.  Java semantics require we do complex
 2864   // manglelations in the corner cases.  So we set the rounding mode to
 2865   // 'zero', store the darned double down as an int, and reset the
 2866   // rounding mode to 'nearest'.  The hardware throws an exception which
 2867   // patches up the correct value directly to the stack.
 2868   enc_class DPR2I_encoding( regDPR src ) %{
 2869     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2870     // exceptions here, so that a NAN or other corner-case value will
 2871     // thrown an exception (but normal values get converted at full speed).
 2872     // However, I2C adapters and other float-stack manglers leave pending
 2873     // invalid-op exceptions hanging.  We would have to clear them before
 2874     // enabling them and that is more expensive than just testing for the
 2875     // invalid value Intel stores down in the corner cases.
 2876     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2877     emit_opcode(masm,0x2D);
 2878     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2879     // Allocate a word
 2880     emit_opcode(masm,0x83);            // SUB ESP,4
 2881     emit_opcode(masm,0xEC);
 2882     emit_d8(masm,0x04);
 2883     // Encoding assumes a double has been pushed into FPR0.
 2884     // Store down the double as an int, popping the FPU stack
 2885     emit_opcode(masm,0xDB);            // FISTP [ESP]
 2886     emit_opcode(masm,0x1C);
 2887     emit_d8(masm,0x24);
 2888     // Restore the rounding mode; mask the exception
 2889     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2890     emit_opcode(masm,0x2D);
 2891     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2892         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2893         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2894 
 2895     // Load the converted int; adjust CPU stack
 2896     emit_opcode(masm,0x58);       // POP EAX
 2897     emit_opcode(masm,0x3D);       // CMP EAX,imm
 2898     emit_d32   (masm,0x80000000); //         0x80000000
 2899     emit_opcode(masm,0x75);       // JNE around_slow_call
 2900     emit_d8    (masm,0x07);       // Size of slow_call
 2901     // Push src onto stack slow-path
 2902     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2903     emit_d8    (masm,0xC0-1+$src$$reg );
 2904     // CALL directly to the runtime
 2905     __ set_inst_mark();
 2906     emit_opcode(masm,0xE8);       // Call into runtime
 2907     emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2908     __ clear_inst_mark();
 2909     __ post_call_nop();
 2910     // Carry on here...
 2911   %}
 2912 
 2913   enc_class DPR2L_encoding( regDPR src ) %{
 2914     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2915     emit_opcode(masm,0x2D);
 2916     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2917     // Allocate a word
 2918     emit_opcode(masm,0x83);            // SUB ESP,8
 2919     emit_opcode(masm,0xEC);
 2920     emit_d8(masm,0x08);
 2921     // Encoding assumes a double has been pushed into FPR0.
 2922     // Store down the double as a long, popping the FPU stack
 2923     emit_opcode(masm,0xDF);            // FISTP [ESP]
 2924     emit_opcode(masm,0x3C);
 2925     emit_d8(masm,0x24);
 2926     // Restore the rounding mode; mask the exception
 2927     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2928     emit_opcode(masm,0x2D);
 2929     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2930         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2931         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2932 
 2933     // Load the converted int; adjust CPU stack
 2934     emit_opcode(masm,0x58);       // POP EAX
 2935     emit_opcode(masm,0x5A);       // POP EDX
 2936     emit_opcode(masm,0x81);       // CMP EDX,imm
 2937     emit_d8    (masm,0xFA);       // rdx
 2938     emit_d32   (masm,0x80000000); //         0x80000000
 2939     emit_opcode(masm,0x75);       // JNE around_slow_call
 2940     emit_d8    (masm,0x07+4);     // Size of slow_call
 2941     emit_opcode(masm,0x85);       // TEST EAX,EAX
 2942     emit_opcode(masm,0xC0);       // 2/rax,/rax,
 2943     emit_opcode(masm,0x75);       // JNE around_slow_call
 2944     emit_d8    (masm,0x07);       // Size of slow_call
 2945     // Push src onto stack slow-path
 2946     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2947     emit_d8    (masm,0xC0-1+$src$$reg );
 2948     // CALL directly to the runtime
 2949     __ set_inst_mark();
 2950     emit_opcode(masm,0xE8);       // Call into runtime
 2951     emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2952     __ clear_inst_mark();
 2953     __ post_call_nop();
 2954     // Carry on here...
 2955   %}
 2956 
 2957   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2958     // Operand was loaded from memory into fp ST (stack top)
 2959     // FMUL   ST,$src  /* D8 C8+i */
 2960     emit_opcode(masm, 0xD8);
 2961     emit_opcode(masm, 0xC8 + $src1$$reg);
 2962   %}
 2963 
 2964   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2965     // FADDP  ST,src2  /* D8 C0+i */
 2966     emit_opcode(masm, 0xD8);
 2967     emit_opcode(masm, 0xC0 + $src2$$reg);
 2968     //could use FADDP  src2,fpST  /* DE C0+i */
 2969   %}
 2970 
 2971   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2972     // FADDP  src2,ST  /* DE C0+i */
 2973     emit_opcode(masm, 0xDE);
 2974     emit_opcode(masm, 0xC0 + $src2$$reg);
 2975   %}
 2976 
 2977   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2978     // Operand has been loaded into fp ST (stack top)
 2979       // FSUB   ST,$src1
 2980       emit_opcode(masm, 0xD8);
 2981       emit_opcode(masm, 0xE0 + $src1$$reg);
 2982 
 2983       // FDIV
 2984       emit_opcode(masm, 0xD8);
 2985       emit_opcode(masm, 0xF0 + $src2$$reg);
 2986   %}
 2987 
 2988   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 2989     // Operand was loaded from memory into fp ST (stack top)
 2990     // FADD   ST,$src  /* D8 C0+i */
 2991     emit_opcode(masm, 0xD8);
 2992     emit_opcode(masm, 0xC0 + $src1$$reg);
 2993 
 2994     // FMUL  ST,src2  /* D8 C*+i */
 2995     emit_opcode(masm, 0xD8);
 2996     emit_opcode(masm, 0xC8 + $src2$$reg);
 2997   %}
 2998 
 2999 
 3000   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3001     // Operand was loaded from memory into fp ST (stack top)
 3002     // FADD   ST,$src  /* D8 C0+i */
 3003     emit_opcode(masm, 0xD8);
 3004     emit_opcode(masm, 0xC0 + $src1$$reg);
 3005 
 3006     // FMULP  src2,ST  /* DE C8+i */
 3007     emit_opcode(masm, 0xDE);
 3008     emit_opcode(masm, 0xC8 + $src2$$reg);
 3009   %}
 3010 
 3011   // Atomically load the volatile long
 3012   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3013     emit_opcode(masm,0xDF);
 3014     int rm_byte_opcode = 0x05;
 3015     int base     = $mem$$base;
 3016     int index    = $mem$$index;
 3017     int scale    = $mem$$scale;
 3018     int displace = $mem$$disp;
 3019     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3020     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3021     store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
 3022   %}
 3023 
 3024   // Volatile Store Long.  Must be atomic, so move it into
 3025   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3026   // target address before the store (for null-ptr checks)
 3027   // so the memory operand is used twice in the encoding.
 3028   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3029     store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
 3030     __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
 3031     emit_opcode(masm,0xDF);
 3032     int rm_byte_opcode = 0x07;
 3033     int base     = $mem$$base;
 3034     int index    = $mem$$index;
 3035     int scale    = $mem$$scale;
 3036     int displace = $mem$$disp;
 3037     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3038     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3039     __ clear_inst_mark();
 3040   %}
 3041 
 3042 %}
 3043 
 3044 
 3045 //----------FRAME--------------------------------------------------------------
 3046 // Definition of frame structure and management information.
 3047 //
 3048 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3049 //                             |   (to get allocators register number
 3050 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3051 //  r   CALLER     |        |
 3052 //  o     |        +--------+      pad to even-align allocators stack-slot
 3053 //  w     V        |  pad0  |        numbers; owned by CALLER
 3054 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3055 //  h     ^        |   in   |  5
 3056 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3057 //  |     |        |        |  3
 3058 //  |     |        +--------+
 3059 //  V     |        | old out|      Empty on Intel, window on Sparc
 3060 //        |    old |preserve|      Must be even aligned.
 3061 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3062 //        |        |   in   |  3   area for Intel ret address
 3063 //     Owned by    |preserve|      Empty on Sparc.
 3064 //       SELF      +--------+
 3065 //        |        |  pad2  |  2   pad to align old SP
 3066 //        |        +--------+  1
 3067 //        |        | locks  |  0
 3068 //        |        +--------+----> OptoReg::stack0(), even aligned
 3069 //        |        |  pad1  | 11   pad to align new SP
 3070 //        |        +--------+
 3071 //        |        |        | 10
 3072 //        |        | spills |  9   spills
 3073 //        V        |        |  8   (pad0 slot for callee)
 3074 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3075 //        ^        |  out   |  7
 3076 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3077 //     Owned by    +--------+
 3078 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3079 //        |    new |preserve|      Must be even-aligned.
 3080 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3081 //        |        |        |
 3082 //
 3083 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3084 //         known from SELF's arguments and the Java calling convention.
 3085 //         Region 6-7 is determined per call site.
 3086 // Note 2: If the calling convention leaves holes in the incoming argument
 3087 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3088 //         are owned by the CALLEE.  Holes should not be necessary in the
 3089 //         incoming area, as the Java calling convention is completely under
 3090 //         the control of the AD file.  Doubles can be sorted and packed to
 3091 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3092 //         varargs C calling conventions.
 3093 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3094 //         even aligned with pad0 as needed.
 3095 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3096 //         region 6-11 is even aligned; it may be padded out more so that
 3097 //         the region from SP to FP meets the minimum stack alignment.
 3098 
 3099 frame %{
 3100   // These three registers define part of the calling convention
 3101   // between compiled code and the interpreter.
 3102   inline_cache_reg(EAX);                // Inline Cache Register
 3103 
 3104   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3105   cisc_spilling_operand_name(indOffset32);
 3106 
 3107   // Number of stack slots consumed by locking an object
 3108   sync_stack_slots(1);
 3109 
 3110   // Compiled code's Frame Pointer
 3111   frame_pointer(ESP);
 3112   // Interpreter stores its frame pointer in a register which is
 3113   // stored to the stack by I2CAdaptors.
 3114   // I2CAdaptors convert from interpreted java to compiled java.
 3115   interpreter_frame_pointer(EBP);
 3116 
 3117   // Stack alignment requirement
 3118   // Alignment size in bytes (128-bit -> 16 bytes)
 3119   stack_alignment(StackAlignmentInBytes);
 3120 
 3121   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3122   // for calls to C.  Supports the var-args backing area for register parms.
 3123   varargs_C_out_slots_killed(0);
 3124 
 3125   // The after-PROLOG location of the return address.  Location of
 3126   // return address specifies a type (REG or STACK) and a number
 3127   // representing the register number (i.e. - use a register name) or
 3128   // stack slot.
 3129   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3130   // Otherwise, it is above the locks and verification slot and alignment word
 3131   return_addr(STACK - 1 +
 3132               align_up((Compile::current()->in_preserve_stack_slots() +
 3133                         Compile::current()->fixed_slots()),
 3134                        stack_alignment_in_slots()));
 3135 
 3136   // Location of C & interpreter return values
 3137   c_return_value %{
 3138     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3139     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3140     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3141 
 3142     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3143     // that C functions return float and double results in XMM0.
 3144     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3145       return OptoRegPair(XMM0b_num,XMM0_num);
 3146     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3147       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3148 
 3149     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3150   %}
 3151 
 3152   // Location of return values
 3153   return_value %{
 3154     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3155     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3156     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3157     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3158       return OptoRegPair(XMM0b_num,XMM0_num);
 3159     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3160       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3161     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3162   %}
 3163 
 3164 %}
 3165 
 3166 //----------ATTRIBUTES---------------------------------------------------------
 3167 //----------Operand Attributes-------------------------------------------------
 3168 op_attrib op_cost(0);        // Required cost attribute
 3169 
 3170 //----------Instruction Attributes---------------------------------------------
 3171 ins_attrib ins_cost(100);       // Required cost attribute
 3172 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3173 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3174                                 // non-matching short branch variant of some
 3175                                                             // long branch?
 3176 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3177                                 // specifies the alignment that some part of the instruction (not
 3178                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3179                                 // function must be provided for the instruction
 3180 
 3181 //----------OPERANDS-----------------------------------------------------------
 3182 // Operand definitions must precede instruction definitions for correct parsing
 3183 // in the ADLC because operands constitute user defined types which are used in
 3184 // instruction definitions.
 3185 
 3186 //----------Simple Operands----------------------------------------------------
 3187 // Immediate Operands
 3188 // Integer Immediate
 3189 operand immI() %{
 3190   match(ConI);
 3191 
 3192   op_cost(10);
 3193   format %{ %}
 3194   interface(CONST_INTER);
 3195 %}
 3196 
 3197 // Constant for test vs zero
 3198 operand immI_0() %{
 3199   predicate(n->get_int() == 0);
 3200   match(ConI);
 3201 
 3202   op_cost(0);
 3203   format %{ %}
 3204   interface(CONST_INTER);
 3205 %}
 3206 
 3207 // Constant for increment
 3208 operand immI_1() %{
 3209   predicate(n->get_int() == 1);
 3210   match(ConI);
 3211 
 3212   op_cost(0);
 3213   format %{ %}
 3214   interface(CONST_INTER);
 3215 %}
 3216 
 3217 // Constant for decrement
 3218 operand immI_M1() %{
 3219   predicate(n->get_int() == -1);
 3220   match(ConI);
 3221 
 3222   op_cost(0);
 3223   format %{ %}
 3224   interface(CONST_INTER);
 3225 %}
 3226 
 3227 // Valid scale values for addressing modes
 3228 operand immI2() %{
 3229   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3230   match(ConI);
 3231 
 3232   format %{ %}
 3233   interface(CONST_INTER);
 3234 %}
 3235 
 3236 operand immI8() %{
 3237   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3238   match(ConI);
 3239 
 3240   op_cost(5);
 3241   format %{ %}
 3242   interface(CONST_INTER);
 3243 %}
 3244 
 3245 operand immU8() %{
 3246   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3247   match(ConI);
 3248 
 3249   op_cost(5);
 3250   format %{ %}
 3251   interface(CONST_INTER);
 3252 %}
 3253 
 3254 operand immI16() %{
 3255   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3256   match(ConI);
 3257 
 3258   op_cost(10);
 3259   format %{ %}
 3260   interface(CONST_INTER);
 3261 %}
 3262 
 3263 // Int Immediate non-negative
 3264 operand immU31()
 3265 %{
 3266   predicate(n->get_int() >= 0);
 3267   match(ConI);
 3268 
 3269   op_cost(0);
 3270   format %{ %}
 3271   interface(CONST_INTER);
 3272 %}
 3273 
 3274 // Constant for long shifts
 3275 operand immI_32() %{
 3276   predicate( n->get_int() == 32 );
 3277   match(ConI);
 3278 
 3279   op_cost(0);
 3280   format %{ %}
 3281   interface(CONST_INTER);
 3282 %}
 3283 
 3284 operand immI_1_31() %{
 3285   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3286   match(ConI);
 3287 
 3288   op_cost(0);
 3289   format %{ %}
 3290   interface(CONST_INTER);
 3291 %}
 3292 
 3293 operand immI_32_63() %{
 3294   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3295   match(ConI);
 3296   op_cost(0);
 3297 
 3298   format %{ %}
 3299   interface(CONST_INTER);
 3300 %}
 3301 
 3302 operand immI_2() %{
 3303   predicate( n->get_int() == 2 );
 3304   match(ConI);
 3305 
 3306   op_cost(0);
 3307   format %{ %}
 3308   interface(CONST_INTER);
 3309 %}
 3310 
 3311 operand immI_3() %{
 3312   predicate( n->get_int() == 3 );
 3313   match(ConI);
 3314 
 3315   op_cost(0);
 3316   format %{ %}
 3317   interface(CONST_INTER);
 3318 %}
 3319 
 3320 operand immI_4()
 3321 %{
 3322   predicate(n->get_int() == 4);
 3323   match(ConI);
 3324 
 3325   op_cost(0);
 3326   format %{ %}
 3327   interface(CONST_INTER);
 3328 %}
 3329 
 3330 operand immI_8()
 3331 %{
 3332   predicate(n->get_int() == 8);
 3333   match(ConI);
 3334 
 3335   op_cost(0);
 3336   format %{ %}
 3337   interface(CONST_INTER);
 3338 %}
 3339 
 3340 // Pointer Immediate
 3341 operand immP() %{
 3342   match(ConP);
 3343 
 3344   op_cost(10);
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 // Null Pointer Immediate
 3350 operand immP0() %{
 3351   predicate( n->get_ptr() == 0 );
 3352   match(ConP);
 3353   op_cost(0);
 3354 
 3355   format %{ %}
 3356   interface(CONST_INTER);
 3357 %}
 3358 
 3359 // Long Immediate
 3360 operand immL() %{
 3361   match(ConL);
 3362 
 3363   op_cost(20);
 3364   format %{ %}
 3365   interface(CONST_INTER);
 3366 %}
 3367 
 3368 // Long Immediate zero
 3369 operand immL0() %{
 3370   predicate( n->get_long() == 0L );
 3371   match(ConL);
 3372   op_cost(0);
 3373 
 3374   format %{ %}
 3375   interface(CONST_INTER);
 3376 %}
 3377 
 3378 // Long Immediate zero
 3379 operand immL_M1() %{
 3380   predicate( n->get_long() == -1L );
 3381   match(ConL);
 3382   op_cost(0);
 3383 
 3384   format %{ %}
 3385   interface(CONST_INTER);
 3386 %}
 3387 
 3388 // Long immediate from 0 to 127.
 3389 // Used for a shorter form of long mul by 10.
 3390 operand immL_127() %{
 3391   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3392   match(ConL);
 3393   op_cost(0);
 3394 
 3395   format %{ %}
 3396   interface(CONST_INTER);
 3397 %}
 3398 
 3399 // Long Immediate: low 32-bit mask
 3400 operand immL_32bits() %{
 3401   predicate(n->get_long() == 0xFFFFFFFFL);
 3402   match(ConL);
 3403   op_cost(0);
 3404 
 3405   format %{ %}
 3406   interface(CONST_INTER);
 3407 %}
 3408 
 3409 // Long Immediate: low 32-bit mask
 3410 operand immL32() %{
 3411   predicate(n->get_long() == (int)(n->get_long()));
 3412   match(ConL);
 3413   op_cost(20);
 3414 
 3415   format %{ %}
 3416   interface(CONST_INTER);
 3417 %}
 3418 
 3419 //Double Immediate zero
 3420 operand immDPR0() %{
 3421   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3422   // bug that generates code such that NaNs compare equal to 0.0
 3423   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3424   match(ConD);
 3425 
 3426   op_cost(5);
 3427   format %{ %}
 3428   interface(CONST_INTER);
 3429 %}
 3430 
 3431 // Double Immediate one
 3432 operand immDPR1() %{
 3433   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3434   match(ConD);
 3435 
 3436   op_cost(5);
 3437   format %{ %}
 3438   interface(CONST_INTER);
 3439 %}
 3440 
 3441 // Double Immediate
 3442 operand immDPR() %{
 3443   predicate(UseSSE<=1);
 3444   match(ConD);
 3445 
 3446   op_cost(5);
 3447   format %{ %}
 3448   interface(CONST_INTER);
 3449 %}
 3450 
 3451 operand immD() %{
 3452   predicate(UseSSE>=2);
 3453   match(ConD);
 3454 
 3455   op_cost(5);
 3456   format %{ %}
 3457   interface(CONST_INTER);
 3458 %}
 3459 
 3460 // Double Immediate zero
 3461 operand immD0() %{
 3462   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3463   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3464   // compare equal to -0.0.
 3465   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3466   match(ConD);
 3467 
 3468   format %{ %}
 3469   interface(CONST_INTER);
 3470 %}
 3471 
 3472 // Float Immediate zero
 3473 operand immFPR0() %{
 3474   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3475   match(ConF);
 3476 
 3477   op_cost(5);
 3478   format %{ %}
 3479   interface(CONST_INTER);
 3480 %}
 3481 
 3482 // Float Immediate one
 3483 operand immFPR1() %{
 3484   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3485   match(ConF);
 3486 
 3487   op_cost(5);
 3488   format %{ %}
 3489   interface(CONST_INTER);
 3490 %}
 3491 
 3492 // Float Immediate
 3493 operand immFPR() %{
 3494   predicate( UseSSE == 0 );
 3495   match(ConF);
 3496 
 3497   op_cost(5);
 3498   format %{ %}
 3499   interface(CONST_INTER);
 3500 %}
 3501 
 3502 // Float Immediate
 3503 operand immF() %{
 3504   predicate(UseSSE >= 1);
 3505   match(ConF);
 3506 
 3507   op_cost(5);
 3508   format %{ %}
 3509   interface(CONST_INTER);
 3510 %}
 3511 
 3512 // Float Immediate zero.  Zero and not -0.0
 3513 operand immF0() %{
 3514   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3515   match(ConF);
 3516 
 3517   op_cost(5);
 3518   format %{ %}
 3519   interface(CONST_INTER);
 3520 %}
 3521 
 3522 // Immediates for special shifts (sign extend)
 3523 
 3524 // Constants for increment
 3525 operand immI_16() %{
 3526   predicate( n->get_int() == 16 );
 3527   match(ConI);
 3528 
 3529   format %{ %}
 3530   interface(CONST_INTER);
 3531 %}
 3532 
 3533 operand immI_24() %{
 3534   predicate( n->get_int() == 24 );
 3535   match(ConI);
 3536 
 3537   format %{ %}
 3538   interface(CONST_INTER);
 3539 %}
 3540 
 3541 // Constant for byte-wide masking
 3542 operand immI_255() %{
 3543   predicate( n->get_int() == 255 );
 3544   match(ConI);
 3545 
 3546   format %{ %}
 3547   interface(CONST_INTER);
 3548 %}
 3549 
 3550 // Constant for short-wide masking
 3551 operand immI_65535() %{
 3552   predicate(n->get_int() == 65535);
 3553   match(ConI);
 3554 
 3555   format %{ %}
 3556   interface(CONST_INTER);
 3557 %}
 3558 
 3559 operand kReg()
 3560 %{
 3561   constraint(ALLOC_IN_RC(vectmask_reg));
 3562   match(RegVectMask);
 3563   format %{%}
 3564   interface(REG_INTER);
 3565 %}
 3566 
 3567 // Register Operands
 3568 // Integer Register
 3569 operand rRegI() %{
 3570   constraint(ALLOC_IN_RC(int_reg));
 3571   match(RegI);
 3572   match(xRegI);
 3573   match(eAXRegI);
 3574   match(eBXRegI);
 3575   match(eCXRegI);
 3576   match(eDXRegI);
 3577   match(eDIRegI);
 3578   match(eSIRegI);
 3579 
 3580   format %{ %}
 3581   interface(REG_INTER);
 3582 %}
 3583 
 3584 // Subset of Integer Register
 3585 operand xRegI(rRegI reg) %{
 3586   constraint(ALLOC_IN_RC(int_x_reg));
 3587   match(reg);
 3588   match(eAXRegI);
 3589   match(eBXRegI);
 3590   match(eCXRegI);
 3591   match(eDXRegI);
 3592 
 3593   format %{ %}
 3594   interface(REG_INTER);
 3595 %}
 3596 
 3597 // Special Registers
 3598 operand eAXRegI(xRegI reg) %{
 3599   constraint(ALLOC_IN_RC(eax_reg));
 3600   match(reg);
 3601   match(rRegI);
 3602 
 3603   format %{ "EAX" %}
 3604   interface(REG_INTER);
 3605 %}
 3606 
 3607 // Special Registers
 3608 operand eBXRegI(xRegI reg) %{
 3609   constraint(ALLOC_IN_RC(ebx_reg));
 3610   match(reg);
 3611   match(rRegI);
 3612 
 3613   format %{ "EBX" %}
 3614   interface(REG_INTER);
 3615 %}
 3616 
 3617 operand eCXRegI(xRegI reg) %{
 3618   constraint(ALLOC_IN_RC(ecx_reg));
 3619   match(reg);
 3620   match(rRegI);
 3621 
 3622   format %{ "ECX" %}
 3623   interface(REG_INTER);
 3624 %}
 3625 
 3626 operand eDXRegI(xRegI reg) %{
 3627   constraint(ALLOC_IN_RC(edx_reg));
 3628   match(reg);
 3629   match(rRegI);
 3630 
 3631   format %{ "EDX" %}
 3632   interface(REG_INTER);
 3633 %}
 3634 
 3635 operand eDIRegI(xRegI reg) %{
 3636   constraint(ALLOC_IN_RC(edi_reg));
 3637   match(reg);
 3638   match(rRegI);
 3639 
 3640   format %{ "EDI" %}
 3641   interface(REG_INTER);
 3642 %}
 3643 
 3644 operand nadxRegI() %{
 3645   constraint(ALLOC_IN_RC(nadx_reg));
 3646   match(RegI);
 3647   match(eBXRegI);
 3648   match(eCXRegI);
 3649   match(eSIRegI);
 3650   match(eDIRegI);
 3651 
 3652   format %{ %}
 3653   interface(REG_INTER);
 3654 %}
 3655 
 3656 operand ncxRegI() %{
 3657   constraint(ALLOC_IN_RC(ncx_reg));
 3658   match(RegI);
 3659   match(eAXRegI);
 3660   match(eDXRegI);
 3661   match(eSIRegI);
 3662   match(eDIRegI);
 3663 
 3664   format %{ %}
 3665   interface(REG_INTER);
 3666 %}
 3667 
 3668 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3669 // //
 3670 operand eSIRegI(xRegI reg) %{
 3671    constraint(ALLOC_IN_RC(esi_reg));
 3672    match(reg);
 3673    match(rRegI);
 3674 
 3675    format %{ "ESI" %}
 3676    interface(REG_INTER);
 3677 %}
 3678 
 3679 // Pointer Register
 3680 operand anyRegP() %{
 3681   constraint(ALLOC_IN_RC(any_reg));
 3682   match(RegP);
 3683   match(eAXRegP);
 3684   match(eBXRegP);
 3685   match(eCXRegP);
 3686   match(eDIRegP);
 3687   match(eRegP);
 3688 
 3689   format %{ %}
 3690   interface(REG_INTER);
 3691 %}
 3692 
 3693 operand eRegP() %{
 3694   constraint(ALLOC_IN_RC(int_reg));
 3695   match(RegP);
 3696   match(eAXRegP);
 3697   match(eBXRegP);
 3698   match(eCXRegP);
 3699   match(eDIRegP);
 3700 
 3701   format %{ %}
 3702   interface(REG_INTER);
 3703 %}
 3704 
 3705 operand rRegP() %{
 3706   constraint(ALLOC_IN_RC(int_reg));
 3707   match(RegP);
 3708   match(eAXRegP);
 3709   match(eBXRegP);
 3710   match(eCXRegP);
 3711   match(eDIRegP);
 3712 
 3713   format %{ %}
 3714   interface(REG_INTER);
 3715 %}
 3716 
 3717 // On windows95, EBP is not safe to use for implicit null tests.
 3718 operand eRegP_no_EBP() %{
 3719   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3720   match(RegP);
 3721   match(eAXRegP);
 3722   match(eBXRegP);
 3723   match(eCXRegP);
 3724   match(eDIRegP);
 3725 
 3726   op_cost(100);
 3727   format %{ %}
 3728   interface(REG_INTER);
 3729 %}
 3730 
 3731 operand pRegP() %{
 3732   constraint(ALLOC_IN_RC(p_reg));
 3733   match(RegP);
 3734   match(eBXRegP);
 3735   match(eDXRegP);
 3736   match(eSIRegP);
 3737   match(eDIRegP);
 3738 
 3739   format %{ %}
 3740   interface(REG_INTER);
 3741 %}
 3742 
 3743 // Special Registers
 3744 // Return a pointer value
 3745 operand eAXRegP(eRegP reg) %{
 3746   constraint(ALLOC_IN_RC(eax_reg));
 3747   match(reg);
 3748   format %{ "EAX" %}
 3749   interface(REG_INTER);
 3750 %}
 3751 
 3752 // Used in AtomicAdd
 3753 operand eBXRegP(eRegP reg) %{
 3754   constraint(ALLOC_IN_RC(ebx_reg));
 3755   match(reg);
 3756   format %{ "EBX" %}
 3757   interface(REG_INTER);
 3758 %}
 3759 
 3760 // Tail-call (interprocedural jump) to interpreter
 3761 operand eCXRegP(eRegP reg) %{
 3762   constraint(ALLOC_IN_RC(ecx_reg));
 3763   match(reg);
 3764   format %{ "ECX" %}
 3765   interface(REG_INTER);
 3766 %}
 3767 
 3768 operand eDXRegP(eRegP reg) %{
 3769   constraint(ALLOC_IN_RC(edx_reg));
 3770   match(reg);
 3771   format %{ "EDX" %}
 3772   interface(REG_INTER);
 3773 %}
 3774 
 3775 operand eSIRegP(eRegP reg) %{
 3776   constraint(ALLOC_IN_RC(esi_reg));
 3777   match(reg);
 3778   format %{ "ESI" %}
 3779   interface(REG_INTER);
 3780 %}
 3781 
 3782 // Used in rep stosw
 3783 operand eDIRegP(eRegP reg) %{
 3784   constraint(ALLOC_IN_RC(edi_reg));
 3785   match(reg);
 3786   format %{ "EDI" %}
 3787   interface(REG_INTER);
 3788 %}
 3789 
 3790 operand eRegL() %{
 3791   constraint(ALLOC_IN_RC(long_reg));
 3792   match(RegL);
 3793   match(eADXRegL);
 3794 
 3795   format %{ %}
 3796   interface(REG_INTER);
 3797 %}
 3798 
 3799 operand eADXRegL( eRegL reg ) %{
 3800   constraint(ALLOC_IN_RC(eadx_reg));
 3801   match(reg);
 3802 
 3803   format %{ "EDX:EAX" %}
 3804   interface(REG_INTER);
 3805 %}
 3806 
 3807 operand eBCXRegL( eRegL reg ) %{
 3808   constraint(ALLOC_IN_RC(ebcx_reg));
 3809   match(reg);
 3810 
 3811   format %{ "EBX:ECX" %}
 3812   interface(REG_INTER);
 3813 %}
 3814 
 3815 operand eBDPRegL( eRegL reg ) %{
 3816   constraint(ALLOC_IN_RC(ebpd_reg));
 3817   match(reg);
 3818 
 3819   format %{ "EBP:EDI" %}
 3820   interface(REG_INTER);
 3821 %}
 3822 // Special case for integer high multiply
 3823 operand eADXRegL_low_only() %{
 3824   constraint(ALLOC_IN_RC(eadx_reg));
 3825   match(RegL);
 3826 
 3827   format %{ "EAX" %}
 3828   interface(REG_INTER);
 3829 %}
 3830 
 3831 // Flags register, used as output of compare instructions
 3832 operand rFlagsReg() %{
 3833   constraint(ALLOC_IN_RC(int_flags));
 3834   match(RegFlags);
 3835 
 3836   format %{ "EFLAGS" %}
 3837   interface(REG_INTER);
 3838 %}
 3839 
 3840 // Flags register, used as output of compare instructions
 3841 operand eFlagsReg() %{
 3842   constraint(ALLOC_IN_RC(int_flags));
 3843   match(RegFlags);
 3844 
 3845   format %{ "EFLAGS" %}
 3846   interface(REG_INTER);
 3847 %}
 3848 
 3849 // Flags register, used as output of FLOATING POINT compare instructions
 3850 operand eFlagsRegU() %{
 3851   constraint(ALLOC_IN_RC(int_flags));
 3852   match(RegFlags);
 3853 
 3854   format %{ "EFLAGS_U" %}
 3855   interface(REG_INTER);
 3856 %}
 3857 
 3858 operand eFlagsRegUCF() %{
 3859   constraint(ALLOC_IN_RC(int_flags));
 3860   match(RegFlags);
 3861   predicate(false);
 3862 
 3863   format %{ "EFLAGS_U_CF" %}
 3864   interface(REG_INTER);
 3865 %}
 3866 
 3867 // Condition Code Register used by long compare
 3868 operand flagsReg_long_LTGE() %{
 3869   constraint(ALLOC_IN_RC(int_flags));
 3870   match(RegFlags);
 3871   format %{ "FLAGS_LTGE" %}
 3872   interface(REG_INTER);
 3873 %}
 3874 operand flagsReg_long_EQNE() %{
 3875   constraint(ALLOC_IN_RC(int_flags));
 3876   match(RegFlags);
 3877   format %{ "FLAGS_EQNE" %}
 3878   interface(REG_INTER);
 3879 %}
 3880 operand flagsReg_long_LEGT() %{
 3881   constraint(ALLOC_IN_RC(int_flags));
 3882   match(RegFlags);
 3883   format %{ "FLAGS_LEGT" %}
 3884   interface(REG_INTER);
 3885 %}
 3886 
 3887 // Condition Code Register used by unsigned long compare
 3888 operand flagsReg_ulong_LTGE() %{
 3889   constraint(ALLOC_IN_RC(int_flags));
 3890   match(RegFlags);
 3891   format %{ "FLAGS_U_LTGE" %}
 3892   interface(REG_INTER);
 3893 %}
 3894 operand flagsReg_ulong_EQNE() %{
 3895   constraint(ALLOC_IN_RC(int_flags));
 3896   match(RegFlags);
 3897   format %{ "FLAGS_U_EQNE" %}
 3898   interface(REG_INTER);
 3899 %}
 3900 operand flagsReg_ulong_LEGT() %{
 3901   constraint(ALLOC_IN_RC(int_flags));
 3902   match(RegFlags);
 3903   format %{ "FLAGS_U_LEGT" %}
 3904   interface(REG_INTER);
 3905 %}
 3906 
 3907 // Float register operands
 3908 operand regDPR() %{
 3909   predicate( UseSSE < 2 );
 3910   constraint(ALLOC_IN_RC(fp_dbl_reg));
 3911   match(RegD);
 3912   match(regDPR1);
 3913   match(regDPR2);
 3914   format %{ %}
 3915   interface(REG_INTER);
 3916 %}
 3917 
 3918 operand regDPR1(regDPR reg) %{
 3919   predicate( UseSSE < 2 );
 3920   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 3921   match(reg);
 3922   format %{ "FPR1" %}
 3923   interface(REG_INTER);
 3924 %}
 3925 
 3926 operand regDPR2(regDPR reg) %{
 3927   predicate( UseSSE < 2 );
 3928   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 3929   match(reg);
 3930   format %{ "FPR2" %}
 3931   interface(REG_INTER);
 3932 %}
 3933 
 3934 operand regnotDPR1(regDPR reg) %{
 3935   predicate( UseSSE < 2 );
 3936   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 3937   match(reg);
 3938   format %{ %}
 3939   interface(REG_INTER);
 3940 %}
 3941 
 3942 // Float register operands
 3943 operand regFPR() %{
 3944   predicate( UseSSE < 2 );
 3945   constraint(ALLOC_IN_RC(fp_flt_reg));
 3946   match(RegF);
 3947   match(regFPR1);
 3948   format %{ %}
 3949   interface(REG_INTER);
 3950 %}
 3951 
 3952 // Float register operands
 3953 operand regFPR1(regFPR reg) %{
 3954   predicate( UseSSE < 2 );
 3955   constraint(ALLOC_IN_RC(fp_flt_reg0));
 3956   match(reg);
 3957   format %{ "FPR1" %}
 3958   interface(REG_INTER);
 3959 %}
 3960 
 3961 // XMM Float register operands
 3962 operand regF() %{
 3963   predicate( UseSSE>=1 );
 3964   constraint(ALLOC_IN_RC(float_reg_legacy));
 3965   match(RegF);
 3966   format %{ %}
 3967   interface(REG_INTER);
 3968 %}
 3969 
 3970 operand legRegF() %{
 3971   predicate( UseSSE>=1 );
 3972   constraint(ALLOC_IN_RC(float_reg_legacy));
 3973   match(RegF);
 3974   format %{ %}
 3975   interface(REG_INTER);
 3976 %}
 3977 
 3978 // Float register operands
 3979 operand vlRegF() %{
 3980    constraint(ALLOC_IN_RC(float_reg_vl));
 3981    match(RegF);
 3982 
 3983    format %{ %}
 3984    interface(REG_INTER);
 3985 %}
 3986 
 3987 // XMM Double register operands
 3988 operand regD() %{
 3989   predicate( UseSSE>=2 );
 3990   constraint(ALLOC_IN_RC(double_reg_legacy));
 3991   match(RegD);
 3992   format %{ %}
 3993   interface(REG_INTER);
 3994 %}
 3995 
 3996 // Double register operands
 3997 operand legRegD() %{
 3998   predicate( UseSSE>=2 );
 3999   constraint(ALLOC_IN_RC(double_reg_legacy));
 4000   match(RegD);
 4001   format %{ %}
 4002   interface(REG_INTER);
 4003 %}
 4004 
 4005 operand vlRegD() %{
 4006    constraint(ALLOC_IN_RC(double_reg_vl));
 4007    match(RegD);
 4008 
 4009    format %{ %}
 4010    interface(REG_INTER);
 4011 %}
 4012 
 4013 //----------Memory Operands----------------------------------------------------
 4014 // Direct Memory Operand
 4015 operand direct(immP addr) %{
 4016   match(addr);
 4017 
 4018   format %{ "[$addr]" %}
 4019   interface(MEMORY_INTER) %{
 4020     base(0xFFFFFFFF);
 4021     index(0x4);
 4022     scale(0x0);
 4023     disp($addr);
 4024   %}
 4025 %}
 4026 
 4027 // Indirect Memory Operand
 4028 operand indirect(eRegP reg) %{
 4029   constraint(ALLOC_IN_RC(int_reg));
 4030   match(reg);
 4031 
 4032   format %{ "[$reg]" %}
 4033   interface(MEMORY_INTER) %{
 4034     base($reg);
 4035     index(0x4);
 4036     scale(0x0);
 4037     disp(0x0);
 4038   %}
 4039 %}
 4040 
 4041 // Indirect Memory Plus Short Offset Operand
 4042 operand indOffset8(eRegP reg, immI8 off) %{
 4043   match(AddP reg off);
 4044 
 4045   format %{ "[$reg + $off]" %}
 4046   interface(MEMORY_INTER) %{
 4047     base($reg);
 4048     index(0x4);
 4049     scale(0x0);
 4050     disp($off);
 4051   %}
 4052 %}
 4053 
 4054 // Indirect Memory Plus Long Offset Operand
 4055 operand indOffset32(eRegP reg, immI off) %{
 4056   match(AddP reg off);
 4057 
 4058   format %{ "[$reg + $off]" %}
 4059   interface(MEMORY_INTER) %{
 4060     base($reg);
 4061     index(0x4);
 4062     scale(0x0);
 4063     disp($off);
 4064   %}
 4065 %}
 4066 
 4067 // Indirect Memory Plus Long Offset Operand
 4068 operand indOffset32X(rRegI reg, immP off) %{
 4069   match(AddP off reg);
 4070 
 4071   format %{ "[$reg + $off]" %}
 4072   interface(MEMORY_INTER) %{
 4073     base($reg);
 4074     index(0x4);
 4075     scale(0x0);
 4076     disp($off);
 4077   %}
 4078 %}
 4079 
 4080 // Indirect Memory Plus Index Register Plus Offset Operand
 4081 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4082   match(AddP (AddP reg ireg) off);
 4083 
 4084   op_cost(10);
 4085   format %{"[$reg + $off + $ireg]" %}
 4086   interface(MEMORY_INTER) %{
 4087     base($reg);
 4088     index($ireg);
 4089     scale(0x0);
 4090     disp($off);
 4091   %}
 4092 %}
 4093 
 4094 // Indirect Memory Plus Index Register Plus Offset Operand
 4095 operand indIndex(eRegP reg, rRegI ireg) %{
 4096   match(AddP reg ireg);
 4097 
 4098   op_cost(10);
 4099   format %{"[$reg + $ireg]" %}
 4100   interface(MEMORY_INTER) %{
 4101     base($reg);
 4102     index($ireg);
 4103     scale(0x0);
 4104     disp(0x0);
 4105   %}
 4106 %}
 4107 
 4108 // // -------------------------------------------------------------------------
 4109 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4110 // // -------------------------------------------------------------------------
 4111 // // Scaled Memory Operands
 4112 // // Indirect Memory Times Scale Plus Offset Operand
 4113 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4114 //   match(AddP off (LShiftI ireg scale));
 4115 //
 4116 //   op_cost(10);
 4117 //   format %{"[$off + $ireg << $scale]" %}
 4118 //   interface(MEMORY_INTER) %{
 4119 //     base(0x4);
 4120 //     index($ireg);
 4121 //     scale($scale);
 4122 //     disp($off);
 4123 //   %}
 4124 // %}
 4125 
 4126 // Indirect Memory Times Scale Plus Index Register
 4127 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4128   match(AddP reg (LShiftI ireg scale));
 4129 
 4130   op_cost(10);
 4131   format %{"[$reg + $ireg << $scale]" %}
 4132   interface(MEMORY_INTER) %{
 4133     base($reg);
 4134     index($ireg);
 4135     scale($scale);
 4136     disp(0x0);
 4137   %}
 4138 %}
 4139 
 4140 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4141 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4142   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4143 
 4144   op_cost(10);
 4145   format %{"[$reg + $off + $ireg << $scale]" %}
 4146   interface(MEMORY_INTER) %{
 4147     base($reg);
 4148     index($ireg);
 4149     scale($scale);
 4150     disp($off);
 4151   %}
 4152 %}
 4153 
 4154 //----------Load Long Memory Operands------------------------------------------
 4155 // The load-long idiom will use it's address expression again after loading
 4156 // the first word of the long.  If the load-long destination overlaps with
 4157 // registers used in the addressing expression, the 2nd half will be loaded
 4158 // from a clobbered address.  Fix this by requiring that load-long use
 4159 // address registers that do not overlap with the load-long target.
 4160 
 4161 // load-long support
 4162 operand load_long_RegP() %{
 4163   constraint(ALLOC_IN_RC(esi_reg));
 4164   match(RegP);
 4165   match(eSIRegP);
 4166   op_cost(100);
 4167   format %{  %}
 4168   interface(REG_INTER);
 4169 %}
 4170 
 4171 // Indirect Memory Operand Long
 4172 operand load_long_indirect(load_long_RegP reg) %{
 4173   constraint(ALLOC_IN_RC(esi_reg));
 4174   match(reg);
 4175 
 4176   format %{ "[$reg]" %}
 4177   interface(MEMORY_INTER) %{
 4178     base($reg);
 4179     index(0x4);
 4180     scale(0x0);
 4181     disp(0x0);
 4182   %}
 4183 %}
 4184 
 4185 // Indirect Memory Plus Long Offset Operand
 4186 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4187   match(AddP reg off);
 4188 
 4189   format %{ "[$reg + $off]" %}
 4190   interface(MEMORY_INTER) %{
 4191     base($reg);
 4192     index(0x4);
 4193     scale(0x0);
 4194     disp($off);
 4195   %}
 4196 %}
 4197 
 4198 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4199 
 4200 
 4201 //----------Special Memory Operands--------------------------------------------
 4202 // Stack Slot Operand - This operand is used for loading and storing temporary
 4203 //                      values on the stack where a match requires a value to
 4204 //                      flow through memory.
 4205 operand stackSlotP(sRegP reg) %{
 4206   constraint(ALLOC_IN_RC(stack_slots));
 4207   // No match rule because this operand is only generated in matching
 4208   format %{ "[$reg]" %}
 4209   interface(MEMORY_INTER) %{
 4210     base(0x4);   // ESP
 4211     index(0x4);  // No Index
 4212     scale(0x0);  // No Scale
 4213     disp($reg);  // Stack Offset
 4214   %}
 4215 %}
 4216 
 4217 operand stackSlotI(sRegI reg) %{
 4218   constraint(ALLOC_IN_RC(stack_slots));
 4219   // No match rule because this operand is only generated in matching
 4220   format %{ "[$reg]" %}
 4221   interface(MEMORY_INTER) %{
 4222     base(0x4);   // ESP
 4223     index(0x4);  // No Index
 4224     scale(0x0);  // No Scale
 4225     disp($reg);  // Stack Offset
 4226   %}
 4227 %}
 4228 
 4229 operand stackSlotF(sRegF reg) %{
 4230   constraint(ALLOC_IN_RC(stack_slots));
 4231   // No match rule because this operand is only generated in matching
 4232   format %{ "[$reg]" %}
 4233   interface(MEMORY_INTER) %{
 4234     base(0x4);   // ESP
 4235     index(0x4);  // No Index
 4236     scale(0x0);  // No Scale
 4237     disp($reg);  // Stack Offset
 4238   %}
 4239 %}
 4240 
 4241 operand stackSlotD(sRegD reg) %{
 4242   constraint(ALLOC_IN_RC(stack_slots));
 4243   // No match rule because this operand is only generated in matching
 4244   format %{ "[$reg]" %}
 4245   interface(MEMORY_INTER) %{
 4246     base(0x4);   // ESP
 4247     index(0x4);  // No Index
 4248     scale(0x0);  // No Scale
 4249     disp($reg);  // Stack Offset
 4250   %}
 4251 %}
 4252 
 4253 operand stackSlotL(sRegL reg) %{
 4254   constraint(ALLOC_IN_RC(stack_slots));
 4255   // No match rule because this operand is only generated in matching
 4256   format %{ "[$reg]" %}
 4257   interface(MEMORY_INTER) %{
 4258     base(0x4);   // ESP
 4259     index(0x4);  // No Index
 4260     scale(0x0);  // No Scale
 4261     disp($reg);  // Stack Offset
 4262   %}
 4263 %}
 4264 
 4265 //----------Conditional Branch Operands----------------------------------------
 4266 // Comparison Op  - This is the operation of the comparison, and is limited to
 4267 //                  the following set of codes:
 4268 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4269 //
 4270 // Other attributes of the comparison, such as unsignedness, are specified
 4271 // by the comparison instruction that sets a condition code flags register.
 4272 // That result is represented by a flags operand whose subtype is appropriate
 4273 // to the unsignedness (etc.) of the comparison.
 4274 //
 4275 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4276 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4277 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4278 
 4279 // Comparison Code
 4280 operand cmpOp() %{
 4281   match(Bool);
 4282 
 4283   format %{ "" %}
 4284   interface(COND_INTER) %{
 4285     equal(0x4, "e");
 4286     not_equal(0x5, "ne");
 4287     less(0xC, "l");
 4288     greater_equal(0xD, "ge");
 4289     less_equal(0xE, "le");
 4290     greater(0xF, "g");
 4291     overflow(0x0, "o");
 4292     no_overflow(0x1, "no");
 4293   %}
 4294 %}
 4295 
 4296 // Comparison Code, unsigned compare.  Used by FP also, with
 4297 // C2 (unordered) turned into GT or LT already.  The other bits
 4298 // C0 and C3 are turned into Carry & Zero flags.
 4299 operand cmpOpU() %{
 4300   match(Bool);
 4301 
 4302   format %{ "" %}
 4303   interface(COND_INTER) %{
 4304     equal(0x4, "e");
 4305     not_equal(0x5, "ne");
 4306     less(0x2, "b");
 4307     greater_equal(0x3, "nb");
 4308     less_equal(0x6, "be");
 4309     greater(0x7, "nbe");
 4310     overflow(0x0, "o");
 4311     no_overflow(0x1, "no");
 4312   %}
 4313 %}
 4314 
 4315 // Floating comparisons that don't require any fixup for the unordered case
 4316 operand cmpOpUCF() %{
 4317   match(Bool);
 4318   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4319             n->as_Bool()->_test._test == BoolTest::ge ||
 4320             n->as_Bool()->_test._test == BoolTest::le ||
 4321             n->as_Bool()->_test._test == BoolTest::gt);
 4322   format %{ "" %}
 4323   interface(COND_INTER) %{
 4324     equal(0x4, "e");
 4325     not_equal(0x5, "ne");
 4326     less(0x2, "b");
 4327     greater_equal(0x3, "nb");
 4328     less_equal(0x6, "be");
 4329     greater(0x7, "nbe");
 4330     overflow(0x0, "o");
 4331     no_overflow(0x1, "no");
 4332   %}
 4333 %}
 4334 
 4335 
 4336 // Floating comparisons that can be fixed up with extra conditional jumps
 4337 operand cmpOpUCF2() %{
 4338   match(Bool);
 4339   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4340             n->as_Bool()->_test._test == BoolTest::eq);
 4341   format %{ "" %}
 4342   interface(COND_INTER) %{
 4343     equal(0x4, "e");
 4344     not_equal(0x5, "ne");
 4345     less(0x2, "b");
 4346     greater_equal(0x3, "nb");
 4347     less_equal(0x6, "be");
 4348     greater(0x7, "nbe");
 4349     overflow(0x0, "o");
 4350     no_overflow(0x1, "no");
 4351   %}
 4352 %}
 4353 
 4354 // Comparison Code for FP conditional move
 4355 operand cmpOp_fcmov() %{
 4356   match(Bool);
 4357 
 4358   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4359             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4360   format %{ "" %}
 4361   interface(COND_INTER) %{
 4362     equal        (0x0C8);
 4363     not_equal    (0x1C8);
 4364     less         (0x0C0);
 4365     greater_equal(0x1C0);
 4366     less_equal   (0x0D0);
 4367     greater      (0x1D0);
 4368     overflow(0x0, "o"); // not really supported by the instruction
 4369     no_overflow(0x1, "no"); // not really supported by the instruction
 4370   %}
 4371 %}
 4372 
 4373 // Comparison Code used in long compares
 4374 operand cmpOp_commute() %{
 4375   match(Bool);
 4376 
 4377   format %{ "" %}
 4378   interface(COND_INTER) %{
 4379     equal(0x4, "e");
 4380     not_equal(0x5, "ne");
 4381     less(0xF, "g");
 4382     greater_equal(0xE, "le");
 4383     less_equal(0xD, "ge");
 4384     greater(0xC, "l");
 4385     overflow(0x0, "o");
 4386     no_overflow(0x1, "no");
 4387   %}
 4388 %}
 4389 
 4390 // Comparison Code used in unsigned long compares
 4391 operand cmpOpU_commute() %{
 4392   match(Bool);
 4393 
 4394   format %{ "" %}
 4395   interface(COND_INTER) %{
 4396     equal(0x4, "e");
 4397     not_equal(0x5, "ne");
 4398     less(0x7, "nbe");
 4399     greater_equal(0x6, "be");
 4400     less_equal(0x3, "nb");
 4401     greater(0x2, "b");
 4402     overflow(0x0, "o");
 4403     no_overflow(0x1, "no");
 4404   %}
 4405 %}
 4406 
 4407 //----------OPERAND CLASSES----------------------------------------------------
 4408 // Operand Classes are groups of operands that are used as to simplify
 4409 // instruction definitions by not requiring the AD writer to specify separate
 4410 // instructions for every form of operand when the instruction accepts
 4411 // multiple operand types with the same basic encoding and format.  The classic
 4412 // case of this is memory operands.
 4413 
 4414 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4415                indIndex, indIndexScale, indIndexScaleOffset);
 4416 
 4417 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4418 // This means some kind of offset is always required and you cannot use
 4419 // an oop as the offset (done when working on static globals).
 4420 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4421                     indIndex, indIndexScale, indIndexScaleOffset);
 4422 
 4423 
 4424 //----------PIPELINE-----------------------------------------------------------
 4425 // Rules which define the behavior of the target architectures pipeline.
 4426 pipeline %{
 4427 
 4428 //----------ATTRIBUTES---------------------------------------------------------
 4429 attributes %{
 4430   variable_size_instructions;        // Fixed size instructions
 4431   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4432   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4433   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4434   instruction_fetch_units = 1;       // of 16 bytes
 4435 
 4436   // List of nop instructions
 4437   nops( MachNop );
 4438 %}
 4439 
 4440 //----------RESOURCES----------------------------------------------------------
 4441 // Resources are the functional units available to the machine
 4442 
 4443 // Generic P2/P3 pipeline
 4444 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4445 // 3 instructions decoded per cycle.
 4446 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4447 // 2 ALU op, only ALU0 handles mul/div instructions.
 4448 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4449            MS0, MS1, MEM = MS0 | MS1,
 4450            BR, FPU,
 4451            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4452 
 4453 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4454 // Pipeline Description specifies the stages in the machine's pipeline
 4455 
 4456 // Generic P2/P3 pipeline
 4457 pipe_desc(S0, S1, S2, S3, S4, S5);
 4458 
 4459 //----------PIPELINE CLASSES---------------------------------------------------
 4460 // Pipeline Classes describe the stages in which input and output are
 4461 // referenced by the hardware pipeline.
 4462 
 4463 // Naming convention: ialu or fpu
 4464 // Then: _reg
 4465 // Then: _reg if there is a 2nd register
 4466 // Then: _long if it's a pair of instructions implementing a long
 4467 // Then: _fat if it requires the big decoder
 4468 //   Or: _mem if it requires the big decoder and a memory unit.
 4469 
 4470 // Integer ALU reg operation
 4471 pipe_class ialu_reg(rRegI dst) %{
 4472     single_instruction;
 4473     dst    : S4(write);
 4474     dst    : S3(read);
 4475     DECODE : S0;        // any decoder
 4476     ALU    : S3;        // any alu
 4477 %}
 4478 
 4479 // Long ALU reg operation
 4480 pipe_class ialu_reg_long(eRegL dst) %{
 4481     instruction_count(2);
 4482     dst    : S4(write);
 4483     dst    : S3(read);
 4484     DECODE : S0(2);     // any 2 decoders
 4485     ALU    : S3(2);     // both alus
 4486 %}
 4487 
 4488 // Integer ALU reg operation using big decoder
 4489 pipe_class ialu_reg_fat(rRegI dst) %{
 4490     single_instruction;
 4491     dst    : S4(write);
 4492     dst    : S3(read);
 4493     D0     : S0;        // big decoder only
 4494     ALU    : S3;        // any alu
 4495 %}
 4496 
 4497 // Long ALU reg operation using big decoder
 4498 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4499     instruction_count(2);
 4500     dst    : S4(write);
 4501     dst    : S3(read);
 4502     D0     : S0(2);     // big decoder only; twice
 4503     ALU    : S3(2);     // any 2 alus
 4504 %}
 4505 
 4506 // Integer ALU reg-reg operation
 4507 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4508     single_instruction;
 4509     dst    : S4(write);
 4510     src    : S3(read);
 4511     DECODE : S0;        // any decoder
 4512     ALU    : S3;        // any alu
 4513 %}
 4514 
 4515 // Long ALU reg-reg operation
 4516 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4517     instruction_count(2);
 4518     dst    : S4(write);
 4519     src    : S3(read);
 4520     DECODE : S0(2);     // any 2 decoders
 4521     ALU    : S3(2);     // both alus
 4522 %}
 4523 
 4524 // Integer ALU reg-reg operation
 4525 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4526     single_instruction;
 4527     dst    : S4(write);
 4528     src    : S3(read);
 4529     D0     : S0;        // big decoder only
 4530     ALU    : S3;        // any alu
 4531 %}
 4532 
 4533 // Long ALU reg-reg operation
 4534 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4535     instruction_count(2);
 4536     dst    : S4(write);
 4537     src    : S3(read);
 4538     D0     : S0(2);     // big decoder only; twice
 4539     ALU    : S3(2);     // both alus
 4540 %}
 4541 
 4542 // Integer ALU reg-mem operation
 4543 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4544     single_instruction;
 4545     dst    : S5(write);
 4546     mem    : S3(read);
 4547     D0     : S0;        // big decoder only
 4548     ALU    : S4;        // any alu
 4549     MEM    : S3;        // any mem
 4550 %}
 4551 
 4552 // Long ALU reg-mem operation
 4553 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4554     instruction_count(2);
 4555     dst    : S5(write);
 4556     mem    : S3(read);
 4557     D0     : S0(2);     // big decoder only; twice
 4558     ALU    : S4(2);     // any 2 alus
 4559     MEM    : S3(2);     // both mems
 4560 %}
 4561 
 4562 // Integer mem operation (prefetch)
 4563 pipe_class ialu_mem(memory mem)
 4564 %{
 4565     single_instruction;
 4566     mem    : S3(read);
 4567     D0     : S0;        // big decoder only
 4568     MEM    : S3;        // any mem
 4569 %}
 4570 
 4571 // Integer Store to Memory
 4572 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4573     single_instruction;
 4574     mem    : S3(read);
 4575     src    : S5(read);
 4576     D0     : S0;        // big decoder only
 4577     ALU    : S4;        // any alu
 4578     MEM    : S3;
 4579 %}
 4580 
 4581 // Long Store to Memory
 4582 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4583     instruction_count(2);
 4584     mem    : S3(read);
 4585     src    : S5(read);
 4586     D0     : S0(2);     // big decoder only; twice
 4587     ALU    : S4(2);     // any 2 alus
 4588     MEM    : S3(2);     // Both mems
 4589 %}
 4590 
 4591 // Integer Store to Memory
 4592 pipe_class ialu_mem_imm(memory mem) %{
 4593     single_instruction;
 4594     mem    : S3(read);
 4595     D0     : S0;        // big decoder only
 4596     ALU    : S4;        // any alu
 4597     MEM    : S3;
 4598 %}
 4599 
 4600 // Integer ALU0 reg-reg operation
 4601 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4602     single_instruction;
 4603     dst    : S4(write);
 4604     src    : S3(read);
 4605     D0     : S0;        // Big decoder only
 4606     ALU0   : S3;        // only alu0
 4607 %}
 4608 
 4609 // Integer ALU0 reg-mem operation
 4610 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4611     single_instruction;
 4612     dst    : S5(write);
 4613     mem    : S3(read);
 4614     D0     : S0;        // big decoder only
 4615     ALU0   : S4;        // ALU0 only
 4616     MEM    : S3;        // any mem
 4617 %}
 4618 
 4619 // Integer ALU reg-reg operation
 4620 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4621     single_instruction;
 4622     cr     : S4(write);
 4623     src1   : S3(read);
 4624     src2   : S3(read);
 4625     DECODE : S0;        // any decoder
 4626     ALU    : S3;        // any alu
 4627 %}
 4628 
 4629 // Integer ALU reg-imm operation
 4630 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4631     single_instruction;
 4632     cr     : S4(write);
 4633     src1   : S3(read);
 4634     DECODE : S0;        // any decoder
 4635     ALU    : S3;        // any alu
 4636 %}
 4637 
 4638 // Integer ALU reg-mem operation
 4639 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4640     single_instruction;
 4641     cr     : S4(write);
 4642     src1   : S3(read);
 4643     src2   : S3(read);
 4644     D0     : S0;        // big decoder only
 4645     ALU    : S4;        // any alu
 4646     MEM    : S3;
 4647 %}
 4648 
 4649 // Conditional move reg-reg
 4650 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4651     instruction_count(4);
 4652     y      : S4(read);
 4653     q      : S3(read);
 4654     p      : S3(read);
 4655     DECODE : S0(4);     // any decoder
 4656 %}
 4657 
 4658 // Conditional move reg-reg
 4659 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4660     single_instruction;
 4661     dst    : S4(write);
 4662     src    : S3(read);
 4663     cr     : S3(read);
 4664     DECODE : S0;        // any decoder
 4665 %}
 4666 
 4667 // Conditional move reg-mem
 4668 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4669     single_instruction;
 4670     dst    : S4(write);
 4671     src    : S3(read);
 4672     cr     : S3(read);
 4673     DECODE : S0;        // any decoder
 4674     MEM    : S3;
 4675 %}
 4676 
 4677 // Conditional move reg-reg long
 4678 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4679     single_instruction;
 4680     dst    : S4(write);
 4681     src    : S3(read);
 4682     cr     : S3(read);
 4683     DECODE : S0(2);     // any 2 decoders
 4684 %}
 4685 
 4686 // Conditional move double reg-reg
 4687 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4688     single_instruction;
 4689     dst    : S4(write);
 4690     src    : S3(read);
 4691     cr     : S3(read);
 4692     DECODE : S0;        // any decoder
 4693 %}
 4694 
 4695 // Float reg-reg operation
 4696 pipe_class fpu_reg(regDPR dst) %{
 4697     instruction_count(2);
 4698     dst    : S3(read);
 4699     DECODE : S0(2);     // any 2 decoders
 4700     FPU    : S3;
 4701 %}
 4702 
 4703 // Float reg-reg operation
 4704 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4705     instruction_count(2);
 4706     dst    : S4(write);
 4707     src    : S3(read);
 4708     DECODE : S0(2);     // any 2 decoders
 4709     FPU    : S3;
 4710 %}
 4711 
 4712 // Float reg-reg operation
 4713 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4714     instruction_count(3);
 4715     dst    : S4(write);
 4716     src1   : S3(read);
 4717     src2   : S3(read);
 4718     DECODE : S0(3);     // any 3 decoders
 4719     FPU    : S3(2);
 4720 %}
 4721 
 4722 // Float reg-reg operation
 4723 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4724     instruction_count(4);
 4725     dst    : S4(write);
 4726     src1   : S3(read);
 4727     src2   : S3(read);
 4728     src3   : S3(read);
 4729     DECODE : S0(4);     // any 3 decoders
 4730     FPU    : S3(2);
 4731 %}
 4732 
 4733 // Float reg-reg operation
 4734 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4735     instruction_count(4);
 4736     dst    : S4(write);
 4737     src1   : S3(read);
 4738     src2   : S3(read);
 4739     src3   : S3(read);
 4740     DECODE : S1(3);     // any 3 decoders
 4741     D0     : S0;        // Big decoder only
 4742     FPU    : S3(2);
 4743     MEM    : S3;
 4744 %}
 4745 
 4746 // Float reg-mem operation
 4747 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4748     instruction_count(2);
 4749     dst    : S5(write);
 4750     mem    : S3(read);
 4751     D0     : S0;        // big decoder only
 4752     DECODE : S1;        // any decoder for FPU POP
 4753     FPU    : S4;
 4754     MEM    : S3;        // any mem
 4755 %}
 4756 
 4757 // Float reg-mem operation
 4758 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4759     instruction_count(3);
 4760     dst    : S5(write);
 4761     src1   : S3(read);
 4762     mem    : S3(read);
 4763     D0     : S0;        // big decoder only
 4764     DECODE : S1(2);     // any decoder for FPU POP
 4765     FPU    : S4;
 4766     MEM    : S3;        // any mem
 4767 %}
 4768 
 4769 // Float mem-reg operation
 4770 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4771     instruction_count(2);
 4772     src    : S5(read);
 4773     mem    : S3(read);
 4774     DECODE : S0;        // any decoder for FPU PUSH
 4775     D0     : S1;        // big decoder only
 4776     FPU    : S4;
 4777     MEM    : S3;        // any mem
 4778 %}
 4779 
 4780 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4781     instruction_count(3);
 4782     src1   : S3(read);
 4783     src2   : S3(read);
 4784     mem    : S3(read);
 4785     DECODE : S0(2);     // any decoder for FPU PUSH
 4786     D0     : S1;        // big decoder only
 4787     FPU    : S4;
 4788     MEM    : S3;        // any mem
 4789 %}
 4790 
 4791 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4792     instruction_count(3);
 4793     src1   : S3(read);
 4794     src2   : S3(read);
 4795     mem    : S4(read);
 4796     DECODE : S0;        // any decoder for FPU PUSH
 4797     D0     : S0(2);     // big decoder only
 4798     FPU    : S4;
 4799     MEM    : S3(2);     // any mem
 4800 %}
 4801 
 4802 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4803     instruction_count(2);
 4804     src1   : S3(read);
 4805     dst    : S4(read);
 4806     D0     : S0(2);     // big decoder only
 4807     MEM    : S3(2);     // any mem
 4808 %}
 4809 
 4810 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4811     instruction_count(3);
 4812     src1   : S3(read);
 4813     src2   : S3(read);
 4814     dst    : S4(read);
 4815     D0     : S0(3);     // big decoder only
 4816     FPU    : S4;
 4817     MEM    : S3(3);     // any mem
 4818 %}
 4819 
 4820 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4821     instruction_count(3);
 4822     src1   : S4(read);
 4823     mem    : S4(read);
 4824     DECODE : S0;        // any decoder for FPU PUSH
 4825     D0     : S0(2);     // big decoder only
 4826     FPU    : S4;
 4827     MEM    : S3(2);     // any mem
 4828 %}
 4829 
 4830 // Float load constant
 4831 pipe_class fpu_reg_con(regDPR dst) %{
 4832     instruction_count(2);
 4833     dst    : S5(write);
 4834     D0     : S0;        // big decoder only for the load
 4835     DECODE : S1;        // any decoder for FPU POP
 4836     FPU    : S4;
 4837     MEM    : S3;        // any mem
 4838 %}
 4839 
 4840 // Float load constant
 4841 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4842     instruction_count(3);
 4843     dst    : S5(write);
 4844     src    : S3(read);
 4845     D0     : S0;        // big decoder only for the load
 4846     DECODE : S1(2);     // any decoder for FPU POP
 4847     FPU    : S4;
 4848     MEM    : S3;        // any mem
 4849 %}
 4850 
 4851 // UnConditional branch
 4852 pipe_class pipe_jmp( label labl ) %{
 4853     single_instruction;
 4854     BR   : S3;
 4855 %}
 4856 
 4857 // Conditional branch
 4858 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4859     single_instruction;
 4860     cr    : S1(read);
 4861     BR    : S3;
 4862 %}
 4863 
 4864 // Allocation idiom
 4865 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4866     instruction_count(1); force_serialization;
 4867     fixed_latency(6);
 4868     heap_ptr : S3(read);
 4869     DECODE   : S0(3);
 4870     D0       : S2;
 4871     MEM      : S3;
 4872     ALU      : S3(2);
 4873     dst      : S5(write);
 4874     BR       : S5;
 4875 %}
 4876 
 4877 // Generic big/slow expanded idiom
 4878 pipe_class pipe_slow(  ) %{
 4879     instruction_count(10); multiple_bundles; force_serialization;
 4880     fixed_latency(100);
 4881     D0  : S0(2);
 4882     MEM : S3(2);
 4883 %}
 4884 
 4885 // The real do-nothing guy
 4886 pipe_class empty( ) %{
 4887     instruction_count(0);
 4888 %}
 4889 
 4890 // Define the class for the Nop node
 4891 define %{
 4892    MachNop = empty;
 4893 %}
 4894 
 4895 %}
 4896 
 4897 //----------INSTRUCTIONS-------------------------------------------------------
 4898 //
 4899 // match      -- States which machine-independent subtree may be replaced
 4900 //               by this instruction.
 4901 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4902 //               selection to identify a minimum cost tree of machine
 4903 //               instructions that matches a tree of machine-independent
 4904 //               instructions.
 4905 // format     -- A string providing the disassembly for this instruction.
 4906 //               The value of an instruction's operand may be inserted
 4907 //               by referring to it with a '$' prefix.
 4908 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4909 //               to within an encode class as $primary, $secondary, and $tertiary
 4910 //               respectively.  The primary opcode is commonly used to
 4911 //               indicate the type of machine instruction, while secondary
 4912 //               and tertiary are often used for prefix options or addressing
 4913 //               modes.
 4914 // ins_encode -- A list of encode classes with parameters. The encode class
 4915 //               name must have been defined in an 'enc_class' specification
 4916 //               in the encode section of the architecture description.
 4917 
 4918 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4919 // Load Float
 4920 instruct MoveF2LEG(legRegF dst, regF src) %{
 4921   match(Set dst src);
 4922   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4923   ins_encode %{
 4924     ShouldNotReachHere();
 4925   %}
 4926   ins_pipe( fpu_reg_reg );
 4927 %}
 4928 
 4929 // Load Float
 4930 instruct MoveLEG2F(regF dst, legRegF src) %{
 4931   match(Set dst src);
 4932   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4933   ins_encode %{
 4934     ShouldNotReachHere();
 4935   %}
 4936   ins_pipe( fpu_reg_reg );
 4937 %}
 4938 
 4939 // Load Float
 4940 instruct MoveF2VL(vlRegF dst, regF src) %{
 4941   match(Set dst src);
 4942   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4943   ins_encode %{
 4944     ShouldNotReachHere();
 4945   %}
 4946   ins_pipe( fpu_reg_reg );
 4947 %}
 4948 
 4949 // Load Float
 4950 instruct MoveVL2F(regF dst, vlRegF src) %{
 4951   match(Set dst src);
 4952   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4953   ins_encode %{
 4954     ShouldNotReachHere();
 4955   %}
 4956   ins_pipe( fpu_reg_reg );
 4957 %}
 4958 
 4959 
 4960 
 4961 // Load Double
 4962 instruct MoveD2LEG(legRegD dst, regD src) %{
 4963   match(Set dst src);
 4964   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4965   ins_encode %{
 4966     ShouldNotReachHere();
 4967   %}
 4968   ins_pipe( fpu_reg_reg );
 4969 %}
 4970 
 4971 // Load Double
 4972 instruct MoveLEG2D(regD dst, legRegD src) %{
 4973   match(Set dst src);
 4974   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4975   ins_encode %{
 4976     ShouldNotReachHere();
 4977   %}
 4978   ins_pipe( fpu_reg_reg );
 4979 %}
 4980 
 4981 // Load Double
 4982 instruct MoveD2VL(vlRegD dst, regD src) %{
 4983   match(Set dst src);
 4984   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4985   ins_encode %{
 4986     ShouldNotReachHere();
 4987   %}
 4988   ins_pipe( fpu_reg_reg );
 4989 %}
 4990 
 4991 // Load Double
 4992 instruct MoveVL2D(regD dst, vlRegD src) %{
 4993   match(Set dst src);
 4994   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4995   ins_encode %{
 4996     ShouldNotReachHere();
 4997   %}
 4998   ins_pipe( fpu_reg_reg );
 4999 %}
 5000 
 5001 //----------BSWAP-Instruction--------------------------------------------------
 5002 instruct bytes_reverse_int(rRegI dst) %{
 5003   match(Set dst (ReverseBytesI dst));
 5004 
 5005   format %{ "BSWAP  $dst" %}
 5006   opcode(0x0F, 0xC8);
 5007   ins_encode( OpcP, OpcSReg(dst) );
 5008   ins_pipe( ialu_reg );
 5009 %}
 5010 
 5011 instruct bytes_reverse_long(eRegL dst) %{
 5012   match(Set dst (ReverseBytesL dst));
 5013 
 5014   format %{ "BSWAP  $dst.lo\n\t"
 5015             "BSWAP  $dst.hi\n\t"
 5016             "XCHG   $dst.lo $dst.hi" %}
 5017 
 5018   ins_cost(125);
 5019   ins_encode( bswap_long_bytes(dst) );
 5020   ins_pipe( ialu_reg_reg);
 5021 %}
 5022 
 5023 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5024   match(Set dst (ReverseBytesUS dst));
 5025   effect(KILL cr);
 5026 
 5027   format %{ "BSWAP  $dst\n\t"
 5028             "SHR    $dst,16\n\t" %}
 5029   ins_encode %{
 5030     __ bswapl($dst$$Register);
 5031     __ shrl($dst$$Register, 16);
 5032   %}
 5033   ins_pipe( ialu_reg );
 5034 %}
 5035 
 5036 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5037   match(Set dst (ReverseBytesS dst));
 5038   effect(KILL cr);
 5039 
 5040   format %{ "BSWAP  $dst\n\t"
 5041             "SAR    $dst,16\n\t" %}
 5042   ins_encode %{
 5043     __ bswapl($dst$$Register);
 5044     __ sarl($dst$$Register, 16);
 5045   %}
 5046   ins_pipe( ialu_reg );
 5047 %}
 5048 
 5049 
 5050 //---------- Zeros Count Instructions ------------------------------------------
 5051 
 5052 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5053   predicate(UseCountLeadingZerosInstruction);
 5054   match(Set dst (CountLeadingZerosI src));
 5055   effect(KILL cr);
 5056 
 5057   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5058   ins_encode %{
 5059     __ lzcntl($dst$$Register, $src$$Register);
 5060   %}
 5061   ins_pipe(ialu_reg);
 5062 %}
 5063 
 5064 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5065   predicate(!UseCountLeadingZerosInstruction);
 5066   match(Set dst (CountLeadingZerosI src));
 5067   effect(KILL cr);
 5068 
 5069   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5070             "JNZ    skip\n\t"
 5071             "MOV    $dst, -1\n"
 5072       "skip:\n\t"
 5073             "NEG    $dst\n\t"
 5074             "ADD    $dst, 31" %}
 5075   ins_encode %{
 5076     Register Rdst = $dst$$Register;
 5077     Register Rsrc = $src$$Register;
 5078     Label skip;
 5079     __ bsrl(Rdst, Rsrc);
 5080     __ jccb(Assembler::notZero, skip);
 5081     __ movl(Rdst, -1);
 5082     __ bind(skip);
 5083     __ negl(Rdst);
 5084     __ addl(Rdst, BitsPerInt - 1);
 5085   %}
 5086   ins_pipe(ialu_reg);
 5087 %}
 5088 
 5089 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5090   predicate(UseCountLeadingZerosInstruction);
 5091   match(Set dst (CountLeadingZerosL src));
 5092   effect(TEMP dst, KILL cr);
 5093 
 5094   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5095             "JNC    done\n\t"
 5096             "LZCNT  $dst, $src.lo\n\t"
 5097             "ADD    $dst, 32\n"
 5098       "done:" %}
 5099   ins_encode %{
 5100     Register Rdst = $dst$$Register;
 5101     Register Rsrc = $src$$Register;
 5102     Label done;
 5103     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5104     __ jccb(Assembler::carryClear, done);
 5105     __ lzcntl(Rdst, Rsrc);
 5106     __ addl(Rdst, BitsPerInt);
 5107     __ bind(done);
 5108   %}
 5109   ins_pipe(ialu_reg);
 5110 %}
 5111 
 5112 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5113   predicate(!UseCountLeadingZerosInstruction);
 5114   match(Set dst (CountLeadingZerosL src));
 5115   effect(TEMP dst, KILL cr);
 5116 
 5117   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5118             "JZ     msw_is_zero\n\t"
 5119             "ADD    $dst, 32\n\t"
 5120             "JMP    not_zero\n"
 5121       "msw_is_zero:\n\t"
 5122             "BSR    $dst, $src.lo\n\t"
 5123             "JNZ    not_zero\n\t"
 5124             "MOV    $dst, -1\n"
 5125       "not_zero:\n\t"
 5126             "NEG    $dst\n\t"
 5127             "ADD    $dst, 63\n" %}
 5128  ins_encode %{
 5129     Register Rdst = $dst$$Register;
 5130     Register Rsrc = $src$$Register;
 5131     Label msw_is_zero;
 5132     Label not_zero;
 5133     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5134     __ jccb(Assembler::zero, msw_is_zero);
 5135     __ addl(Rdst, BitsPerInt);
 5136     __ jmpb(not_zero);
 5137     __ bind(msw_is_zero);
 5138     __ bsrl(Rdst, Rsrc);
 5139     __ jccb(Assembler::notZero, not_zero);
 5140     __ movl(Rdst, -1);
 5141     __ bind(not_zero);
 5142     __ negl(Rdst);
 5143     __ addl(Rdst, BitsPerLong - 1);
 5144   %}
 5145   ins_pipe(ialu_reg);
 5146 %}
 5147 
 5148 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5149   predicate(UseCountTrailingZerosInstruction);
 5150   match(Set dst (CountTrailingZerosI src));
 5151   effect(KILL cr);
 5152 
 5153   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5154   ins_encode %{
 5155     __ tzcntl($dst$$Register, $src$$Register);
 5156   %}
 5157   ins_pipe(ialu_reg);
 5158 %}
 5159 
 5160 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5161   predicate(!UseCountTrailingZerosInstruction);
 5162   match(Set dst (CountTrailingZerosI src));
 5163   effect(KILL cr);
 5164 
 5165   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5166             "JNZ    done\n\t"
 5167             "MOV    $dst, 32\n"
 5168       "done:" %}
 5169   ins_encode %{
 5170     Register Rdst = $dst$$Register;
 5171     Label done;
 5172     __ bsfl(Rdst, $src$$Register);
 5173     __ jccb(Assembler::notZero, done);
 5174     __ movl(Rdst, BitsPerInt);
 5175     __ bind(done);
 5176   %}
 5177   ins_pipe(ialu_reg);
 5178 %}
 5179 
 5180 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5181   predicate(UseCountTrailingZerosInstruction);
 5182   match(Set dst (CountTrailingZerosL src));
 5183   effect(TEMP dst, KILL cr);
 5184 
 5185   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5186             "JNC    done\n\t"
 5187             "TZCNT  $dst, $src.hi\n\t"
 5188             "ADD    $dst, 32\n"
 5189             "done:" %}
 5190   ins_encode %{
 5191     Register Rdst = $dst$$Register;
 5192     Register Rsrc = $src$$Register;
 5193     Label done;
 5194     __ tzcntl(Rdst, Rsrc);
 5195     __ jccb(Assembler::carryClear, done);
 5196     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5197     __ addl(Rdst, BitsPerInt);
 5198     __ bind(done);
 5199   %}
 5200   ins_pipe(ialu_reg);
 5201 %}
 5202 
 5203 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5204   predicate(!UseCountTrailingZerosInstruction);
 5205   match(Set dst (CountTrailingZerosL src));
 5206   effect(TEMP dst, KILL cr);
 5207 
 5208   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5209             "JNZ    done\n\t"
 5210             "BSF    $dst, $src.hi\n\t"
 5211             "JNZ    msw_not_zero\n\t"
 5212             "MOV    $dst, 32\n"
 5213       "msw_not_zero:\n\t"
 5214             "ADD    $dst, 32\n"
 5215       "done:" %}
 5216   ins_encode %{
 5217     Register Rdst = $dst$$Register;
 5218     Register Rsrc = $src$$Register;
 5219     Label msw_not_zero;
 5220     Label done;
 5221     __ bsfl(Rdst, Rsrc);
 5222     __ jccb(Assembler::notZero, done);
 5223     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5224     __ jccb(Assembler::notZero, msw_not_zero);
 5225     __ movl(Rdst, BitsPerInt);
 5226     __ bind(msw_not_zero);
 5227     __ addl(Rdst, BitsPerInt);
 5228     __ bind(done);
 5229   %}
 5230   ins_pipe(ialu_reg);
 5231 %}
 5232 
 5233 
 5234 //---------- Population Count Instructions -------------------------------------
 5235 
 5236 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5237   predicate(UsePopCountInstruction);
 5238   match(Set dst (PopCountI src));
 5239   effect(KILL cr);
 5240 
 5241   format %{ "POPCNT $dst, $src" %}
 5242   ins_encode %{
 5243     __ popcntl($dst$$Register, $src$$Register);
 5244   %}
 5245   ins_pipe(ialu_reg);
 5246 %}
 5247 
 5248 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5249   predicate(UsePopCountInstruction);
 5250   match(Set dst (PopCountI (LoadI mem)));
 5251   effect(KILL cr);
 5252 
 5253   format %{ "POPCNT $dst, $mem" %}
 5254   ins_encode %{
 5255     __ popcntl($dst$$Register, $mem$$Address);
 5256   %}
 5257   ins_pipe(ialu_reg);
 5258 %}
 5259 
 5260 // Note: Long.bitCount(long) returns an int.
 5261 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5262   predicate(UsePopCountInstruction);
 5263   match(Set dst (PopCountL src));
 5264   effect(KILL cr, TEMP tmp, TEMP dst);
 5265 
 5266   format %{ "POPCNT $dst, $src.lo\n\t"
 5267             "POPCNT $tmp, $src.hi\n\t"
 5268             "ADD    $dst, $tmp" %}
 5269   ins_encode %{
 5270     __ popcntl($dst$$Register, $src$$Register);
 5271     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5272     __ addl($dst$$Register, $tmp$$Register);
 5273   %}
 5274   ins_pipe(ialu_reg);
 5275 %}
 5276 
 5277 // Note: Long.bitCount(long) returns an int.
 5278 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5279   predicate(UsePopCountInstruction);
 5280   match(Set dst (PopCountL (LoadL mem)));
 5281   effect(KILL cr, TEMP tmp, TEMP dst);
 5282 
 5283   format %{ "POPCNT $dst, $mem\n\t"
 5284             "POPCNT $tmp, $mem+4\n\t"
 5285             "ADD    $dst, $tmp" %}
 5286   ins_encode %{
 5287     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5288     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5289     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5290     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5291     __ addl($dst$$Register, $tmp$$Register);
 5292   %}
 5293   ins_pipe(ialu_reg);
 5294 %}
 5295 
 5296 
 5297 //----------Load/Store/Move Instructions---------------------------------------
 5298 //----------Load Instructions--------------------------------------------------
 5299 // Load Byte (8bit signed)
 5300 instruct loadB(xRegI dst, memory mem) %{
 5301   match(Set dst (LoadB mem));
 5302 
 5303   ins_cost(125);
 5304   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5305 
 5306   ins_encode %{
 5307     __ movsbl($dst$$Register, $mem$$Address);
 5308   %}
 5309 
 5310   ins_pipe(ialu_reg_mem);
 5311 %}
 5312 
 5313 // Load Byte (8bit signed) into Long Register
 5314 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5315   match(Set dst (ConvI2L (LoadB mem)));
 5316   effect(KILL cr);
 5317 
 5318   ins_cost(375);
 5319   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5320             "MOV    $dst.hi,$dst.lo\n\t"
 5321             "SAR    $dst.hi,7" %}
 5322 
 5323   ins_encode %{
 5324     __ movsbl($dst$$Register, $mem$$Address);
 5325     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5326     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5327   %}
 5328 
 5329   ins_pipe(ialu_reg_mem);
 5330 %}
 5331 
 5332 // Load Unsigned Byte (8bit UNsigned)
 5333 instruct loadUB(xRegI dst, memory mem) %{
 5334   match(Set dst (LoadUB mem));
 5335 
 5336   ins_cost(125);
 5337   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5338 
 5339   ins_encode %{
 5340     __ movzbl($dst$$Register, $mem$$Address);
 5341   %}
 5342 
 5343   ins_pipe(ialu_reg_mem);
 5344 %}
 5345 
 5346 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5347 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5348   match(Set dst (ConvI2L (LoadUB mem)));
 5349   effect(KILL cr);
 5350 
 5351   ins_cost(250);
 5352   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5353             "XOR    $dst.hi,$dst.hi" %}
 5354 
 5355   ins_encode %{
 5356     Register Rdst = $dst$$Register;
 5357     __ movzbl(Rdst, $mem$$Address);
 5358     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5359   %}
 5360 
 5361   ins_pipe(ialu_reg_mem);
 5362 %}
 5363 
 5364 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5365 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5366   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5367   effect(KILL cr);
 5368 
 5369   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5370             "XOR    $dst.hi,$dst.hi\n\t"
 5371             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5372   ins_encode %{
 5373     Register Rdst = $dst$$Register;
 5374     __ movzbl(Rdst, $mem$$Address);
 5375     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5376     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5377   %}
 5378   ins_pipe(ialu_reg_mem);
 5379 %}
 5380 
 5381 // Load Short (16bit signed)
 5382 instruct loadS(rRegI dst, memory mem) %{
 5383   match(Set dst (LoadS mem));
 5384 
 5385   ins_cost(125);
 5386   format %{ "MOVSX  $dst,$mem\t# short" %}
 5387 
 5388   ins_encode %{
 5389     __ movswl($dst$$Register, $mem$$Address);
 5390   %}
 5391 
 5392   ins_pipe(ialu_reg_mem);
 5393 %}
 5394 
 5395 // Load Short (16 bit signed) to Byte (8 bit signed)
 5396 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5397   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5398 
 5399   ins_cost(125);
 5400   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5401   ins_encode %{
 5402     __ movsbl($dst$$Register, $mem$$Address);
 5403   %}
 5404   ins_pipe(ialu_reg_mem);
 5405 %}
 5406 
 5407 // Load Short (16bit signed) into Long Register
 5408 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5409   match(Set dst (ConvI2L (LoadS mem)));
 5410   effect(KILL cr);
 5411 
 5412   ins_cost(375);
 5413   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5414             "MOV    $dst.hi,$dst.lo\n\t"
 5415             "SAR    $dst.hi,15" %}
 5416 
 5417   ins_encode %{
 5418     __ movswl($dst$$Register, $mem$$Address);
 5419     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5420     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5421   %}
 5422 
 5423   ins_pipe(ialu_reg_mem);
 5424 %}
 5425 
 5426 // Load Unsigned Short/Char (16bit unsigned)
 5427 instruct loadUS(rRegI dst, memory mem) %{
 5428   match(Set dst (LoadUS mem));
 5429 
 5430   ins_cost(125);
 5431   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5432 
 5433   ins_encode %{
 5434     __ movzwl($dst$$Register, $mem$$Address);
 5435   %}
 5436 
 5437   ins_pipe(ialu_reg_mem);
 5438 %}
 5439 
 5440 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5441 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5442   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5443 
 5444   ins_cost(125);
 5445   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5446   ins_encode %{
 5447     __ movsbl($dst$$Register, $mem$$Address);
 5448   %}
 5449   ins_pipe(ialu_reg_mem);
 5450 %}
 5451 
 5452 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5453 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5454   match(Set dst (ConvI2L (LoadUS mem)));
 5455   effect(KILL cr);
 5456 
 5457   ins_cost(250);
 5458   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5459             "XOR    $dst.hi,$dst.hi" %}
 5460 
 5461   ins_encode %{
 5462     __ movzwl($dst$$Register, $mem$$Address);
 5463     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5464   %}
 5465 
 5466   ins_pipe(ialu_reg_mem);
 5467 %}
 5468 
 5469 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5470 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5471   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5472   effect(KILL cr);
 5473 
 5474   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5475             "XOR    $dst.hi,$dst.hi" %}
 5476   ins_encode %{
 5477     Register Rdst = $dst$$Register;
 5478     __ movzbl(Rdst, $mem$$Address);
 5479     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5480   %}
 5481   ins_pipe(ialu_reg_mem);
 5482 %}
 5483 
 5484 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5485 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5486   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5487   effect(KILL cr);
 5488 
 5489   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5490             "XOR    $dst.hi,$dst.hi\n\t"
 5491             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5492   ins_encode %{
 5493     Register Rdst = $dst$$Register;
 5494     __ movzwl(Rdst, $mem$$Address);
 5495     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5496     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5497   %}
 5498   ins_pipe(ialu_reg_mem);
 5499 %}
 5500 
 5501 // Load Integer
 5502 instruct loadI(rRegI dst, memory mem) %{
 5503   match(Set dst (LoadI mem));
 5504 
 5505   ins_cost(125);
 5506   format %{ "MOV    $dst,$mem\t# int" %}
 5507 
 5508   ins_encode %{
 5509     __ movl($dst$$Register, $mem$$Address);
 5510   %}
 5511 
 5512   ins_pipe(ialu_reg_mem);
 5513 %}
 5514 
 5515 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5516 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5517   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5518 
 5519   ins_cost(125);
 5520   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5521   ins_encode %{
 5522     __ movsbl($dst$$Register, $mem$$Address);
 5523   %}
 5524   ins_pipe(ialu_reg_mem);
 5525 %}
 5526 
 5527 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5528 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5529   match(Set dst (AndI (LoadI mem) mask));
 5530 
 5531   ins_cost(125);
 5532   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5533   ins_encode %{
 5534     __ movzbl($dst$$Register, $mem$$Address);
 5535   %}
 5536   ins_pipe(ialu_reg_mem);
 5537 %}
 5538 
 5539 // Load Integer (32 bit signed) to Short (16 bit signed)
 5540 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5541   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5542 
 5543   ins_cost(125);
 5544   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5545   ins_encode %{
 5546     __ movswl($dst$$Register, $mem$$Address);
 5547   %}
 5548   ins_pipe(ialu_reg_mem);
 5549 %}
 5550 
 5551 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5552 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5553   match(Set dst (AndI (LoadI mem) mask));
 5554 
 5555   ins_cost(125);
 5556   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5557   ins_encode %{
 5558     __ movzwl($dst$$Register, $mem$$Address);
 5559   %}
 5560   ins_pipe(ialu_reg_mem);
 5561 %}
 5562 
 5563 // Load Integer into Long Register
 5564 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5565   match(Set dst (ConvI2L (LoadI mem)));
 5566   effect(KILL cr);
 5567 
 5568   ins_cost(375);
 5569   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5570             "MOV    $dst.hi,$dst.lo\n\t"
 5571             "SAR    $dst.hi,31" %}
 5572 
 5573   ins_encode %{
 5574     __ movl($dst$$Register, $mem$$Address);
 5575     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5576     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5577   %}
 5578 
 5579   ins_pipe(ialu_reg_mem);
 5580 %}
 5581 
 5582 // Load Integer with mask 0xFF into Long Register
 5583 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5584   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5585   effect(KILL cr);
 5586 
 5587   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5588             "XOR    $dst.hi,$dst.hi" %}
 5589   ins_encode %{
 5590     Register Rdst = $dst$$Register;
 5591     __ movzbl(Rdst, $mem$$Address);
 5592     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5593   %}
 5594   ins_pipe(ialu_reg_mem);
 5595 %}
 5596 
 5597 // Load Integer with mask 0xFFFF into Long Register
 5598 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5599   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5600   effect(KILL cr);
 5601 
 5602   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5603             "XOR    $dst.hi,$dst.hi" %}
 5604   ins_encode %{
 5605     Register Rdst = $dst$$Register;
 5606     __ movzwl(Rdst, $mem$$Address);
 5607     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5608   %}
 5609   ins_pipe(ialu_reg_mem);
 5610 %}
 5611 
 5612 // Load Integer with 31-bit mask into Long Register
 5613 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5614   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5615   effect(KILL cr);
 5616 
 5617   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5618             "XOR    $dst.hi,$dst.hi\n\t"
 5619             "AND    $dst.lo,$mask" %}
 5620   ins_encode %{
 5621     Register Rdst = $dst$$Register;
 5622     __ movl(Rdst, $mem$$Address);
 5623     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5624     __ andl(Rdst, $mask$$constant);
 5625   %}
 5626   ins_pipe(ialu_reg_mem);
 5627 %}
 5628 
 5629 // Load Unsigned Integer into Long Register
 5630 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5631   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5632   effect(KILL cr);
 5633 
 5634   ins_cost(250);
 5635   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5636             "XOR    $dst.hi,$dst.hi" %}
 5637 
 5638   ins_encode %{
 5639     __ movl($dst$$Register, $mem$$Address);
 5640     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5641   %}
 5642 
 5643   ins_pipe(ialu_reg_mem);
 5644 %}
 5645 
 5646 // Load Long.  Cannot clobber address while loading, so restrict address
 5647 // register to ESI
 5648 instruct loadL(eRegL dst, load_long_memory mem) %{
 5649   predicate(!((LoadLNode*)n)->require_atomic_access());
 5650   match(Set dst (LoadL mem));
 5651 
 5652   ins_cost(250);
 5653   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5654             "MOV    $dst.hi,$mem+4" %}
 5655 
 5656   ins_encode %{
 5657     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5658     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5659     __ movl($dst$$Register, Amemlo);
 5660     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5661   %}
 5662 
 5663   ins_pipe(ialu_reg_long_mem);
 5664 %}
 5665 
 5666 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5667 // then store it down to the stack and reload on the int
 5668 // side.
 5669 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5670   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5671   match(Set dst (LoadL mem));
 5672 
 5673   ins_cost(200);
 5674   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5675             "FISTp  $dst" %}
 5676   ins_encode(enc_loadL_volatile(mem,dst));
 5677   ins_pipe( fpu_reg_mem );
 5678 %}
 5679 
 5680 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5681   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5682   match(Set dst (LoadL mem));
 5683   effect(TEMP tmp);
 5684   ins_cost(180);
 5685   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5686             "MOVSD  $dst,$tmp" %}
 5687   ins_encode %{
 5688     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5689     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5690   %}
 5691   ins_pipe( pipe_slow );
 5692 %}
 5693 
 5694 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5695   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5696   match(Set dst (LoadL mem));
 5697   effect(TEMP tmp);
 5698   ins_cost(160);
 5699   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5700             "MOVD   $dst.lo,$tmp\n\t"
 5701             "PSRLQ  $tmp,32\n\t"
 5702             "MOVD   $dst.hi,$tmp" %}
 5703   ins_encode %{
 5704     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5705     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5706     __ psrlq($tmp$$XMMRegister, 32);
 5707     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5708   %}
 5709   ins_pipe( pipe_slow );
 5710 %}
 5711 
 5712 // Load Range
 5713 instruct loadRange(rRegI dst, memory mem) %{
 5714   match(Set dst (LoadRange mem));
 5715 
 5716   ins_cost(125);
 5717   format %{ "MOV    $dst,$mem" %}
 5718   opcode(0x8B);
 5719   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5720   ins_pipe( ialu_reg_mem );
 5721 %}
 5722 
 5723 
 5724 // Load Pointer
 5725 instruct loadP(eRegP dst, memory mem) %{
 5726   match(Set dst (LoadP mem));
 5727 
 5728   ins_cost(125);
 5729   format %{ "MOV    $dst,$mem" %}
 5730   opcode(0x8B);
 5731   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5732   ins_pipe( ialu_reg_mem );
 5733 %}
 5734 
 5735 // Load Klass Pointer
 5736 instruct loadKlass(eRegP dst, memory mem) %{
 5737   match(Set dst (LoadKlass mem));
 5738 
 5739   ins_cost(125);
 5740   format %{ "MOV    $dst,$mem" %}
 5741   opcode(0x8B);
 5742   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5743   ins_pipe( ialu_reg_mem );
 5744 %}
 5745 
 5746 // Load Double
 5747 instruct loadDPR(regDPR dst, memory mem) %{
 5748   predicate(UseSSE<=1);
 5749   match(Set dst (LoadD mem));
 5750 
 5751   ins_cost(150);
 5752   format %{ "FLD_D  ST,$mem\n\t"
 5753             "FSTP   $dst" %}
 5754   opcode(0xDD);               /* DD /0 */
 5755   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5756               Pop_Reg_DPR(dst), ClearInstMark );
 5757   ins_pipe( fpu_reg_mem );
 5758 %}
 5759 
 5760 // Load Double to XMM
 5761 instruct loadD(regD dst, memory mem) %{
 5762   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5763   match(Set dst (LoadD mem));
 5764   ins_cost(145);
 5765   format %{ "MOVSD  $dst,$mem" %}
 5766   ins_encode %{
 5767     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5768   %}
 5769   ins_pipe( pipe_slow );
 5770 %}
 5771 
 5772 instruct loadD_partial(regD dst, memory mem) %{
 5773   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5774   match(Set dst (LoadD mem));
 5775   ins_cost(145);
 5776   format %{ "MOVLPD $dst,$mem" %}
 5777   ins_encode %{
 5778     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5779   %}
 5780   ins_pipe( pipe_slow );
 5781 %}
 5782 
 5783 // Load to XMM register (single-precision floating point)
 5784 // MOVSS instruction
 5785 instruct loadF(regF dst, memory mem) %{
 5786   predicate(UseSSE>=1);
 5787   match(Set dst (LoadF mem));
 5788   ins_cost(145);
 5789   format %{ "MOVSS  $dst,$mem" %}
 5790   ins_encode %{
 5791     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5792   %}
 5793   ins_pipe( pipe_slow );
 5794 %}
 5795 
 5796 // Load Float
 5797 instruct loadFPR(regFPR dst, memory mem) %{
 5798   predicate(UseSSE==0);
 5799   match(Set dst (LoadF mem));
 5800 
 5801   ins_cost(150);
 5802   format %{ "FLD_S  ST,$mem\n\t"
 5803             "FSTP   $dst" %}
 5804   opcode(0xD9);               /* D9 /0 */
 5805   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5806               Pop_Reg_FPR(dst), ClearInstMark );
 5807   ins_pipe( fpu_reg_mem );
 5808 %}
 5809 
 5810 // Load Effective Address
 5811 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5812   match(Set dst mem);
 5813 
 5814   ins_cost(110);
 5815   format %{ "LEA    $dst,$mem" %}
 5816   opcode(0x8D);
 5817   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5818   ins_pipe( ialu_reg_reg_fat );
 5819 %}
 5820 
 5821 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5822   match(Set dst mem);
 5823 
 5824   ins_cost(110);
 5825   format %{ "LEA    $dst,$mem" %}
 5826   opcode(0x8D);
 5827   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5828   ins_pipe( ialu_reg_reg_fat );
 5829 %}
 5830 
 5831 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5832   match(Set dst mem);
 5833 
 5834   ins_cost(110);
 5835   format %{ "LEA    $dst,$mem" %}
 5836   opcode(0x8D);
 5837   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5838   ins_pipe( ialu_reg_reg_fat );
 5839 %}
 5840 
 5841 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5842   match(Set dst mem);
 5843 
 5844   ins_cost(110);
 5845   format %{ "LEA    $dst,$mem" %}
 5846   opcode(0x8D);
 5847   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5848   ins_pipe( ialu_reg_reg_fat );
 5849 %}
 5850 
 5851 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5852   match(Set dst mem);
 5853 
 5854   ins_cost(110);
 5855   format %{ "LEA    $dst,$mem" %}
 5856   opcode(0x8D);
 5857   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5858   ins_pipe( ialu_reg_reg_fat );
 5859 %}
 5860 
 5861 // Load Constant
 5862 instruct loadConI(rRegI dst, immI src) %{
 5863   match(Set dst src);
 5864 
 5865   format %{ "MOV    $dst,$src" %}
 5866   ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
 5867   ins_pipe( ialu_reg_fat );
 5868 %}
 5869 
 5870 // Load Constant zero
 5871 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5872   match(Set dst src);
 5873   effect(KILL cr);
 5874 
 5875   ins_cost(50);
 5876   format %{ "XOR    $dst,$dst" %}
 5877   opcode(0x33);  /* + rd */
 5878   ins_encode( OpcP, RegReg( dst, dst ) );
 5879   ins_pipe( ialu_reg );
 5880 %}
 5881 
 5882 instruct loadConP(eRegP dst, immP src) %{
 5883   match(Set dst src);
 5884 
 5885   format %{ "MOV    $dst,$src" %}
 5886   opcode(0xB8);  /* + rd */
 5887   ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
 5888   ins_pipe( ialu_reg_fat );
 5889 %}
 5890 
 5891 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5892   match(Set dst src);
 5893   effect(KILL cr);
 5894   ins_cost(200);
 5895   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5896             "MOV    $dst.hi,$src.hi" %}
 5897   opcode(0xB8);
 5898   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5899   ins_pipe( ialu_reg_long_fat );
 5900 %}
 5901 
 5902 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5903   match(Set dst src);
 5904   effect(KILL cr);
 5905   ins_cost(150);
 5906   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5907             "XOR    $dst.hi,$dst.hi" %}
 5908   opcode(0x33,0x33);
 5909   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5910   ins_pipe( ialu_reg_long );
 5911 %}
 5912 
 5913 // The instruction usage is guarded by predicate in operand immFPR().
 5914 instruct loadConFPR(regFPR dst, immFPR con) %{
 5915   match(Set dst con);
 5916   ins_cost(125);
 5917   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5918             "FSTP   $dst" %}
 5919   ins_encode %{
 5920     __ fld_s($constantaddress($con));
 5921     __ fstp_d($dst$$reg);
 5922   %}
 5923   ins_pipe(fpu_reg_con);
 5924 %}
 5925 
 5926 // The instruction usage is guarded by predicate in operand immFPR0().
 5927 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5928   match(Set dst con);
 5929   ins_cost(125);
 5930   format %{ "FLDZ   ST\n\t"
 5931             "FSTP   $dst" %}
 5932   ins_encode %{
 5933     __ fldz();
 5934     __ fstp_d($dst$$reg);
 5935   %}
 5936   ins_pipe(fpu_reg_con);
 5937 %}
 5938 
 5939 // The instruction usage is guarded by predicate in operand immFPR1().
 5940 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5941   match(Set dst con);
 5942   ins_cost(125);
 5943   format %{ "FLD1   ST\n\t"
 5944             "FSTP   $dst" %}
 5945   ins_encode %{
 5946     __ fld1();
 5947     __ fstp_d($dst$$reg);
 5948   %}
 5949   ins_pipe(fpu_reg_con);
 5950 %}
 5951 
 5952 // The instruction usage is guarded by predicate in operand immF().
 5953 instruct loadConF(regF dst, immF con) %{
 5954   match(Set dst con);
 5955   ins_cost(125);
 5956   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 5957   ins_encode %{
 5958     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5959   %}
 5960   ins_pipe(pipe_slow);
 5961 %}
 5962 
 5963 // The instruction usage is guarded by predicate in operand immF0().
 5964 instruct loadConF0(regF dst, immF0 src) %{
 5965   match(Set dst src);
 5966   ins_cost(100);
 5967   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 5968   ins_encode %{
 5969     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5970   %}
 5971   ins_pipe(pipe_slow);
 5972 %}
 5973 
 5974 // The instruction usage is guarded by predicate in operand immDPR().
 5975 instruct loadConDPR(regDPR dst, immDPR con) %{
 5976   match(Set dst con);
 5977   ins_cost(125);
 5978 
 5979   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 5980             "FSTP   $dst" %}
 5981   ins_encode %{
 5982     __ fld_d($constantaddress($con));
 5983     __ fstp_d($dst$$reg);
 5984   %}
 5985   ins_pipe(fpu_reg_con);
 5986 %}
 5987 
 5988 // The instruction usage is guarded by predicate in operand immDPR0().
 5989 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 5990   match(Set dst con);
 5991   ins_cost(125);
 5992 
 5993   format %{ "FLDZ   ST\n\t"
 5994             "FSTP   $dst" %}
 5995   ins_encode %{
 5996     __ fldz();
 5997     __ fstp_d($dst$$reg);
 5998   %}
 5999   ins_pipe(fpu_reg_con);
 6000 %}
 6001 
 6002 // The instruction usage is guarded by predicate in operand immDPR1().
 6003 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6004   match(Set dst con);
 6005   ins_cost(125);
 6006 
 6007   format %{ "FLD1   ST\n\t"
 6008             "FSTP   $dst" %}
 6009   ins_encode %{
 6010     __ fld1();
 6011     __ fstp_d($dst$$reg);
 6012   %}
 6013   ins_pipe(fpu_reg_con);
 6014 %}
 6015 
 6016 // The instruction usage is guarded by predicate in operand immD().
 6017 instruct loadConD(regD dst, immD con) %{
 6018   match(Set dst con);
 6019   ins_cost(125);
 6020   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6021   ins_encode %{
 6022     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6023   %}
 6024   ins_pipe(pipe_slow);
 6025 %}
 6026 
 6027 // The instruction usage is guarded by predicate in operand immD0().
 6028 instruct loadConD0(regD dst, immD0 src) %{
 6029   match(Set dst src);
 6030   ins_cost(100);
 6031   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6032   ins_encode %{
 6033     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6034   %}
 6035   ins_pipe( pipe_slow );
 6036 %}
 6037 
 6038 // Load Stack Slot
 6039 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6040   match(Set dst src);
 6041   ins_cost(125);
 6042 
 6043   format %{ "MOV    $dst,$src" %}
 6044   opcode(0x8B);
 6045   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6046   ins_pipe( ialu_reg_mem );
 6047 %}
 6048 
 6049 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6050   match(Set dst src);
 6051 
 6052   ins_cost(200);
 6053   format %{ "MOV    $dst,$src.lo\n\t"
 6054             "MOV    $dst+4,$src.hi" %}
 6055   opcode(0x8B, 0x8B);
 6056   ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
 6057   ins_pipe( ialu_mem_long_reg );
 6058 %}
 6059 
 6060 // Load Stack Slot
 6061 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6062   match(Set dst src);
 6063   ins_cost(125);
 6064 
 6065   format %{ "MOV    $dst,$src" %}
 6066   opcode(0x8B);
 6067   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6068   ins_pipe( ialu_reg_mem );
 6069 %}
 6070 
 6071 // Load Stack Slot
 6072 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6073   match(Set dst src);
 6074   ins_cost(125);
 6075 
 6076   format %{ "FLD_S  $src\n\t"
 6077             "FSTP   $dst" %}
 6078   opcode(0xD9);               /* D9 /0, FLD m32real */
 6079   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6080               Pop_Reg_FPR(dst), ClearInstMark );
 6081   ins_pipe( fpu_reg_mem );
 6082 %}
 6083 
 6084 // Load Stack Slot
 6085 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6086   match(Set dst src);
 6087   ins_cost(125);
 6088 
 6089   format %{ "FLD_D  $src\n\t"
 6090             "FSTP   $dst" %}
 6091   opcode(0xDD);               /* DD /0, FLD m64real */
 6092   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6093               Pop_Reg_DPR(dst), ClearInstMark );
 6094   ins_pipe( fpu_reg_mem );
 6095 %}
 6096 
 6097 // Prefetch instructions for allocation.
 6098 // Must be safe to execute with invalid address (cannot fault).
 6099 
 6100 instruct prefetchAlloc0( memory mem ) %{
 6101   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6102   match(PrefetchAllocation mem);
 6103   ins_cost(0);
 6104   size(0);
 6105   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6106   ins_encode();
 6107   ins_pipe(empty);
 6108 %}
 6109 
 6110 instruct prefetchAlloc( memory mem ) %{
 6111   predicate(AllocatePrefetchInstr==3);
 6112   match( PrefetchAllocation mem );
 6113   ins_cost(100);
 6114 
 6115   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6116   ins_encode %{
 6117     __ prefetchw($mem$$Address);
 6118   %}
 6119   ins_pipe(ialu_mem);
 6120 %}
 6121 
 6122 instruct prefetchAllocNTA( memory mem ) %{
 6123   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6124   match(PrefetchAllocation mem);
 6125   ins_cost(100);
 6126 
 6127   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6128   ins_encode %{
 6129     __ prefetchnta($mem$$Address);
 6130   %}
 6131   ins_pipe(ialu_mem);
 6132 %}
 6133 
 6134 instruct prefetchAllocT0( memory mem ) %{
 6135   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6136   match(PrefetchAllocation mem);
 6137   ins_cost(100);
 6138 
 6139   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6140   ins_encode %{
 6141     __ prefetcht0($mem$$Address);
 6142   %}
 6143   ins_pipe(ialu_mem);
 6144 %}
 6145 
 6146 instruct prefetchAllocT2( memory mem ) %{
 6147   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6148   match(PrefetchAllocation mem);
 6149   ins_cost(100);
 6150 
 6151   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6152   ins_encode %{
 6153     __ prefetcht2($mem$$Address);
 6154   %}
 6155   ins_pipe(ialu_mem);
 6156 %}
 6157 
 6158 //----------Store Instructions-------------------------------------------------
 6159 
 6160 // Store Byte
 6161 instruct storeB(memory mem, xRegI src) %{
 6162   match(Set mem (StoreB mem src));
 6163 
 6164   ins_cost(125);
 6165   format %{ "MOV8   $mem,$src" %}
 6166   opcode(0x88);
 6167   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6168   ins_pipe( ialu_mem_reg );
 6169 %}
 6170 
 6171 // Store Char/Short
 6172 instruct storeC(memory mem, rRegI src) %{
 6173   match(Set mem (StoreC mem src));
 6174 
 6175   ins_cost(125);
 6176   format %{ "MOV16  $mem,$src" %}
 6177   opcode(0x89, 0x66);
 6178   ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
 6179   ins_pipe( ialu_mem_reg );
 6180 %}
 6181 
 6182 // Store Integer
 6183 instruct storeI(memory mem, rRegI src) %{
 6184   match(Set mem (StoreI mem src));
 6185 
 6186   ins_cost(125);
 6187   format %{ "MOV    $mem,$src" %}
 6188   opcode(0x89);
 6189   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6190   ins_pipe( ialu_mem_reg );
 6191 %}
 6192 
 6193 // Store Long
 6194 instruct storeL(long_memory mem, eRegL src) %{
 6195   predicate(!((StoreLNode*)n)->require_atomic_access());
 6196   match(Set mem (StoreL mem src));
 6197 
 6198   ins_cost(200);
 6199   format %{ "MOV    $mem,$src.lo\n\t"
 6200             "MOV    $mem+4,$src.hi" %}
 6201   opcode(0x89, 0x89);
 6202   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
 6203   ins_pipe( ialu_mem_long_reg );
 6204 %}
 6205 
 6206 // Store Long to Integer
 6207 instruct storeL2I(memory mem, eRegL src) %{
 6208   match(Set mem (StoreI mem (ConvL2I src)));
 6209 
 6210   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6211   ins_encode %{
 6212     __ movl($mem$$Address, $src$$Register);
 6213   %}
 6214   ins_pipe(ialu_mem_reg);
 6215 %}
 6216 
 6217 // Volatile Store Long.  Must be atomic, so move it into
 6218 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6219 // target address before the store (for null-ptr checks)
 6220 // so the memory operand is used twice in the encoding.
 6221 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6222   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6223   match(Set mem (StoreL mem src));
 6224   effect( KILL cr );
 6225   ins_cost(400);
 6226   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6227             "FILD   $src\n\t"
 6228             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6229   opcode(0x3B);
 6230   ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
 6231   ins_pipe( fpu_reg_mem );
 6232 %}
 6233 
 6234 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6235   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6236   match(Set mem (StoreL mem src));
 6237   effect( TEMP tmp, KILL cr );
 6238   ins_cost(380);
 6239   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6240             "MOVSD  $tmp,$src\n\t"
 6241             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6242   ins_encode %{
 6243     __ cmpl(rax, $mem$$Address);
 6244     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6245     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6246   %}
 6247   ins_pipe( pipe_slow );
 6248 %}
 6249 
 6250 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6251   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6252   match(Set mem (StoreL mem src));
 6253   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6254   ins_cost(360);
 6255   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6256             "MOVD   $tmp,$src.lo\n\t"
 6257             "MOVD   $tmp2,$src.hi\n\t"
 6258             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6259             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6260   ins_encode %{
 6261     __ cmpl(rax, $mem$$Address);
 6262     __ movdl($tmp$$XMMRegister, $src$$Register);
 6263     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6264     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6265     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6266   %}
 6267   ins_pipe( pipe_slow );
 6268 %}
 6269 
 6270 // Store Pointer; for storing unknown oops and raw pointers
 6271 instruct storeP(memory mem, anyRegP src) %{
 6272   match(Set mem (StoreP mem src));
 6273 
 6274   ins_cost(125);
 6275   format %{ "MOV    $mem,$src" %}
 6276   opcode(0x89);
 6277   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6278   ins_pipe( ialu_mem_reg );
 6279 %}
 6280 
 6281 // Store Integer Immediate
 6282 instruct storeImmI(memory mem, immI src) %{
 6283   match(Set mem (StoreI mem src));
 6284 
 6285   ins_cost(150);
 6286   format %{ "MOV    $mem,$src" %}
 6287   opcode(0xC7);               /* C7 /0 */
 6288   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
 6289   ins_pipe( ialu_mem_imm );
 6290 %}
 6291 
 6292 // Store Short/Char Immediate
 6293 instruct storeImmI16(memory mem, immI16 src) %{
 6294   predicate(UseStoreImmI16);
 6295   match(Set mem (StoreC mem src));
 6296 
 6297   ins_cost(150);
 6298   format %{ "MOV16  $mem,$src" %}
 6299   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6300   ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
 6301   ins_pipe( ialu_mem_imm );
 6302 %}
 6303 
 6304 // Store Pointer Immediate; null pointers or constant oops that do not
 6305 // need card-mark barriers.
 6306 instruct storeImmP(memory mem, immP src) %{
 6307   match(Set mem (StoreP mem src));
 6308 
 6309   ins_cost(150);
 6310   format %{ "MOV    $mem,$src" %}
 6311   opcode(0xC7);               /* C7 /0 */
 6312   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
 6313   ins_pipe( ialu_mem_imm );
 6314 %}
 6315 
 6316 // Store Byte Immediate
 6317 instruct storeImmB(memory mem, immI8 src) %{
 6318   match(Set mem (StoreB mem src));
 6319 
 6320   ins_cost(150);
 6321   format %{ "MOV8   $mem,$src" %}
 6322   opcode(0xC6);               /* C6 /0 */
 6323   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
 6324   ins_pipe( ialu_mem_imm );
 6325 %}
 6326 
 6327 // Store Double
 6328 instruct storeDPR( memory mem, regDPR1 src) %{
 6329   predicate(UseSSE<=1);
 6330   match(Set mem (StoreD mem src));
 6331 
 6332   ins_cost(100);
 6333   format %{ "FST_D  $mem,$src" %}
 6334   opcode(0xDD);       /* DD /2 */
 6335   ins_encode( enc_FPR_store(mem,src) );
 6336   ins_pipe( fpu_mem_reg );
 6337 %}
 6338 
 6339 // Store double does rounding on x86
 6340 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6341   predicate(UseSSE<=1);
 6342   match(Set mem (StoreD mem (RoundDouble src)));
 6343 
 6344   ins_cost(100);
 6345   format %{ "FST_D  $mem,$src\t# round" %}
 6346   opcode(0xDD);       /* DD /2 */
 6347   ins_encode( enc_FPR_store(mem,src) );
 6348   ins_pipe( fpu_mem_reg );
 6349 %}
 6350 
 6351 // Store XMM register to memory (double-precision floating points)
 6352 // MOVSD instruction
 6353 instruct storeD(memory mem, regD src) %{
 6354   predicate(UseSSE>=2);
 6355   match(Set mem (StoreD mem src));
 6356   ins_cost(95);
 6357   format %{ "MOVSD  $mem,$src" %}
 6358   ins_encode %{
 6359     __ movdbl($mem$$Address, $src$$XMMRegister);
 6360   %}
 6361   ins_pipe( pipe_slow );
 6362 %}
 6363 
 6364 // Store XMM register to memory (single-precision floating point)
 6365 // MOVSS instruction
 6366 instruct storeF(memory mem, regF src) %{
 6367   predicate(UseSSE>=1);
 6368   match(Set mem (StoreF mem src));
 6369   ins_cost(95);
 6370   format %{ "MOVSS  $mem,$src" %}
 6371   ins_encode %{
 6372     __ movflt($mem$$Address, $src$$XMMRegister);
 6373   %}
 6374   ins_pipe( pipe_slow );
 6375 %}
 6376 
 6377 
 6378 // Store Float
 6379 instruct storeFPR( memory mem, regFPR1 src) %{
 6380   predicate(UseSSE==0);
 6381   match(Set mem (StoreF mem src));
 6382 
 6383   ins_cost(100);
 6384   format %{ "FST_S  $mem,$src" %}
 6385   opcode(0xD9);       /* D9 /2 */
 6386   ins_encode( enc_FPR_store(mem,src) );
 6387   ins_pipe( fpu_mem_reg );
 6388 %}
 6389 
 6390 // Store Float does rounding on x86
 6391 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6392   predicate(UseSSE==0);
 6393   match(Set mem (StoreF mem (RoundFloat src)));
 6394 
 6395   ins_cost(100);
 6396   format %{ "FST_S  $mem,$src\t# round" %}
 6397   opcode(0xD9);       /* D9 /2 */
 6398   ins_encode( enc_FPR_store(mem,src) );
 6399   ins_pipe( fpu_mem_reg );
 6400 %}
 6401 
 6402 // Store Float does rounding on x86
 6403 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6404   predicate(UseSSE<=1);
 6405   match(Set mem (StoreF mem (ConvD2F src)));
 6406 
 6407   ins_cost(100);
 6408   format %{ "FST_S  $mem,$src\t# D-round" %}
 6409   opcode(0xD9);       /* D9 /2 */
 6410   ins_encode( enc_FPR_store(mem,src) );
 6411   ins_pipe( fpu_mem_reg );
 6412 %}
 6413 
 6414 // Store immediate Float value (it is faster than store from FPU register)
 6415 // The instruction usage is guarded by predicate in operand immFPR().
 6416 instruct storeFPR_imm( memory mem, immFPR src) %{
 6417   match(Set mem (StoreF mem src));
 6418 
 6419   ins_cost(50);
 6420   format %{ "MOV    $mem,$src\t# store float" %}
 6421   opcode(0xC7);               /* C7 /0 */
 6422   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
 6423   ins_pipe( ialu_mem_imm );
 6424 %}
 6425 
 6426 // Store immediate Float value (it is faster than store from XMM register)
 6427 // The instruction usage is guarded by predicate in operand immF().
 6428 instruct storeF_imm( memory mem, immF src) %{
 6429   match(Set mem (StoreF mem src));
 6430 
 6431   ins_cost(50);
 6432   format %{ "MOV    $mem,$src\t# store float" %}
 6433   opcode(0xC7);               /* C7 /0 */
 6434   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
 6435   ins_pipe( ialu_mem_imm );
 6436 %}
 6437 
 6438 // Store Integer to stack slot
 6439 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6440   match(Set dst src);
 6441 
 6442   ins_cost(100);
 6443   format %{ "MOV    $dst,$src" %}
 6444   opcode(0x89);
 6445   ins_encode( OpcPRegSS( dst, src ) );
 6446   ins_pipe( ialu_mem_reg );
 6447 %}
 6448 
 6449 // Store Integer to stack slot
 6450 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6451   match(Set dst src);
 6452 
 6453   ins_cost(100);
 6454   format %{ "MOV    $dst,$src" %}
 6455   opcode(0x89);
 6456   ins_encode( OpcPRegSS( dst, src ) );
 6457   ins_pipe( ialu_mem_reg );
 6458 %}
 6459 
 6460 // Store Long to stack slot
 6461 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6462   match(Set dst src);
 6463 
 6464   ins_cost(200);
 6465   format %{ "MOV    $dst,$src.lo\n\t"
 6466             "MOV    $dst+4,$src.hi" %}
 6467   opcode(0x89, 0x89);
 6468   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
 6469   ins_pipe( ialu_mem_long_reg );
 6470 %}
 6471 
 6472 //----------MemBar Instructions-----------------------------------------------
 6473 // Memory barrier flavors
 6474 
 6475 instruct membar_acquire() %{
 6476   match(MemBarAcquire);
 6477   match(LoadFence);
 6478   ins_cost(400);
 6479 
 6480   size(0);
 6481   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6482   ins_encode();
 6483   ins_pipe(empty);
 6484 %}
 6485 
 6486 instruct membar_acquire_lock() %{
 6487   match(MemBarAcquireLock);
 6488   ins_cost(0);
 6489 
 6490   size(0);
 6491   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6492   ins_encode( );
 6493   ins_pipe(empty);
 6494 %}
 6495 
 6496 instruct membar_release() %{
 6497   match(MemBarRelease);
 6498   match(StoreFence);
 6499   ins_cost(400);
 6500 
 6501   size(0);
 6502   format %{ "MEMBAR-release ! (empty encoding)" %}
 6503   ins_encode( );
 6504   ins_pipe(empty);
 6505 %}
 6506 
 6507 instruct membar_release_lock() %{
 6508   match(MemBarReleaseLock);
 6509   ins_cost(0);
 6510 
 6511   size(0);
 6512   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6513   ins_encode( );
 6514   ins_pipe(empty);
 6515 %}
 6516 
 6517 instruct membar_volatile(eFlagsReg cr) %{
 6518   match(MemBarVolatile);
 6519   effect(KILL cr);
 6520   ins_cost(400);
 6521 
 6522   format %{
 6523     $$template
 6524     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6525   %}
 6526   ins_encode %{
 6527     __ membar(Assembler::StoreLoad);
 6528   %}
 6529   ins_pipe(pipe_slow);
 6530 %}
 6531 
 6532 instruct unnecessary_membar_volatile() %{
 6533   match(MemBarVolatile);
 6534   predicate(Matcher::post_store_load_barrier(n));
 6535   ins_cost(0);
 6536 
 6537   size(0);
 6538   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6539   ins_encode( );
 6540   ins_pipe(empty);
 6541 %}
 6542 
 6543 instruct membar_storestore() %{
 6544   match(MemBarStoreStore);
 6545   match(StoreStoreFence);
 6546   ins_cost(0);
 6547 
 6548   size(0);
 6549   format %{ "MEMBAR-storestore (empty encoding)" %}
 6550   ins_encode( );
 6551   ins_pipe(empty);
 6552 %}
 6553 
 6554 //----------Move Instructions--------------------------------------------------
 6555 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6556   match(Set dst (CastX2P src));
 6557   format %{ "# X2P  $dst, $src" %}
 6558   ins_encode( /*empty encoding*/ );
 6559   ins_cost(0);
 6560   ins_pipe(empty);
 6561 %}
 6562 
 6563 instruct castP2X(rRegI dst, eRegP src ) %{
 6564   match(Set dst (CastP2X src));
 6565   ins_cost(50);
 6566   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6567   ins_encode( enc_Copy( dst, src) );
 6568   ins_pipe( ialu_reg_reg );
 6569 %}
 6570 
 6571 //----------Conditional Move---------------------------------------------------
 6572 // Conditional move
 6573 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6574   predicate(!VM_Version::supports_cmov() );
 6575   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6576   ins_cost(200);
 6577   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6578             "MOV    $dst,$src\n"
 6579       "skip:" %}
 6580   ins_encode %{
 6581     Label Lskip;
 6582     // Invert sense of branch from sense of CMOV
 6583     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6584     __ movl($dst$$Register, $src$$Register);
 6585     __ bind(Lskip);
 6586   %}
 6587   ins_pipe( pipe_cmov_reg );
 6588 %}
 6589 
 6590 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6591   predicate(!VM_Version::supports_cmov() );
 6592   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6593   ins_cost(200);
 6594   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6595             "MOV    $dst,$src\n"
 6596       "skip:" %}
 6597   ins_encode %{
 6598     Label Lskip;
 6599     // Invert sense of branch from sense of CMOV
 6600     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6601     __ movl($dst$$Register, $src$$Register);
 6602     __ bind(Lskip);
 6603   %}
 6604   ins_pipe( pipe_cmov_reg );
 6605 %}
 6606 
 6607 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6608   predicate(VM_Version::supports_cmov() );
 6609   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6610   ins_cost(200);
 6611   format %{ "CMOV$cop $dst,$src" %}
 6612   opcode(0x0F,0x40);
 6613   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6614   ins_pipe( pipe_cmov_reg );
 6615 %}
 6616 
 6617 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6618   predicate(VM_Version::supports_cmov() );
 6619   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6620   ins_cost(200);
 6621   format %{ "CMOV$cop $dst,$src" %}
 6622   opcode(0x0F,0x40);
 6623   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6624   ins_pipe( pipe_cmov_reg );
 6625 %}
 6626 
 6627 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6628   predicate(VM_Version::supports_cmov() );
 6629   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6630   ins_cost(200);
 6631   expand %{
 6632     cmovI_regU(cop, cr, dst, src);
 6633   %}
 6634 %}
 6635 
 6636 // Conditional move
 6637 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6638   predicate(VM_Version::supports_cmov() );
 6639   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6640   ins_cost(250);
 6641   format %{ "CMOV$cop $dst,$src" %}
 6642   opcode(0x0F,0x40);
 6643   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6644   ins_pipe( pipe_cmov_mem );
 6645 %}
 6646 
 6647 // Conditional move
 6648 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6649   predicate(VM_Version::supports_cmov() );
 6650   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6651   ins_cost(250);
 6652   format %{ "CMOV$cop $dst,$src" %}
 6653   opcode(0x0F,0x40);
 6654   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6655   ins_pipe( pipe_cmov_mem );
 6656 %}
 6657 
 6658 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6659   predicate(VM_Version::supports_cmov() );
 6660   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6661   ins_cost(250);
 6662   expand %{
 6663     cmovI_memU(cop, cr, dst, src);
 6664   %}
 6665 %}
 6666 
 6667 // Conditional move
 6668 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6669   predicate(VM_Version::supports_cmov() );
 6670   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6671   ins_cost(200);
 6672   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6673   opcode(0x0F,0x40);
 6674   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6675   ins_pipe( pipe_cmov_reg );
 6676 %}
 6677 
 6678 // Conditional move (non-P6 version)
 6679 // Note:  a CMoveP is generated for  stubs and native wrappers
 6680 //        regardless of whether we are on a P6, so we
 6681 //        emulate a cmov here
 6682 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6683   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6684   ins_cost(300);
 6685   format %{ "Jn$cop   skip\n\t"
 6686           "MOV    $dst,$src\t# pointer\n"
 6687       "skip:" %}
 6688   opcode(0x8b);
 6689   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6690   ins_pipe( pipe_cmov_reg );
 6691 %}
 6692 
 6693 // Conditional move
 6694 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6695   predicate(VM_Version::supports_cmov() );
 6696   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6697   ins_cost(200);
 6698   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6699   opcode(0x0F,0x40);
 6700   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6701   ins_pipe( pipe_cmov_reg );
 6702 %}
 6703 
 6704 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6705   predicate(VM_Version::supports_cmov() );
 6706   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6707   ins_cost(200);
 6708   expand %{
 6709     cmovP_regU(cop, cr, dst, src);
 6710   %}
 6711 %}
 6712 
 6713 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6714 // correctly meets the two pointer arguments; one is an incoming
 6715 // register but the other is a memory operand.  ALSO appears to
 6716 // be buggy with implicit null checks.
 6717 //
 6718 //// Conditional move
 6719 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6720 //  predicate(VM_Version::supports_cmov() );
 6721 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6722 //  ins_cost(250);
 6723 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6724 //  opcode(0x0F,0x40);
 6725 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6726 //  ins_pipe( pipe_cmov_mem );
 6727 //%}
 6728 //
 6729 //// Conditional move
 6730 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6731 //  predicate(VM_Version::supports_cmov() );
 6732 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6733 //  ins_cost(250);
 6734 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6735 //  opcode(0x0F,0x40);
 6736 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6737 //  ins_pipe( pipe_cmov_mem );
 6738 //%}
 6739 
 6740 // Conditional move
 6741 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6742   predicate(UseSSE<=1);
 6743   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6744   ins_cost(200);
 6745   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6746   opcode(0xDA);
 6747   ins_encode( enc_cmov_dpr(cop,src) );
 6748   ins_pipe( pipe_cmovDPR_reg );
 6749 %}
 6750 
 6751 // Conditional move
 6752 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6753   predicate(UseSSE==0);
 6754   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6755   ins_cost(200);
 6756   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6757   opcode(0xDA);
 6758   ins_encode( enc_cmov_dpr(cop,src) );
 6759   ins_pipe( pipe_cmovDPR_reg );
 6760 %}
 6761 
 6762 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6763 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6764   predicate(UseSSE<=1);
 6765   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6766   ins_cost(200);
 6767   format %{ "Jn$cop   skip\n\t"
 6768             "MOV    $dst,$src\t# double\n"
 6769       "skip:" %}
 6770   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6771   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6772   ins_pipe( pipe_cmovDPR_reg );
 6773 %}
 6774 
 6775 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6776 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6777   predicate(UseSSE==0);
 6778   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6779   ins_cost(200);
 6780   format %{ "Jn$cop    skip\n\t"
 6781             "MOV    $dst,$src\t# float\n"
 6782       "skip:" %}
 6783   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6784   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6785   ins_pipe( pipe_cmovDPR_reg );
 6786 %}
 6787 
 6788 // No CMOVE with SSE/SSE2
 6789 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6790   predicate (UseSSE>=1);
 6791   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6792   ins_cost(200);
 6793   format %{ "Jn$cop   skip\n\t"
 6794             "MOVSS  $dst,$src\t# float\n"
 6795       "skip:" %}
 6796   ins_encode %{
 6797     Label skip;
 6798     // Invert sense of branch from sense of CMOV
 6799     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6800     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6801     __ bind(skip);
 6802   %}
 6803   ins_pipe( pipe_slow );
 6804 %}
 6805 
 6806 // No CMOVE with SSE/SSE2
 6807 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6808   predicate (UseSSE>=2);
 6809   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6810   ins_cost(200);
 6811   format %{ "Jn$cop   skip\n\t"
 6812             "MOVSD  $dst,$src\t# float\n"
 6813       "skip:" %}
 6814   ins_encode %{
 6815     Label skip;
 6816     // Invert sense of branch from sense of CMOV
 6817     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6818     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6819     __ bind(skip);
 6820   %}
 6821   ins_pipe( pipe_slow );
 6822 %}
 6823 
 6824 // unsigned version
 6825 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6826   predicate (UseSSE>=1);
 6827   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6828   ins_cost(200);
 6829   format %{ "Jn$cop   skip\n\t"
 6830             "MOVSS  $dst,$src\t# float\n"
 6831       "skip:" %}
 6832   ins_encode %{
 6833     Label skip;
 6834     // Invert sense of branch from sense of CMOV
 6835     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6836     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6837     __ bind(skip);
 6838   %}
 6839   ins_pipe( pipe_slow );
 6840 %}
 6841 
 6842 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6843   predicate (UseSSE>=1);
 6844   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6845   ins_cost(200);
 6846   expand %{
 6847     fcmovF_regU(cop, cr, dst, src);
 6848   %}
 6849 %}
 6850 
 6851 // unsigned version
 6852 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6853   predicate (UseSSE>=2);
 6854   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6855   ins_cost(200);
 6856   format %{ "Jn$cop   skip\n\t"
 6857             "MOVSD  $dst,$src\t# float\n"
 6858       "skip:" %}
 6859   ins_encode %{
 6860     Label skip;
 6861     // Invert sense of branch from sense of CMOV
 6862     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6863     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6864     __ bind(skip);
 6865   %}
 6866   ins_pipe( pipe_slow );
 6867 %}
 6868 
 6869 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6870   predicate (UseSSE>=2);
 6871   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6872   ins_cost(200);
 6873   expand %{
 6874     fcmovD_regU(cop, cr, dst, src);
 6875   %}
 6876 %}
 6877 
 6878 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6879   predicate(VM_Version::supports_cmov() );
 6880   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6881   ins_cost(200);
 6882   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6883             "CMOV$cop $dst.hi,$src.hi" %}
 6884   opcode(0x0F,0x40);
 6885   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6886   ins_pipe( pipe_cmov_reg_long );
 6887 %}
 6888 
 6889 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6890   predicate(VM_Version::supports_cmov() );
 6891   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6892   ins_cost(200);
 6893   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6894             "CMOV$cop $dst.hi,$src.hi" %}
 6895   opcode(0x0F,0x40);
 6896   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6897   ins_pipe( pipe_cmov_reg_long );
 6898 %}
 6899 
 6900 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 6901   predicate(VM_Version::supports_cmov() );
 6902   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6903   ins_cost(200);
 6904   expand %{
 6905     cmovL_regU(cop, cr, dst, src);
 6906   %}
 6907 %}
 6908 
 6909 //----------Arithmetic Instructions--------------------------------------------
 6910 //----------Addition Instructions----------------------------------------------
 6911 
 6912 // Integer Addition Instructions
 6913 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 6914   match(Set dst (AddI dst src));
 6915   effect(KILL cr);
 6916 
 6917   size(2);
 6918   format %{ "ADD    $dst,$src" %}
 6919   opcode(0x03);
 6920   ins_encode( OpcP, RegReg( dst, src) );
 6921   ins_pipe( ialu_reg_reg );
 6922 %}
 6923 
 6924 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 6925   match(Set dst (AddI dst src));
 6926   effect(KILL cr);
 6927 
 6928   format %{ "ADD    $dst,$src" %}
 6929   opcode(0x81, 0x00); /* /0 id */
 6930   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6931   ins_pipe( ialu_reg );
 6932 %}
 6933 
 6934 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 6935   predicate(UseIncDec);
 6936   match(Set dst (AddI dst src));
 6937   effect(KILL cr);
 6938 
 6939   size(1);
 6940   format %{ "INC    $dst" %}
 6941   opcode(0x40); /*  */
 6942   ins_encode( Opc_plus( primary, dst ) );
 6943   ins_pipe( ialu_reg );
 6944 %}
 6945 
 6946 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 6947   match(Set dst (AddI src0 src1));
 6948   ins_cost(110);
 6949 
 6950   format %{ "LEA    $dst,[$src0 + $src1]" %}
 6951   opcode(0x8D); /* 0x8D /r */
 6952   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6953   ins_pipe( ialu_reg_reg );
 6954 %}
 6955 
 6956 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 6957   match(Set dst (AddP src0 src1));
 6958   ins_cost(110);
 6959 
 6960   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 6961   opcode(0x8D); /* 0x8D /r */
 6962   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6963   ins_pipe( ialu_reg_reg );
 6964 %}
 6965 
 6966 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 6967   predicate(UseIncDec);
 6968   match(Set dst (AddI dst src));
 6969   effect(KILL cr);
 6970 
 6971   size(1);
 6972   format %{ "DEC    $dst" %}
 6973   opcode(0x48); /*  */
 6974   ins_encode( Opc_plus( primary, dst ) );
 6975   ins_pipe( ialu_reg );
 6976 %}
 6977 
 6978 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 6979   match(Set dst (AddP dst src));
 6980   effect(KILL cr);
 6981 
 6982   size(2);
 6983   format %{ "ADD    $dst,$src" %}
 6984   opcode(0x03);
 6985   ins_encode( OpcP, RegReg( dst, src) );
 6986   ins_pipe( ialu_reg_reg );
 6987 %}
 6988 
 6989 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 6990   match(Set dst (AddP dst src));
 6991   effect(KILL cr);
 6992 
 6993   format %{ "ADD    $dst,$src" %}
 6994   opcode(0x81,0x00); /* Opcode 81 /0 id */
 6995   // ins_encode( RegImm( dst, src) );
 6996   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6997   ins_pipe( ialu_reg );
 6998 %}
 6999 
 7000 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7001   match(Set dst (AddI dst (LoadI src)));
 7002   effect(KILL cr);
 7003 
 7004   ins_cost(150);
 7005   format %{ "ADD    $dst,$src" %}
 7006   opcode(0x03);
 7007   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7008   ins_pipe( ialu_reg_mem );
 7009 %}
 7010 
 7011 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7012   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7013   effect(KILL cr);
 7014 
 7015   ins_cost(150);
 7016   format %{ "ADD    $dst,$src" %}
 7017   opcode(0x01);  /* Opcode 01 /r */
 7018   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7019   ins_pipe( ialu_mem_reg );
 7020 %}
 7021 
 7022 // Add Memory with Immediate
 7023 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7024   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7025   effect(KILL cr);
 7026 
 7027   ins_cost(125);
 7028   format %{ "ADD    $dst,$src" %}
 7029   opcode(0x81);               /* Opcode 81 /0 id */
 7030   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
 7031   ins_pipe( ialu_mem_imm );
 7032 %}
 7033 
 7034 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7035   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7036   effect(KILL cr);
 7037 
 7038   ins_cost(125);
 7039   format %{ "INC    $dst" %}
 7040   opcode(0xFF);               /* Opcode FF /0 */
 7041   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
 7042   ins_pipe( ialu_mem_imm );
 7043 %}
 7044 
 7045 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7046   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7047   effect(KILL cr);
 7048 
 7049   ins_cost(125);
 7050   format %{ "DEC    $dst" %}
 7051   opcode(0xFF);               /* Opcode FF /1 */
 7052   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
 7053   ins_pipe( ialu_mem_imm );
 7054 %}
 7055 
 7056 
 7057 instruct checkCastPP( eRegP dst ) %{
 7058   match(Set dst (CheckCastPP dst));
 7059 
 7060   size(0);
 7061   format %{ "#checkcastPP of $dst" %}
 7062   ins_encode( /*empty encoding*/ );
 7063   ins_pipe( empty );
 7064 %}
 7065 
 7066 instruct castPP( eRegP dst ) %{
 7067   match(Set dst (CastPP dst));
 7068   format %{ "#castPP of $dst" %}
 7069   ins_encode( /*empty encoding*/ );
 7070   ins_pipe( empty );
 7071 %}
 7072 
 7073 instruct castII( rRegI dst ) %{
 7074   match(Set dst (CastII dst));
 7075   format %{ "#castII of $dst" %}
 7076   ins_encode( /*empty encoding*/ );
 7077   ins_cost(0);
 7078   ins_pipe( empty );
 7079 %}
 7080 
 7081 instruct castLL( eRegL dst ) %{
 7082   match(Set dst (CastLL dst));
 7083   format %{ "#castLL of $dst" %}
 7084   ins_encode( /*empty encoding*/ );
 7085   ins_cost(0);
 7086   ins_pipe( empty );
 7087 %}
 7088 
 7089 instruct castFF( regF dst ) %{
 7090   predicate(UseSSE >= 1);
 7091   match(Set dst (CastFF dst));
 7092   format %{ "#castFF of $dst" %}
 7093   ins_encode( /*empty encoding*/ );
 7094   ins_cost(0);
 7095   ins_pipe( empty );
 7096 %}
 7097 
 7098 instruct castDD( regD dst ) %{
 7099   predicate(UseSSE >= 2);
 7100   match(Set dst (CastDD dst));
 7101   format %{ "#castDD of $dst" %}
 7102   ins_encode( /*empty encoding*/ );
 7103   ins_cost(0);
 7104   ins_pipe( empty );
 7105 %}
 7106 
 7107 instruct castFF_PR( regFPR dst ) %{
 7108   predicate(UseSSE < 1);
 7109   match(Set dst (CastFF dst));
 7110   format %{ "#castFF of $dst" %}
 7111   ins_encode( /*empty encoding*/ );
 7112   ins_cost(0);
 7113   ins_pipe( empty );
 7114 %}
 7115 
 7116 instruct castDD_PR( regDPR dst ) %{
 7117   predicate(UseSSE < 2);
 7118   match(Set dst (CastDD dst));
 7119   format %{ "#castDD of $dst" %}
 7120   ins_encode( /*empty encoding*/ );
 7121   ins_cost(0);
 7122   ins_pipe( empty );
 7123 %}
 7124 
 7125 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7126 
 7127 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7128   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7129   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7130   effect(KILL cr, KILL oldval);
 7131   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7132             "MOV    $res,0\n\t"
 7133             "JNE,s  fail\n\t"
 7134             "MOV    $res,1\n"
 7135           "fail:" %}
 7136   ins_encode( enc_cmpxchg8(mem_ptr),
 7137               enc_flags_ne_to_boolean(res) );
 7138   ins_pipe( pipe_cmpxchg );
 7139 %}
 7140 
 7141 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7142   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7143   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7144   effect(KILL cr, KILL oldval);
 7145   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7146             "MOV    $res,0\n\t"
 7147             "JNE,s  fail\n\t"
 7148             "MOV    $res,1\n"
 7149           "fail:" %}
 7150   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7151   ins_pipe( pipe_cmpxchg );
 7152 %}
 7153 
 7154 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7155   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7156   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7157   effect(KILL cr, KILL oldval);
 7158   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7159             "MOV    $res,0\n\t"
 7160             "JNE,s  fail\n\t"
 7161             "MOV    $res,1\n"
 7162           "fail:" %}
 7163   ins_encode( enc_cmpxchgb(mem_ptr),
 7164               enc_flags_ne_to_boolean(res) );
 7165   ins_pipe( pipe_cmpxchg );
 7166 %}
 7167 
 7168 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7169   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7170   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7171   effect(KILL cr, KILL oldval);
 7172   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7173             "MOV    $res,0\n\t"
 7174             "JNE,s  fail\n\t"
 7175             "MOV    $res,1\n"
 7176           "fail:" %}
 7177   ins_encode( enc_cmpxchgw(mem_ptr),
 7178               enc_flags_ne_to_boolean(res) );
 7179   ins_pipe( pipe_cmpxchg );
 7180 %}
 7181 
 7182 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7183   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7184   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7185   effect(KILL cr, KILL oldval);
 7186   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7187             "MOV    $res,0\n\t"
 7188             "JNE,s  fail\n\t"
 7189             "MOV    $res,1\n"
 7190           "fail:" %}
 7191   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7192   ins_pipe( pipe_cmpxchg );
 7193 %}
 7194 
 7195 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7196   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7197   effect(KILL cr);
 7198   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7199   ins_encode( enc_cmpxchg8(mem_ptr) );
 7200   ins_pipe( pipe_cmpxchg );
 7201 %}
 7202 
 7203 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7204   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7205   effect(KILL cr);
 7206   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7207   ins_encode( enc_cmpxchg(mem_ptr) );
 7208   ins_pipe( pipe_cmpxchg );
 7209 %}
 7210 
 7211 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7212   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7213   effect(KILL cr);
 7214   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7215   ins_encode( enc_cmpxchgb(mem_ptr) );
 7216   ins_pipe( pipe_cmpxchg );
 7217 %}
 7218 
 7219 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7220   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7221   effect(KILL cr);
 7222   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7223   ins_encode( enc_cmpxchgw(mem_ptr) );
 7224   ins_pipe( pipe_cmpxchg );
 7225 %}
 7226 
 7227 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7228   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7229   effect(KILL cr);
 7230   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7231   ins_encode( enc_cmpxchg(mem_ptr) );
 7232   ins_pipe( pipe_cmpxchg );
 7233 %}
 7234 
 7235 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7236   predicate(n->as_LoadStore()->result_not_used());
 7237   match(Set dummy (GetAndAddB mem add));
 7238   effect(KILL cr);
 7239   format %{ "ADDB  [$mem],$add" %}
 7240   ins_encode %{
 7241     __ lock();
 7242     __ addb($mem$$Address, $add$$constant);
 7243   %}
 7244   ins_pipe( pipe_cmpxchg );
 7245 %}
 7246 
 7247 // Important to match to xRegI: only 8-bit regs.
 7248 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7249   match(Set newval (GetAndAddB mem newval));
 7250   effect(KILL cr);
 7251   format %{ "XADDB  [$mem],$newval" %}
 7252   ins_encode %{
 7253     __ lock();
 7254     __ xaddb($mem$$Address, $newval$$Register);
 7255   %}
 7256   ins_pipe( pipe_cmpxchg );
 7257 %}
 7258 
 7259 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7260   predicate(n->as_LoadStore()->result_not_used());
 7261   match(Set dummy (GetAndAddS mem add));
 7262   effect(KILL cr);
 7263   format %{ "ADDS  [$mem],$add" %}
 7264   ins_encode %{
 7265     __ lock();
 7266     __ addw($mem$$Address, $add$$constant);
 7267   %}
 7268   ins_pipe( pipe_cmpxchg );
 7269 %}
 7270 
 7271 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7272   match(Set newval (GetAndAddS mem newval));
 7273   effect(KILL cr);
 7274   format %{ "XADDS  [$mem],$newval" %}
 7275   ins_encode %{
 7276     __ lock();
 7277     __ xaddw($mem$$Address, $newval$$Register);
 7278   %}
 7279   ins_pipe( pipe_cmpxchg );
 7280 %}
 7281 
 7282 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7283   predicate(n->as_LoadStore()->result_not_used());
 7284   match(Set dummy (GetAndAddI mem add));
 7285   effect(KILL cr);
 7286   format %{ "ADDL  [$mem],$add" %}
 7287   ins_encode %{
 7288     __ lock();
 7289     __ addl($mem$$Address, $add$$constant);
 7290   %}
 7291   ins_pipe( pipe_cmpxchg );
 7292 %}
 7293 
 7294 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7295   match(Set newval (GetAndAddI mem newval));
 7296   effect(KILL cr);
 7297   format %{ "XADDL  [$mem],$newval" %}
 7298   ins_encode %{
 7299     __ lock();
 7300     __ xaddl($mem$$Address, $newval$$Register);
 7301   %}
 7302   ins_pipe( pipe_cmpxchg );
 7303 %}
 7304 
 7305 // Important to match to xRegI: only 8-bit regs.
 7306 instruct xchgB( memory mem, xRegI newval) %{
 7307   match(Set newval (GetAndSetB mem newval));
 7308   format %{ "XCHGB  $newval,[$mem]" %}
 7309   ins_encode %{
 7310     __ xchgb($newval$$Register, $mem$$Address);
 7311   %}
 7312   ins_pipe( pipe_cmpxchg );
 7313 %}
 7314 
 7315 instruct xchgS( memory mem, rRegI newval) %{
 7316   match(Set newval (GetAndSetS mem newval));
 7317   format %{ "XCHGW  $newval,[$mem]" %}
 7318   ins_encode %{
 7319     __ xchgw($newval$$Register, $mem$$Address);
 7320   %}
 7321   ins_pipe( pipe_cmpxchg );
 7322 %}
 7323 
 7324 instruct xchgI( memory mem, rRegI newval) %{
 7325   match(Set newval (GetAndSetI mem newval));
 7326   format %{ "XCHGL  $newval,[$mem]" %}
 7327   ins_encode %{
 7328     __ xchgl($newval$$Register, $mem$$Address);
 7329   %}
 7330   ins_pipe( pipe_cmpxchg );
 7331 %}
 7332 
 7333 instruct xchgP( memory mem, pRegP newval) %{
 7334   match(Set newval (GetAndSetP mem newval));
 7335   format %{ "XCHGL  $newval,[$mem]" %}
 7336   ins_encode %{
 7337     __ xchgl($newval$$Register, $mem$$Address);
 7338   %}
 7339   ins_pipe( pipe_cmpxchg );
 7340 %}
 7341 
 7342 //----------Subtraction Instructions-------------------------------------------
 7343 
 7344 // Integer Subtraction Instructions
 7345 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7346   match(Set dst (SubI dst src));
 7347   effect(KILL cr);
 7348 
 7349   size(2);
 7350   format %{ "SUB    $dst,$src" %}
 7351   opcode(0x2B);
 7352   ins_encode( OpcP, RegReg( dst, src) );
 7353   ins_pipe( ialu_reg_reg );
 7354 %}
 7355 
 7356 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7357   match(Set dst (SubI dst src));
 7358   effect(KILL cr);
 7359 
 7360   format %{ "SUB    $dst,$src" %}
 7361   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7362   // ins_encode( RegImm( dst, src) );
 7363   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7364   ins_pipe( ialu_reg );
 7365 %}
 7366 
 7367 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7368   match(Set dst (SubI dst (LoadI src)));
 7369   effect(KILL cr);
 7370 
 7371   ins_cost(150);
 7372   format %{ "SUB    $dst,$src" %}
 7373   opcode(0x2B);
 7374   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7375   ins_pipe( ialu_reg_mem );
 7376 %}
 7377 
 7378 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7379   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7380   effect(KILL cr);
 7381 
 7382   ins_cost(150);
 7383   format %{ "SUB    $dst,$src" %}
 7384   opcode(0x29);  /* Opcode 29 /r */
 7385   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7386   ins_pipe( ialu_mem_reg );
 7387 %}
 7388 
 7389 // Subtract from a pointer
 7390 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7391   match(Set dst (AddP dst (SubI zero src)));
 7392   effect(KILL cr);
 7393 
 7394   size(2);
 7395   format %{ "SUB    $dst,$src" %}
 7396   opcode(0x2B);
 7397   ins_encode( OpcP, RegReg( dst, src) );
 7398   ins_pipe( ialu_reg_reg );
 7399 %}
 7400 
 7401 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7402   match(Set dst (SubI zero dst));
 7403   effect(KILL cr);
 7404 
 7405   size(2);
 7406   format %{ "NEG    $dst" %}
 7407   opcode(0xF7,0x03);  // Opcode F7 /3
 7408   ins_encode( OpcP, RegOpc( dst ) );
 7409   ins_pipe( ialu_reg );
 7410 %}
 7411 
 7412 //----------Multiplication/Division Instructions-------------------------------
 7413 // Integer Multiplication Instructions
 7414 // Multiply Register
 7415 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7416   match(Set dst (MulI dst src));
 7417   effect(KILL cr);
 7418 
 7419   size(3);
 7420   ins_cost(300);
 7421   format %{ "IMUL   $dst,$src" %}
 7422   opcode(0xAF, 0x0F);
 7423   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7424   ins_pipe( ialu_reg_reg_alu0 );
 7425 %}
 7426 
 7427 // Multiply 32-bit Immediate
 7428 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7429   match(Set dst (MulI src imm));
 7430   effect(KILL cr);
 7431 
 7432   ins_cost(300);
 7433   format %{ "IMUL   $dst,$src,$imm" %}
 7434   opcode(0x69);  /* 69 /r id */
 7435   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7436   ins_pipe( ialu_reg_reg_alu0 );
 7437 %}
 7438 
 7439 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7440   match(Set dst src);
 7441   effect(KILL cr);
 7442 
 7443   // Note that this is artificially increased to make it more expensive than loadConL
 7444   ins_cost(250);
 7445   format %{ "MOV    EAX,$src\t// low word only" %}
 7446   opcode(0xB8);
 7447   ins_encode( LdImmL_Lo(dst, src) );
 7448   ins_pipe( ialu_reg_fat );
 7449 %}
 7450 
 7451 // Multiply by 32-bit Immediate, taking the shifted high order results
 7452 //  (special case for shift by 32)
 7453 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7454   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7455   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7456              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7457              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7458   effect(USE src1, KILL cr);
 7459 
 7460   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7461   ins_cost(0*100 + 1*400 - 150);
 7462   format %{ "IMUL   EDX:EAX,$src1" %}
 7463   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7464   ins_pipe( pipe_slow );
 7465 %}
 7466 
 7467 // Multiply by 32-bit Immediate, taking the shifted high order results
 7468 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7469   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7470   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7471              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7472              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7473   effect(USE src1, KILL cr);
 7474 
 7475   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7476   ins_cost(1*100 + 1*400 - 150);
 7477   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7478             "SAR    EDX,$cnt-32" %}
 7479   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7480   ins_pipe( pipe_slow );
 7481 %}
 7482 
 7483 // Multiply Memory 32-bit Immediate
 7484 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7485   match(Set dst (MulI (LoadI src) imm));
 7486   effect(KILL cr);
 7487 
 7488   ins_cost(300);
 7489   format %{ "IMUL   $dst,$src,$imm" %}
 7490   opcode(0x69);  /* 69 /r id */
 7491   ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
 7492   ins_pipe( ialu_reg_mem_alu0 );
 7493 %}
 7494 
 7495 // Multiply Memory
 7496 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7497   match(Set dst (MulI dst (LoadI src)));
 7498   effect(KILL cr);
 7499 
 7500   ins_cost(350);
 7501   format %{ "IMUL   $dst,$src" %}
 7502   opcode(0xAF, 0x0F);
 7503   ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
 7504   ins_pipe( ialu_reg_mem_alu0 );
 7505 %}
 7506 
 7507 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7508 %{
 7509   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7510   effect(KILL cr, KILL src2);
 7511 
 7512   expand %{ mulI_eReg(dst, src1, cr);
 7513            mulI_eReg(src2, src3, cr);
 7514            addI_eReg(dst, src2, cr); %}
 7515 %}
 7516 
 7517 // Multiply Register Int to Long
 7518 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7519   // Basic Idea: long = (long)int * (long)int
 7520   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7521   effect(DEF dst, USE src, USE src1, KILL flags);
 7522 
 7523   ins_cost(300);
 7524   format %{ "IMUL   $dst,$src1" %}
 7525 
 7526   ins_encode( long_int_multiply( dst, src1 ) );
 7527   ins_pipe( ialu_reg_reg_alu0 );
 7528 %}
 7529 
 7530 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7531   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7532   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7533   effect(KILL flags);
 7534 
 7535   ins_cost(300);
 7536   format %{ "MUL    $dst,$src1" %}
 7537 
 7538   ins_encode( long_uint_multiply(dst, src1) );
 7539   ins_pipe( ialu_reg_reg_alu0 );
 7540 %}
 7541 
 7542 // Multiply Register Long
 7543 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7544   match(Set dst (MulL dst src));
 7545   effect(KILL cr, TEMP tmp);
 7546   ins_cost(4*100+3*400);
 7547 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7548 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7549   format %{ "MOV    $tmp,$src.lo\n\t"
 7550             "IMUL   $tmp,EDX\n\t"
 7551             "MOV    EDX,$src.hi\n\t"
 7552             "IMUL   EDX,EAX\n\t"
 7553             "ADD    $tmp,EDX\n\t"
 7554             "MUL    EDX:EAX,$src.lo\n\t"
 7555             "ADD    EDX,$tmp" %}
 7556   ins_encode( long_multiply( dst, src, tmp ) );
 7557   ins_pipe( pipe_slow );
 7558 %}
 7559 
 7560 // Multiply Register Long where the left operand's high 32 bits are zero
 7561 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7562   predicate(is_operand_hi32_zero(n->in(1)));
 7563   match(Set dst (MulL dst src));
 7564   effect(KILL cr, TEMP tmp);
 7565   ins_cost(2*100+2*400);
 7566 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7567 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7568   format %{ "MOV    $tmp,$src.hi\n\t"
 7569             "IMUL   $tmp,EAX\n\t"
 7570             "MUL    EDX:EAX,$src.lo\n\t"
 7571             "ADD    EDX,$tmp" %}
 7572   ins_encode %{
 7573     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7574     __ imull($tmp$$Register, rax);
 7575     __ mull($src$$Register);
 7576     __ addl(rdx, $tmp$$Register);
 7577   %}
 7578   ins_pipe( pipe_slow );
 7579 %}
 7580 
 7581 // Multiply Register Long where the right operand's high 32 bits are zero
 7582 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7583   predicate(is_operand_hi32_zero(n->in(2)));
 7584   match(Set dst (MulL dst src));
 7585   effect(KILL cr, TEMP tmp);
 7586   ins_cost(2*100+2*400);
 7587 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7588 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7589   format %{ "MOV    $tmp,$src.lo\n\t"
 7590             "IMUL   $tmp,EDX\n\t"
 7591             "MUL    EDX:EAX,$src.lo\n\t"
 7592             "ADD    EDX,$tmp" %}
 7593   ins_encode %{
 7594     __ movl($tmp$$Register, $src$$Register);
 7595     __ imull($tmp$$Register, rdx);
 7596     __ mull($src$$Register);
 7597     __ addl(rdx, $tmp$$Register);
 7598   %}
 7599   ins_pipe( pipe_slow );
 7600 %}
 7601 
 7602 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7603 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7604   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7605   match(Set dst (MulL dst src));
 7606   effect(KILL cr);
 7607   ins_cost(1*400);
 7608 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7609 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7610   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7611   ins_encode %{
 7612     __ mull($src$$Register);
 7613   %}
 7614   ins_pipe( pipe_slow );
 7615 %}
 7616 
 7617 // Multiply Register Long by small constant
 7618 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7619   match(Set dst (MulL dst src));
 7620   effect(KILL cr, TEMP tmp);
 7621   ins_cost(2*100+2*400);
 7622   size(12);
 7623 // Basic idea: lo(result) = lo(src * EAX)
 7624 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7625   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7626             "MOV    EDX,$src\n\t"
 7627             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7628             "ADD    EDX,$tmp" %}
 7629   ins_encode( long_multiply_con( dst, src, tmp ) );
 7630   ins_pipe( pipe_slow );
 7631 %}
 7632 
 7633 // Integer DIV with Register
 7634 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7635   match(Set rax (DivI rax div));
 7636   effect(KILL rdx, KILL cr);
 7637   size(26);
 7638   ins_cost(30*100+10*100);
 7639   format %{ "CMP    EAX,0x80000000\n\t"
 7640             "JNE,s  normal\n\t"
 7641             "XOR    EDX,EDX\n\t"
 7642             "CMP    ECX,-1\n\t"
 7643             "JE,s   done\n"
 7644     "normal: CDQ\n\t"
 7645             "IDIV   $div\n\t"
 7646     "done:"        %}
 7647   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7648   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7649   ins_pipe( ialu_reg_reg_alu0 );
 7650 %}
 7651 
 7652 // Divide Register Long
 7653 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7654   match(Set dst (DivL src1 src2));
 7655   effect(CALL);
 7656   ins_cost(10000);
 7657   format %{ "PUSH   $src1.hi\n\t"
 7658             "PUSH   $src1.lo\n\t"
 7659             "PUSH   $src2.hi\n\t"
 7660             "PUSH   $src2.lo\n\t"
 7661             "CALL   SharedRuntime::ldiv\n\t"
 7662             "ADD    ESP,16" %}
 7663   ins_encode( long_div(src1,src2) );
 7664   ins_pipe( pipe_slow );
 7665 %}
 7666 
 7667 // Integer DIVMOD with Register, both quotient and mod results
 7668 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7669   match(DivModI rax div);
 7670   effect(KILL cr);
 7671   size(26);
 7672   ins_cost(30*100+10*100);
 7673   format %{ "CMP    EAX,0x80000000\n\t"
 7674             "JNE,s  normal\n\t"
 7675             "XOR    EDX,EDX\n\t"
 7676             "CMP    ECX,-1\n\t"
 7677             "JE,s   done\n"
 7678     "normal: CDQ\n\t"
 7679             "IDIV   $div\n\t"
 7680     "done:"        %}
 7681   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7682   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7683   ins_pipe( pipe_slow );
 7684 %}
 7685 
 7686 // Integer MOD with Register
 7687 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7688   match(Set rdx (ModI rax div));
 7689   effect(KILL rax, KILL cr);
 7690 
 7691   size(26);
 7692   ins_cost(300);
 7693   format %{ "CDQ\n\t"
 7694             "IDIV   $div" %}
 7695   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7696   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7697   ins_pipe( ialu_reg_reg_alu0 );
 7698 %}
 7699 
 7700 // Remainder Register Long
 7701 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7702   match(Set dst (ModL src1 src2));
 7703   effect(CALL);
 7704   ins_cost(10000);
 7705   format %{ "PUSH   $src1.hi\n\t"
 7706             "PUSH   $src1.lo\n\t"
 7707             "PUSH   $src2.hi\n\t"
 7708             "PUSH   $src2.lo\n\t"
 7709             "CALL   SharedRuntime::lrem\n\t"
 7710             "ADD    ESP,16" %}
 7711   ins_encode( long_mod(src1,src2) );
 7712   ins_pipe( pipe_slow );
 7713 %}
 7714 
 7715 // Divide Register Long (no special case since divisor != -1)
 7716 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7717   match(Set dst (DivL dst imm));
 7718   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7719   ins_cost(1000);
 7720   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7721             "XOR    $tmp2,$tmp2\n\t"
 7722             "CMP    $tmp,EDX\n\t"
 7723             "JA,s   fast\n\t"
 7724             "MOV    $tmp2,EAX\n\t"
 7725             "MOV    EAX,EDX\n\t"
 7726             "MOV    EDX,0\n\t"
 7727             "JLE,s  pos\n\t"
 7728             "LNEG   EAX : $tmp2\n\t"
 7729             "DIV    $tmp # unsigned division\n\t"
 7730             "XCHG   EAX,$tmp2\n\t"
 7731             "DIV    $tmp\n\t"
 7732             "LNEG   $tmp2 : EAX\n\t"
 7733             "JMP,s  done\n"
 7734     "pos:\n\t"
 7735             "DIV    $tmp\n\t"
 7736             "XCHG   EAX,$tmp2\n"
 7737     "fast:\n\t"
 7738             "DIV    $tmp\n"
 7739     "done:\n\t"
 7740             "MOV    EDX,$tmp2\n\t"
 7741             "NEG    EDX:EAX # if $imm < 0" %}
 7742   ins_encode %{
 7743     int con = (int)$imm$$constant;
 7744     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7745     int pcon = (con > 0) ? con : -con;
 7746     Label Lfast, Lpos, Ldone;
 7747 
 7748     __ movl($tmp$$Register, pcon);
 7749     __ xorl($tmp2$$Register,$tmp2$$Register);
 7750     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7751     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7752 
 7753     __ movl($tmp2$$Register, $dst$$Register); // save
 7754     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7755     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7756     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7757 
 7758     // Negative dividend.
 7759     // convert value to positive to use unsigned division
 7760     __ lneg($dst$$Register, $tmp2$$Register);
 7761     __ divl($tmp$$Register);
 7762     __ xchgl($dst$$Register, $tmp2$$Register);
 7763     __ divl($tmp$$Register);
 7764     // revert result back to negative
 7765     __ lneg($tmp2$$Register, $dst$$Register);
 7766     __ jmpb(Ldone);
 7767 
 7768     __ bind(Lpos);
 7769     __ divl($tmp$$Register); // Use unsigned division
 7770     __ xchgl($dst$$Register, $tmp2$$Register);
 7771     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7772 
 7773     __ bind(Lfast);
 7774     // fast path: src is positive
 7775     __ divl($tmp$$Register); // Use unsigned division
 7776 
 7777     __ bind(Ldone);
 7778     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7779     if (con < 0) {
 7780       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7781     }
 7782   %}
 7783   ins_pipe( pipe_slow );
 7784 %}
 7785 
 7786 // Remainder Register Long (remainder fit into 32 bits)
 7787 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7788   match(Set dst (ModL dst imm));
 7789   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7790   ins_cost(1000);
 7791   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7792             "CMP    $tmp,EDX\n\t"
 7793             "JA,s   fast\n\t"
 7794             "MOV    $tmp2,EAX\n\t"
 7795             "MOV    EAX,EDX\n\t"
 7796             "MOV    EDX,0\n\t"
 7797             "JLE,s  pos\n\t"
 7798             "LNEG   EAX : $tmp2\n\t"
 7799             "DIV    $tmp # unsigned division\n\t"
 7800             "MOV    EAX,$tmp2\n\t"
 7801             "DIV    $tmp\n\t"
 7802             "NEG    EDX\n\t"
 7803             "JMP,s  done\n"
 7804     "pos:\n\t"
 7805             "DIV    $tmp\n\t"
 7806             "MOV    EAX,$tmp2\n"
 7807     "fast:\n\t"
 7808             "DIV    $tmp\n"
 7809     "done:\n\t"
 7810             "MOV    EAX,EDX\n\t"
 7811             "SAR    EDX,31\n\t" %}
 7812   ins_encode %{
 7813     int con = (int)$imm$$constant;
 7814     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7815     int pcon = (con > 0) ? con : -con;
 7816     Label  Lfast, Lpos, Ldone;
 7817 
 7818     __ movl($tmp$$Register, pcon);
 7819     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7820     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7821 
 7822     __ movl($tmp2$$Register, $dst$$Register); // save
 7823     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7824     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7825     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7826 
 7827     // Negative dividend.
 7828     // convert value to positive to use unsigned division
 7829     __ lneg($dst$$Register, $tmp2$$Register);
 7830     __ divl($tmp$$Register);
 7831     __ movl($dst$$Register, $tmp2$$Register);
 7832     __ divl($tmp$$Register);
 7833     // revert remainder back to negative
 7834     __ negl(HIGH_FROM_LOW($dst$$Register));
 7835     __ jmpb(Ldone);
 7836 
 7837     __ bind(Lpos);
 7838     __ divl($tmp$$Register);
 7839     __ movl($dst$$Register, $tmp2$$Register);
 7840 
 7841     __ bind(Lfast);
 7842     // fast path: src is positive
 7843     __ divl($tmp$$Register);
 7844 
 7845     __ bind(Ldone);
 7846     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7847     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7848 
 7849   %}
 7850   ins_pipe( pipe_slow );
 7851 %}
 7852 
 7853 // Integer Shift Instructions
 7854 // Shift Left by one
 7855 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7856   match(Set dst (LShiftI dst shift));
 7857   effect(KILL cr);
 7858 
 7859   size(2);
 7860   format %{ "SHL    $dst,$shift" %}
 7861   opcode(0xD1, 0x4);  /* D1 /4 */
 7862   ins_encode( OpcP, RegOpc( dst ) );
 7863   ins_pipe( ialu_reg );
 7864 %}
 7865 
 7866 // Shift Left by 8-bit immediate
 7867 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7868   match(Set dst (LShiftI dst shift));
 7869   effect(KILL cr);
 7870 
 7871   size(3);
 7872   format %{ "SHL    $dst,$shift" %}
 7873   opcode(0xC1, 0x4);  /* C1 /4 ib */
 7874   ins_encode( RegOpcImm( dst, shift) );
 7875   ins_pipe( ialu_reg );
 7876 %}
 7877 
 7878 // Shift Left by variable
 7879 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7880   match(Set dst (LShiftI dst shift));
 7881   effect(KILL cr);
 7882 
 7883   size(2);
 7884   format %{ "SHL    $dst,$shift" %}
 7885   opcode(0xD3, 0x4);  /* D3 /4 */
 7886   ins_encode( OpcP, RegOpc( dst ) );
 7887   ins_pipe( ialu_reg_reg );
 7888 %}
 7889 
 7890 // Arithmetic shift right by one
 7891 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7892   match(Set dst (RShiftI dst shift));
 7893   effect(KILL cr);
 7894 
 7895   size(2);
 7896   format %{ "SAR    $dst,$shift" %}
 7897   opcode(0xD1, 0x7);  /* D1 /7 */
 7898   ins_encode( OpcP, RegOpc( dst ) );
 7899   ins_pipe( ialu_reg );
 7900 %}
 7901 
 7902 // Arithmetic shift right by one
 7903 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 7904   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7905   effect(KILL cr);
 7906   format %{ "SAR    $dst,$shift" %}
 7907   opcode(0xD1, 0x7);  /* D1 /7 */
 7908   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
 7909   ins_pipe( ialu_mem_imm );
 7910 %}
 7911 
 7912 // Arithmetic Shift Right by 8-bit immediate
 7913 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7914   match(Set dst (RShiftI dst shift));
 7915   effect(KILL cr);
 7916 
 7917   size(3);
 7918   format %{ "SAR    $dst,$shift" %}
 7919   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7920   ins_encode( RegOpcImm( dst, shift ) );
 7921   ins_pipe( ialu_mem_imm );
 7922 %}
 7923 
 7924 // Arithmetic Shift Right by 8-bit immediate
 7925 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 7926   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7927   effect(KILL cr);
 7928 
 7929   format %{ "SAR    $dst,$shift" %}
 7930   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7931   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
 7932   ins_pipe( ialu_mem_imm );
 7933 %}
 7934 
 7935 // Arithmetic Shift Right by variable
 7936 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7937   match(Set dst (RShiftI dst shift));
 7938   effect(KILL cr);
 7939 
 7940   size(2);
 7941   format %{ "SAR    $dst,$shift" %}
 7942   opcode(0xD3, 0x7);  /* D3 /7 */
 7943   ins_encode( OpcP, RegOpc( dst ) );
 7944   ins_pipe( ialu_reg_reg );
 7945 %}
 7946 
 7947 // Logical shift right by one
 7948 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7949   match(Set dst (URShiftI dst shift));
 7950   effect(KILL cr);
 7951 
 7952   size(2);
 7953   format %{ "SHR    $dst,$shift" %}
 7954   opcode(0xD1, 0x5);  /* D1 /5 */
 7955   ins_encode( OpcP, RegOpc( dst ) );
 7956   ins_pipe( ialu_reg );
 7957 %}
 7958 
 7959 // Logical Shift Right by 8-bit immediate
 7960 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7961   match(Set dst (URShiftI dst shift));
 7962   effect(KILL cr);
 7963 
 7964   size(3);
 7965   format %{ "SHR    $dst,$shift" %}
 7966   opcode(0xC1, 0x5);  /* C1 /5 ib */
 7967   ins_encode( RegOpcImm( dst, shift) );
 7968   ins_pipe( ialu_reg );
 7969 %}
 7970 
 7971 
 7972 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 7973 // This idiom is used by the compiler for the i2b bytecode.
 7974 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 7975   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 7976 
 7977   size(3);
 7978   format %{ "MOVSX  $dst,$src :8" %}
 7979   ins_encode %{
 7980     __ movsbl($dst$$Register, $src$$Register);
 7981   %}
 7982   ins_pipe(ialu_reg_reg);
 7983 %}
 7984 
 7985 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 7986 // This idiom is used by the compiler the i2s bytecode.
 7987 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 7988   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 7989 
 7990   size(3);
 7991   format %{ "MOVSX  $dst,$src :16" %}
 7992   ins_encode %{
 7993     __ movswl($dst$$Register, $src$$Register);
 7994   %}
 7995   ins_pipe(ialu_reg_reg);
 7996 %}
 7997 
 7998 
 7999 // Logical Shift Right by variable
 8000 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8001   match(Set dst (URShiftI dst shift));
 8002   effect(KILL cr);
 8003 
 8004   size(2);
 8005   format %{ "SHR    $dst,$shift" %}
 8006   opcode(0xD3, 0x5);  /* D3 /5 */
 8007   ins_encode( OpcP, RegOpc( dst ) );
 8008   ins_pipe( ialu_reg_reg );
 8009 %}
 8010 
 8011 
 8012 //----------Logical Instructions-----------------------------------------------
 8013 //----------Integer Logical Instructions---------------------------------------
 8014 // And Instructions
 8015 // And Register with Register
 8016 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8017   match(Set dst (AndI dst src));
 8018   effect(KILL cr);
 8019 
 8020   size(2);
 8021   format %{ "AND    $dst,$src" %}
 8022   opcode(0x23);
 8023   ins_encode( OpcP, RegReg( dst, src) );
 8024   ins_pipe( ialu_reg_reg );
 8025 %}
 8026 
 8027 // And Register with Immediate
 8028 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8029   match(Set dst (AndI dst src));
 8030   effect(KILL cr);
 8031 
 8032   format %{ "AND    $dst,$src" %}
 8033   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8034   // ins_encode( RegImm( dst, src) );
 8035   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8036   ins_pipe( ialu_reg );
 8037 %}
 8038 
 8039 // And Register with Memory
 8040 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8041   match(Set dst (AndI dst (LoadI src)));
 8042   effect(KILL cr);
 8043 
 8044   ins_cost(150);
 8045   format %{ "AND    $dst,$src" %}
 8046   opcode(0x23);
 8047   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8048   ins_pipe( ialu_reg_mem );
 8049 %}
 8050 
 8051 // And Memory with Register
 8052 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8053   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8054   effect(KILL cr);
 8055 
 8056   ins_cost(150);
 8057   format %{ "AND    $dst,$src" %}
 8058   opcode(0x21);  /* Opcode 21 /r */
 8059   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8060   ins_pipe( ialu_mem_reg );
 8061 %}
 8062 
 8063 // And Memory with Immediate
 8064 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8065   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8066   effect(KILL cr);
 8067 
 8068   ins_cost(125);
 8069   format %{ "AND    $dst,$src" %}
 8070   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8071   // ins_encode( MemImm( dst, src) );
 8072   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8073   ins_pipe( ialu_mem_imm );
 8074 %}
 8075 
 8076 // BMI1 instructions
 8077 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8078   match(Set dst (AndI (XorI src1 minus_1) src2));
 8079   predicate(UseBMI1Instructions);
 8080   effect(KILL cr);
 8081 
 8082   format %{ "ANDNL  $dst, $src1, $src2" %}
 8083 
 8084   ins_encode %{
 8085     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8086   %}
 8087   ins_pipe(ialu_reg);
 8088 %}
 8089 
 8090 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8091   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8092   predicate(UseBMI1Instructions);
 8093   effect(KILL cr);
 8094 
 8095   ins_cost(125);
 8096   format %{ "ANDNL  $dst, $src1, $src2" %}
 8097 
 8098   ins_encode %{
 8099     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8100   %}
 8101   ins_pipe(ialu_reg_mem);
 8102 %}
 8103 
 8104 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8105   match(Set dst (AndI (SubI imm_zero src) src));
 8106   predicate(UseBMI1Instructions);
 8107   effect(KILL cr);
 8108 
 8109   format %{ "BLSIL  $dst, $src" %}
 8110 
 8111   ins_encode %{
 8112     __ blsil($dst$$Register, $src$$Register);
 8113   %}
 8114   ins_pipe(ialu_reg);
 8115 %}
 8116 
 8117 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8118   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8119   predicate(UseBMI1Instructions);
 8120   effect(KILL cr);
 8121 
 8122   ins_cost(125);
 8123   format %{ "BLSIL  $dst, $src" %}
 8124 
 8125   ins_encode %{
 8126     __ blsil($dst$$Register, $src$$Address);
 8127   %}
 8128   ins_pipe(ialu_reg_mem);
 8129 %}
 8130 
 8131 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8132 %{
 8133   match(Set dst (XorI (AddI src minus_1) src));
 8134   predicate(UseBMI1Instructions);
 8135   effect(KILL cr);
 8136 
 8137   format %{ "BLSMSKL $dst, $src" %}
 8138 
 8139   ins_encode %{
 8140     __ blsmskl($dst$$Register, $src$$Register);
 8141   %}
 8142 
 8143   ins_pipe(ialu_reg);
 8144 %}
 8145 
 8146 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8147 %{
 8148   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8149   predicate(UseBMI1Instructions);
 8150   effect(KILL cr);
 8151 
 8152   ins_cost(125);
 8153   format %{ "BLSMSKL $dst, $src" %}
 8154 
 8155   ins_encode %{
 8156     __ blsmskl($dst$$Register, $src$$Address);
 8157   %}
 8158 
 8159   ins_pipe(ialu_reg_mem);
 8160 %}
 8161 
 8162 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8163 %{
 8164   match(Set dst (AndI (AddI src minus_1) src) );
 8165   predicate(UseBMI1Instructions);
 8166   effect(KILL cr);
 8167 
 8168   format %{ "BLSRL  $dst, $src" %}
 8169 
 8170   ins_encode %{
 8171     __ blsrl($dst$$Register, $src$$Register);
 8172   %}
 8173 
 8174   ins_pipe(ialu_reg);
 8175 %}
 8176 
 8177 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8178 %{
 8179   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8180   predicate(UseBMI1Instructions);
 8181   effect(KILL cr);
 8182 
 8183   ins_cost(125);
 8184   format %{ "BLSRL  $dst, $src" %}
 8185 
 8186   ins_encode %{
 8187     __ blsrl($dst$$Register, $src$$Address);
 8188   %}
 8189 
 8190   ins_pipe(ialu_reg_mem);
 8191 %}
 8192 
 8193 // Or Instructions
 8194 // Or Register with Register
 8195 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8196   match(Set dst (OrI dst src));
 8197   effect(KILL cr);
 8198 
 8199   size(2);
 8200   format %{ "OR     $dst,$src" %}
 8201   opcode(0x0B);
 8202   ins_encode( OpcP, RegReg( dst, src) );
 8203   ins_pipe( ialu_reg_reg );
 8204 %}
 8205 
 8206 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8207   match(Set dst (OrI dst (CastP2X src)));
 8208   effect(KILL cr);
 8209 
 8210   size(2);
 8211   format %{ "OR     $dst,$src" %}
 8212   opcode(0x0B);
 8213   ins_encode( OpcP, RegReg( dst, src) );
 8214   ins_pipe( ialu_reg_reg );
 8215 %}
 8216 
 8217 
 8218 // Or Register with Immediate
 8219 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8220   match(Set dst (OrI dst src));
 8221   effect(KILL cr);
 8222 
 8223   format %{ "OR     $dst,$src" %}
 8224   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8225   // ins_encode( RegImm( dst, src) );
 8226   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8227   ins_pipe( ialu_reg );
 8228 %}
 8229 
 8230 // Or Register with Memory
 8231 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8232   match(Set dst (OrI dst (LoadI src)));
 8233   effect(KILL cr);
 8234 
 8235   ins_cost(150);
 8236   format %{ "OR     $dst,$src" %}
 8237   opcode(0x0B);
 8238   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8239   ins_pipe( ialu_reg_mem );
 8240 %}
 8241 
 8242 // Or Memory with Register
 8243 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8244   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8245   effect(KILL cr);
 8246 
 8247   ins_cost(150);
 8248   format %{ "OR     $dst,$src" %}
 8249   opcode(0x09);  /* Opcode 09 /r */
 8250   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8251   ins_pipe( ialu_mem_reg );
 8252 %}
 8253 
 8254 // Or Memory with Immediate
 8255 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8256   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8257   effect(KILL cr);
 8258 
 8259   ins_cost(125);
 8260   format %{ "OR     $dst,$src" %}
 8261   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8262   // ins_encode( MemImm( dst, src) );
 8263   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8264   ins_pipe( ialu_mem_imm );
 8265 %}
 8266 
 8267 // ROL/ROR
 8268 // ROL expand
 8269 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8270   effect(USE_DEF dst, USE shift, KILL cr);
 8271 
 8272   format %{ "ROL    $dst, $shift" %}
 8273   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8274   ins_encode( OpcP, RegOpc( dst ));
 8275   ins_pipe( ialu_reg );
 8276 %}
 8277 
 8278 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8279   effect(USE_DEF dst, USE shift, KILL cr);
 8280 
 8281   format %{ "ROL    $dst, $shift" %}
 8282   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8283   ins_encode( RegOpcImm(dst, shift) );
 8284   ins_pipe(ialu_reg);
 8285 %}
 8286 
 8287 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8288   effect(USE_DEF dst, USE shift, KILL cr);
 8289 
 8290   format %{ "ROL    $dst, $shift" %}
 8291   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8292   ins_encode(OpcP, RegOpc(dst));
 8293   ins_pipe( ialu_reg_reg );
 8294 %}
 8295 // end of ROL expand
 8296 
 8297 // ROL 32bit by one once
 8298 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8299   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8300 
 8301   expand %{
 8302     rolI_eReg_imm1(dst, lshift, cr);
 8303   %}
 8304 %}
 8305 
 8306 // ROL 32bit var by imm8 once
 8307 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8308   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8309   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8310 
 8311   expand %{
 8312     rolI_eReg_imm8(dst, lshift, cr);
 8313   %}
 8314 %}
 8315 
 8316 // ROL 32bit var by var once
 8317 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8318   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8319 
 8320   expand %{
 8321     rolI_eReg_CL(dst, shift, cr);
 8322   %}
 8323 %}
 8324 
 8325 // ROL 32bit var by var once
 8326 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8327   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8328 
 8329   expand %{
 8330     rolI_eReg_CL(dst, shift, cr);
 8331   %}
 8332 %}
 8333 
 8334 // ROR expand
 8335 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8336   effect(USE_DEF dst, USE shift, KILL cr);
 8337 
 8338   format %{ "ROR    $dst, $shift" %}
 8339   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8340   ins_encode( OpcP, RegOpc( dst ) );
 8341   ins_pipe( ialu_reg );
 8342 %}
 8343 
 8344 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8345   effect (USE_DEF dst, USE shift, KILL cr);
 8346 
 8347   format %{ "ROR    $dst, $shift" %}
 8348   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8349   ins_encode( RegOpcImm(dst, shift) );
 8350   ins_pipe( ialu_reg );
 8351 %}
 8352 
 8353 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8354   effect(USE_DEF dst, USE shift, KILL cr);
 8355 
 8356   format %{ "ROR    $dst, $shift" %}
 8357   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8358   ins_encode(OpcP, RegOpc(dst));
 8359   ins_pipe( ialu_reg_reg );
 8360 %}
 8361 // end of ROR expand
 8362 
 8363 // ROR right once
 8364 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8365   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8366 
 8367   expand %{
 8368     rorI_eReg_imm1(dst, rshift, cr);
 8369   %}
 8370 %}
 8371 
 8372 // ROR 32bit by immI8 once
 8373 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8374   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8375   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8376 
 8377   expand %{
 8378     rorI_eReg_imm8(dst, rshift, cr);
 8379   %}
 8380 %}
 8381 
 8382 // ROR 32bit var by var once
 8383 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8384   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8385 
 8386   expand %{
 8387     rorI_eReg_CL(dst, shift, cr);
 8388   %}
 8389 %}
 8390 
 8391 // ROR 32bit var by var once
 8392 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8393   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8394 
 8395   expand %{
 8396     rorI_eReg_CL(dst, shift, cr);
 8397   %}
 8398 %}
 8399 
 8400 // Xor Instructions
 8401 // Xor Register with Register
 8402 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8403   match(Set dst (XorI dst src));
 8404   effect(KILL cr);
 8405 
 8406   size(2);
 8407   format %{ "XOR    $dst,$src" %}
 8408   opcode(0x33);
 8409   ins_encode( OpcP, RegReg( dst, src) );
 8410   ins_pipe( ialu_reg_reg );
 8411 %}
 8412 
 8413 // Xor Register with Immediate -1
 8414 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8415   match(Set dst (XorI dst imm));
 8416 
 8417   size(2);
 8418   format %{ "NOT    $dst" %}
 8419   ins_encode %{
 8420      __ notl($dst$$Register);
 8421   %}
 8422   ins_pipe( ialu_reg );
 8423 %}
 8424 
 8425 // Xor Register with Immediate
 8426 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8427   match(Set dst (XorI dst src));
 8428   effect(KILL cr);
 8429 
 8430   format %{ "XOR    $dst,$src" %}
 8431   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8432   // ins_encode( RegImm( dst, src) );
 8433   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8434   ins_pipe( ialu_reg );
 8435 %}
 8436 
 8437 // Xor Register with Memory
 8438 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8439   match(Set dst (XorI dst (LoadI src)));
 8440   effect(KILL cr);
 8441 
 8442   ins_cost(150);
 8443   format %{ "XOR    $dst,$src" %}
 8444   opcode(0x33);
 8445   ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
 8446   ins_pipe( ialu_reg_mem );
 8447 %}
 8448 
 8449 // Xor Memory with Register
 8450 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8451   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8452   effect(KILL cr);
 8453 
 8454   ins_cost(150);
 8455   format %{ "XOR    $dst,$src" %}
 8456   opcode(0x31);  /* Opcode 31 /r */
 8457   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8458   ins_pipe( ialu_mem_reg );
 8459 %}
 8460 
 8461 // Xor Memory with Immediate
 8462 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8463   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8464   effect(KILL cr);
 8465 
 8466   ins_cost(125);
 8467   format %{ "XOR    $dst,$src" %}
 8468   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8469   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8470   ins_pipe( ialu_mem_imm );
 8471 %}
 8472 
 8473 //----------Convert Int to Boolean---------------------------------------------
 8474 
 8475 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8476   effect( DEF dst, USE src );
 8477   format %{ "MOV    $dst,$src" %}
 8478   ins_encode( enc_Copy( dst, src) );
 8479   ins_pipe( ialu_reg_reg );
 8480 %}
 8481 
 8482 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8483   effect( USE_DEF dst, USE src, KILL cr );
 8484 
 8485   size(4);
 8486   format %{ "NEG    $dst\n\t"
 8487             "ADC    $dst,$src" %}
 8488   ins_encode( neg_reg(dst),
 8489               OpcRegReg(0x13,dst,src) );
 8490   ins_pipe( ialu_reg_reg_long );
 8491 %}
 8492 
 8493 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8494   match(Set dst (Conv2B src));
 8495 
 8496   expand %{
 8497     movI_nocopy(dst,src);
 8498     ci2b(dst,src,cr);
 8499   %}
 8500 %}
 8501 
 8502 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8503   effect( DEF dst, USE src );
 8504   format %{ "MOV    $dst,$src" %}
 8505   ins_encode( enc_Copy( dst, src) );
 8506   ins_pipe( ialu_reg_reg );
 8507 %}
 8508 
 8509 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8510   effect( USE_DEF dst, USE src, KILL cr );
 8511   format %{ "NEG    $dst\n\t"
 8512             "ADC    $dst,$src" %}
 8513   ins_encode( neg_reg(dst),
 8514               OpcRegReg(0x13,dst,src) );
 8515   ins_pipe( ialu_reg_reg_long );
 8516 %}
 8517 
 8518 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8519   match(Set dst (Conv2B src));
 8520 
 8521   expand %{
 8522     movP_nocopy(dst,src);
 8523     cp2b(dst,src,cr);
 8524   %}
 8525 %}
 8526 
 8527 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8528   match(Set dst (CmpLTMask p q));
 8529   effect(KILL cr);
 8530   ins_cost(400);
 8531 
 8532   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8533   format %{ "XOR    $dst,$dst\n\t"
 8534             "CMP    $p,$q\n\t"
 8535             "SETlt  $dst\n\t"
 8536             "NEG    $dst" %}
 8537   ins_encode %{
 8538     Register Rp = $p$$Register;
 8539     Register Rq = $q$$Register;
 8540     Register Rd = $dst$$Register;
 8541     Label done;
 8542     __ xorl(Rd, Rd);
 8543     __ cmpl(Rp, Rq);
 8544     __ setb(Assembler::less, Rd);
 8545     __ negl(Rd);
 8546   %}
 8547 
 8548   ins_pipe(pipe_slow);
 8549 %}
 8550 
 8551 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8552   match(Set dst (CmpLTMask dst zero));
 8553   effect(DEF dst, KILL cr);
 8554   ins_cost(100);
 8555 
 8556   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8557   ins_encode %{
 8558   __ sarl($dst$$Register, 31);
 8559   %}
 8560   ins_pipe(ialu_reg);
 8561 %}
 8562 
 8563 /* better to save a register than avoid a branch */
 8564 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8565   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8566   effect(KILL cr);
 8567   ins_cost(400);
 8568   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8569             "JGE    done\n\t"
 8570             "ADD    $p,$y\n"
 8571             "done:  " %}
 8572   ins_encode %{
 8573     Register Rp = $p$$Register;
 8574     Register Rq = $q$$Register;
 8575     Register Ry = $y$$Register;
 8576     Label done;
 8577     __ subl(Rp, Rq);
 8578     __ jccb(Assembler::greaterEqual, done);
 8579     __ addl(Rp, Ry);
 8580     __ bind(done);
 8581   %}
 8582 
 8583   ins_pipe(pipe_cmplt);
 8584 %}
 8585 
 8586 /* better to save a register than avoid a branch */
 8587 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8588   match(Set y (AndI (CmpLTMask p q) y));
 8589   effect(KILL cr);
 8590 
 8591   ins_cost(300);
 8592 
 8593   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8594             "JLT      done\n\t"
 8595             "XORL     $y, $y\n"
 8596             "done:  " %}
 8597   ins_encode %{
 8598     Register Rp = $p$$Register;
 8599     Register Rq = $q$$Register;
 8600     Register Ry = $y$$Register;
 8601     Label done;
 8602     __ cmpl(Rp, Rq);
 8603     __ jccb(Assembler::less, done);
 8604     __ xorl(Ry, Ry);
 8605     __ bind(done);
 8606   %}
 8607 
 8608   ins_pipe(pipe_cmplt);
 8609 %}
 8610 
 8611 /* If I enable this, I encourage spilling in the inner loop of compress.
 8612 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8613   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8614 */
 8615 //----------Overflow Math Instructions-----------------------------------------
 8616 
 8617 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8618 %{
 8619   match(Set cr (OverflowAddI op1 op2));
 8620   effect(DEF cr, USE_KILL op1, USE op2);
 8621 
 8622   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8623 
 8624   ins_encode %{
 8625     __ addl($op1$$Register, $op2$$Register);
 8626   %}
 8627   ins_pipe(ialu_reg_reg);
 8628 %}
 8629 
 8630 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8631 %{
 8632   match(Set cr (OverflowAddI op1 op2));
 8633   effect(DEF cr, USE_KILL op1, USE op2);
 8634 
 8635   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8636 
 8637   ins_encode %{
 8638     __ addl($op1$$Register, $op2$$constant);
 8639   %}
 8640   ins_pipe(ialu_reg_reg);
 8641 %}
 8642 
 8643 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8644 %{
 8645   match(Set cr (OverflowSubI op1 op2));
 8646 
 8647   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8648   ins_encode %{
 8649     __ cmpl($op1$$Register, $op2$$Register);
 8650   %}
 8651   ins_pipe(ialu_reg_reg);
 8652 %}
 8653 
 8654 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8655 %{
 8656   match(Set cr (OverflowSubI op1 op2));
 8657 
 8658   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8659   ins_encode %{
 8660     __ cmpl($op1$$Register, $op2$$constant);
 8661   %}
 8662   ins_pipe(ialu_reg_reg);
 8663 %}
 8664 
 8665 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8666 %{
 8667   match(Set cr (OverflowSubI zero op2));
 8668   effect(DEF cr, USE_KILL op2);
 8669 
 8670   format %{ "NEG    $op2\t# overflow check int" %}
 8671   ins_encode %{
 8672     __ negl($op2$$Register);
 8673   %}
 8674   ins_pipe(ialu_reg_reg);
 8675 %}
 8676 
 8677 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8678 %{
 8679   match(Set cr (OverflowMulI op1 op2));
 8680   effect(DEF cr, USE_KILL op1, USE op2);
 8681 
 8682   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8683   ins_encode %{
 8684     __ imull($op1$$Register, $op2$$Register);
 8685   %}
 8686   ins_pipe(ialu_reg_reg_alu0);
 8687 %}
 8688 
 8689 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8690 %{
 8691   match(Set cr (OverflowMulI op1 op2));
 8692   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8693 
 8694   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8695   ins_encode %{
 8696     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8697   %}
 8698   ins_pipe(ialu_reg_reg_alu0);
 8699 %}
 8700 
 8701 // Integer Absolute Instructions
 8702 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8703 %{
 8704   match(Set dst (AbsI src));
 8705   effect(TEMP dst, TEMP tmp, KILL cr);
 8706   format %{ "movl $tmp, $src\n\t"
 8707             "sarl $tmp, 31\n\t"
 8708             "movl $dst, $src\n\t"
 8709             "xorl $dst, $tmp\n\t"
 8710             "subl $dst, $tmp\n"
 8711           %}
 8712   ins_encode %{
 8713     __ movl($tmp$$Register, $src$$Register);
 8714     __ sarl($tmp$$Register, 31);
 8715     __ movl($dst$$Register, $src$$Register);
 8716     __ xorl($dst$$Register, $tmp$$Register);
 8717     __ subl($dst$$Register, $tmp$$Register);
 8718   %}
 8719 
 8720   ins_pipe(ialu_reg_reg);
 8721 %}
 8722 
 8723 //----------Long Instructions------------------------------------------------
 8724 // Add Long Register with Register
 8725 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8726   match(Set dst (AddL dst src));
 8727   effect(KILL cr);
 8728   ins_cost(200);
 8729   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8730             "ADC    $dst.hi,$src.hi" %}
 8731   opcode(0x03, 0x13);
 8732   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8733   ins_pipe( ialu_reg_reg_long );
 8734 %}
 8735 
 8736 // Add Long Register with Immediate
 8737 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8738   match(Set dst (AddL dst src));
 8739   effect(KILL cr);
 8740   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8741             "ADC    $dst.hi,$src.hi" %}
 8742   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8743   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8744   ins_pipe( ialu_reg_long );
 8745 %}
 8746 
 8747 // Add Long Register with Memory
 8748 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8749   match(Set dst (AddL dst (LoadL mem)));
 8750   effect(KILL cr);
 8751   ins_cost(125);
 8752   format %{ "ADD    $dst.lo,$mem\n\t"
 8753             "ADC    $dst.hi,$mem+4" %}
 8754   opcode(0x03, 0x13);
 8755   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8756   ins_pipe( ialu_reg_long_mem );
 8757 %}
 8758 
 8759 // Subtract Long Register with Register.
 8760 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8761   match(Set dst (SubL dst src));
 8762   effect(KILL cr);
 8763   ins_cost(200);
 8764   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8765             "SBB    $dst.hi,$src.hi" %}
 8766   opcode(0x2B, 0x1B);
 8767   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8768   ins_pipe( ialu_reg_reg_long );
 8769 %}
 8770 
 8771 // Subtract Long Register with Immediate
 8772 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8773   match(Set dst (SubL dst src));
 8774   effect(KILL cr);
 8775   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8776             "SBB    $dst.hi,$src.hi" %}
 8777   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8778   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8779   ins_pipe( ialu_reg_long );
 8780 %}
 8781 
 8782 // Subtract Long Register with Memory
 8783 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8784   match(Set dst (SubL dst (LoadL mem)));
 8785   effect(KILL cr);
 8786   ins_cost(125);
 8787   format %{ "SUB    $dst.lo,$mem\n\t"
 8788             "SBB    $dst.hi,$mem+4" %}
 8789   opcode(0x2B, 0x1B);
 8790   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8791   ins_pipe( ialu_reg_long_mem );
 8792 %}
 8793 
 8794 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8795   match(Set dst (SubL zero dst));
 8796   effect(KILL cr);
 8797   ins_cost(300);
 8798   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8799   ins_encode( neg_long(dst) );
 8800   ins_pipe( ialu_reg_reg_long );
 8801 %}
 8802 
 8803 // And Long Register with Register
 8804 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8805   match(Set dst (AndL dst src));
 8806   effect(KILL cr);
 8807   format %{ "AND    $dst.lo,$src.lo\n\t"
 8808             "AND    $dst.hi,$src.hi" %}
 8809   opcode(0x23,0x23);
 8810   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8811   ins_pipe( ialu_reg_reg_long );
 8812 %}
 8813 
 8814 // And Long Register with Immediate
 8815 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8816   match(Set dst (AndL dst src));
 8817   effect(KILL cr);
 8818   format %{ "AND    $dst.lo,$src.lo\n\t"
 8819             "AND    $dst.hi,$src.hi" %}
 8820   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8821   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8822   ins_pipe( ialu_reg_long );
 8823 %}
 8824 
 8825 // And Long Register with Memory
 8826 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8827   match(Set dst (AndL dst (LoadL mem)));
 8828   effect(KILL cr);
 8829   ins_cost(125);
 8830   format %{ "AND    $dst.lo,$mem\n\t"
 8831             "AND    $dst.hi,$mem+4" %}
 8832   opcode(0x23, 0x23);
 8833   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8834   ins_pipe( ialu_reg_long_mem );
 8835 %}
 8836 
 8837 // BMI1 instructions
 8838 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8839   match(Set dst (AndL (XorL src1 minus_1) src2));
 8840   predicate(UseBMI1Instructions);
 8841   effect(KILL cr, TEMP dst);
 8842 
 8843   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8844             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8845          %}
 8846 
 8847   ins_encode %{
 8848     Register Rdst = $dst$$Register;
 8849     Register Rsrc1 = $src1$$Register;
 8850     Register Rsrc2 = $src2$$Register;
 8851     __ andnl(Rdst, Rsrc1, Rsrc2);
 8852     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8853   %}
 8854   ins_pipe(ialu_reg_reg_long);
 8855 %}
 8856 
 8857 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8858   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8859   predicate(UseBMI1Instructions);
 8860   effect(KILL cr, TEMP dst);
 8861 
 8862   ins_cost(125);
 8863   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 8864             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 8865          %}
 8866 
 8867   ins_encode %{
 8868     Register Rdst = $dst$$Register;
 8869     Register Rsrc1 = $src1$$Register;
 8870     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 8871 
 8872     __ andnl(Rdst, Rsrc1, $src2$$Address);
 8873     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 8874   %}
 8875   ins_pipe(ialu_reg_mem);
 8876 %}
 8877 
 8878 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 8879   match(Set dst (AndL (SubL imm_zero src) src));
 8880   predicate(UseBMI1Instructions);
 8881   effect(KILL cr, TEMP dst);
 8882 
 8883   format %{ "MOVL   $dst.hi, 0\n\t"
 8884             "BLSIL  $dst.lo, $src.lo\n\t"
 8885             "JNZ    done\n\t"
 8886             "BLSIL  $dst.hi, $src.hi\n"
 8887             "done:"
 8888          %}
 8889 
 8890   ins_encode %{
 8891     Label done;
 8892     Register Rdst = $dst$$Register;
 8893     Register Rsrc = $src$$Register;
 8894     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8895     __ blsil(Rdst, Rsrc);
 8896     __ jccb(Assembler::notZero, done);
 8897     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8898     __ bind(done);
 8899   %}
 8900   ins_pipe(ialu_reg);
 8901 %}
 8902 
 8903 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 8904   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 8905   predicate(UseBMI1Instructions);
 8906   effect(KILL cr, TEMP dst);
 8907 
 8908   ins_cost(125);
 8909   format %{ "MOVL   $dst.hi, 0\n\t"
 8910             "BLSIL  $dst.lo, $src\n\t"
 8911             "JNZ    done\n\t"
 8912             "BLSIL  $dst.hi, $src+4\n"
 8913             "done:"
 8914          %}
 8915 
 8916   ins_encode %{
 8917     Label done;
 8918     Register Rdst = $dst$$Register;
 8919     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8920 
 8921     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8922     __ blsil(Rdst, $src$$Address);
 8923     __ jccb(Assembler::notZero, done);
 8924     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 8925     __ bind(done);
 8926   %}
 8927   ins_pipe(ialu_reg_mem);
 8928 %}
 8929 
 8930 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8931 %{
 8932   match(Set dst (XorL (AddL src minus_1) src));
 8933   predicate(UseBMI1Instructions);
 8934   effect(KILL cr, TEMP dst);
 8935 
 8936   format %{ "MOVL    $dst.hi, 0\n\t"
 8937             "BLSMSKL $dst.lo, $src.lo\n\t"
 8938             "JNC     done\n\t"
 8939             "BLSMSKL $dst.hi, $src.hi\n"
 8940             "done:"
 8941          %}
 8942 
 8943   ins_encode %{
 8944     Label done;
 8945     Register Rdst = $dst$$Register;
 8946     Register Rsrc = $src$$Register;
 8947     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8948     __ blsmskl(Rdst, Rsrc);
 8949     __ jccb(Assembler::carryClear, done);
 8950     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8951     __ bind(done);
 8952   %}
 8953 
 8954   ins_pipe(ialu_reg);
 8955 %}
 8956 
 8957 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 8958 %{
 8959   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 8960   predicate(UseBMI1Instructions);
 8961   effect(KILL cr, TEMP dst);
 8962 
 8963   ins_cost(125);
 8964   format %{ "MOVL    $dst.hi, 0\n\t"
 8965             "BLSMSKL $dst.lo, $src\n\t"
 8966             "JNC     done\n\t"
 8967             "BLSMSKL $dst.hi, $src+4\n"
 8968             "done:"
 8969          %}
 8970 
 8971   ins_encode %{
 8972     Label done;
 8973     Register Rdst = $dst$$Register;
 8974     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8975 
 8976     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8977     __ blsmskl(Rdst, $src$$Address);
 8978     __ jccb(Assembler::carryClear, done);
 8979     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 8980     __ bind(done);
 8981   %}
 8982 
 8983   ins_pipe(ialu_reg_mem);
 8984 %}
 8985 
 8986 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8987 %{
 8988   match(Set dst (AndL (AddL src minus_1) src) );
 8989   predicate(UseBMI1Instructions);
 8990   effect(KILL cr, TEMP dst);
 8991 
 8992   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 8993             "BLSRL  $dst.lo, $src.lo\n\t"
 8994             "JNC    done\n\t"
 8995             "BLSRL  $dst.hi, $src.hi\n"
 8996             "done:"
 8997   %}
 8998 
 8999   ins_encode %{
 9000     Label done;
 9001     Register Rdst = $dst$$Register;
 9002     Register Rsrc = $src$$Register;
 9003     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9004     __ blsrl(Rdst, Rsrc);
 9005     __ jccb(Assembler::carryClear, done);
 9006     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9007     __ bind(done);
 9008   %}
 9009 
 9010   ins_pipe(ialu_reg);
 9011 %}
 9012 
 9013 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9014 %{
 9015   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9016   predicate(UseBMI1Instructions);
 9017   effect(KILL cr, TEMP dst);
 9018 
 9019   ins_cost(125);
 9020   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9021             "BLSRL  $dst.lo, $src\n\t"
 9022             "JNC    done\n\t"
 9023             "BLSRL  $dst.hi, $src+4\n"
 9024             "done:"
 9025   %}
 9026 
 9027   ins_encode %{
 9028     Label done;
 9029     Register Rdst = $dst$$Register;
 9030     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9031     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9032     __ blsrl(Rdst, $src$$Address);
 9033     __ jccb(Assembler::carryClear, done);
 9034     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9035     __ bind(done);
 9036   %}
 9037 
 9038   ins_pipe(ialu_reg_mem);
 9039 %}
 9040 
 9041 // Or Long Register with Register
 9042 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9043   match(Set dst (OrL dst src));
 9044   effect(KILL cr);
 9045   format %{ "OR     $dst.lo,$src.lo\n\t"
 9046             "OR     $dst.hi,$src.hi" %}
 9047   opcode(0x0B,0x0B);
 9048   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9049   ins_pipe( ialu_reg_reg_long );
 9050 %}
 9051 
 9052 // Or Long Register with Immediate
 9053 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9054   match(Set dst (OrL dst src));
 9055   effect(KILL cr);
 9056   format %{ "OR     $dst.lo,$src.lo\n\t"
 9057             "OR     $dst.hi,$src.hi" %}
 9058   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9059   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9060   ins_pipe( ialu_reg_long );
 9061 %}
 9062 
 9063 // Or Long Register with Memory
 9064 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9065   match(Set dst (OrL dst (LoadL mem)));
 9066   effect(KILL cr);
 9067   ins_cost(125);
 9068   format %{ "OR     $dst.lo,$mem\n\t"
 9069             "OR     $dst.hi,$mem+4" %}
 9070   opcode(0x0B,0x0B);
 9071   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9072   ins_pipe( ialu_reg_long_mem );
 9073 %}
 9074 
 9075 // Xor Long Register with Register
 9076 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9077   match(Set dst (XorL dst src));
 9078   effect(KILL cr);
 9079   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9080             "XOR    $dst.hi,$src.hi" %}
 9081   opcode(0x33,0x33);
 9082   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9083   ins_pipe( ialu_reg_reg_long );
 9084 %}
 9085 
 9086 // Xor Long Register with Immediate -1
 9087 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9088   match(Set dst (XorL dst imm));
 9089   format %{ "NOT    $dst.lo\n\t"
 9090             "NOT    $dst.hi" %}
 9091   ins_encode %{
 9092      __ notl($dst$$Register);
 9093      __ notl(HIGH_FROM_LOW($dst$$Register));
 9094   %}
 9095   ins_pipe( ialu_reg_long );
 9096 %}
 9097 
 9098 // Xor Long Register with Immediate
 9099 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9100   match(Set dst (XorL dst src));
 9101   effect(KILL cr);
 9102   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9103             "XOR    $dst.hi,$src.hi" %}
 9104   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9105   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9106   ins_pipe( ialu_reg_long );
 9107 %}
 9108 
 9109 // Xor Long Register with Memory
 9110 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9111   match(Set dst (XorL dst (LoadL mem)));
 9112   effect(KILL cr);
 9113   ins_cost(125);
 9114   format %{ "XOR    $dst.lo,$mem\n\t"
 9115             "XOR    $dst.hi,$mem+4" %}
 9116   opcode(0x33,0x33);
 9117   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9118   ins_pipe( ialu_reg_long_mem );
 9119 %}
 9120 
 9121 // Shift Left Long by 1
 9122 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9123   predicate(UseNewLongLShift);
 9124   match(Set dst (LShiftL dst cnt));
 9125   effect(KILL cr);
 9126   ins_cost(100);
 9127   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9128             "ADC    $dst.hi,$dst.hi" %}
 9129   ins_encode %{
 9130     __ addl($dst$$Register,$dst$$Register);
 9131     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9132   %}
 9133   ins_pipe( ialu_reg_long );
 9134 %}
 9135 
 9136 // Shift Left Long by 2
 9137 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9138   predicate(UseNewLongLShift);
 9139   match(Set dst (LShiftL dst cnt));
 9140   effect(KILL cr);
 9141   ins_cost(100);
 9142   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9143             "ADC    $dst.hi,$dst.hi\n\t"
 9144             "ADD    $dst.lo,$dst.lo\n\t"
 9145             "ADC    $dst.hi,$dst.hi" %}
 9146   ins_encode %{
 9147     __ addl($dst$$Register,$dst$$Register);
 9148     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9149     __ addl($dst$$Register,$dst$$Register);
 9150     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9151   %}
 9152   ins_pipe( ialu_reg_long );
 9153 %}
 9154 
 9155 // Shift Left Long by 3
 9156 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9157   predicate(UseNewLongLShift);
 9158   match(Set dst (LShiftL dst cnt));
 9159   effect(KILL cr);
 9160   ins_cost(100);
 9161   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9162             "ADC    $dst.hi,$dst.hi\n\t"
 9163             "ADD    $dst.lo,$dst.lo\n\t"
 9164             "ADC    $dst.hi,$dst.hi\n\t"
 9165             "ADD    $dst.lo,$dst.lo\n\t"
 9166             "ADC    $dst.hi,$dst.hi" %}
 9167   ins_encode %{
 9168     __ addl($dst$$Register,$dst$$Register);
 9169     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9170     __ addl($dst$$Register,$dst$$Register);
 9171     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9172     __ addl($dst$$Register,$dst$$Register);
 9173     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9174   %}
 9175   ins_pipe( ialu_reg_long );
 9176 %}
 9177 
 9178 // Shift Left Long by 1-31
 9179 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9180   match(Set dst (LShiftL dst cnt));
 9181   effect(KILL cr);
 9182   ins_cost(200);
 9183   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9184             "SHL    $dst.lo,$cnt" %}
 9185   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9186   ins_encode( move_long_small_shift(dst,cnt) );
 9187   ins_pipe( ialu_reg_long );
 9188 %}
 9189 
 9190 // Shift Left Long by 32-63
 9191 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9192   match(Set dst (LShiftL dst cnt));
 9193   effect(KILL cr);
 9194   ins_cost(300);
 9195   format %{ "MOV    $dst.hi,$dst.lo\n"
 9196           "\tSHL    $dst.hi,$cnt-32\n"
 9197           "\tXOR    $dst.lo,$dst.lo" %}
 9198   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9199   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9200   ins_pipe( ialu_reg_long );
 9201 %}
 9202 
 9203 // Shift Left Long by variable
 9204 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9205   match(Set dst (LShiftL dst shift));
 9206   effect(KILL cr);
 9207   ins_cost(500+200);
 9208   size(17);
 9209   format %{ "TEST   $shift,32\n\t"
 9210             "JEQ,s  small\n\t"
 9211             "MOV    $dst.hi,$dst.lo\n\t"
 9212             "XOR    $dst.lo,$dst.lo\n"
 9213     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9214             "SHL    $dst.lo,$shift" %}
 9215   ins_encode( shift_left_long( dst, shift ) );
 9216   ins_pipe( pipe_slow );
 9217 %}
 9218 
 9219 // Shift Right Long by 1-31
 9220 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9221   match(Set dst (URShiftL dst cnt));
 9222   effect(KILL cr);
 9223   ins_cost(200);
 9224   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9225             "SHR    $dst.hi,$cnt" %}
 9226   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9227   ins_encode( move_long_small_shift(dst,cnt) );
 9228   ins_pipe( ialu_reg_long );
 9229 %}
 9230 
 9231 // Shift Right Long by 32-63
 9232 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9233   match(Set dst (URShiftL dst cnt));
 9234   effect(KILL cr);
 9235   ins_cost(300);
 9236   format %{ "MOV    $dst.lo,$dst.hi\n"
 9237           "\tSHR    $dst.lo,$cnt-32\n"
 9238           "\tXOR    $dst.hi,$dst.hi" %}
 9239   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9240   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9241   ins_pipe( ialu_reg_long );
 9242 %}
 9243 
 9244 // Shift Right Long by variable
 9245 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9246   match(Set dst (URShiftL dst shift));
 9247   effect(KILL cr);
 9248   ins_cost(600);
 9249   size(17);
 9250   format %{ "TEST   $shift,32\n\t"
 9251             "JEQ,s  small\n\t"
 9252             "MOV    $dst.lo,$dst.hi\n\t"
 9253             "XOR    $dst.hi,$dst.hi\n"
 9254     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9255             "SHR    $dst.hi,$shift" %}
 9256   ins_encode( shift_right_long( dst, shift ) );
 9257   ins_pipe( pipe_slow );
 9258 %}
 9259 
 9260 // Shift Right Long by 1-31
 9261 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9262   match(Set dst (RShiftL dst cnt));
 9263   effect(KILL cr);
 9264   ins_cost(200);
 9265   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9266             "SAR    $dst.hi,$cnt" %}
 9267   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9268   ins_encode( move_long_small_shift(dst,cnt) );
 9269   ins_pipe( ialu_reg_long );
 9270 %}
 9271 
 9272 // Shift Right Long by 32-63
 9273 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9274   match(Set dst (RShiftL dst cnt));
 9275   effect(KILL cr);
 9276   ins_cost(300);
 9277   format %{ "MOV    $dst.lo,$dst.hi\n"
 9278           "\tSAR    $dst.lo,$cnt-32\n"
 9279           "\tSAR    $dst.hi,31" %}
 9280   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9281   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9282   ins_pipe( ialu_reg_long );
 9283 %}
 9284 
 9285 // Shift Right arithmetic Long by variable
 9286 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9287   match(Set dst (RShiftL dst shift));
 9288   effect(KILL cr);
 9289   ins_cost(600);
 9290   size(18);
 9291   format %{ "TEST   $shift,32\n\t"
 9292             "JEQ,s  small\n\t"
 9293             "MOV    $dst.lo,$dst.hi\n\t"
 9294             "SAR    $dst.hi,31\n"
 9295     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9296             "SAR    $dst.hi,$shift" %}
 9297   ins_encode( shift_right_arith_long( dst, shift ) );
 9298   ins_pipe( pipe_slow );
 9299 %}
 9300 
 9301 
 9302 //----------Double Instructions------------------------------------------------
 9303 // Double Math
 9304 
 9305 // Compare & branch
 9306 
 9307 // P6 version of float compare, sets condition codes in EFLAGS
 9308 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9309   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9310   match(Set cr (CmpD src1 src2));
 9311   effect(KILL rax);
 9312   ins_cost(150);
 9313   format %{ "FLD    $src1\n\t"
 9314             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9315             "JNP    exit\n\t"
 9316             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9317             "SAHF\n"
 9318      "exit:\tNOP               // avoid branch to branch" %}
 9319   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9320   ins_encode( Push_Reg_DPR(src1),
 9321               OpcP, RegOpc(src2),
 9322               cmpF_P6_fixup );
 9323   ins_pipe( pipe_slow );
 9324 %}
 9325 
 9326 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9327   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9328   match(Set cr (CmpD src1 src2));
 9329   ins_cost(150);
 9330   format %{ "FLD    $src1\n\t"
 9331             "FUCOMIP ST,$src2  // P6 instruction" %}
 9332   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9333   ins_encode( Push_Reg_DPR(src1),
 9334               OpcP, RegOpc(src2));
 9335   ins_pipe( pipe_slow );
 9336 %}
 9337 
 9338 // Compare & branch
 9339 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9340   predicate(UseSSE<=1);
 9341   match(Set cr (CmpD src1 src2));
 9342   effect(KILL rax);
 9343   ins_cost(200);
 9344   format %{ "FLD    $src1\n\t"
 9345             "FCOMp  $src2\n\t"
 9346             "FNSTSW AX\n\t"
 9347             "TEST   AX,0x400\n\t"
 9348             "JZ,s   flags\n\t"
 9349             "MOV    AH,1\t# unordered treat as LT\n"
 9350     "flags:\tSAHF" %}
 9351   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9352   ins_encode( Push_Reg_DPR(src1),
 9353               OpcP, RegOpc(src2),
 9354               fpu_flags);
 9355   ins_pipe( pipe_slow );
 9356 %}
 9357 
 9358 // Compare vs zero into -1,0,1
 9359 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9360   predicate(UseSSE<=1);
 9361   match(Set dst (CmpD3 src1 zero));
 9362   effect(KILL cr, KILL rax);
 9363   ins_cost(280);
 9364   format %{ "FTSTD  $dst,$src1" %}
 9365   opcode(0xE4, 0xD9);
 9366   ins_encode( Push_Reg_DPR(src1),
 9367               OpcS, OpcP, PopFPU,
 9368               CmpF_Result(dst));
 9369   ins_pipe( pipe_slow );
 9370 %}
 9371 
 9372 // Compare into -1,0,1
 9373 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9374   predicate(UseSSE<=1);
 9375   match(Set dst (CmpD3 src1 src2));
 9376   effect(KILL cr, KILL rax);
 9377   ins_cost(300);
 9378   format %{ "FCMPD  $dst,$src1,$src2" %}
 9379   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9380   ins_encode( Push_Reg_DPR(src1),
 9381               OpcP, RegOpc(src2),
 9382               CmpF_Result(dst));
 9383   ins_pipe( pipe_slow );
 9384 %}
 9385 
 9386 // float compare and set condition codes in EFLAGS by XMM regs
 9387 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9388   predicate(UseSSE>=2);
 9389   match(Set cr (CmpD src1 src2));
 9390   ins_cost(145);
 9391   format %{ "UCOMISD $src1,$src2\n\t"
 9392             "JNP,s   exit\n\t"
 9393             "PUSHF\t# saw NaN, set CF\n\t"
 9394             "AND     [rsp], #0xffffff2b\n\t"
 9395             "POPF\n"
 9396     "exit:" %}
 9397   ins_encode %{
 9398     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9399     emit_cmpfp_fixup(masm);
 9400   %}
 9401   ins_pipe( pipe_slow );
 9402 %}
 9403 
 9404 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9405   predicate(UseSSE>=2);
 9406   match(Set cr (CmpD src1 src2));
 9407   ins_cost(100);
 9408   format %{ "UCOMISD $src1,$src2" %}
 9409   ins_encode %{
 9410     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9411   %}
 9412   ins_pipe( pipe_slow );
 9413 %}
 9414 
 9415 // float compare and set condition codes in EFLAGS by XMM regs
 9416 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9417   predicate(UseSSE>=2);
 9418   match(Set cr (CmpD src1 (LoadD src2)));
 9419   ins_cost(145);
 9420   format %{ "UCOMISD $src1,$src2\n\t"
 9421             "JNP,s   exit\n\t"
 9422             "PUSHF\t# saw NaN, set CF\n\t"
 9423             "AND     [rsp], #0xffffff2b\n\t"
 9424             "POPF\n"
 9425     "exit:" %}
 9426   ins_encode %{
 9427     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9428     emit_cmpfp_fixup(masm);
 9429   %}
 9430   ins_pipe( pipe_slow );
 9431 %}
 9432 
 9433 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9434   predicate(UseSSE>=2);
 9435   match(Set cr (CmpD src1 (LoadD src2)));
 9436   ins_cost(100);
 9437   format %{ "UCOMISD $src1,$src2" %}
 9438   ins_encode %{
 9439     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9440   %}
 9441   ins_pipe( pipe_slow );
 9442 %}
 9443 
 9444 // Compare into -1,0,1 in XMM
 9445 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9446   predicate(UseSSE>=2);
 9447   match(Set dst (CmpD3 src1 src2));
 9448   effect(KILL cr);
 9449   ins_cost(255);
 9450   format %{ "UCOMISD $src1, $src2\n\t"
 9451             "MOV     $dst, #-1\n\t"
 9452             "JP,s    done\n\t"
 9453             "JB,s    done\n\t"
 9454             "SETNE   $dst\n\t"
 9455             "MOVZB   $dst, $dst\n"
 9456     "done:" %}
 9457   ins_encode %{
 9458     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9459     emit_cmpfp3(masm, $dst$$Register);
 9460   %}
 9461   ins_pipe( pipe_slow );
 9462 %}
 9463 
 9464 // Compare into -1,0,1 in XMM and memory
 9465 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9466   predicate(UseSSE>=2);
 9467   match(Set dst (CmpD3 src1 (LoadD src2)));
 9468   effect(KILL cr);
 9469   ins_cost(275);
 9470   format %{ "UCOMISD $src1, $src2\n\t"
 9471             "MOV     $dst, #-1\n\t"
 9472             "JP,s    done\n\t"
 9473             "JB,s    done\n\t"
 9474             "SETNE   $dst\n\t"
 9475             "MOVZB   $dst, $dst\n"
 9476     "done:" %}
 9477   ins_encode %{
 9478     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9479     emit_cmpfp3(masm, $dst$$Register);
 9480   %}
 9481   ins_pipe( pipe_slow );
 9482 %}
 9483 
 9484 
 9485 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9486   predicate (UseSSE <=1);
 9487   match(Set dst (SubD dst src));
 9488 
 9489   format %{ "FLD    $src\n\t"
 9490             "DSUBp  $dst,ST" %}
 9491   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9492   ins_cost(150);
 9493   ins_encode( Push_Reg_DPR(src),
 9494               OpcP, RegOpc(dst) );
 9495   ins_pipe( fpu_reg_reg );
 9496 %}
 9497 
 9498 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9499   predicate (UseSSE <=1);
 9500   match(Set dst (RoundDouble (SubD src1 src2)));
 9501   ins_cost(250);
 9502 
 9503   format %{ "FLD    $src2\n\t"
 9504             "DSUB   ST,$src1\n\t"
 9505             "FSTP_D $dst\t# D-round" %}
 9506   opcode(0xD8, 0x5);
 9507   ins_encode( Push_Reg_DPR(src2),
 9508               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9509   ins_pipe( fpu_mem_reg_reg );
 9510 %}
 9511 
 9512 
 9513 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9514   predicate (UseSSE <=1);
 9515   match(Set dst (SubD dst (LoadD src)));
 9516   ins_cost(150);
 9517 
 9518   format %{ "FLD    $src\n\t"
 9519             "DSUBp  $dst,ST" %}
 9520   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9521   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9522               OpcP, RegOpc(dst), ClearInstMark );
 9523   ins_pipe( fpu_reg_mem );
 9524 %}
 9525 
 9526 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9527   predicate (UseSSE<=1);
 9528   match(Set dst (AbsD src));
 9529   ins_cost(100);
 9530   format %{ "FABS" %}
 9531   opcode(0xE1, 0xD9);
 9532   ins_encode( OpcS, OpcP );
 9533   ins_pipe( fpu_reg_reg );
 9534 %}
 9535 
 9536 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9537   predicate(UseSSE<=1);
 9538   match(Set dst (NegD src));
 9539   ins_cost(100);
 9540   format %{ "FCHS" %}
 9541   opcode(0xE0, 0xD9);
 9542   ins_encode( OpcS, OpcP );
 9543   ins_pipe( fpu_reg_reg );
 9544 %}
 9545 
 9546 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9547   predicate(UseSSE<=1);
 9548   match(Set dst (AddD dst src));
 9549   format %{ "FLD    $src\n\t"
 9550             "DADD   $dst,ST" %}
 9551   size(4);
 9552   ins_cost(150);
 9553   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9554   ins_encode( Push_Reg_DPR(src),
 9555               OpcP, RegOpc(dst) );
 9556   ins_pipe( fpu_reg_reg );
 9557 %}
 9558 
 9559 
 9560 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9561   predicate(UseSSE<=1);
 9562   match(Set dst (RoundDouble (AddD src1 src2)));
 9563   ins_cost(250);
 9564 
 9565   format %{ "FLD    $src2\n\t"
 9566             "DADD   ST,$src1\n\t"
 9567             "FSTP_D $dst\t# D-round" %}
 9568   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9569   ins_encode( Push_Reg_DPR(src2),
 9570               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9571   ins_pipe( fpu_mem_reg_reg );
 9572 %}
 9573 
 9574 
 9575 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9576   predicate(UseSSE<=1);
 9577   match(Set dst (AddD dst (LoadD src)));
 9578   ins_cost(150);
 9579 
 9580   format %{ "FLD    $src\n\t"
 9581             "DADDp  $dst,ST" %}
 9582   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9583   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9584               OpcP, RegOpc(dst), ClearInstMark );
 9585   ins_pipe( fpu_reg_mem );
 9586 %}
 9587 
 9588 // add-to-memory
 9589 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9590   predicate(UseSSE<=1);
 9591   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9592   ins_cost(150);
 9593 
 9594   format %{ "FLD_D  $dst\n\t"
 9595             "DADD   ST,$src\n\t"
 9596             "FST_D  $dst" %}
 9597   opcode(0xDD, 0x0);
 9598   ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
 9599               Opcode(0xD8), RegOpc(src), ClearInstMark,
 9600               SetInstMark,
 9601               Opcode(0xDD), RMopc_Mem(0x03,dst),
 9602               ClearInstMark);
 9603   ins_pipe( fpu_reg_mem );
 9604 %}
 9605 
 9606 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9607   predicate(UseSSE<=1);
 9608   match(Set dst (AddD dst con));
 9609   ins_cost(125);
 9610   format %{ "FLD1\n\t"
 9611             "DADDp  $dst,ST" %}
 9612   ins_encode %{
 9613     __ fld1();
 9614     __ faddp($dst$$reg);
 9615   %}
 9616   ins_pipe(fpu_reg);
 9617 %}
 9618 
 9619 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9620   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9621   match(Set dst (AddD dst con));
 9622   ins_cost(200);
 9623   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9624             "DADDp  $dst,ST" %}
 9625   ins_encode %{
 9626     __ fld_d($constantaddress($con));
 9627     __ faddp($dst$$reg);
 9628   %}
 9629   ins_pipe(fpu_reg_mem);
 9630 %}
 9631 
 9632 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9633   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9634   match(Set dst (RoundDouble (AddD src con)));
 9635   ins_cost(200);
 9636   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9637             "DADD   ST,$src\n\t"
 9638             "FSTP_D $dst\t# D-round" %}
 9639   ins_encode %{
 9640     __ fld_d($constantaddress($con));
 9641     __ fadd($src$$reg);
 9642     __ fstp_d(Address(rsp, $dst$$disp));
 9643   %}
 9644   ins_pipe(fpu_mem_reg_con);
 9645 %}
 9646 
 9647 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9648   predicate(UseSSE<=1);
 9649   match(Set dst (MulD dst src));
 9650   format %{ "FLD    $src\n\t"
 9651             "DMULp  $dst,ST" %}
 9652   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9653   ins_cost(150);
 9654   ins_encode( Push_Reg_DPR(src),
 9655               OpcP, RegOpc(dst) );
 9656   ins_pipe( fpu_reg_reg );
 9657 %}
 9658 
 9659 // Strict FP instruction biases argument before multiply then
 9660 // biases result to avoid double rounding of subnormals.
 9661 //
 9662 // scale arg1 by multiplying arg1 by 2^(-15360)
 9663 // load arg2
 9664 // multiply scaled arg1 by arg2
 9665 // rescale product by 2^(15360)
 9666 //
 9667 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9668   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9669   match(Set dst (MulD dst src));
 9670   ins_cost(1);   // Select this instruction for all FP double multiplies
 9671 
 9672   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9673             "DMULp  $dst,ST\n\t"
 9674             "FLD    $src\n\t"
 9675             "DMULp  $dst,ST\n\t"
 9676             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9677             "DMULp  $dst,ST\n\t" %}
 9678   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9679   ins_encode( strictfp_bias1(dst),
 9680               Push_Reg_DPR(src),
 9681               OpcP, RegOpc(dst),
 9682               strictfp_bias2(dst) );
 9683   ins_pipe( fpu_reg_reg );
 9684 %}
 9685 
 9686 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9687   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9688   match(Set dst (MulD dst con));
 9689   ins_cost(200);
 9690   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9691             "DMULp  $dst,ST" %}
 9692   ins_encode %{
 9693     __ fld_d($constantaddress($con));
 9694     __ fmulp($dst$$reg);
 9695   %}
 9696   ins_pipe(fpu_reg_mem);
 9697 %}
 9698 
 9699 
 9700 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9701   predicate( UseSSE<=1 );
 9702   match(Set dst (MulD dst (LoadD src)));
 9703   ins_cost(200);
 9704   format %{ "FLD_D  $src\n\t"
 9705             "DMULp  $dst,ST" %}
 9706   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9707   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9708               OpcP, RegOpc(dst), ClearInstMark );
 9709   ins_pipe( fpu_reg_mem );
 9710 %}
 9711 
 9712 //
 9713 // Cisc-alternate to reg-reg multiply
 9714 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9715   predicate( UseSSE<=1 );
 9716   match(Set dst (MulD src (LoadD mem)));
 9717   ins_cost(250);
 9718   format %{ "FLD_D  $mem\n\t"
 9719             "DMUL   ST,$src\n\t"
 9720             "FSTP_D $dst" %}
 9721   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9722   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
 9723               OpcReg_FPR(src),
 9724               Pop_Reg_DPR(dst), ClearInstMark );
 9725   ins_pipe( fpu_reg_reg_mem );
 9726 %}
 9727 
 9728 
 9729 // MACRO3 -- addDPR a mulDPR
 9730 // This instruction is a '2-address' instruction in that the result goes
 9731 // back to src2.  This eliminates a move from the macro; possibly the
 9732 // register allocator will have to add it back (and maybe not).
 9733 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9734   predicate( UseSSE<=1 );
 9735   match(Set src2 (AddD (MulD src0 src1) src2));
 9736   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9737             "DMUL   ST,$src1\n\t"
 9738             "DADDp  $src2,ST" %}
 9739   ins_cost(250);
 9740   opcode(0xDD); /* LoadD DD /0 */
 9741   ins_encode( Push_Reg_FPR(src0),
 9742               FMul_ST_reg(src1),
 9743               FAddP_reg_ST(src2) );
 9744   ins_pipe( fpu_reg_reg_reg );
 9745 %}
 9746 
 9747 
 9748 // MACRO3 -- subDPR a mulDPR
 9749 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9750   predicate( UseSSE<=1 );
 9751   match(Set src2 (SubD (MulD src0 src1) src2));
 9752   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9753             "DMUL   ST,$src1\n\t"
 9754             "DSUBRp $src2,ST" %}
 9755   ins_cost(250);
 9756   ins_encode( Push_Reg_FPR(src0),
 9757               FMul_ST_reg(src1),
 9758               Opcode(0xDE), Opc_plus(0xE0,src2));
 9759   ins_pipe( fpu_reg_reg_reg );
 9760 %}
 9761 
 9762 
 9763 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9764   predicate( UseSSE<=1 );
 9765   match(Set dst (DivD dst src));
 9766 
 9767   format %{ "FLD    $src\n\t"
 9768             "FDIVp  $dst,ST" %}
 9769   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9770   ins_cost(150);
 9771   ins_encode( Push_Reg_DPR(src),
 9772               OpcP, RegOpc(dst) );
 9773   ins_pipe( fpu_reg_reg );
 9774 %}
 9775 
 9776 // Strict FP instruction biases argument before division then
 9777 // biases result, to avoid double rounding of subnormals.
 9778 //
 9779 // scale dividend by multiplying dividend by 2^(-15360)
 9780 // load divisor
 9781 // divide scaled dividend by divisor
 9782 // rescale quotient by 2^(15360)
 9783 //
 9784 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9785   predicate (UseSSE<=1);
 9786   match(Set dst (DivD dst src));
 9787   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9788   ins_cost(01);
 9789 
 9790   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9791             "DMULp  $dst,ST\n\t"
 9792             "FLD    $src\n\t"
 9793             "FDIVp  $dst,ST\n\t"
 9794             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9795             "DMULp  $dst,ST\n\t" %}
 9796   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9797   ins_encode( strictfp_bias1(dst),
 9798               Push_Reg_DPR(src),
 9799               OpcP, RegOpc(dst),
 9800               strictfp_bias2(dst) );
 9801   ins_pipe( fpu_reg_reg );
 9802 %}
 9803 
 9804 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9805   predicate(UseSSE<=1);
 9806   match(Set dst (ModD dst src));
 9807   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9808 
 9809   format %{ "DMOD   $dst,$src" %}
 9810   ins_cost(250);
 9811   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9812               emitModDPR(),
 9813               Push_Result_Mod_DPR(src),
 9814               Pop_Reg_DPR(dst));
 9815   ins_pipe( pipe_slow );
 9816 %}
 9817 
 9818 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9819   predicate(UseSSE>=2);
 9820   match(Set dst (ModD src0 src1));
 9821   effect(KILL rax, KILL cr);
 9822 
 9823   format %{ "SUB    ESP,8\t # DMOD\n"
 9824           "\tMOVSD  [ESP+0],$src1\n"
 9825           "\tFLD_D  [ESP+0]\n"
 9826           "\tMOVSD  [ESP+0],$src0\n"
 9827           "\tFLD_D  [ESP+0]\n"
 9828      "loop:\tFPREM\n"
 9829           "\tFWAIT\n"
 9830           "\tFNSTSW AX\n"
 9831           "\tSAHF\n"
 9832           "\tJP     loop\n"
 9833           "\tFSTP_D [ESP+0]\n"
 9834           "\tMOVSD  $dst,[ESP+0]\n"
 9835           "\tADD    ESP,8\n"
 9836           "\tFSTP   ST0\t # Restore FPU Stack"
 9837     %}
 9838   ins_cost(250);
 9839   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9840   ins_pipe( pipe_slow );
 9841 %}
 9842 
 9843 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9844   predicate (UseSSE<=1);
 9845   match(Set dst(AtanD dst src));
 9846   format %{ "DATA   $dst,$src" %}
 9847   opcode(0xD9, 0xF3);
 9848   ins_encode( Push_Reg_DPR(src),
 9849               OpcP, OpcS, RegOpc(dst) );
 9850   ins_pipe( pipe_slow );
 9851 %}
 9852 
 9853 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9854   predicate (UseSSE>=2);
 9855   match(Set dst(AtanD dst src));
 9856   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9857   format %{ "DATA   $dst,$src" %}
 9858   opcode(0xD9, 0xF3);
 9859   ins_encode( Push_SrcD(src),
 9860               OpcP, OpcS, Push_ResultD(dst) );
 9861   ins_pipe( pipe_slow );
 9862 %}
 9863 
 9864 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
 9865   predicate (UseSSE<=1);
 9866   match(Set dst (SqrtD src));
 9867   format %{ "DSQRT  $dst,$src" %}
 9868   opcode(0xFA, 0xD9);
 9869   ins_encode( Push_Reg_DPR(src),
 9870               OpcS, OpcP, Pop_Reg_DPR(dst) );
 9871   ins_pipe( pipe_slow );
 9872 %}
 9873 
 9874 //-------------Float Instructions-------------------------------
 9875 // Float Math
 9876 
 9877 // Code for float compare:
 9878 //     fcompp();
 9879 //     fwait(); fnstsw_ax();
 9880 //     sahf();
 9881 //     movl(dst, unordered_result);
 9882 //     jcc(Assembler::parity, exit);
 9883 //     movl(dst, less_result);
 9884 //     jcc(Assembler::below, exit);
 9885 //     movl(dst, equal_result);
 9886 //     jcc(Assembler::equal, exit);
 9887 //     movl(dst, greater_result);
 9888 //   exit:
 9889 
 9890 // P6 version of float compare, sets condition codes in EFLAGS
 9891 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9892   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9893   match(Set cr (CmpF src1 src2));
 9894   effect(KILL rax);
 9895   ins_cost(150);
 9896   format %{ "FLD    $src1\n\t"
 9897             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9898             "JNP    exit\n\t"
 9899             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
 9900             "SAHF\n"
 9901      "exit:\tNOP               // avoid branch to branch" %}
 9902   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9903   ins_encode( Push_Reg_DPR(src1),
 9904               OpcP, RegOpc(src2),
 9905               cmpF_P6_fixup );
 9906   ins_pipe( pipe_slow );
 9907 %}
 9908 
 9909 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
 9910   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9911   match(Set cr (CmpF src1 src2));
 9912   ins_cost(100);
 9913   format %{ "FLD    $src1\n\t"
 9914             "FUCOMIP ST,$src2  // P6 instruction" %}
 9915   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9916   ins_encode( Push_Reg_DPR(src1),
 9917               OpcP, RegOpc(src2));
 9918   ins_pipe( pipe_slow );
 9919 %}
 9920 
 9921 
 9922 // Compare & branch
 9923 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9924   predicate(UseSSE == 0);
 9925   match(Set cr (CmpF src1 src2));
 9926   effect(KILL rax);
 9927   ins_cost(200);
 9928   format %{ "FLD    $src1\n\t"
 9929             "FCOMp  $src2\n\t"
 9930             "FNSTSW AX\n\t"
 9931             "TEST   AX,0x400\n\t"
 9932             "JZ,s   flags\n\t"
 9933             "MOV    AH,1\t# unordered treat as LT\n"
 9934     "flags:\tSAHF" %}
 9935   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9936   ins_encode( Push_Reg_DPR(src1),
 9937               OpcP, RegOpc(src2),
 9938               fpu_flags);
 9939   ins_pipe( pipe_slow );
 9940 %}
 9941 
 9942 // Compare vs zero into -1,0,1
 9943 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9944   predicate(UseSSE == 0);
 9945   match(Set dst (CmpF3 src1 zero));
 9946   effect(KILL cr, KILL rax);
 9947   ins_cost(280);
 9948   format %{ "FTSTF  $dst,$src1" %}
 9949   opcode(0xE4, 0xD9);
 9950   ins_encode( Push_Reg_DPR(src1),
 9951               OpcS, OpcP, PopFPU,
 9952               CmpF_Result(dst));
 9953   ins_pipe( pipe_slow );
 9954 %}
 9955 
 9956 // Compare into -1,0,1
 9957 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
 9958   predicate(UseSSE == 0);
 9959   match(Set dst (CmpF3 src1 src2));
 9960   effect(KILL cr, KILL rax);
 9961   ins_cost(300);
 9962   format %{ "FCMPF  $dst,$src1,$src2" %}
 9963   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9964   ins_encode( Push_Reg_DPR(src1),
 9965               OpcP, RegOpc(src2),
 9966               CmpF_Result(dst));
 9967   ins_pipe( pipe_slow );
 9968 %}
 9969 
 9970 // float compare and set condition codes in EFLAGS by XMM regs
 9971 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
 9972   predicate(UseSSE>=1);
 9973   match(Set cr (CmpF src1 src2));
 9974   ins_cost(145);
 9975   format %{ "UCOMISS $src1,$src2\n\t"
 9976             "JNP,s   exit\n\t"
 9977             "PUSHF\t# saw NaN, set CF\n\t"
 9978             "AND     [rsp], #0xffffff2b\n\t"
 9979             "POPF\n"
 9980     "exit:" %}
 9981   ins_encode %{
 9982     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9983     emit_cmpfp_fixup(masm);
 9984   %}
 9985   ins_pipe( pipe_slow );
 9986 %}
 9987 
 9988 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
 9989   predicate(UseSSE>=1);
 9990   match(Set cr (CmpF src1 src2));
 9991   ins_cost(100);
 9992   format %{ "UCOMISS $src1,$src2" %}
 9993   ins_encode %{
 9994     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9995   %}
 9996   ins_pipe( pipe_slow );
 9997 %}
 9998 
 9999 // float compare and set condition codes in EFLAGS by XMM regs
10000 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10001   predicate(UseSSE>=1);
10002   match(Set cr (CmpF src1 (LoadF src2)));
10003   ins_cost(165);
10004   format %{ "UCOMISS $src1,$src2\n\t"
10005             "JNP,s   exit\n\t"
10006             "PUSHF\t# saw NaN, set CF\n\t"
10007             "AND     [rsp], #0xffffff2b\n\t"
10008             "POPF\n"
10009     "exit:" %}
10010   ins_encode %{
10011     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10012     emit_cmpfp_fixup(masm);
10013   %}
10014   ins_pipe( pipe_slow );
10015 %}
10016 
10017 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10018   predicate(UseSSE>=1);
10019   match(Set cr (CmpF src1 (LoadF src2)));
10020   ins_cost(100);
10021   format %{ "UCOMISS $src1,$src2" %}
10022   ins_encode %{
10023     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10024   %}
10025   ins_pipe( pipe_slow );
10026 %}
10027 
10028 // Compare into -1,0,1 in XMM
10029 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10030   predicate(UseSSE>=1);
10031   match(Set dst (CmpF3 src1 src2));
10032   effect(KILL cr);
10033   ins_cost(255);
10034   format %{ "UCOMISS $src1, $src2\n\t"
10035             "MOV     $dst, #-1\n\t"
10036             "JP,s    done\n\t"
10037             "JB,s    done\n\t"
10038             "SETNE   $dst\n\t"
10039             "MOVZB   $dst, $dst\n"
10040     "done:" %}
10041   ins_encode %{
10042     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10043     emit_cmpfp3(masm, $dst$$Register);
10044   %}
10045   ins_pipe( pipe_slow );
10046 %}
10047 
10048 // Compare into -1,0,1 in XMM and memory
10049 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10050   predicate(UseSSE>=1);
10051   match(Set dst (CmpF3 src1 (LoadF src2)));
10052   effect(KILL cr);
10053   ins_cost(275);
10054   format %{ "UCOMISS $src1, $src2\n\t"
10055             "MOV     $dst, #-1\n\t"
10056             "JP,s    done\n\t"
10057             "JB,s    done\n\t"
10058             "SETNE   $dst\n\t"
10059             "MOVZB   $dst, $dst\n"
10060     "done:" %}
10061   ins_encode %{
10062     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10063     emit_cmpfp3(masm, $dst$$Register);
10064   %}
10065   ins_pipe( pipe_slow );
10066 %}
10067 
10068 // Spill to obtain 24-bit precision
10069 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10070   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10071   match(Set dst (SubF src1 src2));
10072 
10073   format %{ "FSUB   $dst,$src1 - $src2" %}
10074   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10075   ins_encode( Push_Reg_FPR(src1),
10076               OpcReg_FPR(src2),
10077               Pop_Mem_FPR(dst) );
10078   ins_pipe( fpu_mem_reg_reg );
10079 %}
10080 //
10081 // This instruction does not round to 24-bits
10082 instruct subFPR_reg(regFPR dst, regFPR src) %{
10083   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10084   match(Set dst (SubF dst src));
10085 
10086   format %{ "FSUB   $dst,$src" %}
10087   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10088   ins_encode( Push_Reg_FPR(src),
10089               OpcP, RegOpc(dst) );
10090   ins_pipe( fpu_reg_reg );
10091 %}
10092 
10093 // Spill to obtain 24-bit precision
10094 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10095   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10096   match(Set dst (AddF src1 src2));
10097 
10098   format %{ "FADD   $dst,$src1,$src2" %}
10099   opcode(0xD8, 0x0); /* D8 C0+i */
10100   ins_encode( Push_Reg_FPR(src2),
10101               OpcReg_FPR(src1),
10102               Pop_Mem_FPR(dst) );
10103   ins_pipe( fpu_mem_reg_reg );
10104 %}
10105 //
10106 // This instruction does not round to 24-bits
10107 instruct addFPR_reg(regFPR dst, regFPR src) %{
10108   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10109   match(Set dst (AddF dst src));
10110 
10111   format %{ "FLD    $src\n\t"
10112             "FADDp  $dst,ST" %}
10113   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10114   ins_encode( Push_Reg_FPR(src),
10115               OpcP, RegOpc(dst) );
10116   ins_pipe( fpu_reg_reg );
10117 %}
10118 
10119 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10120   predicate(UseSSE==0);
10121   match(Set dst (AbsF src));
10122   ins_cost(100);
10123   format %{ "FABS" %}
10124   opcode(0xE1, 0xD9);
10125   ins_encode( OpcS, OpcP );
10126   ins_pipe( fpu_reg_reg );
10127 %}
10128 
10129 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10130   predicate(UseSSE==0);
10131   match(Set dst (NegF src));
10132   ins_cost(100);
10133   format %{ "FCHS" %}
10134   opcode(0xE0, 0xD9);
10135   ins_encode( OpcS, OpcP );
10136   ins_pipe( fpu_reg_reg );
10137 %}
10138 
10139 // Cisc-alternate to addFPR_reg
10140 // Spill to obtain 24-bit precision
10141 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10142   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10143   match(Set dst (AddF src1 (LoadF src2)));
10144 
10145   format %{ "FLD    $src2\n\t"
10146             "FADD   ST,$src1\n\t"
10147             "FSTP_S $dst" %}
10148   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10149   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10150               OpcReg_FPR(src1),
10151               Pop_Mem_FPR(dst), ClearInstMark );
10152   ins_pipe( fpu_mem_reg_mem );
10153 %}
10154 //
10155 // Cisc-alternate to addFPR_reg
10156 // This instruction does not round to 24-bits
10157 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10158   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10159   match(Set dst (AddF dst (LoadF src)));
10160 
10161   format %{ "FADD   $dst,$src" %}
10162   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10163   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10164               OpcP, RegOpc(dst), ClearInstMark );
10165   ins_pipe( fpu_reg_mem );
10166 %}
10167 
10168 // // Following two instructions for _222_mpegaudio
10169 // Spill to obtain 24-bit precision
10170 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10171   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10172   match(Set dst (AddF src1 src2));
10173 
10174   format %{ "FADD   $dst,$src1,$src2" %}
10175   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10176   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10177               OpcReg_FPR(src2),
10178               Pop_Mem_FPR(dst), ClearInstMark );
10179   ins_pipe( fpu_mem_reg_mem );
10180 %}
10181 
10182 // Cisc-spill variant
10183 // Spill to obtain 24-bit precision
10184 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10185   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10186   match(Set dst (AddF src1 (LoadF src2)));
10187 
10188   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10189   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10190   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10191               OpcP, RMopc_Mem(secondary,src1),
10192               Pop_Mem_FPR(dst),
10193               ClearInstMark);
10194   ins_pipe( fpu_mem_mem_mem );
10195 %}
10196 
10197 // Spill to obtain 24-bit precision
10198 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10199   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10200   match(Set dst (AddF src1 src2));
10201 
10202   format %{ "FADD   $dst,$src1,$src2" %}
10203   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10204   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10205               OpcP, RMopc_Mem(secondary,src1),
10206               Pop_Mem_FPR(dst),
10207               ClearInstMark);
10208   ins_pipe( fpu_mem_mem_mem );
10209 %}
10210 
10211 
10212 // Spill to obtain 24-bit precision
10213 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10214   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10215   match(Set dst (AddF src con));
10216   format %{ "FLD    $src\n\t"
10217             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10218             "FSTP_S $dst"  %}
10219   ins_encode %{
10220     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10221     __ fadd_s($constantaddress($con));
10222     __ fstp_s(Address(rsp, $dst$$disp));
10223   %}
10224   ins_pipe(fpu_mem_reg_con);
10225 %}
10226 //
10227 // This instruction does not round to 24-bits
10228 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10229   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10230   match(Set dst (AddF src con));
10231   format %{ "FLD    $src\n\t"
10232             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10233             "FSTP   $dst"  %}
10234   ins_encode %{
10235     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10236     __ fadd_s($constantaddress($con));
10237     __ fstp_d($dst$$reg);
10238   %}
10239   ins_pipe(fpu_reg_reg_con);
10240 %}
10241 
10242 // Spill to obtain 24-bit precision
10243 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10244   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10245   match(Set dst (MulF src1 src2));
10246 
10247   format %{ "FLD    $src1\n\t"
10248             "FMUL   $src2\n\t"
10249             "FSTP_S $dst"  %}
10250   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10251   ins_encode( Push_Reg_FPR(src1),
10252               OpcReg_FPR(src2),
10253               Pop_Mem_FPR(dst) );
10254   ins_pipe( fpu_mem_reg_reg );
10255 %}
10256 //
10257 // This instruction does not round to 24-bits
10258 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10259   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10260   match(Set dst (MulF src1 src2));
10261 
10262   format %{ "FLD    $src1\n\t"
10263             "FMUL   $src2\n\t"
10264             "FSTP_S $dst"  %}
10265   opcode(0xD8, 0x1); /* D8 C8+i */
10266   ins_encode( Push_Reg_FPR(src2),
10267               OpcReg_FPR(src1),
10268               Pop_Reg_FPR(dst) );
10269   ins_pipe( fpu_reg_reg_reg );
10270 %}
10271 
10272 
10273 // Spill to obtain 24-bit precision
10274 // Cisc-alternate to reg-reg multiply
10275 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10276   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10277   match(Set dst (MulF src1 (LoadF src2)));
10278 
10279   format %{ "FLD_S  $src2\n\t"
10280             "FMUL   $src1\n\t"
10281             "FSTP_S $dst"  %}
10282   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10283   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10284               OpcReg_FPR(src1),
10285               Pop_Mem_FPR(dst), ClearInstMark );
10286   ins_pipe( fpu_mem_reg_mem );
10287 %}
10288 //
10289 // This instruction does not round to 24-bits
10290 // Cisc-alternate to reg-reg multiply
10291 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10292   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10293   match(Set dst (MulF src1 (LoadF src2)));
10294 
10295   format %{ "FMUL   $dst,$src1,$src2" %}
10296   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10297   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10298               OpcReg_FPR(src1),
10299               Pop_Reg_FPR(dst), ClearInstMark );
10300   ins_pipe( fpu_reg_reg_mem );
10301 %}
10302 
10303 // Spill to obtain 24-bit precision
10304 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10305   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10306   match(Set dst (MulF src1 src2));
10307 
10308   format %{ "FMUL   $dst,$src1,$src2" %}
10309   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10310   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10311               OpcP, RMopc_Mem(secondary,src1),
10312               Pop_Mem_FPR(dst),
10313               ClearInstMark );
10314   ins_pipe( fpu_mem_mem_mem );
10315 %}
10316 
10317 // Spill to obtain 24-bit precision
10318 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10319   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10320   match(Set dst (MulF src con));
10321 
10322   format %{ "FLD    $src\n\t"
10323             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10324             "FSTP_S $dst"  %}
10325   ins_encode %{
10326     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10327     __ fmul_s($constantaddress($con));
10328     __ fstp_s(Address(rsp, $dst$$disp));
10329   %}
10330   ins_pipe(fpu_mem_reg_con);
10331 %}
10332 //
10333 // This instruction does not round to 24-bits
10334 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10335   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10336   match(Set dst (MulF src con));
10337 
10338   format %{ "FLD    $src\n\t"
10339             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10340             "FSTP   $dst"  %}
10341   ins_encode %{
10342     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10343     __ fmul_s($constantaddress($con));
10344     __ fstp_d($dst$$reg);
10345   %}
10346   ins_pipe(fpu_reg_reg_con);
10347 %}
10348 
10349 
10350 //
10351 // MACRO1 -- subsume unshared load into mulFPR
10352 // This instruction does not round to 24-bits
10353 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10354   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10355   match(Set dst (MulF (LoadF mem1) src));
10356 
10357   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10358             "FMUL   ST,$src\n\t"
10359             "FSTP   $dst" %}
10360   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10361   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10362               OpcReg_FPR(src),
10363               Pop_Reg_FPR(dst), ClearInstMark );
10364   ins_pipe( fpu_reg_reg_mem );
10365 %}
10366 //
10367 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10368 // This instruction does not round to 24-bits
10369 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10370   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10371   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10372   ins_cost(95);
10373 
10374   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10375             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10376             "FADD   ST,$src2\n\t"
10377             "FSTP   $dst" %}
10378   opcode(0xD9); /* LoadF D9 /0 */
10379   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10380               FMul_ST_reg(src1),
10381               FAdd_ST_reg(src2),
10382               Pop_Reg_FPR(dst), ClearInstMark );
10383   ins_pipe( fpu_reg_mem_reg_reg );
10384 %}
10385 
10386 // MACRO3 -- addFPR a mulFPR
10387 // This instruction does not round to 24-bits.  It is a '2-address'
10388 // instruction in that the result goes back to src2.  This eliminates
10389 // a move from the macro; possibly the register allocator will have
10390 // to add it back (and maybe not).
10391 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10392   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10393   match(Set src2 (AddF (MulF src0 src1) src2));
10394 
10395   format %{ "FLD    $src0     ===MACRO3===\n\t"
10396             "FMUL   ST,$src1\n\t"
10397             "FADDP  $src2,ST" %}
10398   opcode(0xD9); /* LoadF D9 /0 */
10399   ins_encode( Push_Reg_FPR(src0),
10400               FMul_ST_reg(src1),
10401               FAddP_reg_ST(src2) );
10402   ins_pipe( fpu_reg_reg_reg );
10403 %}
10404 
10405 // MACRO4 -- divFPR subFPR
10406 // This instruction does not round to 24-bits
10407 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10408   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10409   match(Set dst (DivF (SubF src2 src1) src3));
10410 
10411   format %{ "FLD    $src2   ===MACRO4===\n\t"
10412             "FSUB   ST,$src1\n\t"
10413             "FDIV   ST,$src3\n\t"
10414             "FSTP  $dst" %}
10415   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10416   ins_encode( Push_Reg_FPR(src2),
10417               subFPR_divFPR_encode(src1,src3),
10418               Pop_Reg_FPR(dst) );
10419   ins_pipe( fpu_reg_reg_reg_reg );
10420 %}
10421 
10422 // Spill to obtain 24-bit precision
10423 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10424   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10425   match(Set dst (DivF src1 src2));
10426 
10427   format %{ "FDIV   $dst,$src1,$src2" %}
10428   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10429   ins_encode( Push_Reg_FPR(src1),
10430               OpcReg_FPR(src2),
10431               Pop_Mem_FPR(dst) );
10432   ins_pipe( fpu_mem_reg_reg );
10433 %}
10434 //
10435 // This instruction does not round to 24-bits
10436 instruct divFPR_reg(regFPR dst, regFPR src) %{
10437   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10438   match(Set dst (DivF dst src));
10439 
10440   format %{ "FDIV   $dst,$src" %}
10441   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10442   ins_encode( Push_Reg_FPR(src),
10443               OpcP, RegOpc(dst) );
10444   ins_pipe( fpu_reg_reg );
10445 %}
10446 
10447 
10448 // Spill to obtain 24-bit precision
10449 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10450   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10451   match(Set dst (ModF src1 src2));
10452   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10453 
10454   format %{ "FMOD   $dst,$src1,$src2" %}
10455   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10456               emitModDPR(),
10457               Push_Result_Mod_DPR(src2),
10458               Pop_Mem_FPR(dst));
10459   ins_pipe( pipe_slow );
10460 %}
10461 //
10462 // This instruction does not round to 24-bits
10463 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10464   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10465   match(Set dst (ModF dst src));
10466   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10467 
10468   format %{ "FMOD   $dst,$src" %}
10469   ins_encode(Push_Reg_Mod_DPR(dst, src),
10470               emitModDPR(),
10471               Push_Result_Mod_DPR(src),
10472               Pop_Reg_FPR(dst));
10473   ins_pipe( pipe_slow );
10474 %}
10475 
10476 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10477   predicate(UseSSE>=1);
10478   match(Set dst (ModF src0 src1));
10479   effect(KILL rax, KILL cr);
10480   format %{ "SUB    ESP,4\t # FMOD\n"
10481           "\tMOVSS  [ESP+0],$src1\n"
10482           "\tFLD_S  [ESP+0]\n"
10483           "\tMOVSS  [ESP+0],$src0\n"
10484           "\tFLD_S  [ESP+0]\n"
10485      "loop:\tFPREM\n"
10486           "\tFWAIT\n"
10487           "\tFNSTSW AX\n"
10488           "\tSAHF\n"
10489           "\tJP     loop\n"
10490           "\tFSTP_S [ESP+0]\n"
10491           "\tMOVSS  $dst,[ESP+0]\n"
10492           "\tADD    ESP,4\n"
10493           "\tFSTP   ST0\t # Restore FPU Stack"
10494     %}
10495   ins_cost(250);
10496   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10497   ins_pipe( pipe_slow );
10498 %}
10499 
10500 
10501 //----------Arithmetic Conversion Instructions---------------------------------
10502 // The conversions operations are all Alpha sorted.  Please keep it that way!
10503 
10504 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10505   predicate(UseSSE==0);
10506   match(Set dst (RoundFloat src));
10507   ins_cost(125);
10508   format %{ "FST_S  $dst,$src\t# F-round" %}
10509   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10510   ins_pipe( fpu_mem_reg );
10511 %}
10512 
10513 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10514   predicate(UseSSE<=1);
10515   match(Set dst (RoundDouble src));
10516   ins_cost(125);
10517   format %{ "FST_D  $dst,$src\t# D-round" %}
10518   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10519   ins_pipe( fpu_mem_reg );
10520 %}
10521 
10522 // Force rounding to 24-bit precision and 6-bit exponent
10523 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10524   predicate(UseSSE==0);
10525   match(Set dst (ConvD2F src));
10526   format %{ "FST_S  $dst,$src\t# F-round" %}
10527   expand %{
10528     roundFloat_mem_reg(dst,src);
10529   %}
10530 %}
10531 
10532 // Force rounding to 24-bit precision and 6-bit exponent
10533 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10534   predicate(UseSSE==1);
10535   match(Set dst (ConvD2F src));
10536   effect( KILL cr );
10537   format %{ "SUB    ESP,4\n\t"
10538             "FST_S  [ESP],$src\t# F-round\n\t"
10539             "MOVSS  $dst,[ESP]\n\t"
10540             "ADD ESP,4" %}
10541   ins_encode %{
10542     __ subptr(rsp, 4);
10543     if ($src$$reg != FPR1L_enc) {
10544       __ fld_s($src$$reg-1);
10545       __ fstp_s(Address(rsp, 0));
10546     } else {
10547       __ fst_s(Address(rsp, 0));
10548     }
10549     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10550     __ addptr(rsp, 4);
10551   %}
10552   ins_pipe( pipe_slow );
10553 %}
10554 
10555 // Force rounding double precision to single precision
10556 instruct convD2F_reg(regF dst, regD src) %{
10557   predicate(UseSSE>=2);
10558   match(Set dst (ConvD2F src));
10559   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10560   ins_encode %{
10561     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10562   %}
10563   ins_pipe( pipe_slow );
10564 %}
10565 
10566 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10567   predicate(UseSSE==0);
10568   match(Set dst (ConvF2D src));
10569   format %{ "FST_S  $dst,$src\t# D-round" %}
10570   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10571   ins_pipe( fpu_reg_reg );
10572 %}
10573 
10574 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10575   predicate(UseSSE==1);
10576   match(Set dst (ConvF2D src));
10577   format %{ "FST_D  $dst,$src\t# D-round" %}
10578   expand %{
10579     roundDouble_mem_reg(dst,src);
10580   %}
10581 %}
10582 
10583 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10584   predicate(UseSSE==1);
10585   match(Set dst (ConvF2D src));
10586   effect( KILL cr );
10587   format %{ "SUB    ESP,4\n\t"
10588             "MOVSS  [ESP] $src\n\t"
10589             "FLD_S  [ESP]\n\t"
10590             "ADD    ESP,4\n\t"
10591             "FSTP   $dst\t# D-round" %}
10592   ins_encode %{
10593     __ subptr(rsp, 4);
10594     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10595     __ fld_s(Address(rsp, 0));
10596     __ addptr(rsp, 4);
10597     __ fstp_d($dst$$reg);
10598   %}
10599   ins_pipe( pipe_slow );
10600 %}
10601 
10602 instruct convF2D_reg(regD dst, regF src) %{
10603   predicate(UseSSE>=2);
10604   match(Set dst (ConvF2D src));
10605   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10606   ins_encode %{
10607     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10608   %}
10609   ins_pipe( pipe_slow );
10610 %}
10611 
10612 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10613 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10614   predicate(UseSSE<=1);
10615   match(Set dst (ConvD2I src));
10616   effect( KILL tmp, KILL cr );
10617   format %{ "FLD    $src\t# Convert double to int \n\t"
10618             "FLDCW  trunc mode\n\t"
10619             "SUB    ESP,4\n\t"
10620             "FISTp  [ESP + #0]\n\t"
10621             "FLDCW  std/24-bit mode\n\t"
10622             "POP    EAX\n\t"
10623             "CMP    EAX,0x80000000\n\t"
10624             "JNE,s  fast\n\t"
10625             "FLD_D  $src\n\t"
10626             "CALL   d2i_wrapper\n"
10627       "fast:" %}
10628   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10629   ins_pipe( pipe_slow );
10630 %}
10631 
10632 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10633 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10634   predicate(UseSSE>=2);
10635   match(Set dst (ConvD2I src));
10636   effect( KILL tmp, KILL cr );
10637   format %{ "CVTTSD2SI $dst, $src\n\t"
10638             "CMP    $dst,0x80000000\n\t"
10639             "JNE,s  fast\n\t"
10640             "SUB    ESP, 8\n\t"
10641             "MOVSD  [ESP], $src\n\t"
10642             "FLD_D  [ESP]\n\t"
10643             "ADD    ESP, 8\n\t"
10644             "CALL   d2i_wrapper\n"
10645       "fast:" %}
10646   ins_encode %{
10647     Label fast;
10648     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10649     __ cmpl($dst$$Register, 0x80000000);
10650     __ jccb(Assembler::notEqual, fast);
10651     __ subptr(rsp, 8);
10652     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10653     __ fld_d(Address(rsp, 0));
10654     __ addptr(rsp, 8);
10655     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10656     __ post_call_nop();
10657     __ bind(fast);
10658   %}
10659   ins_pipe( pipe_slow );
10660 %}
10661 
10662 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10663   predicate(UseSSE<=1);
10664   match(Set dst (ConvD2L src));
10665   effect( KILL cr );
10666   format %{ "FLD    $src\t# Convert double to long\n\t"
10667             "FLDCW  trunc mode\n\t"
10668             "SUB    ESP,8\n\t"
10669             "FISTp  [ESP + #0]\n\t"
10670             "FLDCW  std/24-bit mode\n\t"
10671             "POP    EAX\n\t"
10672             "POP    EDX\n\t"
10673             "CMP    EDX,0x80000000\n\t"
10674             "JNE,s  fast\n\t"
10675             "TEST   EAX,EAX\n\t"
10676             "JNE,s  fast\n\t"
10677             "FLD    $src\n\t"
10678             "CALL   d2l_wrapper\n"
10679       "fast:" %}
10680   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10681   ins_pipe( pipe_slow );
10682 %}
10683 
10684 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10685 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10686   predicate (UseSSE>=2);
10687   match(Set dst (ConvD2L src));
10688   effect( KILL cr );
10689   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10690             "MOVSD  [ESP],$src\n\t"
10691             "FLD_D  [ESP]\n\t"
10692             "FLDCW  trunc mode\n\t"
10693             "FISTp  [ESP + #0]\n\t"
10694             "FLDCW  std/24-bit mode\n\t"
10695             "POP    EAX\n\t"
10696             "POP    EDX\n\t"
10697             "CMP    EDX,0x80000000\n\t"
10698             "JNE,s  fast\n\t"
10699             "TEST   EAX,EAX\n\t"
10700             "JNE,s  fast\n\t"
10701             "SUB    ESP,8\n\t"
10702             "MOVSD  [ESP],$src\n\t"
10703             "FLD_D  [ESP]\n\t"
10704             "ADD    ESP,8\n\t"
10705             "CALL   d2l_wrapper\n"
10706       "fast:" %}
10707   ins_encode %{
10708     Label fast;
10709     __ subptr(rsp, 8);
10710     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10711     __ fld_d(Address(rsp, 0));
10712     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10713     __ fistp_d(Address(rsp, 0));
10714     // Restore the rounding mode, mask the exception
10715     if (Compile::current()->in_24_bit_fp_mode()) {
10716       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10717     } else {
10718       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10719     }
10720     // Load the converted long, adjust CPU stack
10721     __ pop(rax);
10722     __ pop(rdx);
10723     __ cmpl(rdx, 0x80000000);
10724     __ jccb(Assembler::notEqual, fast);
10725     __ testl(rax, rax);
10726     __ jccb(Assembler::notEqual, fast);
10727     __ subptr(rsp, 8);
10728     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10729     __ fld_d(Address(rsp, 0));
10730     __ addptr(rsp, 8);
10731     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10732     __ post_call_nop();
10733     __ bind(fast);
10734   %}
10735   ins_pipe( pipe_slow );
10736 %}
10737 
10738 // Convert a double to an int.  Java semantics require we do complex
10739 // manglations in the corner cases.  So we set the rounding mode to
10740 // 'zero', store the darned double down as an int, and reset the
10741 // rounding mode to 'nearest'.  The hardware stores a flag value down
10742 // if we would overflow or converted a NAN; we check for this and
10743 // and go the slow path if needed.
10744 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10745   predicate(UseSSE==0);
10746   match(Set dst (ConvF2I src));
10747   effect( KILL tmp, KILL cr );
10748   format %{ "FLD    $src\t# Convert float to int \n\t"
10749             "FLDCW  trunc mode\n\t"
10750             "SUB    ESP,4\n\t"
10751             "FISTp  [ESP + #0]\n\t"
10752             "FLDCW  std/24-bit mode\n\t"
10753             "POP    EAX\n\t"
10754             "CMP    EAX,0x80000000\n\t"
10755             "JNE,s  fast\n\t"
10756             "FLD    $src\n\t"
10757             "CALL   d2i_wrapper\n"
10758       "fast:" %}
10759   // DPR2I_encoding works for FPR2I
10760   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10761   ins_pipe( pipe_slow );
10762 %}
10763 
10764 // Convert a float in xmm to an int reg.
10765 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10766   predicate(UseSSE>=1);
10767   match(Set dst (ConvF2I src));
10768   effect( KILL tmp, KILL cr );
10769   format %{ "CVTTSS2SI $dst, $src\n\t"
10770             "CMP    $dst,0x80000000\n\t"
10771             "JNE,s  fast\n\t"
10772             "SUB    ESP, 4\n\t"
10773             "MOVSS  [ESP], $src\n\t"
10774             "FLD    [ESP]\n\t"
10775             "ADD    ESP, 4\n\t"
10776             "CALL   d2i_wrapper\n"
10777       "fast:" %}
10778   ins_encode %{
10779     Label fast;
10780     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10781     __ cmpl($dst$$Register, 0x80000000);
10782     __ jccb(Assembler::notEqual, fast);
10783     __ subptr(rsp, 4);
10784     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10785     __ fld_s(Address(rsp, 0));
10786     __ addptr(rsp, 4);
10787     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10788     __ post_call_nop();
10789     __ bind(fast);
10790   %}
10791   ins_pipe( pipe_slow );
10792 %}
10793 
10794 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10795   predicate(UseSSE==0);
10796   match(Set dst (ConvF2L src));
10797   effect( KILL cr );
10798   format %{ "FLD    $src\t# Convert float to long\n\t"
10799             "FLDCW  trunc mode\n\t"
10800             "SUB    ESP,8\n\t"
10801             "FISTp  [ESP + #0]\n\t"
10802             "FLDCW  std/24-bit mode\n\t"
10803             "POP    EAX\n\t"
10804             "POP    EDX\n\t"
10805             "CMP    EDX,0x80000000\n\t"
10806             "JNE,s  fast\n\t"
10807             "TEST   EAX,EAX\n\t"
10808             "JNE,s  fast\n\t"
10809             "FLD    $src\n\t"
10810             "CALL   d2l_wrapper\n"
10811       "fast:" %}
10812   // DPR2L_encoding works for FPR2L
10813   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10814   ins_pipe( pipe_slow );
10815 %}
10816 
10817 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10818 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10819   predicate (UseSSE>=1);
10820   match(Set dst (ConvF2L src));
10821   effect( KILL cr );
10822   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10823             "MOVSS  [ESP],$src\n\t"
10824             "FLD_S  [ESP]\n\t"
10825             "FLDCW  trunc mode\n\t"
10826             "FISTp  [ESP + #0]\n\t"
10827             "FLDCW  std/24-bit mode\n\t"
10828             "POP    EAX\n\t"
10829             "POP    EDX\n\t"
10830             "CMP    EDX,0x80000000\n\t"
10831             "JNE,s  fast\n\t"
10832             "TEST   EAX,EAX\n\t"
10833             "JNE,s  fast\n\t"
10834             "SUB    ESP,4\t# Convert float to long\n\t"
10835             "MOVSS  [ESP],$src\n\t"
10836             "FLD_S  [ESP]\n\t"
10837             "ADD    ESP,4\n\t"
10838             "CALL   d2l_wrapper\n"
10839       "fast:" %}
10840   ins_encode %{
10841     Label fast;
10842     __ subptr(rsp, 8);
10843     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10844     __ fld_s(Address(rsp, 0));
10845     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10846     __ fistp_d(Address(rsp, 0));
10847     // Restore the rounding mode, mask the exception
10848     if (Compile::current()->in_24_bit_fp_mode()) {
10849       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10850     } else {
10851       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10852     }
10853     // Load the converted long, adjust CPU stack
10854     __ pop(rax);
10855     __ pop(rdx);
10856     __ cmpl(rdx, 0x80000000);
10857     __ jccb(Assembler::notEqual, fast);
10858     __ testl(rax, rax);
10859     __ jccb(Assembler::notEqual, fast);
10860     __ subptr(rsp, 4);
10861     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10862     __ fld_s(Address(rsp, 0));
10863     __ addptr(rsp, 4);
10864     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10865     __ post_call_nop();
10866     __ bind(fast);
10867   %}
10868   ins_pipe( pipe_slow );
10869 %}
10870 
10871 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10872   predicate( UseSSE<=1 );
10873   match(Set dst (ConvI2D src));
10874   format %{ "FILD   $src\n\t"
10875             "FSTP   $dst" %}
10876   opcode(0xDB, 0x0);  /* DB /0 */
10877   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10878   ins_pipe( fpu_reg_mem );
10879 %}
10880 
10881 instruct convI2D_reg(regD dst, rRegI src) %{
10882   predicate( UseSSE>=2 && !UseXmmI2D );
10883   match(Set dst (ConvI2D src));
10884   format %{ "CVTSI2SD $dst,$src" %}
10885   ins_encode %{
10886     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10887   %}
10888   ins_pipe( pipe_slow );
10889 %}
10890 
10891 instruct convI2D_mem(regD dst, memory mem) %{
10892   predicate( UseSSE>=2 );
10893   match(Set dst (ConvI2D (LoadI mem)));
10894   format %{ "CVTSI2SD $dst,$mem" %}
10895   ins_encode %{
10896     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10897   %}
10898   ins_pipe( pipe_slow );
10899 %}
10900 
10901 instruct convXI2D_reg(regD dst, rRegI src)
10902 %{
10903   predicate( UseSSE>=2 && UseXmmI2D );
10904   match(Set dst (ConvI2D src));
10905 
10906   format %{ "MOVD  $dst,$src\n\t"
10907             "CVTDQ2PD $dst,$dst\t# i2d" %}
10908   ins_encode %{
10909     __ movdl($dst$$XMMRegister, $src$$Register);
10910     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10911   %}
10912   ins_pipe(pipe_slow); // XXX
10913 %}
10914 
10915 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10916   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10917   match(Set dst (ConvI2D (LoadI mem)));
10918   format %{ "FILD   $mem\n\t"
10919             "FSTP   $dst" %}
10920   opcode(0xDB);      /* DB /0 */
10921   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10922               Pop_Reg_DPR(dst), ClearInstMark);
10923   ins_pipe( fpu_reg_mem );
10924 %}
10925 
10926 // Convert a byte to a float; no rounding step needed.
10927 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10928   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10929   match(Set dst (ConvI2F src));
10930   format %{ "FILD   $src\n\t"
10931             "FSTP   $dst" %}
10932 
10933   opcode(0xDB, 0x0);  /* DB /0 */
10934   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10935   ins_pipe( fpu_reg_mem );
10936 %}
10937 
10938 // In 24-bit mode, force exponent rounding by storing back out
10939 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10940   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10941   match(Set dst (ConvI2F src));
10942   ins_cost(200);
10943   format %{ "FILD   $src\n\t"
10944             "FSTP_S $dst" %}
10945   opcode(0xDB, 0x0);  /* DB /0 */
10946   ins_encode( Push_Mem_I(src),
10947               Pop_Mem_FPR(dst));
10948   ins_pipe( fpu_mem_mem );
10949 %}
10950 
10951 // In 24-bit mode, force exponent rounding by storing back out
10952 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10953   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10954   match(Set dst (ConvI2F (LoadI mem)));
10955   ins_cost(200);
10956   format %{ "FILD   $mem\n\t"
10957             "FSTP_S $dst" %}
10958   opcode(0xDB);  /* DB /0 */
10959   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10960               Pop_Mem_FPR(dst), ClearInstMark);
10961   ins_pipe( fpu_mem_mem );
10962 %}
10963 
10964 // This instruction does not round to 24-bits
10965 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10966   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10967   match(Set dst (ConvI2F src));
10968   format %{ "FILD   $src\n\t"
10969             "FSTP   $dst" %}
10970   opcode(0xDB, 0x0);  /* DB /0 */
10971   ins_encode( Push_Mem_I(src),
10972               Pop_Reg_FPR(dst));
10973   ins_pipe( fpu_reg_mem );
10974 %}
10975 
10976 // This instruction does not round to 24-bits
10977 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10978   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10979   match(Set dst (ConvI2F (LoadI mem)));
10980   format %{ "FILD   $mem\n\t"
10981             "FSTP   $dst" %}
10982   opcode(0xDB);      /* DB /0 */
10983   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10984               Pop_Reg_FPR(dst), ClearInstMark);
10985   ins_pipe( fpu_reg_mem );
10986 %}
10987 
10988 // Convert an int to a float in xmm; no rounding step needed.
10989 instruct convI2F_reg(regF dst, rRegI src) %{
10990   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
10991   match(Set dst (ConvI2F src));
10992   format %{ "CVTSI2SS $dst, $src" %}
10993   ins_encode %{
10994     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10995   %}
10996   ins_pipe( pipe_slow );
10997 %}
10998 
10999  instruct convXI2F_reg(regF dst, rRegI src)
11000 %{
11001   predicate( UseSSE>=2 && UseXmmI2F );
11002   match(Set dst (ConvI2F src));
11003 
11004   format %{ "MOVD  $dst,$src\n\t"
11005             "CVTDQ2PS $dst,$dst\t# i2f" %}
11006   ins_encode %{
11007     __ movdl($dst$$XMMRegister, $src$$Register);
11008     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11009   %}
11010   ins_pipe(pipe_slow); // XXX
11011 %}
11012 
11013 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11014   match(Set dst (ConvI2L src));
11015   effect(KILL cr);
11016   ins_cost(375);
11017   format %{ "MOV    $dst.lo,$src\n\t"
11018             "MOV    $dst.hi,$src\n\t"
11019             "SAR    $dst.hi,31" %}
11020   ins_encode(convert_int_long(dst,src));
11021   ins_pipe( ialu_reg_reg_long );
11022 %}
11023 
11024 // Zero-extend convert int to long
11025 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11026   match(Set dst (AndL (ConvI2L src) mask) );
11027   effect( KILL flags );
11028   ins_cost(250);
11029   format %{ "MOV    $dst.lo,$src\n\t"
11030             "XOR    $dst.hi,$dst.hi" %}
11031   opcode(0x33); // XOR
11032   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11033   ins_pipe( ialu_reg_reg_long );
11034 %}
11035 
11036 // Zero-extend long
11037 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11038   match(Set dst (AndL src mask) );
11039   effect( KILL flags );
11040   ins_cost(250);
11041   format %{ "MOV    $dst.lo,$src.lo\n\t"
11042             "XOR    $dst.hi,$dst.hi\n\t" %}
11043   opcode(0x33); // XOR
11044   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11045   ins_pipe( ialu_reg_reg_long );
11046 %}
11047 
11048 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11049   predicate (UseSSE<=1);
11050   match(Set dst (ConvL2D src));
11051   effect( KILL cr );
11052   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11053             "PUSH   $src.lo\n\t"
11054             "FILD   ST,[ESP + #0]\n\t"
11055             "ADD    ESP,8\n\t"
11056             "FSTP_D $dst\t# D-round" %}
11057   opcode(0xDF, 0x5);  /* DF /5 */
11058   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11059   ins_pipe( pipe_slow );
11060 %}
11061 
11062 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11063   predicate (UseSSE>=2);
11064   match(Set dst (ConvL2D src));
11065   effect( KILL cr );
11066   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11067             "PUSH   $src.lo\n\t"
11068             "FILD_D [ESP]\n\t"
11069             "FSTP_D [ESP]\n\t"
11070             "MOVSD  $dst,[ESP]\n\t"
11071             "ADD    ESP,8" %}
11072   opcode(0xDF, 0x5);  /* DF /5 */
11073   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11074   ins_pipe( pipe_slow );
11075 %}
11076 
11077 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11078   predicate (UseSSE>=1);
11079   match(Set dst (ConvL2F src));
11080   effect( KILL cr );
11081   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11082             "PUSH   $src.lo\n\t"
11083             "FILD_D [ESP]\n\t"
11084             "FSTP_S [ESP]\n\t"
11085             "MOVSS  $dst,[ESP]\n\t"
11086             "ADD    ESP,8" %}
11087   opcode(0xDF, 0x5);  /* DF /5 */
11088   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11089   ins_pipe( pipe_slow );
11090 %}
11091 
11092 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11093   match(Set dst (ConvL2F src));
11094   effect( KILL cr );
11095   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11096             "PUSH   $src.lo\n\t"
11097             "FILD   ST,[ESP + #0]\n\t"
11098             "ADD    ESP,8\n\t"
11099             "FSTP_S $dst\t# F-round" %}
11100   opcode(0xDF, 0x5);  /* DF /5 */
11101   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11102   ins_pipe( pipe_slow );
11103 %}
11104 
11105 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11106   match(Set dst (ConvL2I src));
11107   effect( DEF dst, USE src );
11108   format %{ "MOV    $dst,$src.lo" %}
11109   ins_encode(enc_CopyL_Lo(dst,src));
11110   ins_pipe( ialu_reg_reg );
11111 %}
11112 
11113 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11114   match(Set dst (MoveF2I src));
11115   effect( DEF dst, USE src );
11116   ins_cost(100);
11117   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11118   ins_encode %{
11119     __ movl($dst$$Register, Address(rsp, $src$$disp));
11120   %}
11121   ins_pipe( ialu_reg_mem );
11122 %}
11123 
11124 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11125   predicate(UseSSE==0);
11126   match(Set dst (MoveF2I src));
11127   effect( DEF dst, USE src );
11128 
11129   ins_cost(125);
11130   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11131   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11132   ins_pipe( fpu_mem_reg );
11133 %}
11134 
11135 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11136   predicate(UseSSE>=1);
11137   match(Set dst (MoveF2I src));
11138   effect( DEF dst, USE src );
11139 
11140   ins_cost(95);
11141   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11142   ins_encode %{
11143     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11144   %}
11145   ins_pipe( pipe_slow );
11146 %}
11147 
11148 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11149   predicate(UseSSE>=2);
11150   match(Set dst (MoveF2I src));
11151   effect( DEF dst, USE src );
11152   ins_cost(85);
11153   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11154   ins_encode %{
11155     __ movdl($dst$$Register, $src$$XMMRegister);
11156   %}
11157   ins_pipe( pipe_slow );
11158 %}
11159 
11160 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11161   match(Set dst (MoveI2F src));
11162   effect( DEF dst, USE src );
11163 
11164   ins_cost(100);
11165   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11166   ins_encode %{
11167     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11168   %}
11169   ins_pipe( ialu_mem_reg );
11170 %}
11171 
11172 
11173 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11174   predicate(UseSSE==0);
11175   match(Set dst (MoveI2F src));
11176   effect(DEF dst, USE src);
11177 
11178   ins_cost(125);
11179   format %{ "FLD_S  $src\n\t"
11180             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11181   opcode(0xD9);               /* D9 /0, FLD m32real */
11182   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11183               Pop_Reg_FPR(dst), ClearInstMark );
11184   ins_pipe( fpu_reg_mem );
11185 %}
11186 
11187 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11188   predicate(UseSSE>=1);
11189   match(Set dst (MoveI2F src));
11190   effect( DEF dst, USE src );
11191 
11192   ins_cost(95);
11193   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11194   ins_encode %{
11195     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11196   %}
11197   ins_pipe( pipe_slow );
11198 %}
11199 
11200 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11201   predicate(UseSSE>=2);
11202   match(Set dst (MoveI2F src));
11203   effect( DEF dst, USE src );
11204 
11205   ins_cost(85);
11206   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11207   ins_encode %{
11208     __ movdl($dst$$XMMRegister, $src$$Register);
11209   %}
11210   ins_pipe( pipe_slow );
11211 %}
11212 
11213 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11214   match(Set dst (MoveD2L src));
11215   effect(DEF dst, USE src);
11216 
11217   ins_cost(250);
11218   format %{ "MOV    $dst.lo,$src\n\t"
11219             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11220   opcode(0x8B, 0x8B);
11221   ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11222   ins_pipe( ialu_mem_long_reg );
11223 %}
11224 
11225 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11226   predicate(UseSSE<=1);
11227   match(Set dst (MoveD2L src));
11228   effect(DEF dst, USE src);
11229 
11230   ins_cost(125);
11231   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11232   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11233   ins_pipe( fpu_mem_reg );
11234 %}
11235 
11236 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11237   predicate(UseSSE>=2);
11238   match(Set dst (MoveD2L src));
11239   effect(DEF dst, USE src);
11240   ins_cost(95);
11241   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11242   ins_encode %{
11243     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11244   %}
11245   ins_pipe( pipe_slow );
11246 %}
11247 
11248 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11249   predicate(UseSSE>=2);
11250   match(Set dst (MoveD2L src));
11251   effect(DEF dst, USE src, TEMP tmp);
11252   ins_cost(85);
11253   format %{ "MOVD   $dst.lo,$src\n\t"
11254             "PSHUFLW $tmp,$src,0x4E\n\t"
11255             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11256   ins_encode %{
11257     __ movdl($dst$$Register, $src$$XMMRegister);
11258     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11259     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11260   %}
11261   ins_pipe( pipe_slow );
11262 %}
11263 
11264 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11265   match(Set dst (MoveL2D src));
11266   effect(DEF dst, USE src);
11267 
11268   ins_cost(200);
11269   format %{ "MOV    $dst,$src.lo\n\t"
11270             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11271   opcode(0x89, 0x89);
11272   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11273   ins_pipe( ialu_mem_long_reg );
11274 %}
11275 
11276 
11277 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11278   predicate(UseSSE<=1);
11279   match(Set dst (MoveL2D src));
11280   effect(DEF dst, USE src);
11281   ins_cost(125);
11282 
11283   format %{ "FLD_D  $src\n\t"
11284             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11285   opcode(0xDD);               /* DD /0, FLD m64real */
11286   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11287               Pop_Reg_DPR(dst), ClearInstMark );
11288   ins_pipe( fpu_reg_mem );
11289 %}
11290 
11291 
11292 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11293   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11294   match(Set dst (MoveL2D src));
11295   effect(DEF dst, USE src);
11296 
11297   ins_cost(95);
11298   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11299   ins_encode %{
11300     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11301   %}
11302   ins_pipe( pipe_slow );
11303 %}
11304 
11305 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11306   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11307   match(Set dst (MoveL2D src));
11308   effect(DEF dst, USE src);
11309 
11310   ins_cost(95);
11311   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11312   ins_encode %{
11313     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11314   %}
11315   ins_pipe( pipe_slow );
11316 %}
11317 
11318 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11319   predicate(UseSSE>=2);
11320   match(Set dst (MoveL2D src));
11321   effect(TEMP dst, USE src, TEMP tmp);
11322   ins_cost(85);
11323   format %{ "MOVD   $dst,$src.lo\n\t"
11324             "MOVD   $tmp,$src.hi\n\t"
11325             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11326   ins_encode %{
11327     __ movdl($dst$$XMMRegister, $src$$Register);
11328     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11329     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11330   %}
11331   ins_pipe( pipe_slow );
11332 %}
11333 
11334 //----------------------------- CompressBits/ExpandBits ------------------------
11335 
11336 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11337   predicate(n->bottom_type()->isa_long());
11338   match(Set dst (CompressBits src mask));
11339   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11340   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11341   ins_encode %{
11342     Label exit, partail_result;
11343     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11344     // Merge the results of upper and lower destination registers such that upper destination
11345     // results are contiguously laid out after the lower destination result.
11346     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11347     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11348     __ popcntl($rtmp$$Register, $mask$$Register);
11349     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11350     __ cmpl($rtmp$$Register, 32);
11351     __ jccb(Assembler::equal, exit);
11352     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11353     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11354     // Shift left the contents of upper destination register by true bit count of lower mask register
11355     // and merge with lower destination register.
11356     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11357     __ orl($dst$$Register, $rtmp$$Register);
11358     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11359     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11360     // since contents of upper destination have already been copied to lower destination
11361     // register.
11362     __ cmpl($rtmp$$Register, 0);
11363     __ jccb(Assembler::greater, partail_result);
11364     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11365     __ jmp(exit);
11366     __ bind(partail_result);
11367     // Perform right shift over upper destination register to move out bits already copied
11368     // to lower destination register.
11369     __ subl($rtmp$$Register, 32);
11370     __ negl($rtmp$$Register);
11371     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11372     __ bind(exit);
11373   %}
11374   ins_pipe( pipe_slow );
11375 %}
11376 
11377 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11378   predicate(n->bottom_type()->isa_long());
11379   match(Set dst (ExpandBits src mask));
11380   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11381   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11382   ins_encode %{
11383     // Extraction operation sequentially reads the bits from source register starting from LSB
11384     // and lays them out into destination register at bit locations corresponding to true bits
11385     // in mask register. Thus number of source bits read are equal to combined true bit count
11386     // of mask register pair.
11387     Label exit, mask_clipping;
11388     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11389     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11390     __ popcntl($rtmp$$Register, $mask$$Register);
11391     // If true bit count of lower mask register is 32 then none of bit of lower source register
11392     // will feed to upper destination register.
11393     __ cmpl($rtmp$$Register, 32);
11394     __ jccb(Assembler::equal, exit);
11395     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11396     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11397     // Shift right the contents of lower source register to remove already consumed bits.
11398     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11399     // Extract the bits from lower source register starting from LSB under the influence
11400     // of upper mask register.
11401     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11402     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11403     __ subl($rtmp$$Register, 32);
11404     __ negl($rtmp$$Register);
11405     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11406     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11407     // Clear the set bits in upper mask register which have been used to extract the contents
11408     // from lower source register.
11409     __ bind(mask_clipping);
11410     __ blsrl($mask$$Register, $mask$$Register);
11411     __ decrementl($rtmp$$Register, 1);
11412     __ jccb(Assembler::greater, mask_clipping);
11413     // Starting from LSB extract the bits from upper source register under the influence of
11414     // remaining set bits in upper mask register.
11415     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11416     // Merge the partial results extracted from lower and upper source register bits.
11417     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11418     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11419     __ bind(exit);
11420   %}
11421   ins_pipe( pipe_slow );
11422 %}
11423 
11424 // =======================================================================
11425 // Fast clearing of an array
11426 // Small non-constant length ClearArray for non-AVX512 targets.
11427 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11428   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11429   match(Set dummy (ClearArray cnt base));
11430   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11431 
11432   format %{ $$template
11433     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11434     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11435     $$emit$$"JG     LARGE\n\t"
11436     $$emit$$"SHL    ECX, 1\n\t"
11437     $$emit$$"DEC    ECX\n\t"
11438     $$emit$$"JS     DONE\t# Zero length\n\t"
11439     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11440     $$emit$$"DEC    ECX\n\t"
11441     $$emit$$"JGE    LOOP\n\t"
11442     $$emit$$"JMP    DONE\n\t"
11443     $$emit$$"# LARGE:\n\t"
11444     if (UseFastStosb) {
11445        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11446        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11447     } else if (UseXMMForObjInit) {
11448        $$emit$$"MOV     RDI,RAX\n\t"
11449        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11450        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11451        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11452        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11453        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11454        $$emit$$"ADD     0x40,RAX\n\t"
11455        $$emit$$"# L_zero_64_bytes:\n\t"
11456        $$emit$$"SUB     0x8,RCX\n\t"
11457        $$emit$$"JGE     L_loop\n\t"
11458        $$emit$$"ADD     0x4,RCX\n\t"
11459        $$emit$$"JL      L_tail\n\t"
11460        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11461        $$emit$$"ADD     0x20,RAX\n\t"
11462        $$emit$$"SUB     0x4,RCX\n\t"
11463        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11464        $$emit$$"ADD     0x4,RCX\n\t"
11465        $$emit$$"JLE     L_end\n\t"
11466        $$emit$$"DEC     RCX\n\t"
11467        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11468        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11469        $$emit$$"ADD     0x8,RAX\n\t"
11470        $$emit$$"DEC     RCX\n\t"
11471        $$emit$$"JGE     L_sloop\n\t"
11472        $$emit$$"# L_end:\n\t"
11473     } else {
11474        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11475        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11476     }
11477     $$emit$$"# DONE"
11478   %}
11479   ins_encode %{
11480     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11481                  $tmp$$XMMRegister, false, knoreg);
11482   %}
11483   ins_pipe( pipe_slow );
11484 %}
11485 
11486 // Small non-constant length ClearArray for AVX512 targets.
11487 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11488   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11489   match(Set dummy (ClearArray cnt base));
11490   ins_cost(125);
11491   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11492 
11493   format %{ $$template
11494     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11495     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11496     $$emit$$"JG     LARGE\n\t"
11497     $$emit$$"SHL    ECX, 1\n\t"
11498     $$emit$$"DEC    ECX\n\t"
11499     $$emit$$"JS     DONE\t# Zero length\n\t"
11500     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11501     $$emit$$"DEC    ECX\n\t"
11502     $$emit$$"JGE    LOOP\n\t"
11503     $$emit$$"JMP    DONE\n\t"
11504     $$emit$$"# LARGE:\n\t"
11505     if (UseFastStosb) {
11506        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11507        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11508     } else if (UseXMMForObjInit) {
11509        $$emit$$"MOV     RDI,RAX\n\t"
11510        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11511        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11512        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11513        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11514        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11515        $$emit$$"ADD     0x40,RAX\n\t"
11516        $$emit$$"# L_zero_64_bytes:\n\t"
11517        $$emit$$"SUB     0x8,RCX\n\t"
11518        $$emit$$"JGE     L_loop\n\t"
11519        $$emit$$"ADD     0x4,RCX\n\t"
11520        $$emit$$"JL      L_tail\n\t"
11521        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11522        $$emit$$"ADD     0x20,RAX\n\t"
11523        $$emit$$"SUB     0x4,RCX\n\t"
11524        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11525        $$emit$$"ADD     0x4,RCX\n\t"
11526        $$emit$$"JLE     L_end\n\t"
11527        $$emit$$"DEC     RCX\n\t"
11528        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11529        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11530        $$emit$$"ADD     0x8,RAX\n\t"
11531        $$emit$$"DEC     RCX\n\t"
11532        $$emit$$"JGE     L_sloop\n\t"
11533        $$emit$$"# L_end:\n\t"
11534     } else {
11535        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11536        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11537     }
11538     $$emit$$"# DONE"
11539   %}
11540   ins_encode %{
11541     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11542                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11543   %}
11544   ins_pipe( pipe_slow );
11545 %}
11546 
11547 // Large non-constant length ClearArray for non-AVX512 targets.
11548 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11549   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11550   match(Set dummy (ClearArray cnt base));
11551   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11552   format %{ $$template
11553     if (UseFastStosb) {
11554        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11555        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11556        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11557     } else if (UseXMMForObjInit) {
11558        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11559        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11560        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11561        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11562        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11563        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11564        $$emit$$"ADD     0x40,RAX\n\t"
11565        $$emit$$"# L_zero_64_bytes:\n\t"
11566        $$emit$$"SUB     0x8,RCX\n\t"
11567        $$emit$$"JGE     L_loop\n\t"
11568        $$emit$$"ADD     0x4,RCX\n\t"
11569        $$emit$$"JL      L_tail\n\t"
11570        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11571        $$emit$$"ADD     0x20,RAX\n\t"
11572        $$emit$$"SUB     0x4,RCX\n\t"
11573        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11574        $$emit$$"ADD     0x4,RCX\n\t"
11575        $$emit$$"JLE     L_end\n\t"
11576        $$emit$$"DEC     RCX\n\t"
11577        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11578        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11579        $$emit$$"ADD     0x8,RAX\n\t"
11580        $$emit$$"DEC     RCX\n\t"
11581        $$emit$$"JGE     L_sloop\n\t"
11582        $$emit$$"# L_end:\n\t"
11583     } else {
11584        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11585        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11586        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11587     }
11588     $$emit$$"# DONE"
11589   %}
11590   ins_encode %{
11591     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11592                  $tmp$$XMMRegister, true, knoreg);
11593   %}
11594   ins_pipe( pipe_slow );
11595 %}
11596 
11597 // Large non-constant length ClearArray for AVX512 targets.
11598 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11599   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11600   match(Set dummy (ClearArray cnt base));
11601   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11602   format %{ $$template
11603     if (UseFastStosb) {
11604        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11605        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11606        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11607     } else if (UseXMMForObjInit) {
11608        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11609        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11610        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11611        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11612        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11613        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11614        $$emit$$"ADD     0x40,RAX\n\t"
11615        $$emit$$"# L_zero_64_bytes:\n\t"
11616        $$emit$$"SUB     0x8,RCX\n\t"
11617        $$emit$$"JGE     L_loop\n\t"
11618        $$emit$$"ADD     0x4,RCX\n\t"
11619        $$emit$$"JL      L_tail\n\t"
11620        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11621        $$emit$$"ADD     0x20,RAX\n\t"
11622        $$emit$$"SUB     0x4,RCX\n\t"
11623        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11624        $$emit$$"ADD     0x4,RCX\n\t"
11625        $$emit$$"JLE     L_end\n\t"
11626        $$emit$$"DEC     RCX\n\t"
11627        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11628        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11629        $$emit$$"ADD     0x8,RAX\n\t"
11630        $$emit$$"DEC     RCX\n\t"
11631        $$emit$$"JGE     L_sloop\n\t"
11632        $$emit$$"# L_end:\n\t"
11633     } else {
11634        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11635        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11636        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11637     }
11638     $$emit$$"# DONE"
11639   %}
11640   ins_encode %{
11641     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11642                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11643   %}
11644   ins_pipe( pipe_slow );
11645 %}
11646 
11647 // Small constant length ClearArray for AVX512 targets.
11648 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11649 %{
11650   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11651   match(Set dummy (ClearArray cnt base));
11652   ins_cost(100);
11653   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11654   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11655   ins_encode %{
11656    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11657   %}
11658   ins_pipe(pipe_slow);
11659 %}
11660 
11661 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11662                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11663   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11664   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11665   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11666 
11667   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11668   ins_encode %{
11669     __ string_compare($str1$$Register, $str2$$Register,
11670                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11671                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11672   %}
11673   ins_pipe( pipe_slow );
11674 %}
11675 
11676 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11677                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11678   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11679   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11680   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11681 
11682   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11683   ins_encode %{
11684     __ string_compare($str1$$Register, $str2$$Register,
11685                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11686                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11687   %}
11688   ins_pipe( pipe_slow );
11689 %}
11690 
11691 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11692                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11693   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11694   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11695   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11696 
11697   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11698   ins_encode %{
11699     __ string_compare($str1$$Register, $str2$$Register,
11700                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11701                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11702   %}
11703   ins_pipe( pipe_slow );
11704 %}
11705 
11706 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11707                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11708   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11709   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11710   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11711 
11712   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11713   ins_encode %{
11714     __ string_compare($str1$$Register, $str2$$Register,
11715                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11716                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11717   %}
11718   ins_pipe( pipe_slow );
11719 %}
11720 
11721 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11722                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11723   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11724   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11725   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11726 
11727   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11728   ins_encode %{
11729     __ string_compare($str1$$Register, $str2$$Register,
11730                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11731                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11732   %}
11733   ins_pipe( pipe_slow );
11734 %}
11735 
11736 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11737                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11738   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11739   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11740   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11741 
11742   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11743   ins_encode %{
11744     __ string_compare($str1$$Register, $str2$$Register,
11745                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11746                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11747   %}
11748   ins_pipe( pipe_slow );
11749 %}
11750 
11751 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11752                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11753   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11754   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11755   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11756 
11757   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11758   ins_encode %{
11759     __ string_compare($str2$$Register, $str1$$Register,
11760                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11761                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11762   %}
11763   ins_pipe( pipe_slow );
11764 %}
11765 
11766 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11767                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11768   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11769   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11770   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11771 
11772   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11773   ins_encode %{
11774     __ string_compare($str2$$Register, $str1$$Register,
11775                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11776                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11777   %}
11778   ins_pipe( pipe_slow );
11779 %}
11780 
11781 // fast string equals
11782 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11783                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11784   predicate(!VM_Version::supports_avx512vlbw());
11785   match(Set result (StrEquals (Binary str1 str2) cnt));
11786   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11787 
11788   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11789   ins_encode %{
11790     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11791                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11792                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11793   %}
11794 
11795   ins_pipe( pipe_slow );
11796 %}
11797 
11798 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11799                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11800   predicate(VM_Version::supports_avx512vlbw());
11801   match(Set result (StrEquals (Binary str1 str2) cnt));
11802   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11803 
11804   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11805   ins_encode %{
11806     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11807                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11808                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11809   %}
11810 
11811   ins_pipe( pipe_slow );
11812 %}
11813 
11814 
11815 // fast search of substring with known size.
11816 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11817                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11818   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11819   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11820   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11821 
11822   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11823   ins_encode %{
11824     int icnt2 = (int)$int_cnt2$$constant;
11825     if (icnt2 >= 16) {
11826       // IndexOf for constant substrings with size >= 16 elements
11827       // which don't need to be loaded through stack.
11828       __ string_indexofC8($str1$$Register, $str2$$Register,
11829                           $cnt1$$Register, $cnt2$$Register,
11830                           icnt2, $result$$Register,
11831                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11832     } else {
11833       // Small strings are loaded through stack if they cross page boundary.
11834       __ string_indexof($str1$$Register, $str2$$Register,
11835                         $cnt1$$Register, $cnt2$$Register,
11836                         icnt2, $result$$Register,
11837                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11838     }
11839   %}
11840   ins_pipe( pipe_slow );
11841 %}
11842 
11843 // fast search of substring with known size.
11844 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11845                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11846   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11847   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11848   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11849 
11850   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11851   ins_encode %{
11852     int icnt2 = (int)$int_cnt2$$constant;
11853     if (icnt2 >= 8) {
11854       // IndexOf for constant substrings with size >= 8 elements
11855       // which don't need to be loaded through stack.
11856       __ string_indexofC8($str1$$Register, $str2$$Register,
11857                           $cnt1$$Register, $cnt2$$Register,
11858                           icnt2, $result$$Register,
11859                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11860     } else {
11861       // Small strings are loaded through stack if they cross page boundary.
11862       __ string_indexof($str1$$Register, $str2$$Register,
11863                         $cnt1$$Register, $cnt2$$Register,
11864                         icnt2, $result$$Register,
11865                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11866     }
11867   %}
11868   ins_pipe( pipe_slow );
11869 %}
11870 
11871 // fast search of substring with known size.
11872 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11873                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11874   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11875   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11876   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11877 
11878   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11879   ins_encode %{
11880     int icnt2 = (int)$int_cnt2$$constant;
11881     if (icnt2 >= 8) {
11882       // IndexOf for constant substrings with size >= 8 elements
11883       // which don't need to be loaded through stack.
11884       __ string_indexofC8($str1$$Register, $str2$$Register,
11885                           $cnt1$$Register, $cnt2$$Register,
11886                           icnt2, $result$$Register,
11887                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11888     } else {
11889       // Small strings are loaded through stack if they cross page boundary.
11890       __ string_indexof($str1$$Register, $str2$$Register,
11891                         $cnt1$$Register, $cnt2$$Register,
11892                         icnt2, $result$$Register,
11893                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11894     }
11895   %}
11896   ins_pipe( pipe_slow );
11897 %}
11898 
11899 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11900                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11901   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11902   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11903   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11904 
11905   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11906   ins_encode %{
11907     __ string_indexof($str1$$Register, $str2$$Register,
11908                       $cnt1$$Register, $cnt2$$Register,
11909                       (-1), $result$$Register,
11910                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11911   %}
11912   ins_pipe( pipe_slow );
11913 %}
11914 
11915 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11916                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11917   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11918   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11919   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11920 
11921   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11922   ins_encode %{
11923     __ string_indexof($str1$$Register, $str2$$Register,
11924                       $cnt1$$Register, $cnt2$$Register,
11925                       (-1), $result$$Register,
11926                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11927   %}
11928   ins_pipe( pipe_slow );
11929 %}
11930 
11931 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11932                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11933   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11934   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11935   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11936 
11937   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11938   ins_encode %{
11939     __ string_indexof($str1$$Register, $str2$$Register,
11940                       $cnt1$$Register, $cnt2$$Register,
11941                       (-1), $result$$Register,
11942                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11943   %}
11944   ins_pipe( pipe_slow );
11945 %}
11946 
11947 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11948                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11949   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11950   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11951   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11952   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11953   ins_encode %{
11954     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11955                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11956   %}
11957   ins_pipe( pipe_slow );
11958 %}
11959 
11960 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11961                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11962   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11963   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11964   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11965   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11966   ins_encode %{
11967     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11968                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11969   %}
11970   ins_pipe( pipe_slow );
11971 %}
11972 
11973 
11974 // fast array equals
11975 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11976                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11977 %{
11978   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11979   match(Set result (AryEq ary1 ary2));
11980   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11981   //ins_cost(300);
11982 
11983   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11984   ins_encode %{
11985     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11986                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11987                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11988   %}
11989   ins_pipe( pipe_slow );
11990 %}
11991 
11992 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11993                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11994 %{
11995   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11996   match(Set result (AryEq ary1 ary2));
11997   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11998   //ins_cost(300);
11999 
12000   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12001   ins_encode %{
12002     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12003                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12004                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12005   %}
12006   ins_pipe( pipe_slow );
12007 %}
12008 
12009 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12010                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12011 %{
12012   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12013   match(Set result (AryEq ary1 ary2));
12014   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12015   //ins_cost(300);
12016 
12017   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12018   ins_encode %{
12019     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12020                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12021                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12022   %}
12023   ins_pipe( pipe_slow );
12024 %}
12025 
12026 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12027                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12028 %{
12029   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12030   match(Set result (AryEq ary1 ary2));
12031   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12032   //ins_cost(300);
12033 
12034   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12035   ins_encode %{
12036     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12037                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12038                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12039   %}
12040   ins_pipe( pipe_slow );
12041 %}
12042 
12043 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12044                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12045 %{
12046   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12047   match(Set result (CountPositives ary1 len));
12048   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12049 
12050   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12051   ins_encode %{
12052     __ count_positives($ary1$$Register, $len$$Register,
12053                        $result$$Register, $tmp3$$Register,
12054                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12055   %}
12056   ins_pipe( pipe_slow );
12057 %}
12058 
12059 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12060                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12061 %{
12062   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12063   match(Set result (CountPositives ary1 len));
12064   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12065 
12066   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12067   ins_encode %{
12068     __ count_positives($ary1$$Register, $len$$Register,
12069                        $result$$Register, $tmp3$$Register,
12070                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12071   %}
12072   ins_pipe( pipe_slow );
12073 %}
12074 
12075 
12076 // fast char[] to byte[] compression
12077 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12078                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12079   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12080   match(Set result (StrCompressedCopy src (Binary dst len)));
12081   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12082 
12083   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12084   ins_encode %{
12085     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12086                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12087                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12088                            knoreg, knoreg);
12089   %}
12090   ins_pipe( pipe_slow );
12091 %}
12092 
12093 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12094                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12095   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12096   match(Set result (StrCompressedCopy src (Binary dst len)));
12097   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12098 
12099   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12100   ins_encode %{
12101     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12102                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12103                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12104                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12105   %}
12106   ins_pipe( pipe_slow );
12107 %}
12108 
12109 // fast byte[] to char[] inflation
12110 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12111                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12112   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12113   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12114   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12115 
12116   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12117   ins_encode %{
12118     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12119                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12120   %}
12121   ins_pipe( pipe_slow );
12122 %}
12123 
12124 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12125                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12126   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12127   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12128   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12129 
12130   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12131   ins_encode %{
12132     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12133                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12134   %}
12135   ins_pipe( pipe_slow );
12136 %}
12137 
12138 // encode char[] to byte[] in ISO_8859_1
12139 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12140                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12141                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12142   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12143   match(Set result (EncodeISOArray src (Binary dst len)));
12144   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12145 
12146   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12147   ins_encode %{
12148     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12149                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12150                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12151   %}
12152   ins_pipe( pipe_slow );
12153 %}
12154 
12155 // encode char[] to byte[] in ASCII
12156 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12157                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12158                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12159   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12160   match(Set result (EncodeISOArray src (Binary dst len)));
12161   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12162 
12163   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12164   ins_encode %{
12165     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12166                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12167                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12168   %}
12169   ins_pipe( pipe_slow );
12170 %}
12171 
12172 //----------Control Flow Instructions------------------------------------------
12173 // Signed compare Instructions
12174 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12175   match(Set cr (CmpI op1 op2));
12176   effect( DEF cr, USE op1, USE op2 );
12177   format %{ "CMP    $op1,$op2" %}
12178   opcode(0x3B);  /* Opcode 3B /r */
12179   ins_encode( OpcP, RegReg( op1, op2) );
12180   ins_pipe( ialu_cr_reg_reg );
12181 %}
12182 
12183 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12184   match(Set cr (CmpI op1 op2));
12185   effect( DEF cr, USE op1 );
12186   format %{ "CMP    $op1,$op2" %}
12187   opcode(0x81,0x07);  /* Opcode 81 /7 */
12188   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12189   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12190   ins_pipe( ialu_cr_reg_imm );
12191 %}
12192 
12193 // Cisc-spilled version of cmpI_eReg
12194 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12195   match(Set cr (CmpI op1 (LoadI op2)));
12196 
12197   format %{ "CMP    $op1,$op2" %}
12198   ins_cost(500);
12199   opcode(0x3B);  /* Opcode 3B /r */
12200   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12201   ins_pipe( ialu_cr_reg_mem );
12202 %}
12203 
12204 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12205   match(Set cr (CmpI src zero));
12206   effect( DEF cr, USE src );
12207 
12208   format %{ "TEST   $src,$src" %}
12209   opcode(0x85);
12210   ins_encode( OpcP, RegReg( src, src ) );
12211   ins_pipe( ialu_cr_reg_imm );
12212 %}
12213 
12214 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12215   match(Set cr (CmpI (AndI src con) zero));
12216 
12217   format %{ "TEST   $src,$con" %}
12218   opcode(0xF7,0x00);
12219   ins_encode( OpcP, RegOpc(src), Con32(con) );
12220   ins_pipe( ialu_cr_reg_imm );
12221 %}
12222 
12223 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12224   match(Set cr (CmpI (AndI src mem) zero));
12225 
12226   format %{ "TEST   $src,$mem" %}
12227   opcode(0x85);
12228   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12229   ins_pipe( ialu_cr_reg_mem );
12230 %}
12231 
12232 // Unsigned compare Instructions; really, same as signed except they
12233 // produce an eFlagsRegU instead of eFlagsReg.
12234 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12235   match(Set cr (CmpU op1 op2));
12236 
12237   format %{ "CMPu   $op1,$op2" %}
12238   opcode(0x3B);  /* Opcode 3B /r */
12239   ins_encode( OpcP, RegReg( op1, op2) );
12240   ins_pipe( ialu_cr_reg_reg );
12241 %}
12242 
12243 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12244   match(Set cr (CmpU op1 op2));
12245 
12246   format %{ "CMPu   $op1,$op2" %}
12247   opcode(0x81,0x07);  /* Opcode 81 /7 */
12248   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12249   ins_pipe( ialu_cr_reg_imm );
12250 %}
12251 
12252 // // Cisc-spilled version of cmpU_eReg
12253 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12254   match(Set cr (CmpU op1 (LoadI op2)));
12255 
12256   format %{ "CMPu   $op1,$op2" %}
12257   ins_cost(500);
12258   opcode(0x3B);  /* Opcode 3B /r */
12259   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12260   ins_pipe( ialu_cr_reg_mem );
12261 %}
12262 
12263 // // Cisc-spilled version of cmpU_eReg
12264 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12265 //  match(Set cr (CmpU (LoadI op1) op2));
12266 //
12267 //  format %{ "CMPu   $op1,$op2" %}
12268 //  ins_cost(500);
12269 //  opcode(0x39);  /* Opcode 39 /r */
12270 //  ins_encode( OpcP, RegMem( op1, op2) );
12271 //%}
12272 
12273 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12274   match(Set cr (CmpU src zero));
12275 
12276   format %{ "TESTu  $src,$src" %}
12277   opcode(0x85);
12278   ins_encode( OpcP, RegReg( src, src ) );
12279   ins_pipe( ialu_cr_reg_imm );
12280 %}
12281 
12282 // Unsigned pointer compare Instructions
12283 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12284   match(Set cr (CmpP op1 op2));
12285 
12286   format %{ "CMPu   $op1,$op2" %}
12287   opcode(0x3B);  /* Opcode 3B /r */
12288   ins_encode( OpcP, RegReg( op1, op2) );
12289   ins_pipe( ialu_cr_reg_reg );
12290 %}
12291 
12292 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12293   match(Set cr (CmpP op1 op2));
12294 
12295   format %{ "CMPu   $op1,$op2" %}
12296   opcode(0x81,0x07);  /* Opcode 81 /7 */
12297   ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12298   ins_pipe( ialu_cr_reg_imm );
12299 %}
12300 
12301 // // Cisc-spilled version of cmpP_eReg
12302 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12303   match(Set cr (CmpP op1 (LoadP op2)));
12304 
12305   format %{ "CMPu   $op1,$op2" %}
12306   ins_cost(500);
12307   opcode(0x3B);  /* Opcode 3B /r */
12308   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12309   ins_pipe( ialu_cr_reg_mem );
12310 %}
12311 
12312 // // Cisc-spilled version of cmpP_eReg
12313 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12314 //  match(Set cr (CmpP (LoadP op1) op2));
12315 //
12316 //  format %{ "CMPu   $op1,$op2" %}
12317 //  ins_cost(500);
12318 //  opcode(0x39);  /* Opcode 39 /r */
12319 //  ins_encode( OpcP, RegMem( op1, op2) );
12320 //%}
12321 
12322 // Compare raw pointer (used in out-of-heap check).
12323 // Only works because non-oop pointers must be raw pointers
12324 // and raw pointers have no anti-dependencies.
12325 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12326   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12327   match(Set cr (CmpP op1 (LoadP op2)));
12328 
12329   format %{ "CMPu   $op1,$op2" %}
12330   opcode(0x3B);  /* Opcode 3B /r */
12331   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12332   ins_pipe( ialu_cr_reg_mem );
12333 %}
12334 
12335 //
12336 // This will generate a signed flags result. This should be ok
12337 // since any compare to a zero should be eq/neq.
12338 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12339   match(Set cr (CmpP src zero));
12340 
12341   format %{ "TEST   $src,$src" %}
12342   opcode(0x85);
12343   ins_encode( OpcP, RegReg( src, src ) );
12344   ins_pipe( ialu_cr_reg_imm );
12345 %}
12346 
12347 // Cisc-spilled version of testP_reg
12348 // This will generate a signed flags result. This should be ok
12349 // since any compare to a zero should be eq/neq.
12350 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12351   match(Set cr (CmpP (LoadP op) zero));
12352 
12353   format %{ "TEST   $op,0xFFFFFFFF" %}
12354   ins_cost(500);
12355   opcode(0xF7);               /* Opcode F7 /0 */
12356   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12357   ins_pipe( ialu_cr_reg_imm );
12358 %}
12359 
12360 // Yanked all unsigned pointer compare operations.
12361 // Pointer compares are done with CmpP which is already unsigned.
12362 
12363 //----------Max and Min--------------------------------------------------------
12364 // Min Instructions
12365 ////
12366 //   *** Min and Max using the conditional move are slower than the
12367 //   *** branch version on a Pentium III.
12368 // // Conditional move for min
12369 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12370 //  effect( USE_DEF op2, USE op1, USE cr );
12371 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12372 //  opcode(0x4C,0x0F);
12373 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12374 //  ins_pipe( pipe_cmov_reg );
12375 //%}
12376 //
12377 //// Min Register with Register (P6 version)
12378 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12379 //  predicate(VM_Version::supports_cmov() );
12380 //  match(Set op2 (MinI op1 op2));
12381 //  ins_cost(200);
12382 //  expand %{
12383 //    eFlagsReg cr;
12384 //    compI_eReg(cr,op1,op2);
12385 //    cmovI_reg_lt(op2,op1,cr);
12386 //  %}
12387 //%}
12388 
12389 // Min Register with Register (generic version)
12390 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12391   match(Set dst (MinI dst src));
12392   effect(KILL flags);
12393   ins_cost(300);
12394 
12395   format %{ "MIN    $dst,$src" %}
12396   opcode(0xCC);
12397   ins_encode( min_enc(dst,src) );
12398   ins_pipe( pipe_slow );
12399 %}
12400 
12401 // Max Register with Register
12402 //   *** Min and Max using the conditional move are slower than the
12403 //   *** branch version on a Pentium III.
12404 // // Conditional move for max
12405 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12406 //  effect( USE_DEF op2, USE op1, USE cr );
12407 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12408 //  opcode(0x4F,0x0F);
12409 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12410 //  ins_pipe( pipe_cmov_reg );
12411 //%}
12412 //
12413 // // Max Register with Register (P6 version)
12414 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12415 //  predicate(VM_Version::supports_cmov() );
12416 //  match(Set op2 (MaxI op1 op2));
12417 //  ins_cost(200);
12418 //  expand %{
12419 //    eFlagsReg cr;
12420 //    compI_eReg(cr,op1,op2);
12421 //    cmovI_reg_gt(op2,op1,cr);
12422 //  %}
12423 //%}
12424 
12425 // Max Register with Register (generic version)
12426 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12427   match(Set dst (MaxI dst src));
12428   effect(KILL flags);
12429   ins_cost(300);
12430 
12431   format %{ "MAX    $dst,$src" %}
12432   opcode(0xCC);
12433   ins_encode( max_enc(dst,src) );
12434   ins_pipe( pipe_slow );
12435 %}
12436 
12437 // ============================================================================
12438 // Counted Loop limit node which represents exact final iterator value.
12439 // Note: the resulting value should fit into integer range since
12440 // counted loops have limit check on overflow.
12441 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12442   match(Set limit (LoopLimit (Binary init limit) stride));
12443   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12444   ins_cost(300);
12445 
12446   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12447   ins_encode %{
12448     int strd = (int)$stride$$constant;
12449     assert(strd != 1 && strd != -1, "sanity");
12450     int m1 = (strd > 0) ? 1 : -1;
12451     // Convert limit to long (EAX:EDX)
12452     __ cdql();
12453     // Convert init to long (init:tmp)
12454     __ movl($tmp$$Register, $init$$Register);
12455     __ sarl($tmp$$Register, 31);
12456     // $limit - $init
12457     __ subl($limit$$Register, $init$$Register);
12458     __ sbbl($limit_hi$$Register, $tmp$$Register);
12459     // + ($stride - 1)
12460     if (strd > 0) {
12461       __ addl($limit$$Register, (strd - 1));
12462       __ adcl($limit_hi$$Register, 0);
12463       __ movl($tmp$$Register, strd);
12464     } else {
12465       __ addl($limit$$Register, (strd + 1));
12466       __ adcl($limit_hi$$Register, -1);
12467       __ lneg($limit_hi$$Register, $limit$$Register);
12468       __ movl($tmp$$Register, -strd);
12469     }
12470     // signed division: (EAX:EDX) / pos_stride
12471     __ idivl($tmp$$Register);
12472     if (strd < 0) {
12473       // restore sign
12474       __ negl($tmp$$Register);
12475     }
12476     // (EAX) * stride
12477     __ mull($tmp$$Register);
12478     // + init (ignore upper bits)
12479     __ addl($limit$$Register, $init$$Register);
12480   %}
12481   ins_pipe( pipe_slow );
12482 %}
12483 
12484 // ============================================================================
12485 // Branch Instructions
12486 // Jump Table
12487 instruct jumpXtnd(rRegI switch_val) %{
12488   match(Jump switch_val);
12489   ins_cost(350);
12490   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12491   ins_encode %{
12492     // Jump to Address(table_base + switch_reg)
12493     Address index(noreg, $switch_val$$Register, Address::times_1);
12494     __ jump(ArrayAddress($constantaddress, index), noreg);
12495   %}
12496   ins_pipe(pipe_jmp);
12497 %}
12498 
12499 // Jump Direct - Label defines a relative address from JMP+1
12500 instruct jmpDir(label labl) %{
12501   match(Goto);
12502   effect(USE labl);
12503 
12504   ins_cost(300);
12505   format %{ "JMP    $labl" %}
12506   size(5);
12507   ins_encode %{
12508     Label* L = $labl$$label;
12509     __ jmp(*L, false); // Always long jump
12510   %}
12511   ins_pipe( pipe_jmp );
12512 %}
12513 
12514 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12515 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12516   match(If cop cr);
12517   effect(USE labl);
12518 
12519   ins_cost(300);
12520   format %{ "J$cop    $labl" %}
12521   size(6);
12522   ins_encode %{
12523     Label* L = $labl$$label;
12524     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12525   %}
12526   ins_pipe( pipe_jcc );
12527 %}
12528 
12529 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12530 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12531   match(CountedLoopEnd cop cr);
12532   effect(USE labl);
12533 
12534   ins_cost(300);
12535   format %{ "J$cop    $labl\t# Loop end" %}
12536   size(6);
12537   ins_encode %{
12538     Label* L = $labl$$label;
12539     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12540   %}
12541   ins_pipe( pipe_jcc );
12542 %}
12543 
12544 // Jump Direct Conditional - using unsigned comparison
12545 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12546   match(If cop cmp);
12547   effect(USE labl);
12548 
12549   ins_cost(300);
12550   format %{ "J$cop,u  $labl" %}
12551   size(6);
12552   ins_encode %{
12553     Label* L = $labl$$label;
12554     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12555   %}
12556   ins_pipe(pipe_jcc);
12557 %}
12558 
12559 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12560   match(If cop cmp);
12561   effect(USE labl);
12562 
12563   ins_cost(200);
12564   format %{ "J$cop,u  $labl" %}
12565   size(6);
12566   ins_encode %{
12567     Label* L = $labl$$label;
12568     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12569   %}
12570   ins_pipe(pipe_jcc);
12571 %}
12572 
12573 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12574   match(If cop cmp);
12575   effect(USE labl);
12576 
12577   ins_cost(200);
12578   format %{ $$template
12579     if ($cop$$cmpcode == Assembler::notEqual) {
12580       $$emit$$"JP,u   $labl\n\t"
12581       $$emit$$"J$cop,u   $labl"
12582     } else {
12583       $$emit$$"JP,u   done\n\t"
12584       $$emit$$"J$cop,u   $labl\n\t"
12585       $$emit$$"done:"
12586     }
12587   %}
12588   ins_encode %{
12589     Label* l = $labl$$label;
12590     if ($cop$$cmpcode == Assembler::notEqual) {
12591       __ jcc(Assembler::parity, *l, false);
12592       __ jcc(Assembler::notEqual, *l, false);
12593     } else if ($cop$$cmpcode == Assembler::equal) {
12594       Label done;
12595       __ jccb(Assembler::parity, done);
12596       __ jcc(Assembler::equal, *l, false);
12597       __ bind(done);
12598     } else {
12599        ShouldNotReachHere();
12600     }
12601   %}
12602   ins_pipe(pipe_jcc);
12603 %}
12604 
12605 // ============================================================================
12606 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12607 // array for an instance of the superklass.  Set a hidden internal cache on a
12608 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12609 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12610 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12611   match(Set result (PartialSubtypeCheck sub super));
12612   effect( KILL rcx, KILL cr );
12613 
12614   ins_cost(1100);  // slightly larger than the next version
12615   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12616             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12617             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12618             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12619             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12620             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12621             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12622      "miss:\t" %}
12623 
12624   opcode(0x1); // Force a XOR of EDI
12625   ins_encode( enc_PartialSubtypeCheck() );
12626   ins_pipe( pipe_slow );
12627 %}
12628 
12629 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12630   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12631   effect( KILL rcx, KILL result );
12632 
12633   ins_cost(1000);
12634   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12635             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12636             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12637             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12638             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12639             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12640      "miss:\t" %}
12641 
12642   opcode(0x0);  // No need to XOR EDI
12643   ins_encode( enc_PartialSubtypeCheck() );
12644   ins_pipe( pipe_slow );
12645 %}
12646 
12647 // ============================================================================
12648 // Branch Instructions -- short offset versions
12649 //
12650 // These instructions are used to replace jumps of a long offset (the default
12651 // match) with jumps of a shorter offset.  These instructions are all tagged
12652 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12653 // match rules in general matching.  Instead, the ADLC generates a conversion
12654 // method in the MachNode which can be used to do in-place replacement of the
12655 // long variant with the shorter variant.  The compiler will determine if a
12656 // branch can be taken by the is_short_branch_offset() predicate in the machine
12657 // specific code section of the file.
12658 
12659 // Jump Direct - Label defines a relative address from JMP+1
12660 instruct jmpDir_short(label labl) %{
12661   match(Goto);
12662   effect(USE labl);
12663 
12664   ins_cost(300);
12665   format %{ "JMP,s  $labl" %}
12666   size(2);
12667   ins_encode %{
12668     Label* L = $labl$$label;
12669     __ jmpb(*L);
12670   %}
12671   ins_pipe( pipe_jmp );
12672   ins_short_branch(1);
12673 %}
12674 
12675 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12676 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12677   match(If cop cr);
12678   effect(USE labl);
12679 
12680   ins_cost(300);
12681   format %{ "J$cop,s  $labl" %}
12682   size(2);
12683   ins_encode %{
12684     Label* L = $labl$$label;
12685     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12686   %}
12687   ins_pipe( pipe_jcc );
12688   ins_short_branch(1);
12689 %}
12690 
12691 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12692 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12693   match(CountedLoopEnd cop cr);
12694   effect(USE labl);
12695 
12696   ins_cost(300);
12697   format %{ "J$cop,s  $labl\t# Loop end" %}
12698   size(2);
12699   ins_encode %{
12700     Label* L = $labl$$label;
12701     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12702   %}
12703   ins_pipe( pipe_jcc );
12704   ins_short_branch(1);
12705 %}
12706 
12707 // Jump Direct Conditional - using unsigned comparison
12708 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12709   match(If cop cmp);
12710   effect(USE labl);
12711 
12712   ins_cost(300);
12713   format %{ "J$cop,us $labl" %}
12714   size(2);
12715   ins_encode %{
12716     Label* L = $labl$$label;
12717     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12718   %}
12719   ins_pipe( pipe_jcc );
12720   ins_short_branch(1);
12721 %}
12722 
12723 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12724   match(If cop cmp);
12725   effect(USE labl);
12726 
12727   ins_cost(300);
12728   format %{ "J$cop,us $labl" %}
12729   size(2);
12730   ins_encode %{
12731     Label* L = $labl$$label;
12732     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12733   %}
12734   ins_pipe( pipe_jcc );
12735   ins_short_branch(1);
12736 %}
12737 
12738 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12739   match(If cop cmp);
12740   effect(USE labl);
12741 
12742   ins_cost(300);
12743   format %{ $$template
12744     if ($cop$$cmpcode == Assembler::notEqual) {
12745       $$emit$$"JP,u,s   $labl\n\t"
12746       $$emit$$"J$cop,u,s   $labl"
12747     } else {
12748       $$emit$$"JP,u,s   done\n\t"
12749       $$emit$$"J$cop,u,s  $labl\n\t"
12750       $$emit$$"done:"
12751     }
12752   %}
12753   size(4);
12754   ins_encode %{
12755     Label* l = $labl$$label;
12756     if ($cop$$cmpcode == Assembler::notEqual) {
12757       __ jccb(Assembler::parity, *l);
12758       __ jccb(Assembler::notEqual, *l);
12759     } else if ($cop$$cmpcode == Assembler::equal) {
12760       Label done;
12761       __ jccb(Assembler::parity, done);
12762       __ jccb(Assembler::equal, *l);
12763       __ bind(done);
12764     } else {
12765        ShouldNotReachHere();
12766     }
12767   %}
12768   ins_pipe(pipe_jcc);
12769   ins_short_branch(1);
12770 %}
12771 
12772 // ============================================================================
12773 // Long Compare
12774 //
12775 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12776 // is tricky.  The flavor of compare used depends on whether we are testing
12777 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12778 // The GE test is the negated LT test.  The LE test can be had by commuting
12779 // the operands (yielding a GE test) and then negating; negate again for the
12780 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12781 // NE test is negated from that.
12782 
12783 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12784 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12785 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12786 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12787 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12788 // foo match ends up with the wrong leaf.  One fix is to not match both
12789 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12790 // both forms beat the trinary form of long-compare and both are very useful
12791 // on Intel which has so few registers.
12792 
12793 // Manifest a CmpL result in an integer register.  Very painful.
12794 // This is the test to avoid.
12795 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12796   match(Set dst (CmpL3 src1 src2));
12797   effect( KILL flags );
12798   ins_cost(1000);
12799   format %{ "XOR    $dst,$dst\n\t"
12800             "CMP    $src1.hi,$src2.hi\n\t"
12801             "JLT,s  m_one\n\t"
12802             "JGT,s  p_one\n\t"
12803             "CMP    $src1.lo,$src2.lo\n\t"
12804             "JB,s   m_one\n\t"
12805             "JEQ,s  done\n"
12806     "p_one:\tINC    $dst\n\t"
12807             "JMP,s  done\n"
12808     "m_one:\tDEC    $dst\n"
12809      "done:" %}
12810   ins_encode %{
12811     Label p_one, m_one, done;
12812     __ xorptr($dst$$Register, $dst$$Register);
12813     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12814     __ jccb(Assembler::less,    m_one);
12815     __ jccb(Assembler::greater, p_one);
12816     __ cmpl($src1$$Register, $src2$$Register);
12817     __ jccb(Assembler::below,   m_one);
12818     __ jccb(Assembler::equal,   done);
12819     __ bind(p_one);
12820     __ incrementl($dst$$Register);
12821     __ jmpb(done);
12822     __ bind(m_one);
12823     __ decrementl($dst$$Register);
12824     __ bind(done);
12825   %}
12826   ins_pipe( pipe_slow );
12827 %}
12828 
12829 //======
12830 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12831 // compares.  Can be used for LE or GT compares by reversing arguments.
12832 // NOT GOOD FOR EQ/NE tests.
12833 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12834   match( Set flags (CmpL src zero ));
12835   ins_cost(100);
12836   format %{ "TEST   $src.hi,$src.hi" %}
12837   opcode(0x85);
12838   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12839   ins_pipe( ialu_cr_reg_reg );
12840 %}
12841 
12842 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12843 // compares.  Can be used for LE or GT compares by reversing arguments.
12844 // NOT GOOD FOR EQ/NE tests.
12845 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12846   match( Set flags (CmpL src1 src2 ));
12847   effect( TEMP tmp );
12848   ins_cost(300);
12849   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12850             "MOV    $tmp,$src1.hi\n\t"
12851             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12852   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12853   ins_pipe( ialu_cr_reg_reg );
12854 %}
12855 
12856 // Long compares reg < zero/req OR reg >= zero/req.
12857 // Just a wrapper for a normal branch, plus the predicate test.
12858 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12859   match(If cmp flags);
12860   effect(USE labl);
12861   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12862   expand %{
12863     jmpCon(cmp,flags,labl);    // JLT or JGE...
12864   %}
12865 %}
12866 
12867 //======
12868 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12869 // compares.  Can be used for LE or GT compares by reversing arguments.
12870 // NOT GOOD FOR EQ/NE tests.
12871 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12872   match(Set flags (CmpUL src zero));
12873   ins_cost(100);
12874   format %{ "TEST   $src.hi,$src.hi" %}
12875   opcode(0x85);
12876   ins_encode(OpcP, RegReg_Hi2(src, src));
12877   ins_pipe(ialu_cr_reg_reg);
12878 %}
12879 
12880 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12881 // compares.  Can be used for LE or GT compares by reversing arguments.
12882 // NOT GOOD FOR EQ/NE tests.
12883 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12884   match(Set flags (CmpUL src1 src2));
12885   effect(TEMP tmp);
12886   ins_cost(300);
12887   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12888             "MOV    $tmp,$src1.hi\n\t"
12889             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12890   ins_encode(long_cmp_flags2(src1, src2, tmp));
12891   ins_pipe(ialu_cr_reg_reg);
12892 %}
12893 
12894 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12895 // Just a wrapper for a normal branch, plus the predicate test.
12896 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12897   match(If cmp flags);
12898   effect(USE labl);
12899   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12900   expand %{
12901     jmpCon(cmp, flags, labl);    // JLT or JGE...
12902   %}
12903 %}
12904 
12905 // Compare 2 longs and CMOVE longs.
12906 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12907   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12908   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12909   ins_cost(400);
12910   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12911             "CMOV$cmp $dst.hi,$src.hi" %}
12912   opcode(0x0F,0x40);
12913   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12914   ins_pipe( pipe_cmov_reg_long );
12915 %}
12916 
12917 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12918   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12919   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12920   ins_cost(500);
12921   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12922             "CMOV$cmp $dst.hi,$src.hi" %}
12923   opcode(0x0F,0x40);
12924   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12925   ins_pipe( pipe_cmov_reg_long );
12926 %}
12927 
12928 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12929   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12930   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12931   ins_cost(400);
12932   expand %{
12933     cmovLL_reg_LTGE(cmp, flags, dst, src);
12934   %}
12935 %}
12936 
12937 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12938   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12939   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12940   ins_cost(500);
12941   expand %{
12942     cmovLL_mem_LTGE(cmp, flags, dst, src);
12943   %}
12944 %}
12945 
12946 // Compare 2 longs and CMOVE ints.
12947 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12948   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12949   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12950   ins_cost(200);
12951   format %{ "CMOV$cmp $dst,$src" %}
12952   opcode(0x0F,0x40);
12953   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12954   ins_pipe( pipe_cmov_reg );
12955 %}
12956 
12957 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12958   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12959   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12960   ins_cost(250);
12961   format %{ "CMOV$cmp $dst,$src" %}
12962   opcode(0x0F,0x40);
12963   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12964   ins_pipe( pipe_cmov_mem );
12965 %}
12966 
12967 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12968   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12969   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12970   ins_cost(200);
12971   expand %{
12972     cmovII_reg_LTGE(cmp, flags, dst, src);
12973   %}
12974 %}
12975 
12976 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12977   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12978   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12979   ins_cost(250);
12980   expand %{
12981     cmovII_mem_LTGE(cmp, flags, dst, src);
12982   %}
12983 %}
12984 
12985 // Compare 2 longs and CMOVE ptrs.
12986 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12987   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12988   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12989   ins_cost(200);
12990   format %{ "CMOV$cmp $dst,$src" %}
12991   opcode(0x0F,0x40);
12992   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12993   ins_pipe( pipe_cmov_reg );
12994 %}
12995 
12996 // Compare 2 unsigned longs and CMOVE ptrs.
12997 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
12998   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12999   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13000   ins_cost(200);
13001   expand %{
13002     cmovPP_reg_LTGE(cmp,flags,dst,src);
13003   %}
13004 %}
13005 
13006 // Compare 2 longs and CMOVE doubles
13007 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13008   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13009   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13010   ins_cost(200);
13011   expand %{
13012     fcmovDPR_regS(cmp,flags,dst,src);
13013   %}
13014 %}
13015 
13016 // Compare 2 longs and CMOVE doubles
13017 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13018   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13019   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13020   ins_cost(200);
13021   expand %{
13022     fcmovD_regS(cmp,flags,dst,src);
13023   %}
13024 %}
13025 
13026 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13027   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13028   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13029   ins_cost(200);
13030   expand %{
13031     fcmovFPR_regS(cmp,flags,dst,src);
13032   %}
13033 %}
13034 
13035 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13036   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13037   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13038   ins_cost(200);
13039   expand %{
13040     fcmovF_regS(cmp,flags,dst,src);
13041   %}
13042 %}
13043 
13044 //======
13045 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13046 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13047   match( Set flags (CmpL src zero ));
13048   effect(TEMP tmp);
13049   ins_cost(200);
13050   format %{ "MOV    $tmp,$src.lo\n\t"
13051             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13052   ins_encode( long_cmp_flags0( src, tmp ) );
13053   ins_pipe( ialu_reg_reg_long );
13054 %}
13055 
13056 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13057 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13058   match( Set flags (CmpL src1 src2 ));
13059   ins_cost(200+300);
13060   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13061             "JNE,s  skip\n\t"
13062             "CMP    $src1.hi,$src2.hi\n\t"
13063      "skip:\t" %}
13064   ins_encode( long_cmp_flags1( src1, src2 ) );
13065   ins_pipe( ialu_cr_reg_reg );
13066 %}
13067 
13068 // Long compare reg == zero/reg OR reg != zero/reg
13069 // Just a wrapper for a normal branch, plus the predicate test.
13070 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13071   match(If cmp flags);
13072   effect(USE labl);
13073   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13074   expand %{
13075     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13076   %}
13077 %}
13078 
13079 //======
13080 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13081 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13082   match(Set flags (CmpUL src zero));
13083   effect(TEMP tmp);
13084   ins_cost(200);
13085   format %{ "MOV    $tmp,$src.lo\n\t"
13086             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13087   ins_encode(long_cmp_flags0(src, tmp));
13088   ins_pipe(ialu_reg_reg_long);
13089 %}
13090 
13091 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13092 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13093   match(Set flags (CmpUL src1 src2));
13094   ins_cost(200+300);
13095   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13096             "JNE,s  skip\n\t"
13097             "CMP    $src1.hi,$src2.hi\n\t"
13098      "skip:\t" %}
13099   ins_encode(long_cmp_flags1(src1, src2));
13100   ins_pipe(ialu_cr_reg_reg);
13101 %}
13102 
13103 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13104 // Just a wrapper for a normal branch, plus the predicate test.
13105 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13106   match(If cmp flags);
13107   effect(USE labl);
13108   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13109   expand %{
13110     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13111   %}
13112 %}
13113 
13114 // Compare 2 longs and CMOVE longs.
13115 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13116   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13117   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13118   ins_cost(400);
13119   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13120             "CMOV$cmp $dst.hi,$src.hi" %}
13121   opcode(0x0F,0x40);
13122   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13123   ins_pipe( pipe_cmov_reg_long );
13124 %}
13125 
13126 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13127   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13128   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13129   ins_cost(500);
13130   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13131             "CMOV$cmp $dst.hi,$src.hi" %}
13132   opcode(0x0F,0x40);
13133   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13134   ins_pipe( pipe_cmov_reg_long );
13135 %}
13136 
13137 // Compare 2 longs and CMOVE ints.
13138 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13139   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13140   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13141   ins_cost(200);
13142   format %{ "CMOV$cmp $dst,$src" %}
13143   opcode(0x0F,0x40);
13144   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13145   ins_pipe( pipe_cmov_reg );
13146 %}
13147 
13148 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13149   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13150   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13151   ins_cost(250);
13152   format %{ "CMOV$cmp $dst,$src" %}
13153   opcode(0x0F,0x40);
13154   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13155   ins_pipe( pipe_cmov_mem );
13156 %}
13157 
13158 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13159   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13160   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13161   ins_cost(200);
13162   expand %{
13163     cmovII_reg_EQNE(cmp, flags, dst, src);
13164   %}
13165 %}
13166 
13167 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13168   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13169   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13170   ins_cost(250);
13171   expand %{
13172     cmovII_mem_EQNE(cmp, flags, dst, src);
13173   %}
13174 %}
13175 
13176 // Compare 2 longs and CMOVE ptrs.
13177 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13178   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13179   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13180   ins_cost(200);
13181   format %{ "CMOV$cmp $dst,$src" %}
13182   opcode(0x0F,0x40);
13183   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13184   ins_pipe( pipe_cmov_reg );
13185 %}
13186 
13187 // Compare 2 unsigned longs and CMOVE ptrs.
13188 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13189   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13190   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13191   ins_cost(200);
13192   expand %{
13193     cmovPP_reg_EQNE(cmp,flags,dst,src);
13194   %}
13195 %}
13196 
13197 // Compare 2 longs and CMOVE doubles
13198 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13199   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13200   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13201   ins_cost(200);
13202   expand %{
13203     fcmovDPR_regS(cmp,flags,dst,src);
13204   %}
13205 %}
13206 
13207 // Compare 2 longs and CMOVE doubles
13208 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13209   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13210   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13211   ins_cost(200);
13212   expand %{
13213     fcmovD_regS(cmp,flags,dst,src);
13214   %}
13215 %}
13216 
13217 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13218   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13219   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13220   ins_cost(200);
13221   expand %{
13222     fcmovFPR_regS(cmp,flags,dst,src);
13223   %}
13224 %}
13225 
13226 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13227   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13228   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13229   ins_cost(200);
13230   expand %{
13231     fcmovF_regS(cmp,flags,dst,src);
13232   %}
13233 %}
13234 
13235 //======
13236 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13237 // Same as cmpL_reg_flags_LEGT except must negate src
13238 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13239   match( Set flags (CmpL src zero ));
13240   effect( TEMP tmp );
13241   ins_cost(300);
13242   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13243             "CMP    $tmp,$src.lo\n\t"
13244             "SBB    $tmp,$src.hi\n\t" %}
13245   ins_encode( long_cmp_flags3(src, tmp) );
13246   ins_pipe( ialu_reg_reg_long );
13247 %}
13248 
13249 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13250 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13251 // requires a commuted test to get the same result.
13252 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13253   match( Set flags (CmpL src1 src2 ));
13254   effect( TEMP tmp );
13255   ins_cost(300);
13256   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13257             "MOV    $tmp,$src2.hi\n\t"
13258             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13259   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13260   ins_pipe( ialu_cr_reg_reg );
13261 %}
13262 
13263 // Long compares reg < zero/req OR reg >= zero/req.
13264 // Just a wrapper for a normal branch, plus the predicate test
13265 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13266   match(If cmp flags);
13267   effect(USE labl);
13268   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13269   ins_cost(300);
13270   expand %{
13271     jmpCon(cmp,flags,labl);    // JGT or JLE...
13272   %}
13273 %}
13274 
13275 //======
13276 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13277 // Same as cmpUL_reg_flags_LEGT except must negate src
13278 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13279   match(Set flags (CmpUL src zero));
13280   effect(TEMP tmp);
13281   ins_cost(300);
13282   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13283             "CMP    $tmp,$src.lo\n\t"
13284             "SBB    $tmp,$src.hi\n\t" %}
13285   ins_encode(long_cmp_flags3(src, tmp));
13286   ins_pipe(ialu_reg_reg_long);
13287 %}
13288 
13289 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13290 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13291 // requires a commuted test to get the same result.
13292 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13293   match(Set flags (CmpUL src1 src2));
13294   effect(TEMP tmp);
13295   ins_cost(300);
13296   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13297             "MOV    $tmp,$src2.hi\n\t"
13298             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13299   ins_encode(long_cmp_flags2( src2, src1, tmp));
13300   ins_pipe(ialu_cr_reg_reg);
13301 %}
13302 
13303 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13304 // Just a wrapper for a normal branch, plus the predicate test
13305 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13306   match(If cmp flags);
13307   effect(USE labl);
13308   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13309   ins_cost(300);
13310   expand %{
13311     jmpCon(cmp, flags, labl);    // JGT or JLE...
13312   %}
13313 %}
13314 
13315 // Compare 2 longs and CMOVE longs.
13316 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13317   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13318   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13319   ins_cost(400);
13320   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13321             "CMOV$cmp $dst.hi,$src.hi" %}
13322   opcode(0x0F,0x40);
13323   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13324   ins_pipe( pipe_cmov_reg_long );
13325 %}
13326 
13327 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13328   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13329   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13330   ins_cost(500);
13331   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13332             "CMOV$cmp $dst.hi,$src.hi+4" %}
13333   opcode(0x0F,0x40);
13334   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13335   ins_pipe( pipe_cmov_reg_long );
13336 %}
13337 
13338 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13339   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13340   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13341   ins_cost(400);
13342   expand %{
13343     cmovLL_reg_LEGT(cmp, flags, dst, src);
13344   %}
13345 %}
13346 
13347 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13348   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13349   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13350   ins_cost(500);
13351   expand %{
13352     cmovLL_mem_LEGT(cmp, flags, dst, src);
13353   %}
13354 %}
13355 
13356 // Compare 2 longs and CMOVE ints.
13357 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13358   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13359   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13360   ins_cost(200);
13361   format %{ "CMOV$cmp $dst,$src" %}
13362   opcode(0x0F,0x40);
13363   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13364   ins_pipe( pipe_cmov_reg );
13365 %}
13366 
13367 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13368   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13369   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13370   ins_cost(250);
13371   format %{ "CMOV$cmp $dst,$src" %}
13372   opcode(0x0F,0x40);
13373   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13374   ins_pipe( pipe_cmov_mem );
13375 %}
13376 
13377 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13378   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13379   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13380   ins_cost(200);
13381   expand %{
13382     cmovII_reg_LEGT(cmp, flags, dst, src);
13383   %}
13384 %}
13385 
13386 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13387   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13388   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13389   ins_cost(250);
13390   expand %{
13391     cmovII_mem_LEGT(cmp, flags, dst, src);
13392   %}
13393 %}
13394 
13395 // Compare 2 longs and CMOVE ptrs.
13396 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13397   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13398   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13399   ins_cost(200);
13400   format %{ "CMOV$cmp $dst,$src" %}
13401   opcode(0x0F,0x40);
13402   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13403   ins_pipe( pipe_cmov_reg );
13404 %}
13405 
13406 // Compare 2 unsigned longs and CMOVE ptrs.
13407 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13408   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13409   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13410   ins_cost(200);
13411   expand %{
13412     cmovPP_reg_LEGT(cmp,flags,dst,src);
13413   %}
13414 %}
13415 
13416 // Compare 2 longs and CMOVE doubles
13417 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13418   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13419   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13420   ins_cost(200);
13421   expand %{
13422     fcmovDPR_regS(cmp,flags,dst,src);
13423   %}
13424 %}
13425 
13426 // Compare 2 longs and CMOVE doubles
13427 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13428   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13429   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13430   ins_cost(200);
13431   expand %{
13432     fcmovD_regS(cmp,flags,dst,src);
13433   %}
13434 %}
13435 
13436 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13437   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13438   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13439   ins_cost(200);
13440   expand %{
13441     fcmovFPR_regS(cmp,flags,dst,src);
13442   %}
13443 %}
13444 
13445 
13446 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13447   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13448   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13449   ins_cost(200);
13450   expand %{
13451     fcmovF_regS(cmp,flags,dst,src);
13452   %}
13453 %}
13454 
13455 
13456 // ============================================================================
13457 // Procedure Call/Return Instructions
13458 // Call Java Static Instruction
13459 // Note: If this code changes, the corresponding ret_addr_offset() and
13460 //       compute_padding() functions will have to be adjusted.
13461 instruct CallStaticJavaDirect(method meth) %{
13462   match(CallStaticJava);
13463   effect(USE meth);
13464 
13465   ins_cost(300);
13466   format %{ "CALL,static " %}
13467   opcode(0xE8); /* E8 cd */
13468   ins_encode( pre_call_resets,
13469               Java_Static_Call( meth ),
13470               call_epilog,
13471               post_call_FPU );
13472   ins_pipe( pipe_slow );
13473   ins_alignment(4);
13474 %}
13475 
13476 // Call Java Dynamic Instruction
13477 // Note: If this code changes, the corresponding ret_addr_offset() and
13478 //       compute_padding() functions will have to be adjusted.
13479 instruct CallDynamicJavaDirect(method meth) %{
13480   match(CallDynamicJava);
13481   effect(USE meth);
13482 
13483   ins_cost(300);
13484   format %{ "MOV    EAX,(oop)-1\n\t"
13485             "CALL,dynamic" %}
13486   opcode(0xE8); /* E8 cd */
13487   ins_encode( pre_call_resets,
13488               Java_Dynamic_Call( meth ),
13489               call_epilog,
13490               post_call_FPU );
13491   ins_pipe( pipe_slow );
13492   ins_alignment(4);
13493 %}
13494 
13495 // Call Runtime Instruction
13496 instruct CallRuntimeDirect(method meth) %{
13497   match(CallRuntime );
13498   effect(USE meth);
13499 
13500   ins_cost(300);
13501   format %{ "CALL,runtime " %}
13502   opcode(0xE8); /* E8 cd */
13503   // Use FFREEs to clear entries in float stack
13504   ins_encode( pre_call_resets,
13505               FFree_Float_Stack_All,
13506               Java_To_Runtime( meth ),
13507               post_call_FPU );
13508   ins_pipe( pipe_slow );
13509 %}
13510 
13511 // Call runtime without safepoint
13512 instruct CallLeafDirect(method meth) %{
13513   match(CallLeaf);
13514   effect(USE meth);
13515 
13516   ins_cost(300);
13517   format %{ "CALL_LEAF,runtime " %}
13518   opcode(0xE8); /* E8 cd */
13519   ins_encode( pre_call_resets,
13520               FFree_Float_Stack_All,
13521               Java_To_Runtime( meth ),
13522               Verify_FPU_For_Leaf, post_call_FPU );
13523   ins_pipe( pipe_slow );
13524 %}
13525 
13526 instruct CallLeafNoFPDirect(method meth) %{
13527   match(CallLeafNoFP);
13528   effect(USE meth);
13529 
13530   ins_cost(300);
13531   format %{ "CALL_LEAF_NOFP,runtime " %}
13532   opcode(0xE8); /* E8 cd */
13533   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13534   ins_pipe( pipe_slow );
13535 %}
13536 
13537 
13538 // Return Instruction
13539 // Remove the return address & jump to it.
13540 instruct Ret() %{
13541   match(Return);
13542   format %{ "RET" %}
13543   opcode(0xC3);
13544   ins_encode(OpcP);
13545   ins_pipe( pipe_jmp );
13546 %}
13547 
13548 // Tail Call; Jump from runtime stub to Java code.
13549 // Also known as an 'interprocedural jump'.
13550 // Target of jump will eventually return to caller.
13551 // TailJump below removes the return address.
13552 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13553 // emitted just above the TailCall which has reset ebp to the caller state.
13554 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13555   match(TailCall jump_target method_ptr);
13556   ins_cost(300);
13557   format %{ "JMP    $jump_target \t# EBX holds method" %}
13558   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13559   ins_encode( OpcP, RegOpc(jump_target) );
13560   ins_pipe( pipe_jmp );
13561 %}
13562 
13563 
13564 // Tail Jump; remove the return address; jump to target.
13565 // TailCall above leaves the return address around.
13566 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13567   match( TailJump jump_target ex_oop );
13568   ins_cost(300);
13569   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13570             "JMP    $jump_target " %}
13571   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13572   ins_encode( enc_pop_rdx,
13573               OpcP, RegOpc(jump_target) );
13574   ins_pipe( pipe_jmp );
13575 %}
13576 
13577 // Forward exception.
13578 instruct ForwardExceptionjmp()
13579 %{
13580   match(ForwardException);
13581 
13582   format %{ "JMP    forward_exception_stub" %}
13583   ins_encode %{
13584     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
13585   %}
13586   ins_pipe(pipe_jmp);
13587 %}
13588 
13589 // Create exception oop: created by stack-crawling runtime code.
13590 // Created exception is now available to this handler, and is setup
13591 // just prior to jumping to this handler.  No code emitted.
13592 instruct CreateException( eAXRegP ex_oop )
13593 %{
13594   match(Set ex_oop (CreateEx));
13595 
13596   size(0);
13597   // use the following format syntax
13598   format %{ "# exception oop is in EAX; no code emitted" %}
13599   ins_encode();
13600   ins_pipe( empty );
13601 %}
13602 
13603 
13604 // Rethrow exception:
13605 // The exception oop will come in the first argument position.
13606 // Then JUMP (not call) to the rethrow stub code.
13607 instruct RethrowException()
13608 %{
13609   match(Rethrow);
13610 
13611   // use the following format syntax
13612   format %{ "JMP    rethrow_stub" %}
13613   ins_encode(enc_rethrow);
13614   ins_pipe( pipe_jmp );
13615 %}
13616 
13617 // inlined locking and unlocking
13618 
13619 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13620   predicate(LockingMode != LM_LIGHTWEIGHT);
13621   match(Set cr (FastLock object box));
13622   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13623   ins_cost(300);
13624   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13625   ins_encode %{
13626     __ get_thread($thread$$Register);
13627     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13628                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13629   %}
13630   ins_pipe(pipe_slow);
13631 %}
13632 
13633 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13634   predicate(LockingMode != LM_LIGHTWEIGHT);
13635   match(Set cr (FastUnlock object box));
13636   effect(TEMP tmp, USE_KILL box);
13637   ins_cost(300);
13638   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13639   ins_encode %{
13640     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13641   %}
13642   ins_pipe(pipe_slow);
13643 %}
13644 
13645 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13646   predicate(LockingMode == LM_LIGHTWEIGHT);
13647   match(Set cr (FastLock object box));
13648   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13649   ins_cost(300);
13650   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13651   ins_encode %{
13652     __ get_thread($thread$$Register);
13653     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13654   %}
13655   ins_pipe(pipe_slow);
13656 %}
13657 
13658 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13659   predicate(LockingMode == LM_LIGHTWEIGHT);
13660   match(Set cr (FastUnlock object eax_reg));
13661   effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13662   ins_cost(300);
13663   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13664   ins_encode %{
13665     __ get_thread($thread$$Register);
13666     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13667   %}
13668   ins_pipe(pipe_slow);
13669 %}
13670 
13671 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13672   predicate(Matcher::vector_length(n) <= 32);
13673   match(Set dst (MaskAll src));
13674   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13675   ins_encode %{
13676     int mask_len = Matcher::vector_length(this);
13677     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13678   %}
13679   ins_pipe( pipe_slow );
13680 %}
13681 
13682 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13683   predicate(Matcher::vector_length(n) > 32);
13684   match(Set dst (MaskAll src));
13685   effect(TEMP ktmp);
13686   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13687   ins_encode %{
13688     int mask_len = Matcher::vector_length(this);
13689     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13690   %}
13691   ins_pipe( pipe_slow );
13692 %}
13693 
13694 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13695   predicate(Matcher::vector_length(n) > 32);
13696   match(Set dst (MaskAll src));
13697   effect(TEMP ktmp);
13698   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13699   ins_encode %{
13700     int mask_len = Matcher::vector_length(this);
13701     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13702   %}
13703   ins_pipe( pipe_slow );
13704 %}
13705 
13706 // ============================================================================
13707 // Safepoint Instruction
13708 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13709   match(SafePoint poll);
13710   effect(KILL cr, USE poll);
13711 
13712   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13713   ins_cost(125);
13714   // EBP would need size(3)
13715   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13716   ins_encode %{
13717     __ set_inst_mark();
13718     __ relocate(relocInfo::poll_type);
13719     __ clear_inst_mark();
13720     address pre_pc = __ pc();
13721     __ testl(rax, Address($poll$$Register, 0));
13722     address post_pc = __ pc();
13723     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13724   %}
13725   ins_pipe(ialu_reg_mem);
13726 %}
13727 
13728 
13729 // ============================================================================
13730 // This name is KNOWN by the ADLC and cannot be changed.
13731 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13732 // for this guy.
13733 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13734   match(Set dst (ThreadLocal));
13735   effect(DEF dst, KILL cr);
13736 
13737   format %{ "MOV    $dst, Thread::current()" %}
13738   ins_encode %{
13739     Register dstReg = as_Register($dst$$reg);
13740     __ get_thread(dstReg);
13741   %}
13742   ins_pipe( ialu_reg_fat );
13743 %}
13744 
13745 
13746 
13747 //----------PEEPHOLE RULES-----------------------------------------------------
13748 // These must follow all instruction definitions as they use the names
13749 // defined in the instructions definitions.
13750 //
13751 // peepmatch ( root_instr_name [preceding_instruction]* );
13752 //
13753 // peepconstraint %{
13754 // (instruction_number.operand_name relational_op instruction_number.operand_name
13755 //  [, ...] );
13756 // // instruction numbers are zero-based using left to right order in peepmatch
13757 //
13758 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13759 // // provide an instruction_number.operand_name for each operand that appears
13760 // // in the replacement instruction's match rule
13761 //
13762 // ---------VM FLAGS---------------------------------------------------------
13763 //
13764 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13765 //
13766 // Each peephole rule is given an identifying number starting with zero and
13767 // increasing by one in the order seen by the parser.  An individual peephole
13768 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13769 // on the command-line.
13770 //
13771 // ---------CURRENT LIMITATIONS----------------------------------------------
13772 //
13773 // Only match adjacent instructions in same basic block
13774 // Only equality constraints
13775 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13776 // Only one replacement instruction
13777 //
13778 // ---------EXAMPLE----------------------------------------------------------
13779 //
13780 // // pertinent parts of existing instructions in architecture description
13781 // instruct movI(rRegI dst, rRegI src) %{
13782 //   match(Set dst (CopyI src));
13783 // %}
13784 //
13785 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13786 //   match(Set dst (AddI dst src));
13787 //   effect(KILL cr);
13788 // %}
13789 //
13790 // // Change (inc mov) to lea
13791 // peephole %{
13792 //   // increment preceded by register-register move
13793 //   peepmatch ( incI_eReg movI );
13794 //   // require that the destination register of the increment
13795 //   // match the destination register of the move
13796 //   peepconstraint ( 0.dst == 1.dst );
13797 //   // construct a replacement instruction that sets
13798 //   // the destination to ( move's source register + one )
13799 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13800 // %}
13801 //
13802 // Implementation no longer uses movX instructions since
13803 // machine-independent system no longer uses CopyX nodes.
13804 //
13805 // peephole %{
13806 //   peepmatch ( incI_eReg movI );
13807 //   peepconstraint ( 0.dst == 1.dst );
13808 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13809 // %}
13810 //
13811 // peephole %{
13812 //   peepmatch ( decI_eReg movI );
13813 //   peepconstraint ( 0.dst == 1.dst );
13814 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13815 // %}
13816 //
13817 // peephole %{
13818 //   peepmatch ( addI_eReg_imm movI );
13819 //   peepconstraint ( 0.dst == 1.dst );
13820 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13821 // %}
13822 //
13823 // peephole %{
13824 //   peepmatch ( addP_eReg_imm movP );
13825 //   peepconstraint ( 0.dst == 1.dst );
13826 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13827 // %}
13828 
13829 // // Change load of spilled value to only a spill
13830 // instruct storeI(memory mem, rRegI src) %{
13831 //   match(Set mem (StoreI mem src));
13832 // %}
13833 //
13834 // instruct loadI(rRegI dst, memory mem) %{
13835 //   match(Set dst (LoadI mem));
13836 // %}
13837 //
13838 peephole %{
13839   peepmatch ( loadI storeI );
13840   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13841   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13842 %}
13843 
13844 //----------SMARTSPILL RULES---------------------------------------------------
13845 // These must follow all instruction definitions as they use the names
13846 // defined in the instructions definitions.