New src/hotspot/cpu/x86/x86

    1 //
    2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ masm->
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   __ emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   __ emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(C2_MacroAssembler *masm, int code) {
  353   __ emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
  358   __ relocate(__ inst_mark() + offset, reloc);
  359   emit_opcode(masm, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(C2_MacroAssembler *masm, int d8) {
  364   __ emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(C2_MacroAssembler *masm, int d16) {
  369   __ emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(C2_MacroAssembler *masm, int d32) {
  374   __ emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   __ relocate(__ inst_mark(), reloc, format);
  381   __ emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   __ relocate(__ inst_mark(), rspec, format);
  393   __ emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
  398   emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (masm, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(masm, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(masm, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(masm, 0x1, reg_encoding, base);
  423         emit_d8(masm, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(masm, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(masm, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (masm, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(masm, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(masm, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (masm, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(masm, 0x0, reg_encoding, 0x4);
  450       emit_rm(masm, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(masm, 0x1, reg_encoding, 0x4);
  456         emit_rm(masm, scale, index, base);
  457         emit_d8(masm, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(masm, 0x2, reg_encoding, 0x4);
  462           emit_rm(masm, scale, index, 0x04);
  463         } else {
  464           emit_rm(masm, 0x2, reg_encoding, 0x4);
  465           emit_rm(masm, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(masm, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (masm, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( masm, 0x8B );
  483     emit_rm(masm, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler* masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612 
  613   __ verified_entry(C);
  614 
  615   C->output()->set_frame_complete(__ offset());
  616 
  617   if (C->has_mach_constant_base_node()) {
  618     // NOTE: We set the table base offset here because users might be
  619     // emitted before MachConstantBaseNode.
  620     ConstantTable& constant_table = C->output()->constant_table();
  621     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  622   }
  623 }
  624 
  625 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  626   return MachNode::size(ra_); // too many variables; just compute it the hard way
  627 }
  628 
  629 int MachPrologNode::reloc() const {
  630   return 0; // a large enough number
  631 }
  632 
  633 //=============================================================================
  634 #ifndef PRODUCT
  635 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  636   Compile *C = ra_->C;
  637   int framesize = C->output()->frame_size_in_bytes();
  638   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  639   // Remove two words for return addr and rbp,
  640   framesize -= 2*wordSize;
  641 
  642   if (C->max_vector_size() > 16) {
  643     st->print("VZEROUPPER");
  644     st->cr(); st->print("\t");
  645   }
  646   if (C->in_24_bit_fp_mode()) {
  647     st->print("FLDCW  standard control word");
  648     st->cr(); st->print("\t");
  649   }
  650   if (framesize) {
  651     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  652     st->cr(); st->print("\t");
  653   }
  654   st->print_cr("POPL   EBP"); st->print("\t");
  655   if (do_polling() && C->is_method_compilation()) {
  656     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  657               "JA      #safepoint_stub\t"
  658               "# Safepoint: poll for GC");
  659   }
  660 }
  661 #endif
  662 
  663 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  664   Compile *C = ra_->C;
  665 
  666   if (C->max_vector_size() > 16) {
  667     // Clear upper bits of YMM registers when current compiled code uses
  668     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  669     __ vzeroupper();
  670   }
  671   // If method set FPU control word, restore to standard control word
  672   if (C->in_24_bit_fp_mode()) {
  673     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  674   }
  675 
  676   int framesize = C->output()->frame_size_in_bytes();
  677   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  678   // Remove two words for return addr and rbp,
  679   framesize -= 2*wordSize;
  680 
  681   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  682 
  683   if (framesize >= 128) {
  684     emit_opcode(masm, 0x81); // add  SP, #framesize
  685     emit_rm(masm, 0x3, 0x00, ESP_enc);
  686     emit_d32(masm, framesize);
  687   } else if (framesize) {
  688     emit_opcode(masm, 0x83); // add  SP, #framesize
  689     emit_rm(masm, 0x3, 0x00, ESP_enc);
  690     emit_d8(masm, framesize);
  691   }
  692 
  693   emit_opcode(masm, 0x58 | EBP_enc);
  694 
  695   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  696     __ reserved_stack_check();
  697   }
  698 
  699   if (do_polling() && C->is_method_compilation()) {
  700     Register thread = as_Register(EBX_enc);
  701     __ get_thread(thread);
  702     Label dummy_label;
  703     Label* code_stub = &dummy_label;
  704     if (!C->output()->in_scratch_emit_size()) {
  705       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  706       C->output()->add_stub(stub);
  707       code_stub = &stub->entry();
  708     }
  709     __ set_inst_mark();
  710     __ relocate(relocInfo::poll_return_type);
  711     __ clear_inst_mark();
  712     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  713   }
  714 }
  715 
  716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  717   return MachNode::size(ra_); // too many variables; just compute it
  718                               // the hard way
  719 }
  720 
  721 int MachEpilogNode::reloc() const {
  722   return 0; // a large enough number
  723 }
  724 
  725 const Pipeline * MachEpilogNode::pipeline() const {
  726   return MachNode::pipeline_class();
  727 }
  728 
  729 //=============================================================================
  730 
  731 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  732 static enum RC rc_class( OptoReg::Name reg ) {
  733 
  734   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  735   if (OptoReg::is_stack(reg)) return rc_stack;
  736 
  737   VMReg r = OptoReg::as_VMReg(reg);
  738   if (r->is_Register()) return rc_int;
  739   if (r->is_FloatRegister()) {
  740     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  741     return rc_float;
  742   }
  743   if (r->is_KRegister()) return rc_kreg;
  744   assert(r->is_XMMRegister(), "must be");
  745   return rc_xmm;
  746 }
  747 
  748 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
  749                         int opcode, const char *op_str, int size, outputStream* st ) {
  750   if( masm ) {
  751     masm->set_inst_mark();
  752     emit_opcode  (masm, opcode );
  753     encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  754     masm->clear_inst_mark();
  755 #ifndef PRODUCT
  756   } else if( !do_size ) {
  757     if( size != 0 ) st->print("\n\t");
  758     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  759       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  760       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  761     } else { // FLD, FST, PUSH, POP
  762       st->print("%s [ESP + #%d]",op_str,offset);
  763     }
  764 #endif
  765   }
  766   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  767   return size+3+offset_size;
  768 }
  769 
  770 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  771 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
  772                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  773   int in_size_in_bits = Assembler::EVEX_32bit;
  774   int evex_encoding = 0;
  775   if (reg_lo+1 == reg_hi) {
  776     in_size_in_bits = Assembler::EVEX_64bit;
  777     evex_encoding = Assembler::VEX_W;
  778   }
  779   if (masm) {
  780     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  781     //                          it maps more cases to single byte displacement
  782     __ set_managed();
  783     if (reg_lo+1 == reg_hi) { // double move?
  784       if (is_load) {
  785         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  786       } else {
  787         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  788       }
  789     } else {
  790       if (is_load) {
  791         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  792       } else {
  793         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  794       }
  795     }
  796 #ifndef PRODUCT
  797   } else if (!do_size) {
  798     if (size != 0) st->print("\n\t");
  799     if (reg_lo+1 == reg_hi) { // double move?
  800       if (is_load) st->print("%s %s,[ESP + #%d]",
  801                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  802                               Matcher::regName[reg_lo], offset);
  803       else         st->print("MOVSD  [ESP + #%d],%s",
  804                               offset, Matcher::regName[reg_lo]);
  805     } else {
  806       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  807                               Matcher::regName[reg_lo], offset);
  808       else         st->print("MOVSS  [ESP + #%d],%s",
  809                               offset, Matcher::regName[reg_lo]);
  810     }
  811 #endif
  812   }
  813   bool is_single_byte = false;
  814   if ((UseAVX > 2) && (offset != 0)) {
  815     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  816   }
  817   int offset_size = 0;
  818   if (UseAVX > 2 ) {
  819     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  820   } else {
  821     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  822   }
  823   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  824   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  825   return size+5+offset_size;
  826 }
  827 
  828 
  829 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  830                             int src_hi, int dst_hi, int size, outputStream* st ) {
  831   if (masm) {
  832     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  833     __ set_managed();
  834     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  835       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  836                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  837     } else {
  838       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  839                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  840     }
  841 #ifndef PRODUCT
  842   } else if (!do_size) {
  843     if (size != 0) st->print("\n\t");
  844     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  845       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  846         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  847       } else {
  848         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  849       }
  850     } else {
  851       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  852         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  853       } else {
  854         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  855       }
  856     }
  857 #endif
  858   }
  859   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  860   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  861   int sz = (UseAVX > 2) ? 6 : 4;
  862   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  863       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  864   return size + sz;
  865 }
  866 
  867 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  868                             int src_hi, int dst_hi, int size, outputStream* st ) {
  869   // 32-bit
  870   if (masm) {
  871     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  872     __ set_managed();
  873     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  874              as_Register(Matcher::_regEncode[src_lo]));
  875 #ifndef PRODUCT
  876   } else if (!do_size) {
  877     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  878 #endif
  879   }
  880   return (UseAVX> 2) ? 6 : 4;
  881 }
  882 
  883 
  884 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  885                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  886   // 32-bit
  887   if (masm) {
  888     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  889     __ set_managed();
  890     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  891              as_XMMRegister(Matcher::_regEncode[src_lo]));
  892 #ifndef PRODUCT
  893   } else if (!do_size) {
  894     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  895 #endif
  896   }
  897   return (UseAVX> 2) ? 6 : 4;
  898 }
  899 
  900 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
  901   if( masm ) {
  902     emit_opcode(masm, 0x8B );
  903     emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  904 #ifndef PRODUCT
  905   } else if( !do_size ) {
  906     if( size != 0 ) st->print("\n\t");
  907     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  908 #endif
  909   }
  910   return size+2;
  911 }
  912 
  913 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  914                                  int offset, int size, outputStream* st ) {
  915   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  916     if( masm ) {
  917       emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
  918       emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
  919 #ifndef PRODUCT
  920     } else if( !do_size ) {
  921       if( size != 0 ) st->print("\n\t");
  922       st->print("FLD    %s",Matcher::regName[src_lo]);
  923 #endif
  924     }
  925     size += 2;
  926   }
  927 
  928   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  929   const char *op_str;
  930   int op;
  931   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  932     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  933     op = 0xDD;
  934   } else {                   // 32-bit store
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  936     op = 0xD9;
  937     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  938   }
  939 
  940   return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
  941 }
  942 
  943 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  944 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
  945                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  946 
  947 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
  948                             int stack_offset, int reg, uint ireg, outputStream* st);
  949 
  950 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
  951                                      int dst_offset, uint ireg, outputStream* st) {
  952   if (masm) {
  953     switch (ireg) {
  954     case Op_VecS:
  955       __ pushl(Address(rsp, src_offset));
  956       __ popl (Address(rsp, dst_offset));
  957       break;
  958     case Op_VecD:
  959       __ pushl(Address(rsp, src_offset));
  960       __ popl (Address(rsp, dst_offset));
  961       __ pushl(Address(rsp, src_offset+4));
  962       __ popl (Address(rsp, dst_offset+4));
  963       break;
  964     case Op_VecX:
  965       __ movdqu(Address(rsp, -16), xmm0);
  966       __ movdqu(xmm0, Address(rsp, src_offset));
  967       __ movdqu(Address(rsp, dst_offset), xmm0);
  968       __ movdqu(xmm0, Address(rsp, -16));
  969       break;
  970     case Op_VecY:
  971       __ vmovdqu(Address(rsp, -32), xmm0);
  972       __ vmovdqu(xmm0, Address(rsp, src_offset));
  973       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  974       __ vmovdqu(xmm0, Address(rsp, -32));
  975       break;
  976     case Op_VecZ:
  977       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  978       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  979       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  980       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  981       break;
  982     default:
  983       ShouldNotReachHere();
  984     }
  985 #ifndef PRODUCT
  986   } else {
  987     switch (ireg) {
  988     case Op_VecS:
  989       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  990                 "popl    [rsp + #%d]",
  991                 src_offset, dst_offset);
  992       break;
  993     case Op_VecD:
  994       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  995                 "popq    [rsp + #%d]\n\t"
  996                 "pushl   [rsp + #%d]\n\t"
  997                 "popq    [rsp + #%d]",
  998                 src_offset, dst_offset, src_offset+4, dst_offset+4);
  999       break;
 1000      case Op_VecX:
 1001       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1002                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1003                 "movdqu  [rsp + #%d], xmm0\n\t"
 1004                 "movdqu  xmm0, [rsp - #16]",
 1005                 src_offset, dst_offset);
 1006       break;
 1007     case Op_VecY:
 1008       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1009                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1010                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1011                 "vmovdqu xmm0, [rsp - #32]",
 1012                 src_offset, dst_offset);
 1013       break;
 1014     case Op_VecZ:
 1015       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1016                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1017                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1018                 "vmovdqu xmm0, [rsp - #64]",
 1019                 src_offset, dst_offset);
 1020       break;
 1021     default:
 1022       ShouldNotReachHere();
 1023     }
 1024 #endif
 1025   }
 1026 }
 1027 
 1028 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1029   // Get registers to move
 1030   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1031   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1032   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1033   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1034 
 1035   enum RC src_second_rc = rc_class(src_second);
 1036   enum RC src_first_rc = rc_class(src_first);
 1037   enum RC dst_second_rc = rc_class(dst_second);
 1038   enum RC dst_first_rc = rc_class(dst_first);
 1039 
 1040   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1041 
 1042   // Generate spill code!
 1043   int size = 0;
 1044 
 1045   if( src_first == dst_first && src_second == dst_second )
 1046     return size;            // Self copy, no move
 1047 
 1048   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1049     uint ireg = ideal_reg();
 1050     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1051     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1052     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1053     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1054       // mem -> mem
 1055       int src_offset = ra_->reg2offset(src_first);
 1056       int dst_offset = ra_->reg2offset(dst_first);
 1057       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 1058     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1059       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 1060     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1061       int stack_offset = ra_->reg2offset(dst_first);
 1062       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 1063     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1064       int stack_offset = ra_->reg2offset(src_first);
 1065       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 1066     } else {
 1067       ShouldNotReachHere();
 1068     }
 1069     return 0;
 1070   }
 1071 
 1072   // --------------------------------------
 1073   // Check for mem-mem move.  push/pop to move.
 1074   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1075     if( src_second == dst_first ) { // overlapping stack copy ranges
 1076       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1077       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1078       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1079       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1080     }
 1081     // move low bits
 1082     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1083     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1084     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1085       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1086       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1087     }
 1088     return size;
 1089   }
 1090 
 1091   // --------------------------------------
 1092   // Check for integer reg-reg copy
 1093   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1094     size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
 1095 
 1096   // Check for integer store
 1097   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1098     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1099 
 1100   // Check for integer load
 1101   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1102     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1103 
 1104   // Check for integer reg-xmm reg copy
 1105   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1106     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1107             "no 64 bit integer-float reg moves" );
 1108     return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1109   }
 1110   // --------------------------------------
 1111   // Check for float reg-reg copy
 1112   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1113     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1114             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1115     if( masm ) {
 1116 
 1117       // Note the mucking with the register encode to compensate for the 0/1
 1118       // indexing issue mentioned in a comment in the reg_def sections
 1119       // for FPR registers many lines above here.
 1120 
 1121       if( src_first != FPR1L_num ) {
 1122         emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
 1123         emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
 1124         emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1125         emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1126      } else {
 1127         emit_opcode  (masm, 0xDD );           // FST    ST(i)
 1128         emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1129      }
 1130 #ifndef PRODUCT
 1131     } else if( !do_size ) {
 1132       if( size != 0 ) st->print("\n\t");
 1133       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1134       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1135 #endif
 1136     }
 1137     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1138   }
 1139 
 1140   // Check for float store
 1141   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1142     return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1143   }
 1144 
 1145   // Check for float load
 1146   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1147     int offset = ra_->reg2offset(src_first);
 1148     const char *op_str;
 1149     int op;
 1150     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1151       op_str = "FLD_D";
 1152       op = 0xDD;
 1153     } else {                   // 32-bit load
 1154       op_str = "FLD_S";
 1155       op = 0xD9;
 1156       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1157     }
 1158     if( masm ) {
 1159       masm->set_inst_mark();
 1160       emit_opcode  (masm, op );
 1161       encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1162       emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1163       emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1164       masm->clear_inst_mark();
 1165 #ifndef PRODUCT
 1166     } else if( !do_size ) {
 1167       if( size != 0 ) st->print("\n\t");
 1168       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1169 #endif
 1170     }
 1171     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1172     return size + 3+offset_size+2;
 1173   }
 1174 
 1175   // Check for xmm reg-reg copy
 1176   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1177     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1178             (src_first+1 == src_second && dst_first+1 == dst_second),
 1179             "no non-adjacent float-moves" );
 1180     return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1181   }
 1182 
 1183   // Check for xmm reg-integer reg copy
 1184   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1185     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1186             "no 64 bit float-integer reg moves" );
 1187     return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1188   }
 1189 
 1190   // Check for xmm store
 1191   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1192     return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1193   }
 1194 
 1195   // Check for float xmm load
 1196   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1197     return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1198   }
 1199 
 1200   // Copy from float reg to xmm reg
 1201   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1202     // copy to the top of stack from floating point reg
 1203     // and use LEA to preserve flags
 1204     if( masm ) {
 1205       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
 1206       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1207       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1208       emit_d8(masm,0xF8);
 1209 #ifndef PRODUCT
 1210     } else if( !do_size ) {
 1211       if( size != 0 ) st->print("\n\t");
 1212       st->print("LEA    ESP,[ESP-8]");
 1213 #endif
 1214     }
 1215     size += 4;
 1216 
 1217     size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1218 
 1219     // Copy from the temp memory to the xmm reg.
 1220     size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
 1221 
 1222     if( masm ) {
 1223       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
 1224       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1225       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1226       emit_d8(masm,0x08);
 1227 #ifndef PRODUCT
 1228     } else if( !do_size ) {
 1229       if( size != 0 ) st->print("\n\t");
 1230       st->print("LEA    ESP,[ESP+8]");
 1231 #endif
 1232     }
 1233     size += 4;
 1234     return size;
 1235   }
 1236 
 1237   // AVX-512 opmask specific spilling.
 1238   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1239     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1240     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1241     int offset = ra_->reg2offset(src_first);
 1242     if (masm != nullptr) {
 1243       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1244 #ifndef PRODUCT
 1245     } else {
 1246       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1247 #endif
 1248     }
 1249     return 0;
 1250   }
 1251 
 1252   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1253     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1254     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1255     int offset = ra_->reg2offset(dst_first);
 1256     if (masm != nullptr) {
 1257       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1258 #ifndef PRODUCT
 1259     } else {
 1260       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1261 #endif
 1262     }
 1263     return 0;
 1264   }
 1265 
 1266   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1267     Unimplemented();
 1268     return 0;
 1269   }
 1270 
 1271   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1272     Unimplemented();
 1273     return 0;
 1274   }
 1275 
 1276   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1277     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1278     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1279     if (masm != nullptr) {
 1280       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1281 #ifndef PRODUCT
 1282     } else {
 1283       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1284 #endif
 1285     }
 1286     return 0;
 1287   }
 1288 
 1289   assert( size > 0, "missed a case" );
 1290 
 1291   // --------------------------------------------------------------------
 1292   // Check for second bits still needing moving.
 1293   if( src_second == dst_second )
 1294     return size;               // Self copy; no move
 1295   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1296 
 1297   // Check for second word int-int move
 1298   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1299     return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
 1300 
 1301   // Check for second word integer store
 1302   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1303     return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1304 
 1305   // Check for second word integer load
 1306   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1307     return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1308 
 1309   Unimplemented();
 1310   return 0; // Mute compiler
 1311 }
 1312 
 1313 #ifndef PRODUCT
 1314 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1315   implementation( nullptr, ra_, false, st );
 1316 }
 1317 #endif
 1318 
 1319 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1320   implementation( masm, ra_, false, nullptr );
 1321 }
 1322 
 1323 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1324   return MachNode::size(ra_);
 1325 }
 1326 
 1327 
 1328 //=============================================================================
 1329 #ifndef PRODUCT
 1330 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1331   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1332   int reg = ra_->get_reg_first(this);
 1333   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1334 }
 1335 #endif
 1336 
 1337 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1338   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1339   int reg = ra_->get_encode(this);
 1340   if( offset >= 128 ) {
 1341     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1342     emit_rm(masm, 0x2, reg, 0x04);
 1343     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1344     emit_d32(masm, offset);
 1345   }
 1346   else {
 1347     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1348     emit_rm(masm, 0x1, reg, 0x04);
 1349     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1350     emit_d8(masm, offset);
 1351   }
 1352 }
 1353 
 1354 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1355   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1356   if( offset >= 128 ) {
 1357     return 7;
 1358   }
 1359   else {
 1360     return 4;
 1361   }
 1362 }
 1363 
 1364 //=============================================================================
 1365 #ifndef PRODUCT
 1366 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1367   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1368   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1369   st->print_cr("\tNOP");
 1370   st->print_cr("\tNOP");
 1371   if( !OptoBreakpoint )
 1372     st->print_cr("\tNOP");
 1373 }
 1374 #endif
 1375 
 1376 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1377   __ ic_check(CodeEntryAlignment);
 1378 }
 1379 
 1380 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1381   return MachNode::size(ra_); // too many variables; just compute it
 1382                               // the hard way
 1383 }
 1384 
 1385 
 1386 //=============================================================================
 1387 
 1388 // Vector calling convention not supported.
 1389 bool Matcher::supports_vector_calling_convention() {
 1390   return false;
 1391 }
 1392 
 1393 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1394   Unimplemented();
 1395   return OptoRegPair(0, 0);
 1396 }
 1397 
 1398 // Is this branch offset short enough that a short branch can be used?
 1399 //
 1400 // NOTE: If the platform does not provide any short branch variants, then
 1401 //       this method should return false for offset 0.
 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1403   // The passed offset is relative to address of the branch.
 1404   // On 86 a branch displacement is calculated relative to address
 1405   // of a next instruction.
 1406   offset -= br_size;
 1407 
 1408   // the short version of jmpConUCF2 contains multiple branches,
 1409   // making the reach slightly less
 1410   if (rule == jmpConUCF2_rule)
 1411     return (-126 <= offset && offset <= 125);
 1412   return (-128 <= offset && offset <= 127);
 1413 }
 1414 
 1415 // Return whether or not this register is ever used as an argument.  This
 1416 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1417 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1418 // arguments in those registers not be available to the callee.
 1419 bool Matcher::can_be_java_arg( int reg ) {
 1420   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1421   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1422   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1423   return false;
 1424 }
 1425 
 1426 bool Matcher::is_spillable_arg( int reg ) {
 1427   return can_be_java_arg(reg);
 1428 }
 1429 
 1430 uint Matcher::int_pressure_limit()
 1431 {
 1432   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1433 }
 1434 
 1435 uint Matcher::float_pressure_limit()
 1436 {
 1437   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1438 }
 1439 
 1440 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1441   // Use hardware integer DIV instruction when
 1442   // it is faster than a code which use multiply.
 1443   // Only when constant divisor fits into 32 bit
 1444   // (min_jint is excluded to get only correct
 1445   // positive 32 bit values from negative).
 1446   return VM_Version::has_fast_idiv() &&
 1447          (divisor == (int)divisor && divisor != min_jint);
 1448 }
 1449 
 1450 // Register for DIVI projection of divmodI
 1451 RegMask Matcher::divI_proj_mask() {
 1452   return EAX_REG_mask();
 1453 }
 1454 
 1455 // Register for MODI projection of divmodI
 1456 RegMask Matcher::modI_proj_mask() {
 1457   return EDX_REG_mask();
 1458 }
 1459 
 1460 // Register for DIVL projection of divmodL
 1461 RegMask Matcher::divL_proj_mask() {
 1462   ShouldNotReachHere();
 1463   return RegMask();
 1464 }
 1465 
 1466 // Register for MODL projection of divmodL
 1467 RegMask Matcher::modL_proj_mask() {
 1468   ShouldNotReachHere();
 1469   return RegMask();
 1470 }
 1471 
 1472 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1473   return NO_REG_mask();
 1474 }
 1475 
 1476 // Returns true if the high 32 bits of the value is known to be zero.
 1477 bool is_operand_hi32_zero(Node* n) {
 1478   int opc = n->Opcode();
 1479   if (opc == Op_AndL) {
 1480     Node* o2 = n->in(2);
 1481     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1482       return true;
 1483     }
 1484   }
 1485   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1486     return true;
 1487   }
 1488   return false;
 1489 }
 1490 
 1491 %}
 1492 
 1493 //----------ENCODING BLOCK-----------------------------------------------------
 1494 // This block specifies the encoding classes used by the compiler to output
 1495 // byte streams.  Encoding classes generate functions which are called by
 1496 // Machine Instruction Nodes in order to generate the bit encoding of the
 1497 // instruction.  Operands specify their base encoding interface with the
 1498 // interface keyword.  There are currently supported four interfaces,
 1499 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1500 // operand to generate a function which returns its register number when
 1501 // queried.   CONST_INTER causes an operand to generate a function which
 1502 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1503 // operand to generate four functions which return the Base Register, the
 1504 // Index Register, the Scale Value, and the Offset Value of the operand when
 1505 // queried.  COND_INTER causes an operand to generate six functions which
 1506 // return the encoding code (ie - encoding bits for the instruction)
 1507 // associated with each basic boolean condition for a conditional instruction.
 1508 // Instructions specify two basic values for encoding.  They use the
 1509 // ins_encode keyword to specify their encoding class (which must be one of
 1510 // the class names specified in the encoding block), and they use the
 1511 // opcode keyword to specify, in order, their primary, secondary, and
 1512 // tertiary opcode.  Only the opcode sections which a particular instruction
 1513 // needs for encoding need to be specified.
 1514 encode %{
 1515   // Build emit functions for each basic byte or larger field in the intel
 1516   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1517   // code in the enc_class source block.  Emit functions will live in the
 1518   // main source block for now.  In future, we can generalize this by
 1519   // adding a syntax that specifies the sizes of fields in an order,
 1520   // so that the adlc can build the emit functions automagically
 1521 
 1522   // Set instruction mark in MacroAssembler. This is used only in
 1523   // instructions that emit bytes directly to the CodeBuffer wraped
 1524   // in the MacroAssembler. Should go away once all "instruct" are
 1525   // patched to emit bytes only using methods in MacroAssembler.
 1526   enc_class SetInstMark %{
 1527     __ set_inst_mark();
 1528   %}
 1529 
 1530   enc_class ClearInstMark %{
 1531     __ clear_inst_mark();
 1532   %}
 1533 
 1534   // Emit primary opcode
 1535   enc_class OpcP %{
 1536     emit_opcode(masm, $primary);
 1537   %}
 1538 
 1539   // Emit secondary opcode
 1540   enc_class OpcS %{
 1541     emit_opcode(masm, $secondary);
 1542   %}
 1543 
 1544   // Emit opcode directly
 1545   enc_class Opcode(immI d8) %{
 1546     emit_opcode(masm, $d8$$constant);
 1547   %}
 1548 
 1549   enc_class SizePrefix %{
 1550     emit_opcode(masm,0x66);
 1551   %}
 1552 
 1553   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1554     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1555   %}
 1556 
 1557   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1558     emit_opcode(masm,$opcode$$constant);
 1559     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1560   %}
 1561 
 1562   enc_class mov_r32_imm0( rRegI dst ) %{
 1563     emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1564     emit_d32   ( masm, 0x0  );             //                         imm32==0x0
 1565   %}
 1566 
 1567   enc_class cdq_enc %{
 1568     // Full implementation of Java idiv and irem; checks for
 1569     // special case as described in JVM spec., p.243 & p.271.
 1570     //
 1571     //         normal case                           special case
 1572     //
 1573     // input : rax,: dividend                         min_int
 1574     //         reg: divisor                          -1
 1575     //
 1576     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1577     //         rdx: remainder (= rax, irem reg)       0
 1578     //
 1579     //  Code sequnce:
 1580     //
 1581     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1582     //  0F 85 0B 00 00 00    jne         normal_case
 1583     //  33 D2                xor         rdx,edx
 1584     //  83 F9 FF             cmp         rcx,0FFh
 1585     //  0F 84 03 00 00 00    je          done
 1586     //                  normal_case:
 1587     //  99                   cdq
 1588     //  F7 F9                idiv        rax,ecx
 1589     //                  done:
 1590     //
 1591     emit_opcode(masm,0x81); emit_d8(masm,0xF8);
 1592     emit_opcode(masm,0x00); emit_d8(masm,0x00);
 1593     emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
 1594     emit_opcode(masm,0x0F); emit_d8(masm,0x85);
 1595     emit_opcode(masm,0x0B); emit_d8(masm,0x00);
 1596     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
 1597     emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
 1598     emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
 1599     emit_opcode(masm,0x0F); emit_d8(masm,0x84);
 1600     emit_opcode(masm,0x03); emit_d8(masm,0x00);
 1601     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
 1602     // normal_case:
 1603     emit_opcode(masm,0x99);                                         // cdq
 1604     // idiv (note: must be emitted by the user of this rule)
 1605     // normal:
 1606   %}
 1607 
 1608   // Dense encoding for older common ops
 1609   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1610     emit_opcode(masm, $opcode$$constant + $reg$$reg);
 1611   %}
 1612 
 1613 
 1614   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1615   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1616     // Check for 8-bit immediate, and set sign extend bit in opcode
 1617     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1618       emit_opcode(masm, $primary | 0x02);
 1619     }
 1620     else {                          // If 32-bit immediate
 1621       emit_opcode(masm, $primary);
 1622     }
 1623   %}
 1624 
 1625   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1626     // Emit primary opcode and set sign-extend bit
 1627     // Check for 8-bit immediate, and set sign extend bit in opcode
 1628     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1629       emit_opcode(masm, $primary | 0x02);    }
 1630     else {                          // If 32-bit immediate
 1631       emit_opcode(masm, $primary);
 1632     }
 1633     // Emit r/m byte with secondary opcode, after primary opcode.
 1634     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1635   %}
 1636 
 1637   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1638     // Check for 8-bit immediate, and set sign extend bit in opcode
 1639     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1640       $$$emit8$imm$$constant;
 1641     }
 1642     else {                          // If 32-bit immediate
 1643       // Output immediate
 1644       $$$emit32$imm$$constant;
 1645     }
 1646   %}
 1647 
 1648   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1649     // Emit primary opcode and set sign-extend bit
 1650     // Check for 8-bit immediate, and set sign extend bit in opcode
 1651     int con = (int)$imm$$constant; // Throw away top bits
 1652     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1653     // Emit r/m byte with secondary opcode, after primary opcode.
 1654     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1655     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1656     else                               emit_d32(masm,con);
 1657   %}
 1658 
 1659   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1660     // Emit primary opcode and set sign-extend bit
 1661     // Check for 8-bit immediate, and set sign extend bit in opcode
 1662     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1663     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1664     // Emit r/m byte with tertiary opcode, after primary opcode.
 1665     emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1666     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1667     else                               emit_d32(masm,con);
 1668   %}
 1669 
 1670   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1671     emit_cc(masm, $secondary, $dst$$reg );
 1672   %}
 1673 
 1674   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1675     int destlo = $dst$$reg;
 1676     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1677     // bswap lo
 1678     emit_opcode(masm, 0x0F);
 1679     emit_cc(masm, 0xC8, destlo);
 1680     // bswap hi
 1681     emit_opcode(masm, 0x0F);
 1682     emit_cc(masm, 0xC8, desthi);
 1683     // xchg lo and hi
 1684     emit_opcode(masm, 0x87);
 1685     emit_rm(masm, 0x3, destlo, desthi);
 1686   %}
 1687 
 1688   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1689     emit_rm(masm, 0x3, $secondary, $div$$reg );
 1690   %}
 1691 
 1692   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1693     $$$emit8$primary;
 1694     emit_cc(masm, $secondary, $cop$$cmpcode);
 1695   %}
 1696 
 1697   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1698     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1699     emit_d8(masm, op >> 8 );
 1700     emit_d8(masm, op & 255);
 1701   %}
 1702 
 1703   // emulate a CMOV with a conditional branch around a MOV
 1704   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1705     // Invert sense of branch from sense of CMOV
 1706     emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
 1707     emit_d8( masm, $brOffs$$constant );
 1708   %}
 1709 
 1710   enc_class enc_PartialSubtypeCheck( ) %{
 1711     Register Redi = as_Register(EDI_enc); // result register
 1712     Register Reax = as_Register(EAX_enc); // super class
 1713     Register Recx = as_Register(ECX_enc); // killed
 1714     Register Resi = as_Register(ESI_enc); // sub class
 1715     Label miss;
 1716 
 1717     // NB: Callers may assume that, when $result is a valid register,
 1718     // check_klass_subtype_slow_path sets it to a nonzero value.
 1719      __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1720                                      nullptr, &miss,
 1721                                      /*set_cond_codes:*/ true);
 1722     if ($primary) {
 1723       __ xorptr(Redi, Redi);
 1724     }
 1725     __ bind(miss);
 1726   %}
 1727 
 1728   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1729     int start = __ offset();
 1730     if (UseSSE >= 2) {
 1731       if (VerifyFPU) {
 1732         __ verify_FPU(0, "must be empty in SSE2+ mode");
 1733       }
 1734     } else {
 1735       // External c_calling_convention expects the FPU stack to be 'clean'.
 1736       // Compiled code leaves it dirty.  Do cleanup now.
 1737       __ empty_FPU_stack();
 1738     }
 1739     if (sizeof_FFree_Float_Stack_All == -1) {
 1740       sizeof_FFree_Float_Stack_All = __ offset() - start;
 1741     } else {
 1742       assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1743     }
 1744   %}
 1745 
 1746   enc_class Verify_FPU_For_Leaf %{
 1747     if( VerifyFPU ) {
 1748       __ verify_FPU( -3, "Returning from Runtime Leaf call");
 1749     }
 1750   %}
 1751 
 1752   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1753     // This is the instruction starting address for relocation info.
 1754     __ set_inst_mark();
 1755     $$$emit8$primary;
 1756     // CALL directly to the runtime
 1757     emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1758                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1759     __ clear_inst_mark();
 1760     __ post_call_nop();
 1761 
 1762     if (UseSSE >= 2) {
 1763       BasicType rt = tf()->return_type();
 1764 
 1765       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1766         // A C runtime call where the return value is unused.  In SSE2+
 1767         // mode the result needs to be removed from the FPU stack.  It's
 1768         // likely that this function call could be removed by the
 1769         // optimizer if the C function is a pure function.
 1770         __ ffree(0);
 1771       } else if (rt == T_FLOAT) {
 1772         __ lea(rsp, Address(rsp, -4));
 1773         __ fstp_s(Address(rsp, 0));
 1774         __ movflt(xmm0, Address(rsp, 0));
 1775         __ lea(rsp, Address(rsp,  4));
 1776       } else if (rt == T_DOUBLE) {
 1777         __ lea(rsp, Address(rsp, -8));
 1778         __ fstp_d(Address(rsp, 0));
 1779         __ movdbl(xmm0, Address(rsp, 0));
 1780         __ lea(rsp, Address(rsp,  8));
 1781       }
 1782     }
 1783   %}
 1784 
 1785   enc_class pre_call_resets %{
 1786     // If method sets FPU control word restore it here
 1787     debug_only(int off0 = __ offset());
 1788     if (ra_->C->in_24_bit_fp_mode()) {
 1789       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1790     }
 1791     // Clear upper bits of YMM registers when current compiled code uses
 1792     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1793     __ vzeroupper();
 1794     debug_only(int off1 = __ offset());
 1795     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1796   %}
 1797 
 1798   enc_class post_call_FPU %{
 1799     // If method sets FPU control word do it here also
 1800     if (Compile::current()->in_24_bit_fp_mode()) {
 1801       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1802     }
 1803   %}
 1804 
 1805   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1806     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1807     // who we intended to call.
 1808     __ set_inst_mark();
 1809     $$$emit8$primary;
 1810 
 1811     if (!_method) {
 1812       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1813                      runtime_call_Relocation::spec(),
 1814                      RELOC_IMM32);
 1815       __ clear_inst_mark();
 1816       __ post_call_nop();
 1817     } else {
 1818       int method_index = resolved_method_index(masm);
 1819       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1820                                                   : static_call_Relocation::spec(method_index);
 1821       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1822                      rspec, RELOC_DISP32);
 1823       __ post_call_nop();
 1824       address mark = __ inst_mark();
 1825       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1826         // Calls of the same statically bound method can share
 1827         // a stub to the interpreter.
 1828         __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
 1829         __ clear_inst_mark();
 1830       } else {
 1831         // Emit stubs for static call.
 1832         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 1833         __ clear_inst_mark();
 1834         if (stub == nullptr) {
 1835           ciEnv::current()->record_failure("CodeCache is full");
 1836           return;
 1837         }
 1838       }
 1839     }
 1840   %}
 1841 
 1842   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1843     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 1844     __ post_call_nop();
 1845   %}
 1846 
 1847   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1848     int disp = in_bytes(Method::from_compiled_offset());
 1849     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1850 
 1851     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1852     __ set_inst_mark();
 1853     $$$emit8$primary;
 1854     emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
 1855     emit_d8(masm, disp);             // Displacement
 1856     __ clear_inst_mark();
 1857     __ post_call_nop();
 1858   %}
 1859 
 1860   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1861     $$$emit8$primary;
 1862     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1863     $$$emit8$shift$$constant;
 1864   %}
 1865 
 1866   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1867     // Load immediate does not have a zero or sign extended version
 1868     // for 8-bit immediates
 1869     emit_opcode(masm, 0xB8 + $dst$$reg);
 1870     $$$emit32$src$$constant;
 1871   %}
 1872 
 1873   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1874     // Load immediate does not have a zero or sign extended version
 1875     // for 8-bit immediates
 1876     emit_opcode(masm, $primary + $dst$$reg);
 1877     $$$emit32$src$$constant;
 1878   %}
 1879 
 1880   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1881     // Load immediate does not have a zero or sign extended version
 1882     // for 8-bit immediates
 1883     int dst_enc = $dst$$reg;
 1884     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1885     if (src_con == 0) {
 1886       // xor dst, dst
 1887       emit_opcode(masm, 0x33);
 1888       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1889     } else {
 1890       emit_opcode(masm, $primary + dst_enc);
 1891       emit_d32(masm, src_con);
 1892     }
 1893   %}
 1894 
 1895   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1896     // Load immediate does not have a zero or sign extended version
 1897     // for 8-bit immediates
 1898     int dst_enc = $dst$$reg + 2;
 1899     int src_con = ((julong)($src$$constant)) >> 32;
 1900     if (src_con == 0) {
 1901       // xor dst, dst
 1902       emit_opcode(masm, 0x33);
 1903       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1904     } else {
 1905       emit_opcode(masm, $primary + dst_enc);
 1906       emit_d32(masm, src_con);
 1907     }
 1908   %}
 1909 
 1910 
 1911   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1912   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1913     encode_Copy( masm, $dst$$reg, $src$$reg );
 1914   %}
 1915 
 1916   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1917     encode_Copy( masm, $dst$$reg, $src$$reg );
 1918   %}
 1919 
 1920   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1921     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1922   %}
 1923 
 1924   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1925     $$$emit8$primary;
 1926     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1927   %}
 1928 
 1929   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1930     $$$emit8$secondary;
 1931     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1932   %}
 1933 
 1934   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1935     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1936   %}
 1937 
 1938   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1939     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1940   %}
 1941 
 1942   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1943     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1944   %}
 1945 
 1946   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1947     // Output immediate
 1948     $$$emit32$src$$constant;
 1949   %}
 1950 
 1951   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1952     // Output Float immediate bits
 1953     jfloat jf = $src$$constant;
 1954     int    jf_as_bits = jint_cast( jf );
 1955     emit_d32(masm, jf_as_bits);
 1956   %}
 1957 
 1958   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1959     // Output Float immediate bits
 1960     jfloat jf = $src$$constant;
 1961     int    jf_as_bits = jint_cast( jf );
 1962     emit_d32(masm, jf_as_bits);
 1963   %}
 1964 
 1965   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1966     // Output immediate
 1967     $$$emit16$src$$constant;
 1968   %}
 1969 
 1970   enc_class Con_d32(immI src) %{
 1971     emit_d32(masm,$src$$constant);
 1972   %}
 1973 
 1974   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1975     // Output immediate memory reference
 1976     emit_rm(masm, 0x00, $t1$$reg, 0x05 );
 1977     emit_d32(masm, 0x00);
 1978   %}
 1979 
 1980   enc_class lock_prefix( ) %{
 1981     emit_opcode(masm,0xF0);         // [Lock]
 1982   %}
 1983 
 1984   // Cmp-xchg long value.
 1985   // Note: we need to swap rbx, and rcx before and after the
 1986   //       cmpxchg8 instruction because the instruction uses
 1987   //       rcx as the high order word of the new value to store but
 1988   //       our register encoding uses rbx,.
 1989   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 1990 
 1991     // XCHG  rbx,ecx
 1992     emit_opcode(masm,0x87);
 1993     emit_opcode(masm,0xD9);
 1994     // [Lock]
 1995     emit_opcode(masm,0xF0);
 1996     // CMPXCHG8 [Eptr]
 1997     emit_opcode(masm,0x0F);
 1998     emit_opcode(masm,0xC7);
 1999     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2000     // XCHG  rbx,ecx
 2001     emit_opcode(masm,0x87);
 2002     emit_opcode(masm,0xD9);
 2003   %}
 2004 
 2005   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2006     // [Lock]
 2007     emit_opcode(masm,0xF0);
 2008 
 2009     // CMPXCHG [Eptr]
 2010     emit_opcode(masm,0x0F);
 2011     emit_opcode(masm,0xB1);
 2012     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2013   %}
 2014 
 2015   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2016     // [Lock]
 2017     emit_opcode(masm,0xF0);
 2018 
 2019     // CMPXCHGB [Eptr]
 2020     emit_opcode(masm,0x0F);
 2021     emit_opcode(masm,0xB0);
 2022     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2023   %}
 2024 
 2025   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2026     // [Lock]
 2027     emit_opcode(masm,0xF0);
 2028 
 2029     // 16-bit mode
 2030     emit_opcode(masm, 0x66);
 2031 
 2032     // CMPXCHGW [Eptr]
 2033     emit_opcode(masm,0x0F);
 2034     emit_opcode(masm,0xB1);
 2035     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2036   %}
 2037 
 2038   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2039     int res_encoding = $res$$reg;
 2040 
 2041     // MOV  res,0
 2042     emit_opcode( masm, 0xB8 + res_encoding);
 2043     emit_d32( masm, 0 );
 2044     // JNE,s  fail
 2045     emit_opcode(masm,0x75);
 2046     emit_d8(masm, 5 );
 2047     // MOV  res,1
 2048     emit_opcode( masm, 0xB8 + res_encoding);
 2049     emit_d32( masm, 1 );
 2050     // fail:
 2051   %}
 2052 
 2053   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2054     int reg_encoding = $ereg$$reg;
 2055     int base  = $mem$$base;
 2056     int index = $mem$$index;
 2057     int scale = $mem$$scale;
 2058     int displace = $mem$$disp;
 2059     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2060     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2061   %}
 2062 
 2063   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2064     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2065     int base  = $mem$$base;
 2066     int index = $mem$$index;
 2067     int scale = $mem$$scale;
 2068     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2069     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2070     encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
 2071   %}
 2072 
 2073   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2074     int r1, r2;
 2075     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2076     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2077     emit_opcode(masm,0x0F);
 2078     emit_opcode(masm,$tertiary);
 2079     emit_rm(masm, 0x3, r1, r2);
 2080     emit_d8(masm,$cnt$$constant);
 2081     emit_d8(masm,$primary);
 2082     emit_rm(masm, 0x3, $secondary, r1);
 2083     emit_d8(masm,$cnt$$constant);
 2084   %}
 2085 
 2086   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2087     emit_opcode( masm, 0x8B ); // Move
 2088     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2089     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2090       emit_d8(masm,$primary);
 2091       emit_rm(masm, 0x3, $secondary, $dst$$reg);
 2092       emit_d8(masm,$cnt$$constant-32);
 2093     }
 2094     emit_d8(masm,$primary);
 2095     emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2096     emit_d8(masm,31);
 2097   %}
 2098 
 2099   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2100     int r1, r2;
 2101     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2102     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2103 
 2104     emit_opcode( masm, 0x8B ); // Move r1,r2
 2105     emit_rm(masm, 0x3, r1, r2);
 2106     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2107       emit_opcode(masm,$primary);
 2108       emit_rm(masm, 0x3, $secondary, r1);
 2109       emit_d8(masm,$cnt$$constant-32);
 2110     }
 2111     emit_opcode(masm,0x33);  // XOR r2,r2
 2112     emit_rm(masm, 0x3, r2, r2);
 2113   %}
 2114 
 2115   // Clone of RegMem but accepts an extra parameter to access each
 2116   // half of a double in memory; it never needs relocation info.
 2117   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2118     emit_opcode(masm,$opcode$$constant);
 2119     int reg_encoding = $rm_reg$$reg;
 2120     int base     = $mem$$base;
 2121     int index    = $mem$$index;
 2122     int scale    = $mem$$scale;
 2123     int displace = $mem$$disp + $disp_for_half$$constant;
 2124     relocInfo::relocType disp_reloc = relocInfo::none;
 2125     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2126   %}
 2127 
 2128   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2129   //
 2130   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2131   // and it never needs relocation information.
 2132   // Frequently used to move data between FPU's Stack Top and memory.
 2133   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2134     int rm_byte_opcode = $rm_opcode$$constant;
 2135     int base     = $mem$$base;
 2136     int index    = $mem$$index;
 2137     int scale    = $mem$$scale;
 2138     int displace = $mem$$disp;
 2139     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2140     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2141   %}
 2142 
 2143   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2144     int rm_byte_opcode = $rm_opcode$$constant;
 2145     int base     = $mem$$base;
 2146     int index    = $mem$$index;
 2147     int scale    = $mem$$scale;
 2148     int displace = $mem$$disp;
 2149     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2150     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2151   %}
 2152 
 2153   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2154     int reg_encoding = $dst$$reg;
 2155     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2156     int index        = 0x04;            // 0x04 indicates no index
 2157     int scale        = 0x00;            // 0x00 indicates no scale
 2158     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2159     relocInfo::relocType disp_reloc = relocInfo::none;
 2160     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2161   %}
 2162 
 2163   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2164     // Compare dst,src
 2165     emit_opcode(masm,0x3B);
 2166     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2167     // jmp dst < src around move
 2168     emit_opcode(masm,0x7C);
 2169     emit_d8(masm,2);
 2170     // move dst,src
 2171     emit_opcode(masm,0x8B);
 2172     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2173   %}
 2174 
 2175   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2176     // Compare dst,src
 2177     emit_opcode(masm,0x3B);
 2178     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2179     // jmp dst > src around move
 2180     emit_opcode(masm,0x7F);
 2181     emit_d8(masm,2);
 2182     // move dst,src
 2183     emit_opcode(masm,0x8B);
 2184     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2185   %}
 2186 
 2187   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2188     // If src is FPR1, we can just FST to store it.
 2189     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2190     int reg_encoding = 0x2; // Just store
 2191     int base  = $mem$$base;
 2192     int index = $mem$$index;
 2193     int scale = $mem$$scale;
 2194     int displace = $mem$$disp;
 2195     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2196     if( $src$$reg != FPR1L_enc ) {
 2197       reg_encoding = 0x3;  // Store & pop
 2198       emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
 2199       emit_d8( masm, 0xC0-1+$src$$reg );
 2200     }
 2201     __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
 2202     emit_opcode(masm,$primary);
 2203     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2204     __ clear_inst_mark();
 2205   %}
 2206 
 2207   enc_class neg_reg(rRegI dst) %{
 2208     // NEG $dst
 2209     emit_opcode(masm,0xF7);
 2210     emit_rm(masm, 0x3, 0x03, $dst$$reg );
 2211   %}
 2212 
 2213   enc_class setLT_reg(eCXRegI dst) %{
 2214     // SETLT $dst
 2215     emit_opcode(masm,0x0F);
 2216     emit_opcode(masm,0x9C);
 2217     emit_rm( masm, 0x3, 0x4, $dst$$reg );
 2218   %}
 2219 
 2220   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2221     int tmpReg = $tmp$$reg;
 2222 
 2223     // SUB $p,$q
 2224     emit_opcode(masm,0x2B);
 2225     emit_rm(masm, 0x3, $p$$reg, $q$$reg);
 2226     // SBB $tmp,$tmp
 2227     emit_opcode(masm,0x1B);
 2228     emit_rm(masm, 0x3, tmpReg, tmpReg);
 2229     // AND $tmp,$y
 2230     emit_opcode(masm,0x23);
 2231     emit_rm(masm, 0x3, tmpReg, $y$$reg);
 2232     // ADD $p,$tmp
 2233     emit_opcode(masm,0x03);
 2234     emit_rm(masm, 0x3, $p$$reg, tmpReg);
 2235   %}
 2236 
 2237   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2238     // TEST shift,32
 2239     emit_opcode(masm,0xF7);
 2240     emit_rm(masm, 0x3, 0, ECX_enc);
 2241     emit_d32(masm,0x20);
 2242     // JEQ,s small
 2243     emit_opcode(masm, 0x74);
 2244     emit_d8(masm, 0x04);
 2245     // MOV    $dst.hi,$dst.lo
 2246     emit_opcode( masm, 0x8B );
 2247     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2248     // CLR    $dst.lo
 2249     emit_opcode(masm, 0x33);
 2250     emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
 2251 // small:
 2252     // SHLD   $dst.hi,$dst.lo,$shift
 2253     emit_opcode(masm,0x0F);
 2254     emit_opcode(masm,0xA5);
 2255     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2256     // SHL    $dst.lo,$shift"
 2257     emit_opcode(masm,0xD3);
 2258     emit_rm(masm, 0x3, 0x4, $dst$$reg );
 2259   %}
 2260 
 2261   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2262     // TEST shift,32
 2263     emit_opcode(masm,0xF7);
 2264     emit_rm(masm, 0x3, 0, ECX_enc);
 2265     emit_d32(masm,0x20);
 2266     // JEQ,s small
 2267     emit_opcode(masm, 0x74);
 2268     emit_d8(masm, 0x04);
 2269     // MOV    $dst.lo,$dst.hi
 2270     emit_opcode( masm, 0x8B );
 2271     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2272     // CLR    $dst.hi
 2273     emit_opcode(masm, 0x33);
 2274     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2275 // small:
 2276     // SHRD   $dst.lo,$dst.hi,$shift
 2277     emit_opcode(masm,0x0F);
 2278     emit_opcode(masm,0xAD);
 2279     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2280     // SHR    $dst.hi,$shift"
 2281     emit_opcode(masm,0xD3);
 2282     emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2283   %}
 2284 
 2285   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2286     // TEST shift,32
 2287     emit_opcode(masm,0xF7);
 2288     emit_rm(masm, 0x3, 0, ECX_enc);
 2289     emit_d32(masm,0x20);
 2290     // JEQ,s small
 2291     emit_opcode(masm, 0x74);
 2292     emit_d8(masm, 0x05);
 2293     // MOV    $dst.lo,$dst.hi
 2294     emit_opcode( masm, 0x8B );
 2295     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2296     // SAR    $dst.hi,31
 2297     emit_opcode(masm, 0xC1);
 2298     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2299     emit_d8(masm, 0x1F );
 2300 // small:
 2301     // SHRD   $dst.lo,$dst.hi,$shift
 2302     emit_opcode(masm,0x0F);
 2303     emit_opcode(masm,0xAD);
 2304     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2305     // SAR    $dst.hi,$shift"
 2306     emit_opcode(masm,0xD3);
 2307     emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2308   %}
 2309 
 2310 
 2311   // ----------------- Encodings for floating point unit -----------------
 2312   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2313   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2314     $$$emit8$primary;
 2315     emit_rm(masm, 0x3, $secondary, $src$$reg );
 2316   %}
 2317 
 2318   // Pop argument in FPR0 with FSTP ST(0)
 2319   enc_class PopFPU() %{
 2320     emit_opcode( masm, 0xDD );
 2321     emit_d8( masm, 0xD8 );
 2322   %}
 2323 
 2324   // !!!!! equivalent to Pop_Reg_F
 2325   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2326     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2327     emit_d8( masm, 0xD8+$dst$$reg );
 2328   %}
 2329 
 2330   enc_class Push_Reg_DPR( regDPR dst ) %{
 2331     emit_opcode( masm, 0xD9 );
 2332     emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2333   %}
 2334 
 2335   enc_class strictfp_bias1( regDPR dst ) %{
 2336     emit_opcode( masm, 0xDB );           // FLD m80real
 2337     emit_opcode( masm, 0x2D );
 2338     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2339     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2340     emit_opcode( masm, 0xC8+$dst$$reg );
 2341   %}
 2342 
 2343   enc_class strictfp_bias2( regDPR dst ) %{
 2344     emit_opcode( masm, 0xDB );           // FLD m80real
 2345     emit_opcode( masm, 0x2D );
 2346     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2347     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2348     emit_opcode( masm, 0xC8+$dst$$reg );
 2349   %}
 2350 
 2351   // Special case for moving an integer register to a stack slot.
 2352   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2353     store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
 2354   %}
 2355 
 2356   // Special case for moving a register to a stack slot.
 2357   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2358     // Opcode already emitted
 2359     emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2360     emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2361     emit_d32(masm, $dst$$disp);   // Displacement
 2362   %}
 2363 
 2364   // Push the integer in stackSlot 'src' onto FP-stack
 2365   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2366     store_to_stackslot( masm, $primary, $secondary, $src$$disp );
 2367   %}
 2368 
 2369   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2370   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2371     store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
 2372   %}
 2373 
 2374   // Same as Pop_Mem_F except for opcode
 2375   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2376   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2377     store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
 2378   %}
 2379 
 2380   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2381     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2382     emit_d8( masm, 0xD8+$dst$$reg );
 2383   %}
 2384 
 2385   enc_class Push_Reg_FPR( regFPR dst ) %{
 2386     emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
 2387     emit_d8( masm, 0xC0-1+$dst$$reg );
 2388   %}
 2389 
 2390   // Push FPU's float to a stack-slot, and pop FPU-stack
 2391   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2392     int pop = 0x02;
 2393     if ($src$$reg != FPR1L_enc) {
 2394       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2395       emit_d8( masm, 0xC0-1+$src$$reg );
 2396       pop = 0x03;
 2397     }
 2398     store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2399   %}
 2400 
 2401   // Push FPU's double to a stack-slot, and pop FPU-stack
 2402   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2403     int pop = 0x02;
 2404     if ($src$$reg != FPR1L_enc) {
 2405       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2406       emit_d8( masm, 0xC0-1+$src$$reg );
 2407       pop = 0x03;
 2408     }
 2409     store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2410   %}
 2411 
 2412   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2413   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2414     int pop = 0xD0 - 1; // -1 since we skip FLD
 2415     if ($src$$reg != FPR1L_enc) {
 2416       emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
 2417       emit_d8( masm, 0xC0-1+$src$$reg );
 2418       pop = 0xD8;
 2419     }
 2420     emit_opcode( masm, 0xDD );
 2421     emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
 2422   %}
 2423 
 2424 
 2425   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2426     // load dst in FPR0
 2427     emit_opcode( masm, 0xD9 );
 2428     emit_d8( masm, 0xC0-1+$dst$$reg );
 2429     if ($src$$reg != FPR1L_enc) {
 2430       // fincstp
 2431       emit_opcode (masm, 0xD9);
 2432       emit_opcode (masm, 0xF7);
 2433       // swap src with FPR1:
 2434       // FXCH FPR1 with src
 2435       emit_opcode(masm, 0xD9);
 2436       emit_d8(masm, 0xC8-1+$src$$reg );
 2437       // fdecstp
 2438       emit_opcode (masm, 0xD9);
 2439       emit_opcode (masm, 0xF6);
 2440     }
 2441   %}
 2442 
 2443   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2444     __ subptr(rsp, 8);
 2445     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2446     __ fld_d(Address(rsp, 0));
 2447     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2448     __ fld_d(Address(rsp, 0));
 2449   %}
 2450 
 2451   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2452     __ subptr(rsp, 4);
 2453     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2454     __ fld_s(Address(rsp, 0));
 2455     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2456     __ fld_s(Address(rsp, 0));
 2457   %}
 2458 
 2459   enc_class Push_ResultD(regD dst) %{
 2460     __ fstp_d(Address(rsp, 0));
 2461     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2462     __ addptr(rsp, 8);
 2463   %}
 2464 
 2465   enc_class Push_ResultF(regF dst, immI d8) %{
 2466     __ fstp_s(Address(rsp, 0));
 2467     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2468     __ addptr(rsp, $d8$$constant);
 2469   %}
 2470 
 2471   enc_class Push_SrcD(regD src) %{
 2472     __ subptr(rsp, 8);
 2473     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2474     __ fld_d(Address(rsp, 0));
 2475   %}
 2476 
 2477   enc_class push_stack_temp_qword() %{
 2478     __ subptr(rsp, 8);
 2479   %}
 2480 
 2481   enc_class pop_stack_temp_qword() %{
 2482     __ addptr(rsp, 8);
 2483   %}
 2484 
 2485   enc_class push_xmm_to_fpr1(regD src) %{
 2486     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2487     __ fld_d(Address(rsp, 0));
 2488   %}
 2489 
 2490   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2491     if ($src$$reg != FPR1L_enc) {
 2492       // fincstp
 2493       emit_opcode (masm, 0xD9);
 2494       emit_opcode (masm, 0xF7);
 2495       // FXCH FPR1 with src
 2496       emit_opcode(masm, 0xD9);
 2497       emit_d8(masm, 0xC8-1+$src$$reg );
 2498       // fdecstp
 2499       emit_opcode (masm, 0xD9);
 2500       emit_opcode (masm, 0xF6);
 2501     }
 2502   %}
 2503 
 2504   enc_class fnstsw_sahf_skip_parity() %{
 2505     // fnstsw ax
 2506     emit_opcode( masm, 0xDF );
 2507     emit_opcode( masm, 0xE0 );
 2508     // sahf
 2509     emit_opcode( masm, 0x9E );
 2510     // jnp  ::skip
 2511     emit_opcode( masm, 0x7B );
 2512     emit_opcode( masm, 0x05 );
 2513   %}
 2514 
 2515   enc_class emitModDPR() %{
 2516     // fprem must be iterative
 2517     // :: loop
 2518     // fprem
 2519     emit_opcode( masm, 0xD9 );
 2520     emit_opcode( masm, 0xF8 );
 2521     // wait
 2522     emit_opcode( masm, 0x9b );
 2523     // fnstsw ax
 2524     emit_opcode( masm, 0xDF );
 2525     emit_opcode( masm, 0xE0 );
 2526     // sahf
 2527     emit_opcode( masm, 0x9E );
 2528     // jp  ::loop
 2529     emit_opcode( masm, 0x0F );
 2530     emit_opcode( masm, 0x8A );
 2531     emit_opcode( masm, 0xF4 );
 2532     emit_opcode( masm, 0xFF );
 2533     emit_opcode( masm, 0xFF );
 2534     emit_opcode( masm, 0xFF );
 2535   %}
 2536 
 2537   enc_class fpu_flags() %{
 2538     // fnstsw_ax
 2539     emit_opcode( masm, 0xDF);
 2540     emit_opcode( masm, 0xE0);
 2541     // test ax,0x0400
 2542     emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
 2543     emit_opcode( masm, 0xA9 );
 2544     emit_d16   ( masm, 0x0400 );
 2545     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2546     // // test rax,0x0400
 2547     // emit_opcode( masm, 0xA9 );
 2548     // emit_d32   ( masm, 0x00000400 );
 2549     //
 2550     // jz exit (no unordered comparison)
 2551     emit_opcode( masm, 0x74 );
 2552     emit_d8    ( masm, 0x02 );
 2553     // mov ah,1 - treat as LT case (set carry flag)
 2554     emit_opcode( masm, 0xB4 );
 2555     emit_d8    ( masm, 0x01 );
 2556     // sahf
 2557     emit_opcode( masm, 0x9E);
 2558   %}
 2559 
 2560   enc_class cmpF_P6_fixup() %{
 2561     // Fixup the integer flags in case comparison involved a NaN
 2562     //
 2563     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2564     emit_opcode( masm, 0x7B );
 2565     emit_d8    ( masm, 0x03 );
 2566     // MOV AH,1 - treat as LT case (set carry flag)
 2567     emit_opcode( masm, 0xB4 );
 2568     emit_d8    ( masm, 0x01 );
 2569     // SAHF
 2570     emit_opcode( masm, 0x9E);
 2571     // NOP     // target for branch to avoid branch to branch
 2572     emit_opcode( masm, 0x90);
 2573   %}
 2574 
 2575 //     fnstsw_ax();
 2576 //     sahf();
 2577 //     movl(dst, nan_result);
 2578 //     jcc(Assembler::parity, exit);
 2579 //     movl(dst, less_result);
 2580 //     jcc(Assembler::below, exit);
 2581 //     movl(dst, equal_result);
 2582 //     jcc(Assembler::equal, exit);
 2583 //     movl(dst, greater_result);
 2584 
 2585 // less_result     =  1;
 2586 // greater_result  = -1;
 2587 // equal_result    = 0;
 2588 // nan_result      = -1;
 2589 
 2590   enc_class CmpF_Result(rRegI dst) %{
 2591     // fnstsw_ax();
 2592     emit_opcode( masm, 0xDF);
 2593     emit_opcode( masm, 0xE0);
 2594     // sahf
 2595     emit_opcode( masm, 0x9E);
 2596     // movl(dst, nan_result);
 2597     emit_opcode( masm, 0xB8 + $dst$$reg);
 2598     emit_d32( masm, -1 );
 2599     // jcc(Assembler::parity, exit);
 2600     emit_opcode( masm, 0x7A );
 2601     emit_d8    ( masm, 0x13 );
 2602     // movl(dst, less_result);
 2603     emit_opcode( masm, 0xB8 + $dst$$reg);
 2604     emit_d32( masm, -1 );
 2605     // jcc(Assembler::below, exit);
 2606     emit_opcode( masm, 0x72 );
 2607     emit_d8    ( masm, 0x0C );
 2608     // movl(dst, equal_result);
 2609     emit_opcode( masm, 0xB8 + $dst$$reg);
 2610     emit_d32( masm, 0 );
 2611     // jcc(Assembler::equal, exit);
 2612     emit_opcode( masm, 0x74 );
 2613     emit_d8    ( masm, 0x05 );
 2614     // movl(dst, greater_result);
 2615     emit_opcode( masm, 0xB8 + $dst$$reg);
 2616     emit_d32( masm, 1 );
 2617   %}
 2618 
 2619 
 2620   // Compare the longs and set flags
 2621   // BROKEN!  Do Not use as-is
 2622   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2623     // CMP    $src1.hi,$src2.hi
 2624     emit_opcode( masm, 0x3B );
 2625     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2626     // JNE,s  done
 2627     emit_opcode(masm,0x75);
 2628     emit_d8(masm, 2 );
 2629     // CMP    $src1.lo,$src2.lo
 2630     emit_opcode( masm, 0x3B );
 2631     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2632 // done:
 2633   %}
 2634 
 2635   enc_class convert_int_long( regL dst, rRegI src ) %{
 2636     // mov $dst.lo,$src
 2637     int dst_encoding = $dst$$reg;
 2638     int src_encoding = $src$$reg;
 2639     encode_Copy( masm, dst_encoding  , src_encoding );
 2640     // mov $dst.hi,$src
 2641     encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2642     // sar $dst.hi,31
 2643     emit_opcode( masm, 0xC1 );
 2644     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2645     emit_d8(masm, 0x1F );
 2646   %}
 2647 
 2648   enc_class convert_long_double( eRegL src ) %{
 2649     // push $src.hi
 2650     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2651     // push $src.lo
 2652     emit_opcode(masm, 0x50+$src$$reg  );
 2653     // fild 64-bits at [SP]
 2654     emit_opcode(masm,0xdf);
 2655     emit_d8(masm, 0x6C);
 2656     emit_d8(masm, 0x24);
 2657     emit_d8(masm, 0x00);
 2658     // pop stack
 2659     emit_opcode(masm, 0x83); // add  SP, #8
 2660     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2661     emit_d8(masm, 0x8);
 2662   %}
 2663 
 2664   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2665     // IMUL   EDX:EAX,$src1
 2666     emit_opcode( masm, 0xF7 );
 2667     emit_rm( masm, 0x3, 0x5, $src1$$reg );
 2668     // SAR    EDX,$cnt-32
 2669     int shift_count = ((int)$cnt$$constant) - 32;
 2670     if (shift_count > 0) {
 2671       emit_opcode(masm, 0xC1);
 2672       emit_rm(masm, 0x3, 7, $dst$$reg );
 2673       emit_d8(masm, shift_count);
 2674     }
 2675   %}
 2676 
 2677   // this version doesn't have add sp, 8
 2678   enc_class convert_long_double2( eRegL src ) %{
 2679     // push $src.hi
 2680     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2681     // push $src.lo
 2682     emit_opcode(masm, 0x50+$src$$reg  );
 2683     // fild 64-bits at [SP]
 2684     emit_opcode(masm,0xdf);
 2685     emit_d8(masm, 0x6C);
 2686     emit_d8(masm, 0x24);
 2687     emit_d8(masm, 0x00);
 2688   %}
 2689 
 2690   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2691     // Basic idea: long = (long)int * (long)int
 2692     // IMUL EDX:EAX, src
 2693     emit_opcode( masm, 0xF7 );
 2694     emit_rm( masm, 0x3, 0x5, $src$$reg);
 2695   %}
 2696 
 2697   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2698     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2699     // MUL EDX:EAX, src
 2700     emit_opcode( masm, 0xF7 );
 2701     emit_rm( masm, 0x3, 0x4, $src$$reg);
 2702   %}
 2703 
 2704   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2705     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2706     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2707     // MOV    $tmp,$src.lo
 2708     encode_Copy( masm, $tmp$$reg, $src$$reg );
 2709     // IMUL   $tmp,EDX
 2710     emit_opcode( masm, 0x0F );
 2711     emit_opcode( masm, 0xAF );
 2712     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2713     // MOV    EDX,$src.hi
 2714     encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2715     // IMUL   EDX,EAX
 2716     emit_opcode( masm, 0x0F );
 2717     emit_opcode( masm, 0xAF );
 2718     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2719     // ADD    $tmp,EDX
 2720     emit_opcode( masm, 0x03 );
 2721     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2722     // MUL   EDX:EAX,$src.lo
 2723     emit_opcode( masm, 0xF7 );
 2724     emit_rm( masm, 0x3, 0x4, $src$$reg );
 2725     // ADD    EDX,ESI
 2726     emit_opcode( masm, 0x03 );
 2727     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2728   %}
 2729 
 2730   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2731     // Basic idea: lo(result) = lo(src * y_lo)
 2732     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2733     // IMUL   $tmp,EDX,$src
 2734     emit_opcode( masm, 0x6B );
 2735     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2736     emit_d8( masm, (int)$src$$constant );
 2737     // MOV    EDX,$src
 2738     emit_opcode(masm, 0xB8 + EDX_enc);
 2739     emit_d32( masm, (int)$src$$constant );
 2740     // MUL   EDX:EAX,EDX
 2741     emit_opcode( masm, 0xF7 );
 2742     emit_rm( masm, 0x3, 0x4, EDX_enc );
 2743     // ADD    EDX,ESI
 2744     emit_opcode( masm, 0x03 );
 2745     emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
 2746   %}
 2747 
 2748   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2749     // PUSH src1.hi
 2750     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2751     // PUSH src1.lo
 2752     emit_opcode(masm,               0x50+$src1$$reg  );
 2753     // PUSH src2.hi
 2754     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2755     // PUSH src2.lo
 2756     emit_opcode(masm,               0x50+$src2$$reg  );
 2757     // CALL directly to the runtime
 2758     __ set_inst_mark();
 2759     emit_opcode(masm,0xE8);       // Call into runtime
 2760     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2761     __ clear_inst_mark();
 2762     __ post_call_nop();
 2763     // Restore stack
 2764     emit_opcode(masm, 0x83); // add  SP, #framesize
 2765     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2766     emit_d8(masm, 4*4);
 2767   %}
 2768 
 2769   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2770     // PUSH src1.hi
 2771     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2772     // PUSH src1.lo
 2773     emit_opcode(masm,               0x50+$src1$$reg  );
 2774     // PUSH src2.hi
 2775     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2776     // PUSH src2.lo
 2777     emit_opcode(masm,               0x50+$src2$$reg  );
 2778     // CALL directly to the runtime
 2779     __ set_inst_mark();
 2780     emit_opcode(masm,0xE8);       // Call into runtime
 2781     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2782     __ clear_inst_mark();
 2783     __ post_call_nop();
 2784     // Restore stack
 2785     emit_opcode(masm, 0x83); // add  SP, #framesize
 2786     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2787     emit_d8(masm, 4*4);
 2788   %}
 2789 
 2790   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2791     // MOV   $tmp,$src.lo
 2792     emit_opcode(masm, 0x8B);
 2793     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
 2794     // OR    $tmp,$src.hi
 2795     emit_opcode(masm, 0x0B);
 2796     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2797   %}
 2798 
 2799   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2800     // CMP    $src1.lo,$src2.lo
 2801     emit_opcode( masm, 0x3B );
 2802     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2803     // JNE,s  skip
 2804     emit_cc(masm, 0x70, 0x5);
 2805     emit_d8(masm,2);
 2806     // CMP    $src1.hi,$src2.hi
 2807     emit_opcode( masm, 0x3B );
 2808     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2809   %}
 2810 
 2811   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2812     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2813     emit_opcode( masm, 0x3B );
 2814     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2815     // MOV    $tmp,$src1.hi
 2816     emit_opcode( masm, 0x8B );
 2817     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2818     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2819     emit_opcode( masm, 0x1B );
 2820     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2821   %}
 2822 
 2823   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2824     // XOR    $tmp,$tmp
 2825     emit_opcode(masm,0x33);  // XOR
 2826     emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
 2827     // CMP    $tmp,$src.lo
 2828     emit_opcode( masm, 0x3B );
 2829     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
 2830     // SBB    $tmp,$src.hi
 2831     emit_opcode( masm, 0x1B );
 2832     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2833   %}
 2834 
 2835  // Sniff, sniff... smells like Gnu Superoptimizer
 2836   enc_class neg_long( eRegL dst ) %{
 2837     emit_opcode(masm,0xF7);    // NEG hi
 2838     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2839     emit_opcode(masm,0xF7);    // NEG lo
 2840     emit_rm    (masm,0x3, 0x3,               $dst$$reg );
 2841     emit_opcode(masm,0x83);    // SBB hi,0
 2842     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2843     emit_d8    (masm,0 );
 2844   %}
 2845 
 2846   enc_class enc_pop_rdx() %{
 2847     emit_opcode(masm,0x5A);
 2848   %}
 2849 
 2850   enc_class enc_rethrow() %{
 2851     __ set_inst_mark();
 2852     emit_opcode(masm, 0xE9);        // jmp    entry
 2853     emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
 2854                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2855     __ clear_inst_mark();
 2856     __ post_call_nop();
 2857   %}
 2858 
 2859 
 2860   // Convert a double to an int.  Java semantics require we do complex
 2861   // manglelations in the corner cases.  So we set the rounding mode to
 2862   // 'zero', store the darned double down as an int, and reset the
 2863   // rounding mode to 'nearest'.  The hardware throws an exception which
 2864   // patches up the correct value directly to the stack.
 2865   enc_class DPR2I_encoding( regDPR src ) %{
 2866     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2867     // exceptions here, so that a NAN or other corner-case value will
 2868     // thrown an exception (but normal values get converted at full speed).
 2869     // However, I2C adapters and other float-stack manglers leave pending
 2870     // invalid-op exceptions hanging.  We would have to clear them before
 2871     // enabling them and that is more expensive than just testing for the
 2872     // invalid value Intel stores down in the corner cases.
 2873     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2874     emit_opcode(masm,0x2D);
 2875     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2876     // Allocate a word
 2877     emit_opcode(masm,0x83);            // SUB ESP,4
 2878     emit_opcode(masm,0xEC);
 2879     emit_d8(masm,0x04);
 2880     // Encoding assumes a double has been pushed into FPR0.
 2881     // Store down the double as an int, popping the FPU stack
 2882     emit_opcode(masm,0xDB);            // FISTP [ESP]
 2883     emit_opcode(masm,0x1C);
 2884     emit_d8(masm,0x24);
 2885     // Restore the rounding mode; mask the exception
 2886     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2887     emit_opcode(masm,0x2D);
 2888     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2889         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2890         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2891 
 2892     // Load the converted int; adjust CPU stack
 2893     emit_opcode(masm,0x58);       // POP EAX
 2894     emit_opcode(masm,0x3D);       // CMP EAX,imm
 2895     emit_d32   (masm,0x80000000); //         0x80000000
 2896     emit_opcode(masm,0x75);       // JNE around_slow_call
 2897     emit_d8    (masm,0x07);       // Size of slow_call
 2898     // Push src onto stack slow-path
 2899     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2900     emit_d8    (masm,0xC0-1+$src$$reg );
 2901     // CALL directly to the runtime
 2902     __ set_inst_mark();
 2903     emit_opcode(masm,0xE8);       // Call into runtime
 2904     emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2905     __ clear_inst_mark();
 2906     __ post_call_nop();
 2907     // Carry on here...
 2908   %}
 2909 
 2910   enc_class DPR2L_encoding( regDPR src ) %{
 2911     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2912     emit_opcode(masm,0x2D);
 2913     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2914     // Allocate a word
 2915     emit_opcode(masm,0x83);            // SUB ESP,8
 2916     emit_opcode(masm,0xEC);
 2917     emit_d8(masm,0x08);
 2918     // Encoding assumes a double has been pushed into FPR0.
 2919     // Store down the double as a long, popping the FPU stack
 2920     emit_opcode(masm,0xDF);            // FISTP [ESP]
 2921     emit_opcode(masm,0x3C);
 2922     emit_d8(masm,0x24);
 2923     // Restore the rounding mode; mask the exception
 2924     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2925     emit_opcode(masm,0x2D);
 2926     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2927         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2928         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2929 
 2930     // Load the converted int; adjust CPU stack
 2931     emit_opcode(masm,0x58);       // POP EAX
 2932     emit_opcode(masm,0x5A);       // POP EDX
 2933     emit_opcode(masm,0x81);       // CMP EDX,imm
 2934     emit_d8    (masm,0xFA);       // rdx
 2935     emit_d32   (masm,0x80000000); //         0x80000000
 2936     emit_opcode(masm,0x75);       // JNE around_slow_call
 2937     emit_d8    (masm,0x07+4);     // Size of slow_call
 2938     emit_opcode(masm,0x85);       // TEST EAX,EAX
 2939     emit_opcode(masm,0xC0);       // 2/rax,/rax,
 2940     emit_opcode(masm,0x75);       // JNE around_slow_call
 2941     emit_d8    (masm,0x07);       // Size of slow_call
 2942     // Push src onto stack slow-path
 2943     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2944     emit_d8    (masm,0xC0-1+$src$$reg );
 2945     // CALL directly to the runtime
 2946     __ set_inst_mark();
 2947     emit_opcode(masm,0xE8);       // Call into runtime
 2948     emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2949     __ clear_inst_mark();
 2950     __ post_call_nop();
 2951     // Carry on here...
 2952   %}
 2953 
 2954   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2955     // Operand was loaded from memory into fp ST (stack top)
 2956     // FMUL   ST,$src  /* D8 C8+i */
 2957     emit_opcode(masm, 0xD8);
 2958     emit_opcode(masm, 0xC8 + $src1$$reg);
 2959   %}
 2960 
 2961   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2962     // FADDP  ST,src2  /* D8 C0+i */
 2963     emit_opcode(masm, 0xD8);
 2964     emit_opcode(masm, 0xC0 + $src2$$reg);
 2965     //could use FADDP  src2,fpST  /* DE C0+i */
 2966   %}
 2967 
 2968   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2969     // FADDP  src2,ST  /* DE C0+i */
 2970     emit_opcode(masm, 0xDE);
 2971     emit_opcode(masm, 0xC0 + $src2$$reg);
 2972   %}
 2973 
 2974   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2975     // Operand has been loaded into fp ST (stack top)
 2976       // FSUB   ST,$src1
 2977       emit_opcode(masm, 0xD8);
 2978       emit_opcode(masm, 0xE0 + $src1$$reg);
 2979 
 2980       // FDIV
 2981       emit_opcode(masm, 0xD8);
 2982       emit_opcode(masm, 0xF0 + $src2$$reg);
 2983   %}
 2984 
 2985   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 2986     // Operand was loaded from memory into fp ST (stack top)
 2987     // FADD   ST,$src  /* D8 C0+i */
 2988     emit_opcode(masm, 0xD8);
 2989     emit_opcode(masm, 0xC0 + $src1$$reg);
 2990 
 2991     // FMUL  ST,src2  /* D8 C*+i */
 2992     emit_opcode(masm, 0xD8);
 2993     emit_opcode(masm, 0xC8 + $src2$$reg);
 2994   %}
 2995 
 2996 
 2997   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 2998     // Operand was loaded from memory into fp ST (stack top)
 2999     // FADD   ST,$src  /* D8 C0+i */
 3000     emit_opcode(masm, 0xD8);
 3001     emit_opcode(masm, 0xC0 + $src1$$reg);
 3002 
 3003     // FMULP  src2,ST  /* DE C8+i */
 3004     emit_opcode(masm, 0xDE);
 3005     emit_opcode(masm, 0xC8 + $src2$$reg);
 3006   %}
 3007 
 3008   // Atomically load the volatile long
 3009   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3010     emit_opcode(masm,0xDF);
 3011     int rm_byte_opcode = 0x05;
 3012     int base     = $mem$$base;
 3013     int index    = $mem$$index;
 3014     int scale    = $mem$$scale;
 3015     int displace = $mem$$disp;
 3016     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3017     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3018     store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
 3019   %}
 3020 
 3021   // Volatile Store Long.  Must be atomic, so move it into
 3022   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3023   // target address before the store (for null-ptr checks)
 3024   // so the memory operand is used twice in the encoding.
 3025   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3026     store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
 3027     __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
 3028     emit_opcode(masm,0xDF);
 3029     int rm_byte_opcode = 0x07;
 3030     int base     = $mem$$base;
 3031     int index    = $mem$$index;
 3032     int scale    = $mem$$scale;
 3033     int displace = $mem$$disp;
 3034     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3035     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3036     __ clear_inst_mark();
 3037   %}
 3038 
 3039 %}
 3040 
 3041 
 3042 //----------FRAME--------------------------------------------------------------
 3043 // Definition of frame structure and management information.
 3044 //
 3045 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3046 //                             |   (to get allocators register number
 3047 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3048 //  r   CALLER     |        |
 3049 //  o     |        +--------+      pad to even-align allocators stack-slot
 3050 //  w     V        |  pad0  |        numbers; owned by CALLER
 3051 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3052 //  h     ^        |   in   |  5
 3053 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3054 //  |     |        |        |  3
 3055 //  |     |        +--------+
 3056 //  V     |        | old out|      Empty on Intel, window on Sparc
 3057 //        |    old |preserve|      Must be even aligned.
 3058 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3059 //        |        |   in   |  3   area for Intel ret address
 3060 //     Owned by    |preserve|      Empty on Sparc.
 3061 //       SELF      +--------+
 3062 //        |        |  pad2  |  2   pad to align old SP
 3063 //        |        +--------+  1
 3064 //        |        | locks  |  0
 3065 //        |        +--------+----> OptoReg::stack0(), even aligned
 3066 //        |        |  pad1  | 11   pad to align new SP
 3067 //        |        +--------+
 3068 //        |        |        | 10
 3069 //        |        | spills |  9   spills
 3070 //        V        |        |  8   (pad0 slot for callee)
 3071 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3072 //        ^        |  out   |  7
 3073 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3074 //     Owned by    +--------+
 3075 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3076 //        |    new |preserve|      Must be even-aligned.
 3077 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3078 //        |        |        |
 3079 //
 3080 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3081 //         known from SELF's arguments and the Java calling convention.
 3082 //         Region 6-7 is determined per call site.
 3083 // Note 2: If the calling convention leaves holes in the incoming argument
 3084 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3085 //         are owned by the CALLEE.  Holes should not be necessary in the
 3086 //         incoming area, as the Java calling convention is completely under
 3087 //         the control of the AD file.  Doubles can be sorted and packed to
 3088 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3089 //         varargs C calling conventions.
 3090 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3091 //         even aligned with pad0 as needed.
 3092 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3093 //         region 6-11 is even aligned; it may be padded out more so that
 3094 //         the region from SP to FP meets the minimum stack alignment.
 3095 
 3096 frame %{
 3097   // These three registers define part of the calling convention
 3098   // between compiled code and the interpreter.
 3099   inline_cache_reg(EAX);                // Inline Cache Register
 3100 
 3101   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3102   cisc_spilling_operand_name(indOffset32);
 3103 
 3104   // Number of stack slots consumed by locking an object
 3105   sync_stack_slots(1);
 3106 
 3107   // Compiled code's Frame Pointer
 3108   frame_pointer(ESP);
 3109   // Interpreter stores its frame pointer in a register which is
 3110   // stored to the stack by I2CAdaptors.
 3111   // I2CAdaptors convert from interpreted java to compiled java.
 3112   interpreter_frame_pointer(EBP);
 3113 
 3114   // Stack alignment requirement
 3115   // Alignment size in bytes (128-bit -> 16 bytes)
 3116   stack_alignment(StackAlignmentInBytes);
 3117 
 3118   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3119   // for calls to C.  Supports the var-args backing area for register parms.
 3120   varargs_C_out_slots_killed(0);
 3121 
 3122   // The after-PROLOG location of the return address.  Location of
 3123   // return address specifies a type (REG or STACK) and a number
 3124   // representing the register number (i.e. - use a register name) or
 3125   // stack slot.
 3126   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3127   // Otherwise, it is above the locks and verification slot and alignment word
 3128   return_addr(STACK - 1 +
 3129               align_up((Compile::current()->in_preserve_stack_slots() +
 3130                         Compile::current()->fixed_slots()),
 3131                        stack_alignment_in_slots()));
 3132 
 3133   // Location of C & interpreter return values
 3134   c_return_value %{
 3135     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3136     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3137     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3138 
 3139     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3140     // that C functions return float and double results in XMM0.
 3141     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3142       return OptoRegPair(XMM0b_num,XMM0_num);
 3143     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3144       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3145 
 3146     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3147   %}
 3148 
 3149   // Location of return values
 3150   return_value %{
 3151     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3152     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3153     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3154     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3155       return OptoRegPair(XMM0b_num,XMM0_num);
 3156     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3157       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3158     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3159   %}
 3160 
 3161 %}
 3162 
 3163 //----------ATTRIBUTES---------------------------------------------------------
 3164 //----------Operand Attributes-------------------------------------------------
 3165 op_attrib op_cost(0);        // Required cost attribute
 3166 
 3167 //----------Instruction Attributes---------------------------------------------
 3168 ins_attrib ins_cost(100);       // Required cost attribute
 3169 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3170 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3171                                 // non-matching short branch variant of some
 3172                                                             // long branch?
 3173 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3174                                 // specifies the alignment that some part of the instruction (not
 3175                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3176                                 // function must be provided for the instruction
 3177 
 3178 //----------OPERANDS-----------------------------------------------------------
 3179 // Operand definitions must precede instruction definitions for correct parsing
 3180 // in the ADLC because operands constitute user defined types which are used in
 3181 // instruction definitions.
 3182 
 3183 //----------Simple Operands----------------------------------------------------
 3184 // Immediate Operands
 3185 // Integer Immediate
 3186 operand immI() %{
 3187   match(ConI);
 3188 
 3189   op_cost(10);
 3190   format %{ %}
 3191   interface(CONST_INTER);
 3192 %}
 3193 
 3194 // Constant for test vs zero
 3195 operand immI_0() %{
 3196   predicate(n->get_int() == 0);
 3197   match(ConI);
 3198 
 3199   op_cost(0);
 3200   format %{ %}
 3201   interface(CONST_INTER);
 3202 %}
 3203 
 3204 // Constant for increment
 3205 operand immI_1() %{
 3206   predicate(n->get_int() == 1);
 3207   match(ConI);
 3208 
 3209   op_cost(0);
 3210   format %{ %}
 3211   interface(CONST_INTER);
 3212 %}
 3213 
 3214 // Constant for decrement
 3215 operand immI_M1() %{
 3216   predicate(n->get_int() == -1);
 3217   match(ConI);
 3218 
 3219   op_cost(0);
 3220   format %{ %}
 3221   interface(CONST_INTER);
 3222 %}
 3223 
 3224 // Valid scale values for addressing modes
 3225 operand immI2() %{
 3226   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3227   match(ConI);
 3228 
 3229   format %{ %}
 3230   interface(CONST_INTER);
 3231 %}
 3232 
 3233 operand immI8() %{
 3234   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3235   match(ConI);
 3236 
 3237   op_cost(5);
 3238   format %{ %}
 3239   interface(CONST_INTER);
 3240 %}
 3241 
 3242 operand immU8() %{
 3243   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3244   match(ConI);
 3245 
 3246   op_cost(5);
 3247   format %{ %}
 3248   interface(CONST_INTER);
 3249 %}
 3250 
 3251 operand immI16() %{
 3252   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3253   match(ConI);
 3254 
 3255   op_cost(10);
 3256   format %{ %}
 3257   interface(CONST_INTER);
 3258 %}
 3259 
 3260 // Int Immediate non-negative
 3261 operand immU31()
 3262 %{
 3263   predicate(n->get_int() >= 0);
 3264   match(ConI);
 3265 
 3266   op_cost(0);
 3267   format %{ %}
 3268   interface(CONST_INTER);
 3269 %}
 3270 
 3271 // Constant for long shifts
 3272 operand immI_32() %{
 3273   predicate( n->get_int() == 32 );
 3274   match(ConI);
 3275 
 3276   op_cost(0);
 3277   format %{ %}
 3278   interface(CONST_INTER);
 3279 %}
 3280 
 3281 operand immI_1_31() %{
 3282   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3283   match(ConI);
 3284 
 3285   op_cost(0);
 3286   format %{ %}
 3287   interface(CONST_INTER);
 3288 %}
 3289 
 3290 operand immI_32_63() %{
 3291   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3292   match(ConI);
 3293   op_cost(0);
 3294 
 3295   format %{ %}
 3296   interface(CONST_INTER);
 3297 %}
 3298 
 3299 operand immI_2() %{
 3300   predicate( n->get_int() == 2 );
 3301   match(ConI);
 3302 
 3303   op_cost(0);
 3304   format %{ %}
 3305   interface(CONST_INTER);
 3306 %}
 3307 
 3308 operand immI_3() %{
 3309   predicate( n->get_int() == 3 );
 3310   match(ConI);
 3311 
 3312   op_cost(0);
 3313   format %{ %}
 3314   interface(CONST_INTER);
 3315 %}
 3316 
 3317 operand immI_4()
 3318 %{
 3319   predicate(n->get_int() == 4);
 3320   match(ConI);
 3321 
 3322   op_cost(0);
 3323   format %{ %}
 3324   interface(CONST_INTER);
 3325 %}
 3326 
 3327 operand immI_8()
 3328 %{
 3329   predicate(n->get_int() == 8);
 3330   match(ConI);
 3331 
 3332   op_cost(0);
 3333   format %{ %}
 3334   interface(CONST_INTER);
 3335 %}
 3336 
 3337 // Pointer Immediate
 3338 operand immP() %{
 3339   match(ConP);
 3340 
 3341   op_cost(10);
 3342   format %{ %}
 3343   interface(CONST_INTER);
 3344 %}
 3345 
 3346 // Null Pointer Immediate
 3347 operand immP0() %{
 3348   predicate( n->get_ptr() == 0 );
 3349   match(ConP);
 3350   op_cost(0);
 3351 
 3352   format %{ %}
 3353   interface(CONST_INTER);
 3354 %}
 3355 
 3356 // Long Immediate
 3357 operand immL() %{
 3358   match(ConL);
 3359 
 3360   op_cost(20);
 3361   format %{ %}
 3362   interface(CONST_INTER);
 3363 %}
 3364 
 3365 // Long Immediate zero
 3366 operand immL0() %{
 3367   predicate( n->get_long() == 0L );
 3368   match(ConL);
 3369   op_cost(0);
 3370 
 3371   format %{ %}
 3372   interface(CONST_INTER);
 3373 %}
 3374 
 3375 // Long Immediate zero
 3376 operand immL_M1() %{
 3377   predicate( n->get_long() == -1L );
 3378   match(ConL);
 3379   op_cost(0);
 3380 
 3381   format %{ %}
 3382   interface(CONST_INTER);
 3383 %}
 3384 
 3385 // Long immediate from 0 to 127.
 3386 // Used for a shorter form of long mul by 10.
 3387 operand immL_127() %{
 3388   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3389   match(ConL);
 3390   op_cost(0);
 3391 
 3392   format %{ %}
 3393   interface(CONST_INTER);
 3394 %}
 3395 
 3396 // Long Immediate: low 32-bit mask
 3397 operand immL_32bits() %{
 3398   predicate(n->get_long() == 0xFFFFFFFFL);
 3399   match(ConL);
 3400   op_cost(0);
 3401 
 3402   format %{ %}
 3403   interface(CONST_INTER);
 3404 %}
 3405 
 3406 // Long Immediate: low 32-bit mask
 3407 operand immL32() %{
 3408   predicate(n->get_long() == (int)(n->get_long()));
 3409   match(ConL);
 3410   op_cost(20);
 3411 
 3412   format %{ %}
 3413   interface(CONST_INTER);
 3414 %}
 3415 
 3416 //Double Immediate zero
 3417 operand immDPR0() %{
 3418   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3419   // bug that generates code such that NaNs compare equal to 0.0
 3420   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3421   match(ConD);
 3422 
 3423   op_cost(5);
 3424   format %{ %}
 3425   interface(CONST_INTER);
 3426 %}
 3427 
 3428 // Double Immediate one
 3429 operand immDPR1() %{
 3430   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3431   match(ConD);
 3432 
 3433   op_cost(5);
 3434   format %{ %}
 3435   interface(CONST_INTER);
 3436 %}
 3437 
 3438 // Double Immediate
 3439 operand immDPR() %{
 3440   predicate(UseSSE<=1);
 3441   match(ConD);
 3442 
 3443   op_cost(5);
 3444   format %{ %}
 3445   interface(CONST_INTER);
 3446 %}
 3447 
 3448 operand immD() %{
 3449   predicate(UseSSE>=2);
 3450   match(ConD);
 3451 
 3452   op_cost(5);
 3453   format %{ %}
 3454   interface(CONST_INTER);
 3455 %}
 3456 
 3457 // Double Immediate zero
 3458 operand immD0() %{
 3459   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3460   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3461   // compare equal to -0.0.
 3462   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3463   match(ConD);
 3464 
 3465   format %{ %}
 3466   interface(CONST_INTER);
 3467 %}
 3468 
 3469 // Float Immediate zero
 3470 operand immFPR0() %{
 3471   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3472   match(ConF);
 3473 
 3474   op_cost(5);
 3475   format %{ %}
 3476   interface(CONST_INTER);
 3477 %}
 3478 
 3479 // Float Immediate one
 3480 operand immFPR1() %{
 3481   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3482   match(ConF);
 3483 
 3484   op_cost(5);
 3485   format %{ %}
 3486   interface(CONST_INTER);
 3487 %}
 3488 
 3489 // Float Immediate
 3490 operand immFPR() %{
 3491   predicate( UseSSE == 0 );
 3492   match(ConF);
 3493 
 3494   op_cost(5);
 3495   format %{ %}
 3496   interface(CONST_INTER);
 3497 %}
 3498 
 3499 // Float Immediate
 3500 operand immF() %{
 3501   predicate(UseSSE >= 1);
 3502   match(ConF);
 3503 
 3504   op_cost(5);
 3505   format %{ %}
 3506   interface(CONST_INTER);
 3507 %}
 3508 
 3509 // Float Immediate zero.  Zero and not -0.0
 3510 operand immF0() %{
 3511   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3512   match(ConF);
 3513 
 3514   op_cost(5);
 3515   format %{ %}
 3516   interface(CONST_INTER);
 3517 %}
 3518 
 3519 // Immediates for special shifts (sign extend)
 3520 
 3521 // Constants for increment
 3522 operand immI_16() %{
 3523   predicate( n->get_int() == 16 );
 3524   match(ConI);
 3525 
 3526   format %{ %}
 3527   interface(CONST_INTER);
 3528 %}
 3529 
 3530 operand immI_24() %{
 3531   predicate( n->get_int() == 24 );
 3532   match(ConI);
 3533 
 3534   format %{ %}
 3535   interface(CONST_INTER);
 3536 %}
 3537 
 3538 // Constant for byte-wide masking
 3539 operand immI_255() %{
 3540   predicate( n->get_int() == 255 );
 3541   match(ConI);
 3542 
 3543   format %{ %}
 3544   interface(CONST_INTER);
 3545 %}
 3546 
 3547 // Constant for short-wide masking
 3548 operand immI_65535() %{
 3549   predicate(n->get_int() == 65535);
 3550   match(ConI);
 3551 
 3552   format %{ %}
 3553   interface(CONST_INTER);
 3554 %}
 3555 
 3556 operand kReg()
 3557 %{
 3558   constraint(ALLOC_IN_RC(vectmask_reg));
 3559   match(RegVectMask);
 3560   format %{%}
 3561   interface(REG_INTER);
 3562 %}
 3563 
 3564 // Register Operands
 3565 // Integer Register
 3566 operand rRegI() %{
 3567   constraint(ALLOC_IN_RC(int_reg));
 3568   match(RegI);
 3569   match(xRegI);
 3570   match(eAXRegI);
 3571   match(eBXRegI);
 3572   match(eCXRegI);
 3573   match(eDXRegI);
 3574   match(eDIRegI);
 3575   match(eSIRegI);
 3576 
 3577   format %{ %}
 3578   interface(REG_INTER);
 3579 %}
 3580 
 3581 // Subset of Integer Register
 3582 operand xRegI(rRegI reg) %{
 3583   constraint(ALLOC_IN_RC(int_x_reg));
 3584   match(reg);
 3585   match(eAXRegI);
 3586   match(eBXRegI);
 3587   match(eCXRegI);
 3588   match(eDXRegI);
 3589 
 3590   format %{ %}
 3591   interface(REG_INTER);
 3592 %}
 3593 
 3594 // Special Registers
 3595 operand eAXRegI(xRegI reg) %{
 3596   constraint(ALLOC_IN_RC(eax_reg));
 3597   match(reg);
 3598   match(rRegI);
 3599 
 3600   format %{ "EAX" %}
 3601   interface(REG_INTER);
 3602 %}
 3603 
 3604 // Special Registers
 3605 operand eBXRegI(xRegI reg) %{
 3606   constraint(ALLOC_IN_RC(ebx_reg));
 3607   match(reg);
 3608   match(rRegI);
 3609 
 3610   format %{ "EBX" %}
 3611   interface(REG_INTER);
 3612 %}
 3613 
 3614 operand eCXRegI(xRegI reg) %{
 3615   constraint(ALLOC_IN_RC(ecx_reg));
 3616   match(reg);
 3617   match(rRegI);
 3618 
 3619   format %{ "ECX" %}
 3620   interface(REG_INTER);
 3621 %}
 3622 
 3623 operand eDXRegI(xRegI reg) %{
 3624   constraint(ALLOC_IN_RC(edx_reg));
 3625   match(reg);
 3626   match(rRegI);
 3627 
 3628   format %{ "EDX" %}
 3629   interface(REG_INTER);
 3630 %}
 3631 
 3632 operand eDIRegI(xRegI reg) %{
 3633   constraint(ALLOC_IN_RC(edi_reg));
 3634   match(reg);
 3635   match(rRegI);
 3636 
 3637   format %{ "EDI" %}
 3638   interface(REG_INTER);
 3639 %}
 3640 
 3641 operand nadxRegI() %{
 3642   constraint(ALLOC_IN_RC(nadx_reg));
 3643   match(RegI);
 3644   match(eBXRegI);
 3645   match(eCXRegI);
 3646   match(eSIRegI);
 3647   match(eDIRegI);
 3648 
 3649   format %{ %}
 3650   interface(REG_INTER);
 3651 %}
 3652 
 3653 operand ncxRegI() %{
 3654   constraint(ALLOC_IN_RC(ncx_reg));
 3655   match(RegI);
 3656   match(eAXRegI);
 3657   match(eDXRegI);
 3658   match(eSIRegI);
 3659   match(eDIRegI);
 3660 
 3661   format %{ %}
 3662   interface(REG_INTER);
 3663 %}
 3664 
 3665 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3666 // //
 3667 operand eSIRegI(xRegI reg) %{
 3668    constraint(ALLOC_IN_RC(esi_reg));
 3669    match(reg);
 3670    match(rRegI);
 3671 
 3672    format %{ "ESI" %}
 3673    interface(REG_INTER);
 3674 %}
 3675 
 3676 // Pointer Register
 3677 operand anyRegP() %{
 3678   constraint(ALLOC_IN_RC(any_reg));
 3679   match(RegP);
 3680   match(eAXRegP);
 3681   match(eBXRegP);
 3682   match(eCXRegP);
 3683   match(eDIRegP);
 3684   match(eRegP);
 3685 
 3686   format %{ %}
 3687   interface(REG_INTER);
 3688 %}
 3689 
 3690 operand eRegP() %{
 3691   constraint(ALLOC_IN_RC(int_reg));
 3692   match(RegP);
 3693   match(eAXRegP);
 3694   match(eBXRegP);
 3695   match(eCXRegP);
 3696   match(eDIRegP);
 3697 
 3698   format %{ %}
 3699   interface(REG_INTER);
 3700 %}
 3701 
 3702 operand rRegP() %{
 3703   constraint(ALLOC_IN_RC(int_reg));
 3704   match(RegP);
 3705   match(eAXRegP);
 3706   match(eBXRegP);
 3707   match(eCXRegP);
 3708   match(eDIRegP);
 3709 
 3710   format %{ %}
 3711   interface(REG_INTER);
 3712 %}
 3713 
 3714 // On windows95, EBP is not safe to use for implicit null tests.
 3715 operand eRegP_no_EBP() %{
 3716   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3717   match(RegP);
 3718   match(eAXRegP);
 3719   match(eBXRegP);
 3720   match(eCXRegP);
 3721   match(eDIRegP);
 3722 
 3723   op_cost(100);
 3724   format %{ %}
 3725   interface(REG_INTER);
 3726 %}
 3727 
 3728 operand pRegP() %{
 3729   constraint(ALLOC_IN_RC(p_reg));
 3730   match(RegP);
 3731   match(eBXRegP);
 3732   match(eDXRegP);
 3733   match(eSIRegP);
 3734   match(eDIRegP);
 3735 
 3736   format %{ %}
 3737   interface(REG_INTER);
 3738 %}
 3739 
 3740 // Special Registers
 3741 // Return a pointer value
 3742 operand eAXRegP(eRegP reg) %{
 3743   constraint(ALLOC_IN_RC(eax_reg));
 3744   match(reg);
 3745   format %{ "EAX" %}
 3746   interface(REG_INTER);
 3747 %}
 3748 
 3749 // Used in AtomicAdd
 3750 operand eBXRegP(eRegP reg) %{
 3751   constraint(ALLOC_IN_RC(ebx_reg));
 3752   match(reg);
 3753   format %{ "EBX" %}
 3754   interface(REG_INTER);
 3755 %}
 3756 
 3757 // Tail-call (interprocedural jump) to interpreter
 3758 operand eCXRegP(eRegP reg) %{
 3759   constraint(ALLOC_IN_RC(ecx_reg));
 3760   match(reg);
 3761   format %{ "ECX" %}
 3762   interface(REG_INTER);
 3763 %}
 3764 
 3765 operand eDXRegP(eRegP reg) %{
 3766   constraint(ALLOC_IN_RC(edx_reg));
 3767   match(reg);
 3768   format %{ "EDX" %}
 3769   interface(REG_INTER);
 3770 %}
 3771 
 3772 operand eSIRegP(eRegP reg) %{
 3773   constraint(ALLOC_IN_RC(esi_reg));
 3774   match(reg);
 3775   format %{ "ESI" %}
 3776   interface(REG_INTER);
 3777 %}
 3778 
 3779 // Used in rep stosw
 3780 operand eDIRegP(eRegP reg) %{
 3781   constraint(ALLOC_IN_RC(edi_reg));
 3782   match(reg);
 3783   format %{ "EDI" %}
 3784   interface(REG_INTER);
 3785 %}
 3786 
 3787 operand eRegL() %{
 3788   constraint(ALLOC_IN_RC(long_reg));
 3789   match(RegL);
 3790   match(eADXRegL);
 3791 
 3792   format %{ %}
 3793   interface(REG_INTER);
 3794 %}
 3795 
 3796 operand eADXRegL( eRegL reg ) %{
 3797   constraint(ALLOC_IN_RC(eadx_reg));
 3798   match(reg);
 3799 
 3800   format %{ "EDX:EAX" %}
 3801   interface(REG_INTER);
 3802 %}
 3803 
 3804 operand eBCXRegL( eRegL reg ) %{
 3805   constraint(ALLOC_IN_RC(ebcx_reg));
 3806   match(reg);
 3807 
 3808   format %{ "EBX:ECX" %}
 3809   interface(REG_INTER);
 3810 %}
 3811 
 3812 operand eBDPRegL( eRegL reg ) %{
 3813   constraint(ALLOC_IN_RC(ebpd_reg));
 3814   match(reg);
 3815 
 3816   format %{ "EBP:EDI" %}
 3817   interface(REG_INTER);
 3818 %}
 3819 // Special case for integer high multiply
 3820 operand eADXRegL_low_only() %{
 3821   constraint(ALLOC_IN_RC(eadx_reg));
 3822   match(RegL);
 3823 
 3824   format %{ "EAX" %}
 3825   interface(REG_INTER);
 3826 %}
 3827 
 3828 // Flags register, used as output of compare instructions
 3829 operand rFlagsReg() %{
 3830   constraint(ALLOC_IN_RC(int_flags));
 3831   match(RegFlags);
 3832 
 3833   format %{ "EFLAGS" %}
 3834   interface(REG_INTER);
 3835 %}
 3836 
 3837 // Flags register, used as output of compare instructions
 3838 operand eFlagsReg() %{
 3839   constraint(ALLOC_IN_RC(int_flags));
 3840   match(RegFlags);
 3841 
 3842   format %{ "EFLAGS" %}
 3843   interface(REG_INTER);
 3844 %}
 3845 
 3846 // Flags register, used as output of FLOATING POINT compare instructions
 3847 operand eFlagsRegU() %{
 3848   constraint(ALLOC_IN_RC(int_flags));
 3849   match(RegFlags);
 3850 
 3851   format %{ "EFLAGS_U" %}
 3852   interface(REG_INTER);
 3853 %}
 3854 
 3855 operand eFlagsRegUCF() %{
 3856   constraint(ALLOC_IN_RC(int_flags));
 3857   match(RegFlags);
 3858   predicate(false);
 3859 
 3860   format %{ "EFLAGS_U_CF" %}
 3861   interface(REG_INTER);
 3862 %}
 3863 
 3864 // Condition Code Register used by long compare
 3865 operand flagsReg_long_LTGE() %{
 3866   constraint(ALLOC_IN_RC(int_flags));
 3867   match(RegFlags);
 3868   format %{ "FLAGS_LTGE" %}
 3869   interface(REG_INTER);
 3870 %}
 3871 operand flagsReg_long_EQNE() %{
 3872   constraint(ALLOC_IN_RC(int_flags));
 3873   match(RegFlags);
 3874   format %{ "FLAGS_EQNE" %}
 3875   interface(REG_INTER);
 3876 %}
 3877 operand flagsReg_long_LEGT() %{
 3878   constraint(ALLOC_IN_RC(int_flags));
 3879   match(RegFlags);
 3880   format %{ "FLAGS_LEGT" %}
 3881   interface(REG_INTER);
 3882 %}
 3883 
 3884 // Condition Code Register used by unsigned long compare
 3885 operand flagsReg_ulong_LTGE() %{
 3886   constraint(ALLOC_IN_RC(int_flags));
 3887   match(RegFlags);
 3888   format %{ "FLAGS_U_LTGE" %}
 3889   interface(REG_INTER);
 3890 %}
 3891 operand flagsReg_ulong_EQNE() %{
 3892   constraint(ALLOC_IN_RC(int_flags));
 3893   match(RegFlags);
 3894   format %{ "FLAGS_U_EQNE" %}
 3895   interface(REG_INTER);
 3896 %}
 3897 operand flagsReg_ulong_LEGT() %{
 3898   constraint(ALLOC_IN_RC(int_flags));
 3899   match(RegFlags);
 3900   format %{ "FLAGS_U_LEGT" %}
 3901   interface(REG_INTER);
 3902 %}
 3903 
 3904 // Float register operands
 3905 operand regDPR() %{
 3906   predicate( UseSSE < 2 );
 3907   constraint(ALLOC_IN_RC(fp_dbl_reg));
 3908   match(RegD);
 3909   match(regDPR1);
 3910   match(regDPR2);
 3911   format %{ %}
 3912   interface(REG_INTER);
 3913 %}
 3914 
 3915 operand regDPR1(regDPR reg) %{
 3916   predicate( UseSSE < 2 );
 3917   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 3918   match(reg);
 3919   format %{ "FPR1" %}
 3920   interface(REG_INTER);
 3921 %}
 3922 
 3923 operand regDPR2(regDPR reg) %{
 3924   predicate( UseSSE < 2 );
 3925   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 3926   match(reg);
 3927   format %{ "FPR2" %}
 3928   interface(REG_INTER);
 3929 %}
 3930 
 3931 operand regnotDPR1(regDPR reg) %{
 3932   predicate( UseSSE < 2 );
 3933   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 3934   match(reg);
 3935   format %{ %}
 3936   interface(REG_INTER);
 3937 %}
 3938 
 3939 // Float register operands
 3940 operand regFPR() %{
 3941   predicate( UseSSE < 2 );
 3942   constraint(ALLOC_IN_RC(fp_flt_reg));
 3943   match(RegF);
 3944   match(regFPR1);
 3945   format %{ %}
 3946   interface(REG_INTER);
 3947 %}
 3948 
 3949 // Float register operands
 3950 operand regFPR1(regFPR reg) %{
 3951   predicate( UseSSE < 2 );
 3952   constraint(ALLOC_IN_RC(fp_flt_reg0));
 3953   match(reg);
 3954   format %{ "FPR1" %}
 3955   interface(REG_INTER);
 3956 %}
 3957 
 3958 // XMM Float register operands
 3959 operand regF() %{
 3960   predicate( UseSSE>=1 );
 3961   constraint(ALLOC_IN_RC(float_reg_legacy));
 3962   match(RegF);
 3963   format %{ %}
 3964   interface(REG_INTER);
 3965 %}
 3966 
 3967 operand legRegF() %{
 3968   predicate( UseSSE>=1 );
 3969   constraint(ALLOC_IN_RC(float_reg_legacy));
 3970   match(RegF);
 3971   format %{ %}
 3972   interface(REG_INTER);
 3973 %}
 3974 
 3975 // Float register operands
 3976 operand vlRegF() %{
 3977    constraint(ALLOC_IN_RC(float_reg_vl));
 3978    match(RegF);
 3979 
 3980    format %{ %}
 3981    interface(REG_INTER);
 3982 %}
 3983 
 3984 // XMM Double register operands
 3985 operand regD() %{
 3986   predicate( UseSSE>=2 );
 3987   constraint(ALLOC_IN_RC(double_reg_legacy));
 3988   match(RegD);
 3989   format %{ %}
 3990   interface(REG_INTER);
 3991 %}
 3992 
 3993 // Double register operands
 3994 operand legRegD() %{
 3995   predicate( UseSSE>=2 );
 3996   constraint(ALLOC_IN_RC(double_reg_legacy));
 3997   match(RegD);
 3998   format %{ %}
 3999   interface(REG_INTER);
 4000 %}
 4001 
 4002 operand vlRegD() %{
 4003    constraint(ALLOC_IN_RC(double_reg_vl));
 4004    match(RegD);
 4005 
 4006    format %{ %}
 4007    interface(REG_INTER);
 4008 %}
 4009 
 4010 //----------Memory Operands----------------------------------------------------
 4011 // Direct Memory Operand
 4012 operand direct(immP addr) %{
 4013   match(addr);
 4014 
 4015   format %{ "[$addr]" %}
 4016   interface(MEMORY_INTER) %{
 4017     base(0xFFFFFFFF);
 4018     index(0x4);
 4019     scale(0x0);
 4020     disp($addr);
 4021   %}
 4022 %}
 4023 
 4024 // Indirect Memory Operand
 4025 operand indirect(eRegP reg) %{
 4026   constraint(ALLOC_IN_RC(int_reg));
 4027   match(reg);
 4028 
 4029   format %{ "[$reg]" %}
 4030   interface(MEMORY_INTER) %{
 4031     base($reg);
 4032     index(0x4);
 4033     scale(0x0);
 4034     disp(0x0);
 4035   %}
 4036 %}
 4037 
 4038 // Indirect Memory Plus Short Offset Operand
 4039 operand indOffset8(eRegP reg, immI8 off) %{
 4040   match(AddP reg off);
 4041 
 4042   format %{ "[$reg + $off]" %}
 4043   interface(MEMORY_INTER) %{
 4044     base($reg);
 4045     index(0x4);
 4046     scale(0x0);
 4047     disp($off);
 4048   %}
 4049 %}
 4050 
 4051 // Indirect Memory Plus Long Offset Operand
 4052 operand indOffset32(eRegP reg, immI off) %{
 4053   match(AddP reg off);
 4054 
 4055   format %{ "[$reg + $off]" %}
 4056   interface(MEMORY_INTER) %{
 4057     base($reg);
 4058     index(0x4);
 4059     scale(0x0);
 4060     disp($off);
 4061   %}
 4062 %}
 4063 
 4064 // Indirect Memory Plus Long Offset Operand
 4065 operand indOffset32X(rRegI reg, immP off) %{
 4066   match(AddP off reg);
 4067 
 4068   format %{ "[$reg + $off]" %}
 4069   interface(MEMORY_INTER) %{
 4070     base($reg);
 4071     index(0x4);
 4072     scale(0x0);
 4073     disp($off);
 4074   %}
 4075 %}
 4076 
 4077 // Indirect Memory Plus Index Register Plus Offset Operand
 4078 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4079   match(AddP (AddP reg ireg) off);
 4080 
 4081   op_cost(10);
 4082   format %{"[$reg + $off + $ireg]" %}
 4083   interface(MEMORY_INTER) %{
 4084     base($reg);
 4085     index($ireg);
 4086     scale(0x0);
 4087     disp($off);
 4088   %}
 4089 %}
 4090 
 4091 // Indirect Memory Plus Index Register Plus Offset Operand
 4092 operand indIndex(eRegP reg, rRegI ireg) %{
 4093   match(AddP reg ireg);
 4094 
 4095   op_cost(10);
 4096   format %{"[$reg + $ireg]" %}
 4097   interface(MEMORY_INTER) %{
 4098     base($reg);
 4099     index($ireg);
 4100     scale(0x0);
 4101     disp(0x0);
 4102   %}
 4103 %}
 4104 
 4105 // // -------------------------------------------------------------------------
 4106 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4107 // // -------------------------------------------------------------------------
 4108 // // Scaled Memory Operands
 4109 // // Indirect Memory Times Scale Plus Offset Operand
 4110 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4111 //   match(AddP off (LShiftI ireg scale));
 4112 //
 4113 //   op_cost(10);
 4114 //   format %{"[$off + $ireg << $scale]" %}
 4115 //   interface(MEMORY_INTER) %{
 4116 //     base(0x4);
 4117 //     index($ireg);
 4118 //     scale($scale);
 4119 //     disp($off);
 4120 //   %}
 4121 // %}
 4122 
 4123 // Indirect Memory Times Scale Plus Index Register
 4124 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4125   match(AddP reg (LShiftI ireg scale));
 4126 
 4127   op_cost(10);
 4128   format %{"[$reg + $ireg << $scale]" %}
 4129   interface(MEMORY_INTER) %{
 4130     base($reg);
 4131     index($ireg);
 4132     scale($scale);
 4133     disp(0x0);
 4134   %}
 4135 %}
 4136 
 4137 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4138 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4139   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4140 
 4141   op_cost(10);
 4142   format %{"[$reg + $off + $ireg << $scale]" %}
 4143   interface(MEMORY_INTER) %{
 4144     base($reg);
 4145     index($ireg);
 4146     scale($scale);
 4147     disp($off);
 4148   %}
 4149 %}
 4150 
 4151 //----------Load Long Memory Operands------------------------------------------
 4152 // The load-long idiom will use it's address expression again after loading
 4153 // the first word of the long.  If the load-long destination overlaps with
 4154 // registers used in the addressing expression, the 2nd half will be loaded
 4155 // from a clobbered address.  Fix this by requiring that load-long use
 4156 // address registers that do not overlap with the load-long target.
 4157 
 4158 // load-long support
 4159 operand load_long_RegP() %{
 4160   constraint(ALLOC_IN_RC(esi_reg));
 4161   match(RegP);
 4162   match(eSIRegP);
 4163   op_cost(100);
 4164   format %{  %}
 4165   interface(REG_INTER);
 4166 %}
 4167 
 4168 // Indirect Memory Operand Long
 4169 operand load_long_indirect(load_long_RegP reg) %{
 4170   constraint(ALLOC_IN_RC(esi_reg));
 4171   match(reg);
 4172 
 4173   format %{ "[$reg]" %}
 4174   interface(MEMORY_INTER) %{
 4175     base($reg);
 4176     index(0x4);
 4177     scale(0x0);
 4178     disp(0x0);
 4179   %}
 4180 %}
 4181 
 4182 // Indirect Memory Plus Long Offset Operand
 4183 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4184   match(AddP reg off);
 4185 
 4186   format %{ "[$reg + $off]" %}
 4187   interface(MEMORY_INTER) %{
 4188     base($reg);
 4189     index(0x4);
 4190     scale(0x0);
 4191     disp($off);
 4192   %}
 4193 %}
 4194 
 4195 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4196 
 4197 
 4198 //----------Special Memory Operands--------------------------------------------
 4199 // Stack Slot Operand - This operand is used for loading and storing temporary
 4200 //                      values on the stack where a match requires a value to
 4201 //                      flow through memory.
 4202 operand stackSlotP(sRegP reg) %{
 4203   constraint(ALLOC_IN_RC(stack_slots));
 4204   // No match rule because this operand is only generated in matching
 4205   format %{ "[$reg]" %}
 4206   interface(MEMORY_INTER) %{
 4207     base(0x4);   // ESP
 4208     index(0x4);  // No Index
 4209     scale(0x0);  // No Scale
 4210     disp($reg);  // Stack Offset
 4211   %}
 4212 %}
 4213 
 4214 operand stackSlotI(sRegI reg) %{
 4215   constraint(ALLOC_IN_RC(stack_slots));
 4216   // No match rule because this operand is only generated in matching
 4217   format %{ "[$reg]" %}
 4218   interface(MEMORY_INTER) %{
 4219     base(0x4);   // ESP
 4220     index(0x4);  // No Index
 4221     scale(0x0);  // No Scale
 4222     disp($reg);  // Stack Offset
 4223   %}
 4224 %}
 4225 
 4226 operand stackSlotF(sRegF reg) %{
 4227   constraint(ALLOC_IN_RC(stack_slots));
 4228   // No match rule because this operand is only generated in matching
 4229   format %{ "[$reg]" %}
 4230   interface(MEMORY_INTER) %{
 4231     base(0x4);   // ESP
 4232     index(0x4);  // No Index
 4233     scale(0x0);  // No Scale
 4234     disp($reg);  // Stack Offset
 4235   %}
 4236 %}
 4237 
 4238 operand stackSlotD(sRegD reg) %{
 4239   constraint(ALLOC_IN_RC(stack_slots));
 4240   // No match rule because this operand is only generated in matching
 4241   format %{ "[$reg]" %}
 4242   interface(MEMORY_INTER) %{
 4243     base(0x4);   // ESP
 4244     index(0x4);  // No Index
 4245     scale(0x0);  // No Scale
 4246     disp($reg);  // Stack Offset
 4247   %}
 4248 %}
 4249 
 4250 operand stackSlotL(sRegL reg) %{
 4251   constraint(ALLOC_IN_RC(stack_slots));
 4252   // No match rule because this operand is only generated in matching
 4253   format %{ "[$reg]" %}
 4254   interface(MEMORY_INTER) %{
 4255     base(0x4);   // ESP
 4256     index(0x4);  // No Index
 4257     scale(0x0);  // No Scale
 4258     disp($reg);  // Stack Offset
 4259   %}
 4260 %}
 4261 
 4262 //----------Conditional Branch Operands----------------------------------------
 4263 // Comparison Op  - This is the operation of the comparison, and is limited to
 4264 //                  the following set of codes:
 4265 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4266 //
 4267 // Other attributes of the comparison, such as unsignedness, are specified
 4268 // by the comparison instruction that sets a condition code flags register.
 4269 // That result is represented by a flags operand whose subtype is appropriate
 4270 // to the unsignedness (etc.) of the comparison.
 4271 //
 4272 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4273 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4274 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4275 
 4276 // Comparison Code
 4277 operand cmpOp() %{
 4278   match(Bool);
 4279 
 4280   format %{ "" %}
 4281   interface(COND_INTER) %{
 4282     equal(0x4, "e");
 4283     not_equal(0x5, "ne");
 4284     less(0xC, "l");
 4285     greater_equal(0xD, "ge");
 4286     less_equal(0xE, "le");
 4287     greater(0xF, "g");
 4288     overflow(0x0, "o");
 4289     no_overflow(0x1, "no");
 4290   %}
 4291 %}
 4292 
 4293 // Comparison Code, unsigned compare.  Used by FP also, with
 4294 // C2 (unordered) turned into GT or LT already.  The other bits
 4295 // C0 and C3 are turned into Carry & Zero flags.
 4296 operand cmpOpU() %{
 4297   match(Bool);
 4298 
 4299   format %{ "" %}
 4300   interface(COND_INTER) %{
 4301     equal(0x4, "e");
 4302     not_equal(0x5, "ne");
 4303     less(0x2, "b");
 4304     greater_equal(0x3, "nb");
 4305     less_equal(0x6, "be");
 4306     greater(0x7, "nbe");
 4307     overflow(0x0, "o");
 4308     no_overflow(0x1, "no");
 4309   %}
 4310 %}
 4311 
 4312 // Floating comparisons that don't require any fixup for the unordered case
 4313 operand cmpOpUCF() %{
 4314   match(Bool);
 4315   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4316             n->as_Bool()->_test._test == BoolTest::ge ||
 4317             n->as_Bool()->_test._test == BoolTest::le ||
 4318             n->as_Bool()->_test._test == BoolTest::gt);
 4319   format %{ "" %}
 4320   interface(COND_INTER) %{
 4321     equal(0x4, "e");
 4322     not_equal(0x5, "ne");
 4323     less(0x2, "b");
 4324     greater_equal(0x3, "nb");
 4325     less_equal(0x6, "be");
 4326     greater(0x7, "nbe");
 4327     overflow(0x0, "o");
 4328     no_overflow(0x1, "no");
 4329   %}
 4330 %}
 4331 
 4332 
 4333 // Floating comparisons that can be fixed up with extra conditional jumps
 4334 operand cmpOpUCF2() %{
 4335   match(Bool);
 4336   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4337             n->as_Bool()->_test._test == BoolTest::eq);
 4338   format %{ "" %}
 4339   interface(COND_INTER) %{
 4340     equal(0x4, "e");
 4341     not_equal(0x5, "ne");
 4342     less(0x2, "b");
 4343     greater_equal(0x3, "nb");
 4344     less_equal(0x6, "be");
 4345     greater(0x7, "nbe");
 4346     overflow(0x0, "o");
 4347     no_overflow(0x1, "no");
 4348   %}
 4349 %}
 4350 
 4351 // Comparison Code for FP conditional move
 4352 operand cmpOp_fcmov() %{
 4353   match(Bool);
 4354 
 4355   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4356             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4357   format %{ "" %}
 4358   interface(COND_INTER) %{
 4359     equal        (0x0C8);
 4360     not_equal    (0x1C8);
 4361     less         (0x0C0);
 4362     greater_equal(0x1C0);
 4363     less_equal   (0x0D0);
 4364     greater      (0x1D0);
 4365     overflow(0x0, "o"); // not really supported by the instruction
 4366     no_overflow(0x1, "no"); // not really supported by the instruction
 4367   %}
 4368 %}
 4369 
 4370 // Comparison Code used in long compares
 4371 operand cmpOp_commute() %{
 4372   match(Bool);
 4373 
 4374   format %{ "" %}
 4375   interface(COND_INTER) %{
 4376     equal(0x4, "e");
 4377     not_equal(0x5, "ne");
 4378     less(0xF, "g");
 4379     greater_equal(0xE, "le");
 4380     less_equal(0xD, "ge");
 4381     greater(0xC, "l");
 4382     overflow(0x0, "o");
 4383     no_overflow(0x1, "no");
 4384   %}
 4385 %}
 4386 
 4387 // Comparison Code used in unsigned long compares
 4388 operand cmpOpU_commute() %{
 4389   match(Bool);
 4390 
 4391   format %{ "" %}
 4392   interface(COND_INTER) %{
 4393     equal(0x4, "e");
 4394     not_equal(0x5, "ne");
 4395     less(0x7, "nbe");
 4396     greater_equal(0x6, "be");
 4397     less_equal(0x3, "nb");
 4398     greater(0x2, "b");
 4399     overflow(0x0, "o");
 4400     no_overflow(0x1, "no");
 4401   %}
 4402 %}
 4403 
 4404 //----------OPERAND CLASSES----------------------------------------------------
 4405 // Operand Classes are groups of operands that are used as to simplify
 4406 // instruction definitions by not requiring the AD writer to specify separate
 4407 // instructions for every form of operand when the instruction accepts
 4408 // multiple operand types with the same basic encoding and format.  The classic
 4409 // case of this is memory operands.
 4410 
 4411 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4412                indIndex, indIndexScale, indIndexScaleOffset);
 4413 
 4414 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4415 // This means some kind of offset is always required and you cannot use
 4416 // an oop as the offset (done when working on static globals).
 4417 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4418                     indIndex, indIndexScale, indIndexScaleOffset);
 4419 
 4420 
 4421 //----------PIPELINE-----------------------------------------------------------
 4422 // Rules which define the behavior of the target architectures pipeline.
 4423 pipeline %{
 4424 
 4425 //----------ATTRIBUTES---------------------------------------------------------
 4426 attributes %{
 4427   variable_size_instructions;        // Fixed size instructions
 4428   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4429   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4430   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4431   instruction_fetch_units = 1;       // of 16 bytes
 4432 
 4433   // List of nop instructions
 4434   nops( MachNop );
 4435 %}
 4436 
 4437 //----------RESOURCES----------------------------------------------------------
 4438 // Resources are the functional units available to the machine
 4439 
 4440 // Generic P2/P3 pipeline
 4441 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4442 // 3 instructions decoded per cycle.
 4443 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4444 // 2 ALU op, only ALU0 handles mul/div instructions.
 4445 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4446            MS0, MS1, MEM = MS0 | MS1,
 4447            BR, FPU,
 4448            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4449 
 4450 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4451 // Pipeline Description specifies the stages in the machine's pipeline
 4452 
 4453 // Generic P2/P3 pipeline
 4454 pipe_desc(S0, S1, S2, S3, S4, S5);
 4455 
 4456 //----------PIPELINE CLASSES---------------------------------------------------
 4457 // Pipeline Classes describe the stages in which input and output are
 4458 // referenced by the hardware pipeline.
 4459 
 4460 // Naming convention: ialu or fpu
 4461 // Then: _reg
 4462 // Then: _reg if there is a 2nd register
 4463 // Then: _long if it's a pair of instructions implementing a long
 4464 // Then: _fat if it requires the big decoder
 4465 //   Or: _mem if it requires the big decoder and a memory unit.
 4466 
 4467 // Integer ALU reg operation
 4468 pipe_class ialu_reg(rRegI dst) %{
 4469     single_instruction;
 4470     dst    : S4(write);
 4471     dst    : S3(read);
 4472     DECODE : S0;        // any decoder
 4473     ALU    : S3;        // any alu
 4474 %}
 4475 
 4476 // Long ALU reg operation
 4477 pipe_class ialu_reg_long(eRegL dst) %{
 4478     instruction_count(2);
 4479     dst    : S4(write);
 4480     dst    : S3(read);
 4481     DECODE : S0(2);     // any 2 decoders
 4482     ALU    : S3(2);     // both alus
 4483 %}
 4484 
 4485 // Integer ALU reg operation using big decoder
 4486 pipe_class ialu_reg_fat(rRegI dst) %{
 4487     single_instruction;
 4488     dst    : S4(write);
 4489     dst    : S3(read);
 4490     D0     : S0;        // big decoder only
 4491     ALU    : S3;        // any alu
 4492 %}
 4493 
 4494 // Long ALU reg operation using big decoder
 4495 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4496     instruction_count(2);
 4497     dst    : S4(write);
 4498     dst    : S3(read);
 4499     D0     : S0(2);     // big decoder only; twice
 4500     ALU    : S3(2);     // any 2 alus
 4501 %}
 4502 
 4503 // Integer ALU reg-reg operation
 4504 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4505     single_instruction;
 4506     dst    : S4(write);
 4507     src    : S3(read);
 4508     DECODE : S0;        // any decoder
 4509     ALU    : S3;        // any alu
 4510 %}
 4511 
 4512 // Long ALU reg-reg operation
 4513 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4514     instruction_count(2);
 4515     dst    : S4(write);
 4516     src    : S3(read);
 4517     DECODE : S0(2);     // any 2 decoders
 4518     ALU    : S3(2);     // both alus
 4519 %}
 4520 
 4521 // Integer ALU reg-reg operation
 4522 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4523     single_instruction;
 4524     dst    : S4(write);
 4525     src    : S3(read);
 4526     D0     : S0;        // big decoder only
 4527     ALU    : S3;        // any alu
 4528 %}
 4529 
 4530 // Long ALU reg-reg operation
 4531 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4532     instruction_count(2);
 4533     dst    : S4(write);
 4534     src    : S3(read);
 4535     D0     : S0(2);     // big decoder only; twice
 4536     ALU    : S3(2);     // both alus
 4537 %}
 4538 
 4539 // Integer ALU reg-mem operation
 4540 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4541     single_instruction;
 4542     dst    : S5(write);
 4543     mem    : S3(read);
 4544     D0     : S0;        // big decoder only
 4545     ALU    : S4;        // any alu
 4546     MEM    : S3;        // any mem
 4547 %}
 4548 
 4549 // Long ALU reg-mem operation
 4550 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4551     instruction_count(2);
 4552     dst    : S5(write);
 4553     mem    : S3(read);
 4554     D0     : S0(2);     // big decoder only; twice
 4555     ALU    : S4(2);     // any 2 alus
 4556     MEM    : S3(2);     // both mems
 4557 %}
 4558 
 4559 // Integer mem operation (prefetch)
 4560 pipe_class ialu_mem(memory mem)
 4561 %{
 4562     single_instruction;
 4563     mem    : S3(read);
 4564     D0     : S0;        // big decoder only
 4565     MEM    : S3;        // any mem
 4566 %}
 4567 
 4568 // Integer Store to Memory
 4569 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4570     single_instruction;
 4571     mem    : S3(read);
 4572     src    : S5(read);
 4573     D0     : S0;        // big decoder only
 4574     ALU    : S4;        // any alu
 4575     MEM    : S3;
 4576 %}
 4577 
 4578 // Long Store to Memory
 4579 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4580     instruction_count(2);
 4581     mem    : S3(read);
 4582     src    : S5(read);
 4583     D0     : S0(2);     // big decoder only; twice
 4584     ALU    : S4(2);     // any 2 alus
 4585     MEM    : S3(2);     // Both mems
 4586 %}
 4587 
 4588 // Integer Store to Memory
 4589 pipe_class ialu_mem_imm(memory mem) %{
 4590     single_instruction;
 4591     mem    : S3(read);
 4592     D0     : S0;        // big decoder only
 4593     ALU    : S4;        // any alu
 4594     MEM    : S3;
 4595 %}
 4596 
 4597 // Integer ALU0 reg-reg operation
 4598 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4599     single_instruction;
 4600     dst    : S4(write);
 4601     src    : S3(read);
 4602     D0     : S0;        // Big decoder only
 4603     ALU0   : S3;        // only alu0
 4604 %}
 4605 
 4606 // Integer ALU0 reg-mem operation
 4607 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4608     single_instruction;
 4609     dst    : S5(write);
 4610     mem    : S3(read);
 4611     D0     : S0;        // big decoder only
 4612     ALU0   : S4;        // ALU0 only
 4613     MEM    : S3;        // any mem
 4614 %}
 4615 
 4616 // Integer ALU reg-reg operation
 4617 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4618     single_instruction;
 4619     cr     : S4(write);
 4620     src1   : S3(read);
 4621     src2   : S3(read);
 4622     DECODE : S0;        // any decoder
 4623     ALU    : S3;        // any alu
 4624 %}
 4625 
 4626 // Integer ALU reg-imm operation
 4627 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4628     single_instruction;
 4629     cr     : S4(write);
 4630     src1   : S3(read);
 4631     DECODE : S0;        // any decoder
 4632     ALU    : S3;        // any alu
 4633 %}
 4634 
 4635 // Integer ALU reg-mem operation
 4636 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4637     single_instruction;
 4638     cr     : S4(write);
 4639     src1   : S3(read);
 4640     src2   : S3(read);
 4641     D0     : S0;        // big decoder only
 4642     ALU    : S4;        // any alu
 4643     MEM    : S3;
 4644 %}
 4645 
 4646 // Conditional move reg-reg
 4647 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4648     instruction_count(4);
 4649     y      : S4(read);
 4650     q      : S3(read);
 4651     p      : S3(read);
 4652     DECODE : S0(4);     // any decoder
 4653 %}
 4654 
 4655 // Conditional move reg-reg
 4656 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4657     single_instruction;
 4658     dst    : S4(write);
 4659     src    : S3(read);
 4660     cr     : S3(read);
 4661     DECODE : S0;        // any decoder
 4662 %}
 4663 
 4664 // Conditional move reg-mem
 4665 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4666     single_instruction;
 4667     dst    : S4(write);
 4668     src    : S3(read);
 4669     cr     : S3(read);
 4670     DECODE : S0;        // any decoder
 4671     MEM    : S3;
 4672 %}
 4673 
 4674 // Conditional move reg-reg long
 4675 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4676     single_instruction;
 4677     dst    : S4(write);
 4678     src    : S3(read);
 4679     cr     : S3(read);
 4680     DECODE : S0(2);     // any 2 decoders
 4681 %}
 4682 
 4683 // Conditional move double reg-reg
 4684 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4685     single_instruction;
 4686     dst    : S4(write);
 4687     src    : S3(read);
 4688     cr     : S3(read);
 4689     DECODE : S0;        // any decoder
 4690 %}
 4691 
 4692 // Float reg-reg operation
 4693 pipe_class fpu_reg(regDPR dst) %{
 4694     instruction_count(2);
 4695     dst    : S3(read);
 4696     DECODE : S0(2);     // any 2 decoders
 4697     FPU    : S3;
 4698 %}
 4699 
 4700 // Float reg-reg operation
 4701 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4702     instruction_count(2);
 4703     dst    : S4(write);
 4704     src    : S3(read);
 4705     DECODE : S0(2);     // any 2 decoders
 4706     FPU    : S3;
 4707 %}
 4708 
 4709 // Float reg-reg operation
 4710 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4711     instruction_count(3);
 4712     dst    : S4(write);
 4713     src1   : S3(read);
 4714     src2   : S3(read);
 4715     DECODE : S0(3);     // any 3 decoders
 4716     FPU    : S3(2);
 4717 %}
 4718 
 4719 // Float reg-reg operation
 4720 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4721     instruction_count(4);
 4722     dst    : S4(write);
 4723     src1   : S3(read);
 4724     src2   : S3(read);
 4725     src3   : S3(read);
 4726     DECODE : S0(4);     // any 3 decoders
 4727     FPU    : S3(2);
 4728 %}
 4729 
 4730 // Float reg-reg operation
 4731 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4732     instruction_count(4);
 4733     dst    : S4(write);
 4734     src1   : S3(read);
 4735     src2   : S3(read);
 4736     src3   : S3(read);
 4737     DECODE : S1(3);     // any 3 decoders
 4738     D0     : S0;        // Big decoder only
 4739     FPU    : S3(2);
 4740     MEM    : S3;
 4741 %}
 4742 
 4743 // Float reg-mem operation
 4744 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4745     instruction_count(2);
 4746     dst    : S5(write);
 4747     mem    : S3(read);
 4748     D0     : S0;        // big decoder only
 4749     DECODE : S1;        // any decoder for FPU POP
 4750     FPU    : S4;
 4751     MEM    : S3;        // any mem
 4752 %}
 4753 
 4754 // Float reg-mem operation
 4755 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4756     instruction_count(3);
 4757     dst    : S5(write);
 4758     src1   : S3(read);
 4759     mem    : S3(read);
 4760     D0     : S0;        // big decoder only
 4761     DECODE : S1(2);     // any decoder for FPU POP
 4762     FPU    : S4;
 4763     MEM    : S3;        // any mem
 4764 %}
 4765 
 4766 // Float mem-reg operation
 4767 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4768     instruction_count(2);
 4769     src    : S5(read);
 4770     mem    : S3(read);
 4771     DECODE : S0;        // any decoder for FPU PUSH
 4772     D0     : S1;        // big decoder only
 4773     FPU    : S4;
 4774     MEM    : S3;        // any mem
 4775 %}
 4776 
 4777 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4778     instruction_count(3);
 4779     src1   : S3(read);
 4780     src2   : S3(read);
 4781     mem    : S3(read);
 4782     DECODE : S0(2);     // any decoder for FPU PUSH
 4783     D0     : S1;        // big decoder only
 4784     FPU    : S4;
 4785     MEM    : S3;        // any mem
 4786 %}
 4787 
 4788 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4789     instruction_count(3);
 4790     src1   : S3(read);
 4791     src2   : S3(read);
 4792     mem    : S4(read);
 4793     DECODE : S0;        // any decoder for FPU PUSH
 4794     D0     : S0(2);     // big decoder only
 4795     FPU    : S4;
 4796     MEM    : S3(2);     // any mem
 4797 %}
 4798 
 4799 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4800     instruction_count(2);
 4801     src1   : S3(read);
 4802     dst    : S4(read);
 4803     D0     : S0(2);     // big decoder only
 4804     MEM    : S3(2);     // any mem
 4805 %}
 4806 
 4807 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4808     instruction_count(3);
 4809     src1   : S3(read);
 4810     src2   : S3(read);
 4811     dst    : S4(read);
 4812     D0     : S0(3);     // big decoder only
 4813     FPU    : S4;
 4814     MEM    : S3(3);     // any mem
 4815 %}
 4816 
 4817 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4818     instruction_count(3);
 4819     src1   : S4(read);
 4820     mem    : S4(read);
 4821     DECODE : S0;        // any decoder for FPU PUSH
 4822     D0     : S0(2);     // big decoder only
 4823     FPU    : S4;
 4824     MEM    : S3(2);     // any mem
 4825 %}
 4826 
 4827 // Float load constant
 4828 pipe_class fpu_reg_con(regDPR dst) %{
 4829     instruction_count(2);
 4830     dst    : S5(write);
 4831     D0     : S0;        // big decoder only for the load
 4832     DECODE : S1;        // any decoder for FPU POP
 4833     FPU    : S4;
 4834     MEM    : S3;        // any mem
 4835 %}
 4836 
 4837 // Float load constant
 4838 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4839     instruction_count(3);
 4840     dst    : S5(write);
 4841     src    : S3(read);
 4842     D0     : S0;        // big decoder only for the load
 4843     DECODE : S1(2);     // any decoder for FPU POP
 4844     FPU    : S4;
 4845     MEM    : S3;        // any mem
 4846 %}
 4847 
 4848 // UnConditional branch
 4849 pipe_class pipe_jmp( label labl ) %{
 4850     single_instruction;
 4851     BR   : S3;
 4852 %}
 4853 
 4854 // Conditional branch
 4855 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4856     single_instruction;
 4857     cr    : S1(read);
 4858     BR    : S3;
 4859 %}
 4860 
 4861 // Allocation idiom
 4862 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4863     instruction_count(1); force_serialization;
 4864     fixed_latency(6);
 4865     heap_ptr : S3(read);
 4866     DECODE   : S0(3);
 4867     D0       : S2;
 4868     MEM      : S3;
 4869     ALU      : S3(2);
 4870     dst      : S5(write);
 4871     BR       : S5;
 4872 %}
 4873 
 4874 // Generic big/slow expanded idiom
 4875 pipe_class pipe_slow(  ) %{
 4876     instruction_count(10); multiple_bundles; force_serialization;
 4877     fixed_latency(100);
 4878     D0  : S0(2);
 4879     MEM : S3(2);
 4880 %}
 4881 
 4882 // The real do-nothing guy
 4883 pipe_class empty( ) %{
 4884     instruction_count(0);
 4885 %}
 4886 
 4887 // Define the class for the Nop node
 4888 define %{
 4889    MachNop = empty;
 4890 %}
 4891 
 4892 %}
 4893 
 4894 //----------INSTRUCTIONS-------------------------------------------------------
 4895 //
 4896 // match      -- States which machine-independent subtree may be replaced
 4897 //               by this instruction.
 4898 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4899 //               selection to identify a minimum cost tree of machine
 4900 //               instructions that matches a tree of machine-independent
 4901 //               instructions.
 4902 // format     -- A string providing the disassembly for this instruction.
 4903 //               The value of an instruction's operand may be inserted
 4904 //               by referring to it with a '$' prefix.
 4905 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4906 //               to within an encode class as $primary, $secondary, and $tertiary
 4907 //               respectively.  The primary opcode is commonly used to
 4908 //               indicate the type of machine instruction, while secondary
 4909 //               and tertiary are often used for prefix options or addressing
 4910 //               modes.
 4911 // ins_encode -- A list of encode classes with parameters. The encode class
 4912 //               name must have been defined in an 'enc_class' specification
 4913 //               in the encode section of the architecture description.
 4914 
 4915 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4916 // Load Float
 4917 instruct MoveF2LEG(legRegF dst, regF src) %{
 4918   match(Set dst src);
 4919   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4920   ins_encode %{
 4921     ShouldNotReachHere();
 4922   %}
 4923   ins_pipe( fpu_reg_reg );
 4924 %}
 4925 
 4926 // Load Float
 4927 instruct MoveLEG2F(regF dst, legRegF src) %{
 4928   match(Set dst src);
 4929   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4930   ins_encode %{
 4931     ShouldNotReachHere();
 4932   %}
 4933   ins_pipe( fpu_reg_reg );
 4934 %}
 4935 
 4936 // Load Float
 4937 instruct MoveF2VL(vlRegF dst, regF src) %{
 4938   match(Set dst src);
 4939   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4940   ins_encode %{
 4941     ShouldNotReachHere();
 4942   %}
 4943   ins_pipe( fpu_reg_reg );
 4944 %}
 4945 
 4946 // Load Float
 4947 instruct MoveVL2F(regF dst, vlRegF src) %{
 4948   match(Set dst src);
 4949   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4950   ins_encode %{
 4951     ShouldNotReachHere();
 4952   %}
 4953   ins_pipe( fpu_reg_reg );
 4954 %}
 4955 
 4956 
 4957 
 4958 // Load Double
 4959 instruct MoveD2LEG(legRegD dst, regD src) %{
 4960   match(Set dst src);
 4961   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4962   ins_encode %{
 4963     ShouldNotReachHere();
 4964   %}
 4965   ins_pipe( fpu_reg_reg );
 4966 %}
 4967 
 4968 // Load Double
 4969 instruct MoveLEG2D(regD dst, legRegD src) %{
 4970   match(Set dst src);
 4971   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4972   ins_encode %{
 4973     ShouldNotReachHere();
 4974   %}
 4975   ins_pipe( fpu_reg_reg );
 4976 %}
 4977 
 4978 // Load Double
 4979 instruct MoveD2VL(vlRegD dst, regD src) %{
 4980   match(Set dst src);
 4981   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4982   ins_encode %{
 4983     ShouldNotReachHere();
 4984   %}
 4985   ins_pipe( fpu_reg_reg );
 4986 %}
 4987 
 4988 // Load Double
 4989 instruct MoveVL2D(regD dst, vlRegD src) %{
 4990   match(Set dst src);
 4991   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4992   ins_encode %{
 4993     ShouldNotReachHere();
 4994   %}
 4995   ins_pipe( fpu_reg_reg );
 4996 %}
 4997 
 4998 //----------BSWAP-Instruction--------------------------------------------------
 4999 instruct bytes_reverse_int(rRegI dst) %{
 5000   match(Set dst (ReverseBytesI dst));
 5001 
 5002   format %{ "BSWAP  $dst" %}
 5003   opcode(0x0F, 0xC8);
 5004   ins_encode( OpcP, OpcSReg(dst) );
 5005   ins_pipe( ialu_reg );
 5006 %}
 5007 
 5008 instruct bytes_reverse_long(eRegL dst) %{
 5009   match(Set dst (ReverseBytesL dst));
 5010 
 5011   format %{ "BSWAP  $dst.lo\n\t"
 5012             "BSWAP  $dst.hi\n\t"
 5013             "XCHG   $dst.lo $dst.hi" %}
 5014 
 5015   ins_cost(125);
 5016   ins_encode( bswap_long_bytes(dst) );
 5017   ins_pipe( ialu_reg_reg);
 5018 %}
 5019 
 5020 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5021   match(Set dst (ReverseBytesUS dst));
 5022   effect(KILL cr);
 5023 
 5024   format %{ "BSWAP  $dst\n\t"
 5025             "SHR    $dst,16\n\t" %}
 5026   ins_encode %{
 5027     __ bswapl($dst$$Register);
 5028     __ shrl($dst$$Register, 16);
 5029   %}
 5030   ins_pipe( ialu_reg );
 5031 %}
 5032 
 5033 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5034   match(Set dst (ReverseBytesS dst));
 5035   effect(KILL cr);
 5036 
 5037   format %{ "BSWAP  $dst\n\t"
 5038             "SAR    $dst,16\n\t" %}
 5039   ins_encode %{
 5040     __ bswapl($dst$$Register);
 5041     __ sarl($dst$$Register, 16);
 5042   %}
 5043   ins_pipe( ialu_reg );
 5044 %}
 5045 
 5046 
 5047 //---------- Zeros Count Instructions ------------------------------------------
 5048 
 5049 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5050   predicate(UseCountLeadingZerosInstruction);
 5051   match(Set dst (CountLeadingZerosI src));
 5052   effect(KILL cr);
 5053 
 5054   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5055   ins_encode %{
 5056     __ lzcntl($dst$$Register, $src$$Register);
 5057   %}
 5058   ins_pipe(ialu_reg);
 5059 %}
 5060 
 5061 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5062   predicate(!UseCountLeadingZerosInstruction);
 5063   match(Set dst (CountLeadingZerosI src));
 5064   effect(KILL cr);
 5065 
 5066   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5067             "JNZ    skip\n\t"
 5068             "MOV    $dst, -1\n"
 5069       "skip:\n\t"
 5070             "NEG    $dst\n\t"
 5071             "ADD    $dst, 31" %}
 5072   ins_encode %{
 5073     Register Rdst = $dst$$Register;
 5074     Register Rsrc = $src$$Register;
 5075     Label skip;
 5076     __ bsrl(Rdst, Rsrc);
 5077     __ jccb(Assembler::notZero, skip);
 5078     __ movl(Rdst, -1);
 5079     __ bind(skip);
 5080     __ negl(Rdst);
 5081     __ addl(Rdst, BitsPerInt - 1);
 5082   %}
 5083   ins_pipe(ialu_reg);
 5084 %}
 5085 
 5086 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5087   predicate(UseCountLeadingZerosInstruction);
 5088   match(Set dst (CountLeadingZerosL src));
 5089   effect(TEMP dst, KILL cr);
 5090 
 5091   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5092             "JNC    done\n\t"
 5093             "LZCNT  $dst, $src.lo\n\t"
 5094             "ADD    $dst, 32\n"
 5095       "done:" %}
 5096   ins_encode %{
 5097     Register Rdst = $dst$$Register;
 5098     Register Rsrc = $src$$Register;
 5099     Label done;
 5100     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5101     __ jccb(Assembler::carryClear, done);
 5102     __ lzcntl(Rdst, Rsrc);
 5103     __ addl(Rdst, BitsPerInt);
 5104     __ bind(done);
 5105   %}
 5106   ins_pipe(ialu_reg);
 5107 %}
 5108 
 5109 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5110   predicate(!UseCountLeadingZerosInstruction);
 5111   match(Set dst (CountLeadingZerosL src));
 5112   effect(TEMP dst, KILL cr);
 5113 
 5114   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5115             "JZ     msw_is_zero\n\t"
 5116             "ADD    $dst, 32\n\t"
 5117             "JMP    not_zero\n"
 5118       "msw_is_zero:\n\t"
 5119             "BSR    $dst, $src.lo\n\t"
 5120             "JNZ    not_zero\n\t"
 5121             "MOV    $dst, -1\n"
 5122       "not_zero:\n\t"
 5123             "NEG    $dst\n\t"
 5124             "ADD    $dst, 63\n" %}
 5125  ins_encode %{
 5126     Register Rdst = $dst$$Register;
 5127     Register Rsrc = $src$$Register;
 5128     Label msw_is_zero;
 5129     Label not_zero;
 5130     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5131     __ jccb(Assembler::zero, msw_is_zero);
 5132     __ addl(Rdst, BitsPerInt);
 5133     __ jmpb(not_zero);
 5134     __ bind(msw_is_zero);
 5135     __ bsrl(Rdst, Rsrc);
 5136     __ jccb(Assembler::notZero, not_zero);
 5137     __ movl(Rdst, -1);
 5138     __ bind(not_zero);
 5139     __ negl(Rdst);
 5140     __ addl(Rdst, BitsPerLong - 1);
 5141   %}
 5142   ins_pipe(ialu_reg);
 5143 %}
 5144 
 5145 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5146   predicate(UseCountTrailingZerosInstruction);
 5147   match(Set dst (CountTrailingZerosI src));
 5148   effect(KILL cr);
 5149 
 5150   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5151   ins_encode %{
 5152     __ tzcntl($dst$$Register, $src$$Register);
 5153   %}
 5154   ins_pipe(ialu_reg);
 5155 %}
 5156 
 5157 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5158   predicate(!UseCountTrailingZerosInstruction);
 5159   match(Set dst (CountTrailingZerosI src));
 5160   effect(KILL cr);
 5161 
 5162   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5163             "JNZ    done\n\t"
 5164             "MOV    $dst, 32\n"
 5165       "done:" %}
 5166   ins_encode %{
 5167     Register Rdst = $dst$$Register;
 5168     Label done;
 5169     __ bsfl(Rdst, $src$$Register);
 5170     __ jccb(Assembler::notZero, done);
 5171     __ movl(Rdst, BitsPerInt);
 5172     __ bind(done);
 5173   %}
 5174   ins_pipe(ialu_reg);
 5175 %}
 5176 
 5177 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5178   predicate(UseCountTrailingZerosInstruction);
 5179   match(Set dst (CountTrailingZerosL src));
 5180   effect(TEMP dst, KILL cr);
 5181 
 5182   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5183             "JNC    done\n\t"
 5184             "TZCNT  $dst, $src.hi\n\t"
 5185             "ADD    $dst, 32\n"
 5186             "done:" %}
 5187   ins_encode %{
 5188     Register Rdst = $dst$$Register;
 5189     Register Rsrc = $src$$Register;
 5190     Label done;
 5191     __ tzcntl(Rdst, Rsrc);
 5192     __ jccb(Assembler::carryClear, done);
 5193     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5194     __ addl(Rdst, BitsPerInt);
 5195     __ bind(done);
 5196   %}
 5197   ins_pipe(ialu_reg);
 5198 %}
 5199 
 5200 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5201   predicate(!UseCountTrailingZerosInstruction);
 5202   match(Set dst (CountTrailingZerosL src));
 5203   effect(TEMP dst, KILL cr);
 5204 
 5205   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5206             "JNZ    done\n\t"
 5207             "BSF    $dst, $src.hi\n\t"
 5208             "JNZ    msw_not_zero\n\t"
 5209             "MOV    $dst, 32\n"
 5210       "msw_not_zero:\n\t"
 5211             "ADD    $dst, 32\n"
 5212       "done:" %}
 5213   ins_encode %{
 5214     Register Rdst = $dst$$Register;
 5215     Register Rsrc = $src$$Register;
 5216     Label msw_not_zero;
 5217     Label done;
 5218     __ bsfl(Rdst, Rsrc);
 5219     __ jccb(Assembler::notZero, done);
 5220     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5221     __ jccb(Assembler::notZero, msw_not_zero);
 5222     __ movl(Rdst, BitsPerInt);
 5223     __ bind(msw_not_zero);
 5224     __ addl(Rdst, BitsPerInt);
 5225     __ bind(done);
 5226   %}
 5227   ins_pipe(ialu_reg);
 5228 %}
 5229 
 5230 
 5231 //---------- Population Count Instructions -------------------------------------
 5232 
 5233 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5234   predicate(UsePopCountInstruction);
 5235   match(Set dst (PopCountI src));
 5236   effect(KILL cr);
 5237 
 5238   format %{ "POPCNT $dst, $src" %}
 5239   ins_encode %{
 5240     __ popcntl($dst$$Register, $src$$Register);
 5241   %}
 5242   ins_pipe(ialu_reg);
 5243 %}
 5244 
 5245 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5246   predicate(UsePopCountInstruction);
 5247   match(Set dst (PopCountI (LoadI mem)));
 5248   effect(KILL cr);
 5249 
 5250   format %{ "POPCNT $dst, $mem" %}
 5251   ins_encode %{
 5252     __ popcntl($dst$$Register, $mem$$Address);
 5253   %}
 5254   ins_pipe(ialu_reg);
 5255 %}
 5256 
 5257 // Note: Long.bitCount(long) returns an int.
 5258 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5259   predicate(UsePopCountInstruction);
 5260   match(Set dst (PopCountL src));
 5261   effect(KILL cr, TEMP tmp, TEMP dst);
 5262 
 5263   format %{ "POPCNT $dst, $src.lo\n\t"
 5264             "POPCNT $tmp, $src.hi\n\t"
 5265             "ADD    $dst, $tmp" %}
 5266   ins_encode %{
 5267     __ popcntl($dst$$Register, $src$$Register);
 5268     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5269     __ addl($dst$$Register, $tmp$$Register);
 5270   %}
 5271   ins_pipe(ialu_reg);
 5272 %}
 5273 
 5274 // Note: Long.bitCount(long) returns an int.
 5275 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5276   predicate(UsePopCountInstruction);
 5277   match(Set dst (PopCountL (LoadL mem)));
 5278   effect(KILL cr, TEMP tmp, TEMP dst);
 5279 
 5280   format %{ "POPCNT $dst, $mem\n\t"
 5281             "POPCNT $tmp, $mem+4\n\t"
 5282             "ADD    $dst, $tmp" %}
 5283   ins_encode %{
 5284     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5285     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5286     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5287     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5288     __ addl($dst$$Register, $tmp$$Register);
 5289   %}
 5290   ins_pipe(ialu_reg);
 5291 %}
 5292 
 5293 
 5294 //----------Load/Store/Move Instructions---------------------------------------
 5295 //----------Load Instructions--------------------------------------------------
 5296 // Load Byte (8bit signed)
 5297 instruct loadB(xRegI dst, memory mem) %{
 5298   match(Set dst (LoadB mem));
 5299 
 5300   ins_cost(125);
 5301   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5302 
 5303   ins_encode %{
 5304     __ movsbl($dst$$Register, $mem$$Address);
 5305   %}
 5306 
 5307   ins_pipe(ialu_reg_mem);
 5308 %}
 5309 
 5310 // Load Byte (8bit signed) into Long Register
 5311 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5312   match(Set dst (ConvI2L (LoadB mem)));
 5313   effect(KILL cr);
 5314 
 5315   ins_cost(375);
 5316   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5317             "MOV    $dst.hi,$dst.lo\n\t"
 5318             "SAR    $dst.hi,7" %}
 5319 
 5320   ins_encode %{
 5321     __ movsbl($dst$$Register, $mem$$Address);
 5322     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5323     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5324   %}
 5325 
 5326   ins_pipe(ialu_reg_mem);
 5327 %}
 5328 
 5329 // Load Unsigned Byte (8bit UNsigned)
 5330 instruct loadUB(xRegI dst, memory mem) %{
 5331   match(Set dst (LoadUB mem));
 5332 
 5333   ins_cost(125);
 5334   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5335 
 5336   ins_encode %{
 5337     __ movzbl($dst$$Register, $mem$$Address);
 5338   %}
 5339 
 5340   ins_pipe(ialu_reg_mem);
 5341 %}
 5342 
 5343 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5344 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5345   match(Set dst (ConvI2L (LoadUB mem)));
 5346   effect(KILL cr);
 5347 
 5348   ins_cost(250);
 5349   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5350             "XOR    $dst.hi,$dst.hi" %}
 5351 
 5352   ins_encode %{
 5353     Register Rdst = $dst$$Register;
 5354     __ movzbl(Rdst, $mem$$Address);
 5355     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5356   %}
 5357 
 5358   ins_pipe(ialu_reg_mem);
 5359 %}
 5360 
 5361 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5362 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5363   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5364   effect(KILL cr);
 5365 
 5366   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5367             "XOR    $dst.hi,$dst.hi\n\t"
 5368             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5369   ins_encode %{
 5370     Register Rdst = $dst$$Register;
 5371     __ movzbl(Rdst, $mem$$Address);
 5372     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5373     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5374   %}
 5375   ins_pipe(ialu_reg_mem);
 5376 %}
 5377 
 5378 // Load Short (16bit signed)
 5379 instruct loadS(rRegI dst, memory mem) %{
 5380   match(Set dst (LoadS mem));
 5381 
 5382   ins_cost(125);
 5383   format %{ "MOVSX  $dst,$mem\t# short" %}
 5384 
 5385   ins_encode %{
 5386     __ movswl($dst$$Register, $mem$$Address);
 5387   %}
 5388 
 5389   ins_pipe(ialu_reg_mem);
 5390 %}
 5391 
 5392 // Load Short (16 bit signed) to Byte (8 bit signed)
 5393 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5394   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5395 
 5396   ins_cost(125);
 5397   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5398   ins_encode %{
 5399     __ movsbl($dst$$Register, $mem$$Address);
 5400   %}
 5401   ins_pipe(ialu_reg_mem);
 5402 %}
 5403 
 5404 // Load Short (16bit signed) into Long Register
 5405 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5406   match(Set dst (ConvI2L (LoadS mem)));
 5407   effect(KILL cr);
 5408 
 5409   ins_cost(375);
 5410   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5411             "MOV    $dst.hi,$dst.lo\n\t"
 5412             "SAR    $dst.hi,15" %}
 5413 
 5414   ins_encode %{
 5415     __ movswl($dst$$Register, $mem$$Address);
 5416     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5417     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5418   %}
 5419 
 5420   ins_pipe(ialu_reg_mem);
 5421 %}
 5422 
 5423 // Load Unsigned Short/Char (16bit unsigned)
 5424 instruct loadUS(rRegI dst, memory mem) %{
 5425   match(Set dst (LoadUS mem));
 5426 
 5427   ins_cost(125);
 5428   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5429 
 5430   ins_encode %{
 5431     __ movzwl($dst$$Register, $mem$$Address);
 5432   %}
 5433 
 5434   ins_pipe(ialu_reg_mem);
 5435 %}
 5436 
 5437 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5438 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5439   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5440 
 5441   ins_cost(125);
 5442   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5443   ins_encode %{
 5444     __ movsbl($dst$$Register, $mem$$Address);
 5445   %}
 5446   ins_pipe(ialu_reg_mem);
 5447 %}
 5448 
 5449 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5450 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5451   match(Set dst (ConvI2L (LoadUS mem)));
 5452   effect(KILL cr);
 5453 
 5454   ins_cost(250);
 5455   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5456             "XOR    $dst.hi,$dst.hi" %}
 5457 
 5458   ins_encode %{
 5459     __ movzwl($dst$$Register, $mem$$Address);
 5460     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5461   %}
 5462 
 5463   ins_pipe(ialu_reg_mem);
 5464 %}
 5465 
 5466 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5467 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5468   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5469   effect(KILL cr);
 5470 
 5471   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5472             "XOR    $dst.hi,$dst.hi" %}
 5473   ins_encode %{
 5474     Register Rdst = $dst$$Register;
 5475     __ movzbl(Rdst, $mem$$Address);
 5476     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5477   %}
 5478   ins_pipe(ialu_reg_mem);
 5479 %}
 5480 
 5481 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5482 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5483   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5484   effect(KILL cr);
 5485 
 5486   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5487             "XOR    $dst.hi,$dst.hi\n\t"
 5488             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5489   ins_encode %{
 5490     Register Rdst = $dst$$Register;
 5491     __ movzwl(Rdst, $mem$$Address);
 5492     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5493     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5494   %}
 5495   ins_pipe(ialu_reg_mem);
 5496 %}
 5497 
 5498 // Load Integer
 5499 instruct loadI(rRegI dst, memory mem) %{
 5500   match(Set dst (LoadI mem));
 5501 
 5502   ins_cost(125);
 5503   format %{ "MOV    $dst,$mem\t# int" %}
 5504 
 5505   ins_encode %{
 5506     __ movl($dst$$Register, $mem$$Address);
 5507   %}
 5508 
 5509   ins_pipe(ialu_reg_mem);
 5510 %}
 5511 
 5512 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5513 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5514   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5515 
 5516   ins_cost(125);
 5517   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5518   ins_encode %{
 5519     __ movsbl($dst$$Register, $mem$$Address);
 5520   %}
 5521   ins_pipe(ialu_reg_mem);
 5522 %}
 5523 
 5524 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5525 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5526   match(Set dst (AndI (LoadI mem) mask));
 5527 
 5528   ins_cost(125);
 5529   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5530   ins_encode %{
 5531     __ movzbl($dst$$Register, $mem$$Address);
 5532   %}
 5533   ins_pipe(ialu_reg_mem);
 5534 %}
 5535 
 5536 // Load Integer (32 bit signed) to Short (16 bit signed)
 5537 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5538   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5539 
 5540   ins_cost(125);
 5541   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5542   ins_encode %{
 5543     __ movswl($dst$$Register, $mem$$Address);
 5544   %}
 5545   ins_pipe(ialu_reg_mem);
 5546 %}
 5547 
 5548 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5549 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5550   match(Set dst (AndI (LoadI mem) mask));
 5551 
 5552   ins_cost(125);
 5553   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5554   ins_encode %{
 5555     __ movzwl($dst$$Register, $mem$$Address);
 5556   %}
 5557   ins_pipe(ialu_reg_mem);
 5558 %}
 5559 
 5560 // Load Integer into Long Register
 5561 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5562   match(Set dst (ConvI2L (LoadI mem)));
 5563   effect(KILL cr);
 5564 
 5565   ins_cost(375);
 5566   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5567             "MOV    $dst.hi,$dst.lo\n\t"
 5568             "SAR    $dst.hi,31" %}
 5569 
 5570   ins_encode %{
 5571     __ movl($dst$$Register, $mem$$Address);
 5572     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5573     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5574   %}
 5575 
 5576   ins_pipe(ialu_reg_mem);
 5577 %}
 5578 
 5579 // Load Integer with mask 0xFF into Long Register
 5580 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5581   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5582   effect(KILL cr);
 5583 
 5584   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5585             "XOR    $dst.hi,$dst.hi" %}
 5586   ins_encode %{
 5587     Register Rdst = $dst$$Register;
 5588     __ movzbl(Rdst, $mem$$Address);
 5589     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5590   %}
 5591   ins_pipe(ialu_reg_mem);
 5592 %}
 5593 
 5594 // Load Integer with mask 0xFFFF into Long Register
 5595 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5596   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5597   effect(KILL cr);
 5598 
 5599   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5600             "XOR    $dst.hi,$dst.hi" %}
 5601   ins_encode %{
 5602     Register Rdst = $dst$$Register;
 5603     __ movzwl(Rdst, $mem$$Address);
 5604     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5605   %}
 5606   ins_pipe(ialu_reg_mem);
 5607 %}
 5608 
 5609 // Load Integer with 31-bit mask into Long Register
 5610 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5611   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5612   effect(KILL cr);
 5613 
 5614   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5615             "XOR    $dst.hi,$dst.hi\n\t"
 5616             "AND    $dst.lo,$mask" %}
 5617   ins_encode %{
 5618     Register Rdst = $dst$$Register;
 5619     __ movl(Rdst, $mem$$Address);
 5620     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5621     __ andl(Rdst, $mask$$constant);
 5622   %}
 5623   ins_pipe(ialu_reg_mem);
 5624 %}
 5625 
 5626 // Load Unsigned Integer into Long Register
 5627 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5628   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5629   effect(KILL cr);
 5630 
 5631   ins_cost(250);
 5632   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5633             "XOR    $dst.hi,$dst.hi" %}
 5634 
 5635   ins_encode %{
 5636     __ movl($dst$$Register, $mem$$Address);
 5637     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5638   %}
 5639 
 5640   ins_pipe(ialu_reg_mem);
 5641 %}
 5642 
 5643 // Load Long.  Cannot clobber address while loading, so restrict address
 5644 // register to ESI
 5645 instruct loadL(eRegL dst, load_long_memory mem) %{
 5646   predicate(!((LoadLNode*)n)->require_atomic_access());
 5647   match(Set dst (LoadL mem));
 5648 
 5649   ins_cost(250);
 5650   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5651             "MOV    $dst.hi,$mem+4" %}
 5652 
 5653   ins_encode %{
 5654     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5655     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5656     __ movl($dst$$Register, Amemlo);
 5657     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5658   %}
 5659 
 5660   ins_pipe(ialu_reg_long_mem);
 5661 %}
 5662 
 5663 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5664 // then store it down to the stack and reload on the int
 5665 // side.
 5666 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5667   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5668   match(Set dst (LoadL mem));
 5669 
 5670   ins_cost(200);
 5671   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5672             "FISTp  $dst" %}
 5673   ins_encode(enc_loadL_volatile(mem,dst));
 5674   ins_pipe( fpu_reg_mem );
 5675 %}
 5676 
 5677 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5678   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5679   match(Set dst (LoadL mem));
 5680   effect(TEMP tmp);
 5681   ins_cost(180);
 5682   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5683             "MOVSD  $dst,$tmp" %}
 5684   ins_encode %{
 5685     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5686     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5687   %}
 5688   ins_pipe( pipe_slow );
 5689 %}
 5690 
 5691 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5692   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5693   match(Set dst (LoadL mem));
 5694   effect(TEMP tmp);
 5695   ins_cost(160);
 5696   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5697             "MOVD   $dst.lo,$tmp\n\t"
 5698             "PSRLQ  $tmp,32\n\t"
 5699             "MOVD   $dst.hi,$tmp" %}
 5700   ins_encode %{
 5701     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5702     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5703     __ psrlq($tmp$$XMMRegister, 32);
 5704     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5705   %}
 5706   ins_pipe( pipe_slow );
 5707 %}
 5708 
 5709 // Load Range
 5710 instruct loadRange(rRegI dst, memory mem) %{
 5711   match(Set dst (LoadRange mem));
 5712 
 5713   ins_cost(125);
 5714   format %{ "MOV    $dst,$mem" %}
 5715   opcode(0x8B);
 5716   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5717   ins_pipe( ialu_reg_mem );
 5718 %}
 5719 
 5720 
 5721 // Load Pointer
 5722 instruct loadP(eRegP dst, memory mem) %{
 5723   match(Set dst (LoadP mem));
 5724 
 5725   ins_cost(125);
 5726   format %{ "MOV    $dst,$mem" %}
 5727   opcode(0x8B);
 5728   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5729   ins_pipe( ialu_reg_mem );
 5730 %}
 5731 
 5732 // Load Klass Pointer
 5733 instruct loadKlass(eRegP dst, memory mem) %{
 5734   match(Set dst (LoadKlass mem));
 5735 
 5736   ins_cost(125);
 5737   format %{ "MOV    $dst,$mem" %}
 5738   opcode(0x8B);
 5739   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5740   ins_pipe( ialu_reg_mem );
 5741 %}
 5742 
 5743 // Load Double
 5744 instruct loadDPR(regDPR dst, memory mem) %{
 5745   predicate(UseSSE<=1);
 5746   match(Set dst (LoadD mem));
 5747 
 5748   ins_cost(150);
 5749   format %{ "FLD_D  ST,$mem\n\t"
 5750             "FSTP   $dst" %}
 5751   opcode(0xDD);               /* DD /0 */
 5752   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5753               Pop_Reg_DPR(dst), ClearInstMark );
 5754   ins_pipe( fpu_reg_mem );
 5755 %}
 5756 
 5757 // Load Double to XMM
 5758 instruct loadD(regD dst, memory mem) %{
 5759   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5760   match(Set dst (LoadD mem));
 5761   ins_cost(145);
 5762   format %{ "MOVSD  $dst,$mem" %}
 5763   ins_encode %{
 5764     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5765   %}
 5766   ins_pipe( pipe_slow );
 5767 %}
 5768 
 5769 instruct loadD_partial(regD dst, memory mem) %{
 5770   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5771   match(Set dst (LoadD mem));
 5772   ins_cost(145);
 5773   format %{ "MOVLPD $dst,$mem" %}
 5774   ins_encode %{
 5775     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5776   %}
 5777   ins_pipe( pipe_slow );
 5778 %}
 5779 
 5780 // Load to XMM register (single-precision floating point)
 5781 // MOVSS instruction
 5782 instruct loadF(regF dst, memory mem) %{
 5783   predicate(UseSSE>=1);
 5784   match(Set dst (LoadF mem));
 5785   ins_cost(145);
 5786   format %{ "MOVSS  $dst,$mem" %}
 5787   ins_encode %{
 5788     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5789   %}
 5790   ins_pipe( pipe_slow );
 5791 %}
 5792 
 5793 // Load Float
 5794 instruct loadFPR(regFPR dst, memory mem) %{
 5795   predicate(UseSSE==0);
 5796   match(Set dst (LoadF mem));
 5797 
 5798   ins_cost(150);
 5799   format %{ "FLD_S  ST,$mem\n\t"
 5800             "FSTP   $dst" %}
 5801   opcode(0xD9);               /* D9 /0 */
 5802   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5803               Pop_Reg_FPR(dst), ClearInstMark );
 5804   ins_pipe( fpu_reg_mem );
 5805 %}
 5806 
 5807 // Load Effective Address
 5808 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5809   match(Set dst mem);
 5810 
 5811   ins_cost(110);
 5812   format %{ "LEA    $dst,$mem" %}
 5813   opcode(0x8D);
 5814   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5815   ins_pipe( ialu_reg_reg_fat );
 5816 %}
 5817 
 5818 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5819   match(Set dst mem);
 5820 
 5821   ins_cost(110);
 5822   format %{ "LEA    $dst,$mem" %}
 5823   opcode(0x8D);
 5824   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5825   ins_pipe( ialu_reg_reg_fat );
 5826 %}
 5827 
 5828 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5829   match(Set dst mem);
 5830 
 5831   ins_cost(110);
 5832   format %{ "LEA    $dst,$mem" %}
 5833   opcode(0x8D);
 5834   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5835   ins_pipe( ialu_reg_reg_fat );
 5836 %}
 5837 
 5838 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5839   match(Set dst mem);
 5840 
 5841   ins_cost(110);
 5842   format %{ "LEA    $dst,$mem" %}
 5843   opcode(0x8D);
 5844   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5845   ins_pipe( ialu_reg_reg_fat );
 5846 %}
 5847 
 5848 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5849   match(Set dst mem);
 5850 
 5851   ins_cost(110);
 5852   format %{ "LEA    $dst,$mem" %}
 5853   opcode(0x8D);
 5854   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5855   ins_pipe( ialu_reg_reg_fat );
 5856 %}
 5857 
 5858 // Load Constant
 5859 instruct loadConI(rRegI dst, immI src) %{
 5860   match(Set dst src);
 5861 
 5862   format %{ "MOV    $dst,$src" %}
 5863   ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
 5864   ins_pipe( ialu_reg_fat );
 5865 %}
 5866 
 5867 // Load Constant zero
 5868 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5869   match(Set dst src);
 5870   effect(KILL cr);
 5871 
 5872   ins_cost(50);
 5873   format %{ "XOR    $dst,$dst" %}
 5874   opcode(0x33);  /* + rd */
 5875   ins_encode( OpcP, RegReg( dst, dst ) );
 5876   ins_pipe( ialu_reg );
 5877 %}
 5878 
 5879 instruct loadConP(eRegP dst, immP src) %{
 5880   match(Set dst src);
 5881 
 5882   format %{ "MOV    $dst,$src" %}
 5883   opcode(0xB8);  /* + rd */
 5884   ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
 5885   ins_pipe( ialu_reg_fat );
 5886 %}
 5887 
 5888 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5889   match(Set dst src);
 5890   effect(KILL cr);
 5891   ins_cost(200);
 5892   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5893             "MOV    $dst.hi,$src.hi" %}
 5894   opcode(0xB8);
 5895   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5896   ins_pipe( ialu_reg_long_fat );
 5897 %}
 5898 
 5899 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5900   match(Set dst src);
 5901   effect(KILL cr);
 5902   ins_cost(150);
 5903   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5904             "XOR    $dst.hi,$dst.hi" %}
 5905   opcode(0x33,0x33);
 5906   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5907   ins_pipe( ialu_reg_long );
 5908 %}
 5909 
 5910 // The instruction usage is guarded by predicate in operand immFPR().
 5911 instruct loadConFPR(regFPR dst, immFPR con) %{
 5912   match(Set dst con);
 5913   ins_cost(125);
 5914   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5915             "FSTP   $dst" %}
 5916   ins_encode %{
 5917     __ fld_s($constantaddress($con));
 5918     __ fstp_d($dst$$reg);
 5919   %}
 5920   ins_pipe(fpu_reg_con);
 5921 %}
 5922 
 5923 // The instruction usage is guarded by predicate in operand immFPR0().
 5924 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5925   match(Set dst con);
 5926   ins_cost(125);
 5927   format %{ "FLDZ   ST\n\t"
 5928             "FSTP   $dst" %}
 5929   ins_encode %{
 5930     __ fldz();
 5931     __ fstp_d($dst$$reg);
 5932   %}
 5933   ins_pipe(fpu_reg_con);
 5934 %}
 5935 
 5936 // The instruction usage is guarded by predicate in operand immFPR1().
 5937 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5938   match(Set dst con);
 5939   ins_cost(125);
 5940   format %{ "FLD1   ST\n\t"
 5941             "FSTP   $dst" %}
 5942   ins_encode %{
 5943     __ fld1();
 5944     __ fstp_d($dst$$reg);
 5945   %}
 5946   ins_pipe(fpu_reg_con);
 5947 %}
 5948 
 5949 // The instruction usage is guarded by predicate in operand immF().
 5950 instruct loadConF(regF dst, immF con) %{
 5951   match(Set dst con);
 5952   ins_cost(125);
 5953   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 5954   ins_encode %{
 5955     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5956   %}
 5957   ins_pipe(pipe_slow);
 5958 %}
 5959 
 5960 // The instruction usage is guarded by predicate in operand immF0().
 5961 instruct loadConF0(regF dst, immF0 src) %{
 5962   match(Set dst src);
 5963   ins_cost(100);
 5964   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 5965   ins_encode %{
 5966     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5967   %}
 5968   ins_pipe(pipe_slow);
 5969 %}
 5970 
 5971 // The instruction usage is guarded by predicate in operand immDPR().
 5972 instruct loadConDPR(regDPR dst, immDPR con) %{
 5973   match(Set dst con);
 5974   ins_cost(125);
 5975 
 5976   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 5977             "FSTP   $dst" %}
 5978   ins_encode %{
 5979     __ fld_d($constantaddress($con));
 5980     __ fstp_d($dst$$reg);
 5981   %}
 5982   ins_pipe(fpu_reg_con);
 5983 %}
 5984 
 5985 // The instruction usage is guarded by predicate in operand immDPR0().
 5986 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 5987   match(Set dst con);
 5988   ins_cost(125);
 5989 
 5990   format %{ "FLDZ   ST\n\t"
 5991             "FSTP   $dst" %}
 5992   ins_encode %{
 5993     __ fldz();
 5994     __ fstp_d($dst$$reg);
 5995   %}
 5996   ins_pipe(fpu_reg_con);
 5997 %}
 5998 
 5999 // The instruction usage is guarded by predicate in operand immDPR1().
 6000 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6001   match(Set dst con);
 6002   ins_cost(125);
 6003 
 6004   format %{ "FLD1   ST\n\t"
 6005             "FSTP   $dst" %}
 6006   ins_encode %{
 6007     __ fld1();
 6008     __ fstp_d($dst$$reg);
 6009   %}
 6010   ins_pipe(fpu_reg_con);
 6011 %}
 6012 
 6013 // The instruction usage is guarded by predicate in operand immD().
 6014 instruct loadConD(regD dst, immD con) %{
 6015   match(Set dst con);
 6016   ins_cost(125);
 6017   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6018   ins_encode %{
 6019     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6020   %}
 6021   ins_pipe(pipe_slow);
 6022 %}
 6023 
 6024 // The instruction usage is guarded by predicate in operand immD0().
 6025 instruct loadConD0(regD dst, immD0 src) %{
 6026   match(Set dst src);
 6027   ins_cost(100);
 6028   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6029   ins_encode %{
 6030     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6031   %}
 6032   ins_pipe( pipe_slow );
 6033 %}
 6034 
 6035 // Load Stack Slot
 6036 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6037   match(Set dst src);
 6038   ins_cost(125);
 6039 
 6040   format %{ "MOV    $dst,$src" %}
 6041   opcode(0x8B);
 6042   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6043   ins_pipe( ialu_reg_mem );
 6044 %}
 6045 
 6046 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6047   match(Set dst src);
 6048 
 6049   ins_cost(200);
 6050   format %{ "MOV    $dst,$src.lo\n\t"
 6051             "MOV    $dst+4,$src.hi" %}
 6052   opcode(0x8B, 0x8B);
 6053   ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
 6054   ins_pipe( ialu_mem_long_reg );
 6055 %}
 6056 
 6057 // Load Stack Slot
 6058 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6059   match(Set dst src);
 6060   ins_cost(125);
 6061 
 6062   format %{ "MOV    $dst,$src" %}
 6063   opcode(0x8B);
 6064   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6065   ins_pipe( ialu_reg_mem );
 6066 %}
 6067 
 6068 // Load Stack Slot
 6069 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6070   match(Set dst src);
 6071   ins_cost(125);
 6072 
 6073   format %{ "FLD_S  $src\n\t"
 6074             "FSTP   $dst" %}
 6075   opcode(0xD9);               /* D9 /0, FLD m32real */
 6076   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6077               Pop_Reg_FPR(dst), ClearInstMark );
 6078   ins_pipe( fpu_reg_mem );
 6079 %}
 6080 
 6081 // Load Stack Slot
 6082 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6083   match(Set dst src);
 6084   ins_cost(125);
 6085 
 6086   format %{ "FLD_D  $src\n\t"
 6087             "FSTP   $dst" %}
 6088   opcode(0xDD);               /* DD /0, FLD m64real */
 6089   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6090               Pop_Reg_DPR(dst), ClearInstMark );
 6091   ins_pipe( fpu_reg_mem );
 6092 %}
 6093 
 6094 // Prefetch instructions for allocation.
 6095 // Must be safe to execute with invalid address (cannot fault).
 6096 
 6097 instruct prefetchAlloc0( memory mem ) %{
 6098   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6099   match(PrefetchAllocation mem);
 6100   ins_cost(0);
 6101   size(0);
 6102   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6103   ins_encode();
 6104   ins_pipe(empty);
 6105 %}
 6106 
 6107 instruct prefetchAlloc( memory mem ) %{
 6108   predicate(AllocatePrefetchInstr==3);
 6109   match( PrefetchAllocation mem );
 6110   ins_cost(100);
 6111 
 6112   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6113   ins_encode %{
 6114     __ prefetchw($mem$$Address);
 6115   %}
 6116   ins_pipe(ialu_mem);
 6117 %}
 6118 
 6119 instruct prefetchAllocNTA( memory mem ) %{
 6120   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6121   match(PrefetchAllocation mem);
 6122   ins_cost(100);
 6123 
 6124   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6125   ins_encode %{
 6126     __ prefetchnta($mem$$Address);
 6127   %}
 6128   ins_pipe(ialu_mem);
 6129 %}
 6130 
 6131 instruct prefetchAllocT0( memory mem ) %{
 6132   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6133   match(PrefetchAllocation mem);
 6134   ins_cost(100);
 6135 
 6136   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6137   ins_encode %{
 6138     __ prefetcht0($mem$$Address);
 6139   %}
 6140   ins_pipe(ialu_mem);
 6141 %}
 6142 
 6143 instruct prefetchAllocT2( memory mem ) %{
 6144   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6145   match(PrefetchAllocation mem);
 6146   ins_cost(100);
 6147 
 6148   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6149   ins_encode %{
 6150     __ prefetcht2($mem$$Address);
 6151   %}
 6152   ins_pipe(ialu_mem);
 6153 %}
 6154 
 6155 //----------Store Instructions-------------------------------------------------
 6156 
 6157 // Store Byte
 6158 instruct storeB(memory mem, xRegI src) %{
 6159   match(Set mem (StoreB mem src));
 6160 
 6161   ins_cost(125);
 6162   format %{ "MOV8   $mem,$src" %}
 6163   opcode(0x88);
 6164   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6165   ins_pipe( ialu_mem_reg );
 6166 %}
 6167 
 6168 // Store Char/Short
 6169 instruct storeC(memory mem, rRegI src) %{
 6170   match(Set mem (StoreC mem src));
 6171 
 6172   ins_cost(125);
 6173   format %{ "MOV16  $mem,$src" %}
 6174   opcode(0x89, 0x66);
 6175   ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
 6176   ins_pipe( ialu_mem_reg );
 6177 %}
 6178 
 6179 // Store Integer
 6180 instruct storeI(memory mem, rRegI src) %{
 6181   match(Set mem (StoreI mem src));
 6182 
 6183   ins_cost(125);
 6184   format %{ "MOV    $mem,$src" %}
 6185   opcode(0x89);
 6186   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6187   ins_pipe( ialu_mem_reg );
 6188 %}
 6189 
 6190 // Store Long
 6191 instruct storeL(long_memory mem, eRegL src) %{
 6192   predicate(!((StoreLNode*)n)->require_atomic_access());
 6193   match(Set mem (StoreL mem src));
 6194 
 6195   ins_cost(200);
 6196   format %{ "MOV    $mem,$src.lo\n\t"
 6197             "MOV    $mem+4,$src.hi" %}
 6198   opcode(0x89, 0x89);
 6199   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
 6200   ins_pipe( ialu_mem_long_reg );
 6201 %}
 6202 
 6203 // Store Long to Integer
 6204 instruct storeL2I(memory mem, eRegL src) %{
 6205   match(Set mem (StoreI mem (ConvL2I src)));
 6206 
 6207   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6208   ins_encode %{
 6209     __ movl($mem$$Address, $src$$Register);
 6210   %}
 6211   ins_pipe(ialu_mem_reg);
 6212 %}
 6213 
 6214 // Volatile Store Long.  Must be atomic, so move it into
 6215 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6216 // target address before the store (for null-ptr checks)
 6217 // so the memory operand is used twice in the encoding.
 6218 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6219   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6220   match(Set mem (StoreL mem src));
 6221   effect( KILL cr );
 6222   ins_cost(400);
 6223   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6224             "FILD   $src\n\t"
 6225             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6226   opcode(0x3B);
 6227   ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
 6228   ins_pipe( fpu_reg_mem );
 6229 %}
 6230 
 6231 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6232   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6233   match(Set mem (StoreL mem src));
 6234   effect( TEMP tmp, KILL cr );
 6235   ins_cost(380);
 6236   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6237             "MOVSD  $tmp,$src\n\t"
 6238             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6239   ins_encode %{
 6240     __ cmpl(rax, $mem$$Address);
 6241     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6242     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6243   %}
 6244   ins_pipe( pipe_slow );
 6245 %}
 6246 
 6247 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6248   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6249   match(Set mem (StoreL mem src));
 6250   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6251   ins_cost(360);
 6252   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6253             "MOVD   $tmp,$src.lo\n\t"
 6254             "MOVD   $tmp2,$src.hi\n\t"
 6255             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6256             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6257   ins_encode %{
 6258     __ cmpl(rax, $mem$$Address);
 6259     __ movdl($tmp$$XMMRegister, $src$$Register);
 6260     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6261     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6262     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6263   %}
 6264   ins_pipe( pipe_slow );
 6265 %}
 6266 
 6267 // Store Pointer; for storing unknown oops and raw pointers
 6268 instruct storeP(memory mem, anyRegP src) %{
 6269   match(Set mem (StoreP mem src));
 6270 
 6271   ins_cost(125);
 6272   format %{ "MOV    $mem,$src" %}
 6273   opcode(0x89);
 6274   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6275   ins_pipe( ialu_mem_reg );
 6276 %}
 6277 
 6278 // Store Integer Immediate
 6279 instruct storeImmI(memory mem, immI src) %{
 6280   match(Set mem (StoreI mem src));
 6281 
 6282   ins_cost(150);
 6283   format %{ "MOV    $mem,$src" %}
 6284   opcode(0xC7);               /* C7 /0 */
 6285   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
 6286   ins_pipe( ialu_mem_imm );
 6287 %}
 6288 
 6289 // Store Short/Char Immediate
 6290 instruct storeImmI16(memory mem, immI16 src) %{
 6291   predicate(UseStoreImmI16);
 6292   match(Set mem (StoreC mem src));
 6293 
 6294   ins_cost(150);
 6295   format %{ "MOV16  $mem,$src" %}
 6296   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6297   ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
 6298   ins_pipe( ialu_mem_imm );
 6299 %}
 6300 
 6301 // Store Pointer Immediate; null pointers or constant oops that do not
 6302 // need card-mark barriers.
 6303 instruct storeImmP(memory mem, immP src) %{
 6304   match(Set mem (StoreP mem src));
 6305 
 6306   ins_cost(150);
 6307   format %{ "MOV    $mem,$src" %}
 6308   opcode(0xC7);               /* C7 /0 */
 6309   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
 6310   ins_pipe( ialu_mem_imm );
 6311 %}
 6312 
 6313 // Store Byte Immediate
 6314 instruct storeImmB(memory mem, immI8 src) %{
 6315   match(Set mem (StoreB mem src));
 6316 
 6317   ins_cost(150);
 6318   format %{ "MOV8   $mem,$src" %}
 6319   opcode(0xC6);               /* C6 /0 */
 6320   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
 6321   ins_pipe( ialu_mem_imm );
 6322 %}
 6323 
 6324 // Store Double
 6325 instruct storeDPR( memory mem, regDPR1 src) %{
 6326   predicate(UseSSE<=1);
 6327   match(Set mem (StoreD mem src));
 6328 
 6329   ins_cost(100);
 6330   format %{ "FST_D  $mem,$src" %}
 6331   opcode(0xDD);       /* DD /2 */
 6332   ins_encode( enc_FPR_store(mem,src) );
 6333   ins_pipe( fpu_mem_reg );
 6334 %}
 6335 
 6336 // Store double does rounding on x86
 6337 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6338   predicate(UseSSE<=1);
 6339   match(Set mem (StoreD mem (RoundDouble src)));
 6340 
 6341   ins_cost(100);
 6342   format %{ "FST_D  $mem,$src\t# round" %}
 6343   opcode(0xDD);       /* DD /2 */
 6344   ins_encode( enc_FPR_store(mem,src) );
 6345   ins_pipe( fpu_mem_reg );
 6346 %}
 6347 
 6348 // Store XMM register to memory (double-precision floating points)
 6349 // MOVSD instruction
 6350 instruct storeD(memory mem, regD src) %{
 6351   predicate(UseSSE>=2);
 6352   match(Set mem (StoreD mem src));
 6353   ins_cost(95);
 6354   format %{ "MOVSD  $mem,$src" %}
 6355   ins_encode %{
 6356     __ movdbl($mem$$Address, $src$$XMMRegister);
 6357   %}
 6358   ins_pipe( pipe_slow );
 6359 %}
 6360 
 6361 // Store XMM register to memory (single-precision floating point)
 6362 // MOVSS instruction
 6363 instruct storeF(memory mem, regF src) %{
 6364   predicate(UseSSE>=1);
 6365   match(Set mem (StoreF mem src));
 6366   ins_cost(95);
 6367   format %{ "MOVSS  $mem,$src" %}
 6368   ins_encode %{
 6369     __ movflt($mem$$Address, $src$$XMMRegister);
 6370   %}
 6371   ins_pipe( pipe_slow );
 6372 %}
 6373 
 6374 
 6375 // Store Float
 6376 instruct storeFPR( memory mem, regFPR1 src) %{
 6377   predicate(UseSSE==0);
 6378   match(Set mem (StoreF mem src));
 6379 
 6380   ins_cost(100);
 6381   format %{ "FST_S  $mem,$src" %}
 6382   opcode(0xD9);       /* D9 /2 */
 6383   ins_encode( enc_FPR_store(mem,src) );
 6384   ins_pipe( fpu_mem_reg );
 6385 %}
 6386 
 6387 // Store Float does rounding on x86
 6388 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6389   predicate(UseSSE==0);
 6390   match(Set mem (StoreF mem (RoundFloat src)));
 6391 
 6392   ins_cost(100);
 6393   format %{ "FST_S  $mem,$src\t# round" %}
 6394   opcode(0xD9);       /* D9 /2 */
 6395   ins_encode( enc_FPR_store(mem,src) );
 6396   ins_pipe( fpu_mem_reg );
 6397 %}
 6398 
 6399 // Store Float does rounding on x86
 6400 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6401   predicate(UseSSE<=1);
 6402   match(Set mem (StoreF mem (ConvD2F src)));
 6403 
 6404   ins_cost(100);
 6405   format %{ "FST_S  $mem,$src\t# D-round" %}
 6406   opcode(0xD9);       /* D9 /2 */
 6407   ins_encode( enc_FPR_store(mem,src) );
 6408   ins_pipe( fpu_mem_reg );
 6409 %}
 6410 
 6411 // Store immediate Float value (it is faster than store from FPU register)
 6412 // The instruction usage is guarded by predicate in operand immFPR().
 6413 instruct storeFPR_imm( memory mem, immFPR src) %{
 6414   match(Set mem (StoreF mem src));
 6415 
 6416   ins_cost(50);
 6417   format %{ "MOV    $mem,$src\t# store float" %}
 6418   opcode(0xC7);               /* C7 /0 */
 6419   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
 6420   ins_pipe( ialu_mem_imm );
 6421 %}
 6422 
 6423 // Store immediate Float value (it is faster than store from XMM register)
 6424 // The instruction usage is guarded by predicate in operand immF().
 6425 instruct storeF_imm( memory mem, immF src) %{
 6426   match(Set mem (StoreF mem src));
 6427 
 6428   ins_cost(50);
 6429   format %{ "MOV    $mem,$src\t# store float" %}
 6430   opcode(0xC7);               /* C7 /0 */
 6431   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
 6432   ins_pipe( ialu_mem_imm );
 6433 %}
 6434 
 6435 // Store Integer to stack slot
 6436 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6437   match(Set dst src);
 6438 
 6439   ins_cost(100);
 6440   format %{ "MOV    $dst,$src" %}
 6441   opcode(0x89);
 6442   ins_encode( OpcPRegSS( dst, src ) );
 6443   ins_pipe( ialu_mem_reg );
 6444 %}
 6445 
 6446 // Store Integer to stack slot
 6447 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6448   match(Set dst src);
 6449 
 6450   ins_cost(100);
 6451   format %{ "MOV    $dst,$src" %}
 6452   opcode(0x89);
 6453   ins_encode( OpcPRegSS( dst, src ) );
 6454   ins_pipe( ialu_mem_reg );
 6455 %}
 6456 
 6457 // Store Long to stack slot
 6458 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6459   match(Set dst src);
 6460 
 6461   ins_cost(200);
 6462   format %{ "MOV    $dst,$src.lo\n\t"
 6463             "MOV    $dst+4,$src.hi" %}
 6464   opcode(0x89, 0x89);
 6465   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
 6466   ins_pipe( ialu_mem_long_reg );
 6467 %}
 6468 
 6469 //----------MemBar Instructions-----------------------------------------------
 6470 // Memory barrier flavors
 6471 
 6472 instruct membar_acquire() %{
 6473   match(MemBarAcquire);
 6474   match(LoadFence);
 6475   ins_cost(400);
 6476 
 6477   size(0);
 6478   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6479   ins_encode();
 6480   ins_pipe(empty);
 6481 %}
 6482 
 6483 instruct membar_acquire_lock() %{
 6484   match(MemBarAcquireLock);
 6485   ins_cost(0);
 6486 
 6487   size(0);
 6488   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6489   ins_encode( );
 6490   ins_pipe(empty);
 6491 %}
 6492 
 6493 instruct membar_release() %{
 6494   match(MemBarRelease);
 6495   match(StoreFence);
 6496   ins_cost(400);
 6497 
 6498   size(0);
 6499   format %{ "MEMBAR-release ! (empty encoding)" %}
 6500   ins_encode( );
 6501   ins_pipe(empty);
 6502 %}
 6503 
 6504 instruct membar_release_lock() %{
 6505   match(MemBarReleaseLock);
 6506   ins_cost(0);
 6507 
 6508   size(0);
 6509   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6510   ins_encode( );
 6511   ins_pipe(empty);
 6512 %}
 6513 
 6514 instruct membar_volatile(eFlagsReg cr) %{
 6515   match(MemBarVolatile);
 6516   effect(KILL cr);
 6517   ins_cost(400);
 6518 
 6519   format %{
 6520     $$template
 6521     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6522   %}
 6523   ins_encode %{
 6524     __ membar(Assembler::StoreLoad);
 6525   %}
 6526   ins_pipe(pipe_slow);
 6527 %}
 6528 
 6529 instruct unnecessary_membar_volatile() %{
 6530   match(MemBarVolatile);
 6531   predicate(Matcher::post_store_load_barrier(n));
 6532   ins_cost(0);
 6533 
 6534   size(0);
 6535   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6536   ins_encode( );
 6537   ins_pipe(empty);
 6538 %}
 6539 
 6540 instruct membar_storestore() %{
 6541   match(MemBarStoreStore);
 6542   match(StoreStoreFence);
 6543   ins_cost(0);
 6544 
 6545   size(0);
 6546   format %{ "MEMBAR-storestore (empty encoding)" %}
 6547   ins_encode( );
 6548   ins_pipe(empty);
 6549 %}
 6550 
 6551 //----------Move Instructions--------------------------------------------------
 6552 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6553   match(Set dst (CastX2P src));
 6554   format %{ "# X2P  $dst, $src" %}
 6555   ins_encode( /*empty encoding*/ );
 6556   ins_cost(0);
 6557   ins_pipe(empty);
 6558 %}
 6559 
 6560 instruct castP2X(rRegI dst, eRegP src ) %{
 6561   match(Set dst (CastP2X src));
 6562   ins_cost(50);
 6563   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6564   ins_encode( enc_Copy( dst, src) );
 6565   ins_pipe( ialu_reg_reg );
 6566 %}
 6567 
 6568 //----------Conditional Move---------------------------------------------------
 6569 // Conditional move
 6570 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6571   predicate(!VM_Version::supports_cmov() );
 6572   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6573   ins_cost(200);
 6574   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6575             "MOV    $dst,$src\n"
 6576       "skip:" %}
 6577   ins_encode %{
 6578     Label Lskip;
 6579     // Invert sense of branch from sense of CMOV
 6580     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6581     __ movl($dst$$Register, $src$$Register);
 6582     __ bind(Lskip);
 6583   %}
 6584   ins_pipe( pipe_cmov_reg );
 6585 %}
 6586 
 6587 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6588   predicate(!VM_Version::supports_cmov() );
 6589   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6590   ins_cost(200);
 6591   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6592             "MOV    $dst,$src\n"
 6593       "skip:" %}
 6594   ins_encode %{
 6595     Label Lskip;
 6596     // Invert sense of branch from sense of CMOV
 6597     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6598     __ movl($dst$$Register, $src$$Register);
 6599     __ bind(Lskip);
 6600   %}
 6601   ins_pipe( pipe_cmov_reg );
 6602 %}
 6603 
 6604 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6605   predicate(VM_Version::supports_cmov() );
 6606   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6607   ins_cost(200);
 6608   format %{ "CMOV$cop $dst,$src" %}
 6609   opcode(0x0F,0x40);
 6610   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6611   ins_pipe( pipe_cmov_reg );
 6612 %}
 6613 
 6614 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6615   predicate(VM_Version::supports_cmov() );
 6616   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6617   ins_cost(200);
 6618   format %{ "CMOV$cop $dst,$src" %}
 6619   opcode(0x0F,0x40);
 6620   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6621   ins_pipe( pipe_cmov_reg );
 6622 %}
 6623 
 6624 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6625   predicate(VM_Version::supports_cmov() );
 6626   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6627   ins_cost(200);
 6628   expand %{
 6629     cmovI_regU(cop, cr, dst, src);
 6630   %}
 6631 %}
 6632 
 6633 // Conditional move
 6634 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6635   predicate(VM_Version::supports_cmov() );
 6636   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6637   ins_cost(250);
 6638   format %{ "CMOV$cop $dst,$src" %}
 6639   opcode(0x0F,0x40);
 6640   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6641   ins_pipe( pipe_cmov_mem );
 6642 %}
 6643 
 6644 // Conditional move
 6645 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6646   predicate(VM_Version::supports_cmov() );
 6647   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6648   ins_cost(250);
 6649   format %{ "CMOV$cop $dst,$src" %}
 6650   opcode(0x0F,0x40);
 6651   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6652   ins_pipe( pipe_cmov_mem );
 6653 %}
 6654 
 6655 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6656   predicate(VM_Version::supports_cmov() );
 6657   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6658   ins_cost(250);
 6659   expand %{
 6660     cmovI_memU(cop, cr, dst, src);
 6661   %}
 6662 %}
 6663 
 6664 // Conditional move
 6665 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6666   predicate(VM_Version::supports_cmov() );
 6667   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6668   ins_cost(200);
 6669   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6670   opcode(0x0F,0x40);
 6671   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6672   ins_pipe( pipe_cmov_reg );
 6673 %}
 6674 
 6675 // Conditional move (non-P6 version)
 6676 // Note:  a CMoveP is generated for  stubs and native wrappers
 6677 //        regardless of whether we are on a P6, so we
 6678 //        emulate a cmov here
 6679 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6680   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6681   ins_cost(300);
 6682   format %{ "Jn$cop   skip\n\t"
 6683           "MOV    $dst,$src\t# pointer\n"
 6684       "skip:" %}
 6685   opcode(0x8b);
 6686   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6687   ins_pipe( pipe_cmov_reg );
 6688 %}
 6689 
 6690 // Conditional move
 6691 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6692   predicate(VM_Version::supports_cmov() );
 6693   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6694   ins_cost(200);
 6695   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6696   opcode(0x0F,0x40);
 6697   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6698   ins_pipe( pipe_cmov_reg );
 6699 %}
 6700 
 6701 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6702   predicate(VM_Version::supports_cmov() );
 6703   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6704   ins_cost(200);
 6705   expand %{
 6706     cmovP_regU(cop, cr, dst, src);
 6707   %}
 6708 %}
 6709 
 6710 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6711 // correctly meets the two pointer arguments; one is an incoming
 6712 // register but the other is a memory operand.  ALSO appears to
 6713 // be buggy with implicit null checks.
 6714 //
 6715 //// Conditional move
 6716 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6717 //  predicate(VM_Version::supports_cmov() );
 6718 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6719 //  ins_cost(250);
 6720 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6721 //  opcode(0x0F,0x40);
 6722 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6723 //  ins_pipe( pipe_cmov_mem );
 6724 //%}
 6725 //
 6726 //// Conditional move
 6727 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6728 //  predicate(VM_Version::supports_cmov() );
 6729 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6730 //  ins_cost(250);
 6731 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6732 //  opcode(0x0F,0x40);
 6733 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6734 //  ins_pipe( pipe_cmov_mem );
 6735 //%}
 6736 
 6737 // Conditional move
 6738 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6739   predicate(UseSSE<=1);
 6740   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6741   ins_cost(200);
 6742   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6743   opcode(0xDA);
 6744   ins_encode( enc_cmov_dpr(cop,src) );
 6745   ins_pipe( pipe_cmovDPR_reg );
 6746 %}
 6747 
 6748 // Conditional move
 6749 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6750   predicate(UseSSE==0);
 6751   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6752   ins_cost(200);
 6753   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6754   opcode(0xDA);
 6755   ins_encode( enc_cmov_dpr(cop,src) );
 6756   ins_pipe( pipe_cmovDPR_reg );
 6757 %}
 6758 
 6759 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6760 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6761   predicate(UseSSE<=1);
 6762   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6763   ins_cost(200);
 6764   format %{ "Jn$cop   skip\n\t"
 6765             "MOV    $dst,$src\t# double\n"
 6766       "skip:" %}
 6767   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6768   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6769   ins_pipe( pipe_cmovDPR_reg );
 6770 %}
 6771 
 6772 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6773 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6774   predicate(UseSSE==0);
 6775   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6776   ins_cost(200);
 6777   format %{ "Jn$cop    skip\n\t"
 6778             "MOV    $dst,$src\t# float\n"
 6779       "skip:" %}
 6780   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6781   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6782   ins_pipe( pipe_cmovDPR_reg );
 6783 %}
 6784 
 6785 // No CMOVE with SSE/SSE2
 6786 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6787   predicate (UseSSE>=1);
 6788   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6789   ins_cost(200);
 6790   format %{ "Jn$cop   skip\n\t"
 6791             "MOVSS  $dst,$src\t# float\n"
 6792       "skip:" %}
 6793   ins_encode %{
 6794     Label skip;
 6795     // Invert sense of branch from sense of CMOV
 6796     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6797     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6798     __ bind(skip);
 6799   %}
 6800   ins_pipe( pipe_slow );
 6801 %}
 6802 
 6803 // No CMOVE with SSE/SSE2
 6804 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6805   predicate (UseSSE>=2);
 6806   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6807   ins_cost(200);
 6808   format %{ "Jn$cop   skip\n\t"
 6809             "MOVSD  $dst,$src\t# float\n"
 6810       "skip:" %}
 6811   ins_encode %{
 6812     Label skip;
 6813     // Invert sense of branch from sense of CMOV
 6814     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6815     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6816     __ bind(skip);
 6817   %}
 6818   ins_pipe( pipe_slow );
 6819 %}
 6820 
 6821 // unsigned version
 6822 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6823   predicate (UseSSE>=1);
 6824   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6825   ins_cost(200);
 6826   format %{ "Jn$cop   skip\n\t"
 6827             "MOVSS  $dst,$src\t# float\n"
 6828       "skip:" %}
 6829   ins_encode %{
 6830     Label skip;
 6831     // Invert sense of branch from sense of CMOV
 6832     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6833     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6834     __ bind(skip);
 6835   %}
 6836   ins_pipe( pipe_slow );
 6837 %}
 6838 
 6839 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6840   predicate (UseSSE>=1);
 6841   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6842   ins_cost(200);
 6843   expand %{
 6844     fcmovF_regU(cop, cr, dst, src);
 6845   %}
 6846 %}
 6847 
 6848 // unsigned version
 6849 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6850   predicate (UseSSE>=2);
 6851   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6852   ins_cost(200);
 6853   format %{ "Jn$cop   skip\n\t"
 6854             "MOVSD  $dst,$src\t# float\n"
 6855       "skip:" %}
 6856   ins_encode %{
 6857     Label skip;
 6858     // Invert sense of branch from sense of CMOV
 6859     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6860     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6861     __ bind(skip);
 6862   %}
 6863   ins_pipe( pipe_slow );
 6864 %}
 6865 
 6866 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6867   predicate (UseSSE>=2);
 6868   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6869   ins_cost(200);
 6870   expand %{
 6871     fcmovD_regU(cop, cr, dst, src);
 6872   %}
 6873 %}
 6874 
 6875 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6876   predicate(VM_Version::supports_cmov() );
 6877   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6878   ins_cost(200);
 6879   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6880             "CMOV$cop $dst.hi,$src.hi" %}
 6881   opcode(0x0F,0x40);
 6882   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6883   ins_pipe( pipe_cmov_reg_long );
 6884 %}
 6885 
 6886 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6887   predicate(VM_Version::supports_cmov() );
 6888   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6889   ins_cost(200);
 6890   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6891             "CMOV$cop $dst.hi,$src.hi" %}
 6892   opcode(0x0F,0x40);
 6893   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6894   ins_pipe( pipe_cmov_reg_long );
 6895 %}
 6896 
 6897 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 6898   predicate(VM_Version::supports_cmov() );
 6899   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6900   ins_cost(200);
 6901   expand %{
 6902     cmovL_regU(cop, cr, dst, src);
 6903   %}
 6904 %}
 6905 
 6906 //----------Arithmetic Instructions--------------------------------------------
 6907 //----------Addition Instructions----------------------------------------------
 6908 
 6909 // Integer Addition Instructions
 6910 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 6911   match(Set dst (AddI dst src));
 6912   effect(KILL cr);
 6913 
 6914   size(2);
 6915   format %{ "ADD    $dst,$src" %}
 6916   opcode(0x03);
 6917   ins_encode( OpcP, RegReg( dst, src) );
 6918   ins_pipe( ialu_reg_reg );
 6919 %}
 6920 
 6921 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 6922   match(Set dst (AddI dst src));
 6923   effect(KILL cr);
 6924 
 6925   format %{ "ADD    $dst,$src" %}
 6926   opcode(0x81, 0x00); /* /0 id */
 6927   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6928   ins_pipe( ialu_reg );
 6929 %}
 6930 
 6931 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 6932   predicate(UseIncDec);
 6933   match(Set dst (AddI dst src));
 6934   effect(KILL cr);
 6935 
 6936   size(1);
 6937   format %{ "INC    $dst" %}
 6938   opcode(0x40); /*  */
 6939   ins_encode( Opc_plus( primary, dst ) );
 6940   ins_pipe( ialu_reg );
 6941 %}
 6942 
 6943 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 6944   match(Set dst (AddI src0 src1));
 6945   ins_cost(110);
 6946 
 6947   format %{ "LEA    $dst,[$src0 + $src1]" %}
 6948   opcode(0x8D); /* 0x8D /r */
 6949   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6950   ins_pipe( ialu_reg_reg );
 6951 %}
 6952 
 6953 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 6954   match(Set dst (AddP src0 src1));
 6955   ins_cost(110);
 6956 
 6957   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 6958   opcode(0x8D); /* 0x8D /r */
 6959   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6960   ins_pipe( ialu_reg_reg );
 6961 %}
 6962 
 6963 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 6964   predicate(UseIncDec);
 6965   match(Set dst (AddI dst src));
 6966   effect(KILL cr);
 6967 
 6968   size(1);
 6969   format %{ "DEC    $dst" %}
 6970   opcode(0x48); /*  */
 6971   ins_encode( Opc_plus( primary, dst ) );
 6972   ins_pipe( ialu_reg );
 6973 %}
 6974 
 6975 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 6976   match(Set dst (AddP dst src));
 6977   effect(KILL cr);
 6978 
 6979   size(2);
 6980   format %{ "ADD    $dst,$src" %}
 6981   opcode(0x03);
 6982   ins_encode( OpcP, RegReg( dst, src) );
 6983   ins_pipe( ialu_reg_reg );
 6984 %}
 6985 
 6986 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 6987   match(Set dst (AddP dst src));
 6988   effect(KILL cr);
 6989 
 6990   format %{ "ADD    $dst,$src" %}
 6991   opcode(0x81,0x00); /* Opcode 81 /0 id */
 6992   // ins_encode( RegImm( dst, src) );
 6993   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6994   ins_pipe( ialu_reg );
 6995 %}
 6996 
 6997 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 6998   match(Set dst (AddI dst (LoadI src)));
 6999   effect(KILL cr);
 7000 
 7001   ins_cost(150);
 7002   format %{ "ADD    $dst,$src" %}
 7003   opcode(0x03);
 7004   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7005   ins_pipe( ialu_reg_mem );
 7006 %}
 7007 
 7008 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7009   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7010   effect(KILL cr);
 7011 
 7012   ins_cost(150);
 7013   format %{ "ADD    $dst,$src" %}
 7014   opcode(0x01);  /* Opcode 01 /r */
 7015   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7016   ins_pipe( ialu_mem_reg );
 7017 %}
 7018 
 7019 // Add Memory with Immediate
 7020 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7021   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7022   effect(KILL cr);
 7023 
 7024   ins_cost(125);
 7025   format %{ "ADD    $dst,$src" %}
 7026   opcode(0x81);               /* Opcode 81 /0 id */
 7027   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
 7028   ins_pipe( ialu_mem_imm );
 7029 %}
 7030 
 7031 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7032   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7033   effect(KILL cr);
 7034 
 7035   ins_cost(125);
 7036   format %{ "INC    $dst" %}
 7037   opcode(0xFF);               /* Opcode FF /0 */
 7038   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
 7039   ins_pipe( ialu_mem_imm );
 7040 %}
 7041 
 7042 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7043   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7044   effect(KILL cr);
 7045 
 7046   ins_cost(125);
 7047   format %{ "DEC    $dst" %}
 7048   opcode(0xFF);               /* Opcode FF /1 */
 7049   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
 7050   ins_pipe( ialu_mem_imm );
 7051 %}
 7052 
 7053 
 7054 instruct checkCastPP( eRegP dst ) %{
 7055   match(Set dst (CheckCastPP dst));
 7056 
 7057   size(0);
 7058   format %{ "#checkcastPP of $dst" %}
 7059   ins_encode( /*empty encoding*/ );
 7060   ins_pipe( empty );
 7061 %}
 7062 
 7063 instruct castPP( eRegP dst ) %{
 7064   match(Set dst (CastPP dst));
 7065   format %{ "#castPP of $dst" %}
 7066   ins_encode( /*empty encoding*/ );
 7067   ins_pipe( empty );
 7068 %}
 7069 
 7070 instruct castII( rRegI dst ) %{
 7071   match(Set dst (CastII dst));
 7072   format %{ "#castII of $dst" %}
 7073   ins_encode( /*empty encoding*/ );
 7074   ins_cost(0);
 7075   ins_pipe( empty );
 7076 %}
 7077 
 7078 instruct castLL( eRegL dst ) %{
 7079   match(Set dst (CastLL dst));
 7080   format %{ "#castLL of $dst" %}
 7081   ins_encode( /*empty encoding*/ );
 7082   ins_cost(0);
 7083   ins_pipe( empty );
 7084 %}
 7085 
 7086 instruct castFF( regF dst ) %{
 7087   predicate(UseSSE >= 1);
 7088   match(Set dst (CastFF dst));
 7089   format %{ "#castFF of $dst" %}
 7090   ins_encode( /*empty encoding*/ );
 7091   ins_cost(0);
 7092   ins_pipe( empty );
 7093 %}
 7094 
 7095 instruct castDD( regD dst ) %{
 7096   predicate(UseSSE >= 2);
 7097   match(Set dst (CastDD dst));
 7098   format %{ "#castDD of $dst" %}
 7099   ins_encode( /*empty encoding*/ );
 7100   ins_cost(0);
 7101   ins_pipe( empty );
 7102 %}
 7103 
 7104 instruct castFF_PR( regFPR dst ) %{
 7105   predicate(UseSSE < 1);
 7106   match(Set dst (CastFF dst));
 7107   format %{ "#castFF of $dst" %}
 7108   ins_encode( /*empty encoding*/ );
 7109   ins_cost(0);
 7110   ins_pipe( empty );
 7111 %}
 7112 
 7113 instruct castDD_PR( regDPR dst ) %{
 7114   predicate(UseSSE < 2);
 7115   match(Set dst (CastDD dst));
 7116   format %{ "#castDD of $dst" %}
 7117   ins_encode( /*empty encoding*/ );
 7118   ins_cost(0);
 7119   ins_pipe( empty );
 7120 %}
 7121 
 7122 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7123 
 7124 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7125   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7126   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7127   effect(KILL cr, KILL oldval);
 7128   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7129             "MOV    $res,0\n\t"
 7130             "JNE,s  fail\n\t"
 7131             "MOV    $res,1\n"
 7132           "fail:" %}
 7133   ins_encode( enc_cmpxchg8(mem_ptr),
 7134               enc_flags_ne_to_boolean(res) );
 7135   ins_pipe( pipe_cmpxchg );
 7136 %}
 7137 
 7138 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7139   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7140   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7141   effect(KILL cr, KILL oldval);
 7142   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7143             "MOV    $res,0\n\t"
 7144             "JNE,s  fail\n\t"
 7145             "MOV    $res,1\n"
 7146           "fail:" %}
 7147   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7148   ins_pipe( pipe_cmpxchg );
 7149 %}
 7150 
 7151 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7152   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7153   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7154   effect(KILL cr, KILL oldval);
 7155   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7156             "MOV    $res,0\n\t"
 7157             "JNE,s  fail\n\t"
 7158             "MOV    $res,1\n"
 7159           "fail:" %}
 7160   ins_encode( enc_cmpxchgb(mem_ptr),
 7161               enc_flags_ne_to_boolean(res) );
 7162   ins_pipe( pipe_cmpxchg );
 7163 %}
 7164 
 7165 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7166   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7167   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7168   effect(KILL cr, KILL oldval);
 7169   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7170             "MOV    $res,0\n\t"
 7171             "JNE,s  fail\n\t"
 7172             "MOV    $res,1\n"
 7173           "fail:" %}
 7174   ins_encode( enc_cmpxchgw(mem_ptr),
 7175               enc_flags_ne_to_boolean(res) );
 7176   ins_pipe( pipe_cmpxchg );
 7177 %}
 7178 
 7179 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7180   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7181   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7182   effect(KILL cr, KILL oldval);
 7183   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7184             "MOV    $res,0\n\t"
 7185             "JNE,s  fail\n\t"
 7186             "MOV    $res,1\n"
 7187           "fail:" %}
 7188   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7189   ins_pipe( pipe_cmpxchg );
 7190 %}
 7191 
 7192 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7193   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7194   effect(KILL cr);
 7195   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7196   ins_encode( enc_cmpxchg8(mem_ptr) );
 7197   ins_pipe( pipe_cmpxchg );
 7198 %}
 7199 
 7200 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7201   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7202   effect(KILL cr);
 7203   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7204   ins_encode( enc_cmpxchg(mem_ptr) );
 7205   ins_pipe( pipe_cmpxchg );
 7206 %}
 7207 
 7208 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7209   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7210   effect(KILL cr);
 7211   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7212   ins_encode( enc_cmpxchgb(mem_ptr) );
 7213   ins_pipe( pipe_cmpxchg );
 7214 %}
 7215 
 7216 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7217   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7218   effect(KILL cr);
 7219   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7220   ins_encode( enc_cmpxchgw(mem_ptr) );
 7221   ins_pipe( pipe_cmpxchg );
 7222 %}
 7223 
 7224 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7225   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7226   effect(KILL cr);
 7227   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7228   ins_encode( enc_cmpxchg(mem_ptr) );
 7229   ins_pipe( pipe_cmpxchg );
 7230 %}
 7231 
 7232 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7233   predicate(n->as_LoadStore()->result_not_used());
 7234   match(Set dummy (GetAndAddB mem add));
 7235   effect(KILL cr);
 7236   format %{ "ADDB  [$mem],$add" %}
 7237   ins_encode %{
 7238     __ lock();
 7239     __ addb($mem$$Address, $add$$constant);
 7240   %}
 7241   ins_pipe( pipe_cmpxchg );
 7242 %}
 7243 
 7244 // Important to match to xRegI: only 8-bit regs.
 7245 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7246   match(Set newval (GetAndAddB mem newval));
 7247   effect(KILL cr);
 7248   format %{ "XADDB  [$mem],$newval" %}
 7249   ins_encode %{
 7250     __ lock();
 7251     __ xaddb($mem$$Address, $newval$$Register);
 7252   %}
 7253   ins_pipe( pipe_cmpxchg );
 7254 %}
 7255 
 7256 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7257   predicate(n->as_LoadStore()->result_not_used());
 7258   match(Set dummy (GetAndAddS mem add));
 7259   effect(KILL cr);
 7260   format %{ "ADDS  [$mem],$add" %}
 7261   ins_encode %{
 7262     __ lock();
 7263     __ addw($mem$$Address, $add$$constant);
 7264   %}
 7265   ins_pipe( pipe_cmpxchg );
 7266 %}
 7267 
 7268 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7269   match(Set newval (GetAndAddS mem newval));
 7270   effect(KILL cr);
 7271   format %{ "XADDS  [$mem],$newval" %}
 7272   ins_encode %{
 7273     __ lock();
 7274     __ xaddw($mem$$Address, $newval$$Register);
 7275   %}
 7276   ins_pipe( pipe_cmpxchg );
 7277 %}
 7278 
 7279 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7280   predicate(n->as_LoadStore()->result_not_used());
 7281   match(Set dummy (GetAndAddI mem add));
 7282   effect(KILL cr);
 7283   format %{ "ADDL  [$mem],$add" %}
 7284   ins_encode %{
 7285     __ lock();
 7286     __ addl($mem$$Address, $add$$constant);
 7287   %}
 7288   ins_pipe( pipe_cmpxchg );
 7289 %}
 7290 
 7291 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7292   match(Set newval (GetAndAddI mem newval));
 7293   effect(KILL cr);
 7294   format %{ "XADDL  [$mem],$newval" %}
 7295   ins_encode %{
 7296     __ lock();
 7297     __ xaddl($mem$$Address, $newval$$Register);
 7298   %}
 7299   ins_pipe( pipe_cmpxchg );
 7300 %}
 7301 
 7302 // Important to match to xRegI: only 8-bit regs.
 7303 instruct xchgB( memory mem, xRegI newval) %{
 7304   match(Set newval (GetAndSetB mem newval));
 7305   format %{ "XCHGB  $newval,[$mem]" %}
 7306   ins_encode %{
 7307     __ xchgb($newval$$Register, $mem$$Address);
 7308   %}
 7309   ins_pipe( pipe_cmpxchg );
 7310 %}
 7311 
 7312 instruct xchgS( memory mem, rRegI newval) %{
 7313   match(Set newval (GetAndSetS mem newval));
 7314   format %{ "XCHGW  $newval,[$mem]" %}
 7315   ins_encode %{
 7316     __ xchgw($newval$$Register, $mem$$Address);
 7317   %}
 7318   ins_pipe( pipe_cmpxchg );
 7319 %}
 7320 
 7321 instruct xchgI( memory mem, rRegI newval) %{
 7322   match(Set newval (GetAndSetI mem newval));
 7323   format %{ "XCHGL  $newval,[$mem]" %}
 7324   ins_encode %{
 7325     __ xchgl($newval$$Register, $mem$$Address);
 7326   %}
 7327   ins_pipe( pipe_cmpxchg );
 7328 %}
 7329 
 7330 instruct xchgP( memory mem, pRegP newval) %{
 7331   match(Set newval (GetAndSetP mem newval));
 7332   format %{ "XCHGL  $newval,[$mem]" %}
 7333   ins_encode %{
 7334     __ xchgl($newval$$Register, $mem$$Address);
 7335   %}
 7336   ins_pipe( pipe_cmpxchg );
 7337 %}
 7338 
 7339 //----------Subtraction Instructions-------------------------------------------
 7340 
 7341 // Integer Subtraction Instructions
 7342 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7343   match(Set dst (SubI dst src));
 7344   effect(KILL cr);
 7345 
 7346   size(2);
 7347   format %{ "SUB    $dst,$src" %}
 7348   opcode(0x2B);
 7349   ins_encode( OpcP, RegReg( dst, src) );
 7350   ins_pipe( ialu_reg_reg );
 7351 %}
 7352 
 7353 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7354   match(Set dst (SubI dst src));
 7355   effect(KILL cr);
 7356 
 7357   format %{ "SUB    $dst,$src" %}
 7358   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7359   // ins_encode( RegImm( dst, src) );
 7360   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7361   ins_pipe( ialu_reg );
 7362 %}
 7363 
 7364 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7365   match(Set dst (SubI dst (LoadI src)));
 7366   effect(KILL cr);
 7367 
 7368   ins_cost(150);
 7369   format %{ "SUB    $dst,$src" %}
 7370   opcode(0x2B);
 7371   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7372   ins_pipe( ialu_reg_mem );
 7373 %}
 7374 
 7375 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7376   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7377   effect(KILL cr);
 7378 
 7379   ins_cost(150);
 7380   format %{ "SUB    $dst,$src" %}
 7381   opcode(0x29);  /* Opcode 29 /r */
 7382   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7383   ins_pipe( ialu_mem_reg );
 7384 %}
 7385 
 7386 // Subtract from a pointer
 7387 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7388   match(Set dst (AddP dst (SubI zero src)));
 7389   effect(KILL cr);
 7390 
 7391   size(2);
 7392   format %{ "SUB    $dst,$src" %}
 7393   opcode(0x2B);
 7394   ins_encode( OpcP, RegReg( dst, src) );
 7395   ins_pipe( ialu_reg_reg );
 7396 %}
 7397 
 7398 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7399   match(Set dst (SubI zero dst));
 7400   effect(KILL cr);
 7401 
 7402   size(2);
 7403   format %{ "NEG    $dst" %}
 7404   opcode(0xF7,0x03);  // Opcode F7 /3
 7405   ins_encode( OpcP, RegOpc( dst ) );
 7406   ins_pipe( ialu_reg );
 7407 %}
 7408 
 7409 //----------Multiplication/Division Instructions-------------------------------
 7410 // Integer Multiplication Instructions
 7411 // Multiply Register
 7412 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7413   match(Set dst (MulI dst src));
 7414   effect(KILL cr);
 7415 
 7416   size(3);
 7417   ins_cost(300);
 7418   format %{ "IMUL   $dst,$src" %}
 7419   opcode(0xAF, 0x0F);
 7420   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7421   ins_pipe( ialu_reg_reg_alu0 );
 7422 %}
 7423 
 7424 // Multiply 32-bit Immediate
 7425 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7426   match(Set dst (MulI src imm));
 7427   effect(KILL cr);
 7428 
 7429   ins_cost(300);
 7430   format %{ "IMUL   $dst,$src,$imm" %}
 7431   opcode(0x69);  /* 69 /r id */
 7432   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7433   ins_pipe( ialu_reg_reg_alu0 );
 7434 %}
 7435 
 7436 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7437   match(Set dst src);
 7438   effect(KILL cr);
 7439 
 7440   // Note that this is artificially increased to make it more expensive than loadConL
 7441   ins_cost(250);
 7442   format %{ "MOV    EAX,$src\t// low word only" %}
 7443   opcode(0xB8);
 7444   ins_encode( LdImmL_Lo(dst, src) );
 7445   ins_pipe( ialu_reg_fat );
 7446 %}
 7447 
 7448 // Multiply by 32-bit Immediate, taking the shifted high order results
 7449 //  (special case for shift by 32)
 7450 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7451   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7452   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7453              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7454              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7455   effect(USE src1, KILL cr);
 7456 
 7457   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7458   ins_cost(0*100 + 1*400 - 150);
 7459   format %{ "IMUL   EDX:EAX,$src1" %}
 7460   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7461   ins_pipe( pipe_slow );
 7462 %}
 7463 
 7464 // Multiply by 32-bit Immediate, taking the shifted high order results
 7465 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7466   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7467   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7468              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7469              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7470   effect(USE src1, KILL cr);
 7471 
 7472   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7473   ins_cost(1*100 + 1*400 - 150);
 7474   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7475             "SAR    EDX,$cnt-32" %}
 7476   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7477   ins_pipe( pipe_slow );
 7478 %}
 7479 
 7480 // Multiply Memory 32-bit Immediate
 7481 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7482   match(Set dst (MulI (LoadI src) imm));
 7483   effect(KILL cr);
 7484 
 7485   ins_cost(300);
 7486   format %{ "IMUL   $dst,$src,$imm" %}
 7487   opcode(0x69);  /* 69 /r id */
 7488   ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
 7489   ins_pipe( ialu_reg_mem_alu0 );
 7490 %}
 7491 
 7492 // Multiply Memory
 7493 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7494   match(Set dst (MulI dst (LoadI src)));
 7495   effect(KILL cr);
 7496 
 7497   ins_cost(350);
 7498   format %{ "IMUL   $dst,$src" %}
 7499   opcode(0xAF, 0x0F);
 7500   ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
 7501   ins_pipe( ialu_reg_mem_alu0 );
 7502 %}
 7503 
 7504 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7505 %{
 7506   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7507   effect(KILL cr, KILL src2);
 7508 
 7509   expand %{ mulI_eReg(dst, src1, cr);
 7510            mulI_eReg(src2, src3, cr);
 7511            addI_eReg(dst, src2, cr); %}
 7512 %}
 7513 
 7514 // Multiply Register Int to Long
 7515 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7516   // Basic Idea: long = (long)int * (long)int
 7517   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7518   effect(DEF dst, USE src, USE src1, KILL flags);
 7519 
 7520   ins_cost(300);
 7521   format %{ "IMUL   $dst,$src1" %}
 7522 
 7523   ins_encode( long_int_multiply( dst, src1 ) );
 7524   ins_pipe( ialu_reg_reg_alu0 );
 7525 %}
 7526 
 7527 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7528   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7529   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7530   effect(KILL flags);
 7531 
 7532   ins_cost(300);
 7533   format %{ "MUL    $dst,$src1" %}
 7534 
 7535   ins_encode( long_uint_multiply(dst, src1) );
 7536   ins_pipe( ialu_reg_reg_alu0 );
 7537 %}
 7538 
 7539 // Multiply Register Long
 7540 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7541   match(Set dst (MulL dst src));
 7542   effect(KILL cr, TEMP tmp);
 7543   ins_cost(4*100+3*400);
 7544 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7545 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7546   format %{ "MOV    $tmp,$src.lo\n\t"
 7547             "IMUL   $tmp,EDX\n\t"
 7548             "MOV    EDX,$src.hi\n\t"
 7549             "IMUL   EDX,EAX\n\t"
 7550             "ADD    $tmp,EDX\n\t"
 7551             "MUL    EDX:EAX,$src.lo\n\t"
 7552             "ADD    EDX,$tmp" %}
 7553   ins_encode( long_multiply( dst, src, tmp ) );
 7554   ins_pipe( pipe_slow );
 7555 %}
 7556 
 7557 // Multiply Register Long where the left operand's high 32 bits are zero
 7558 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7559   predicate(is_operand_hi32_zero(n->in(1)));
 7560   match(Set dst (MulL dst src));
 7561   effect(KILL cr, TEMP tmp);
 7562   ins_cost(2*100+2*400);
 7563 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7564 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7565   format %{ "MOV    $tmp,$src.hi\n\t"
 7566             "IMUL   $tmp,EAX\n\t"
 7567             "MUL    EDX:EAX,$src.lo\n\t"
 7568             "ADD    EDX,$tmp" %}
 7569   ins_encode %{
 7570     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7571     __ imull($tmp$$Register, rax);
 7572     __ mull($src$$Register);
 7573     __ addl(rdx, $tmp$$Register);
 7574   %}
 7575   ins_pipe( pipe_slow );
 7576 %}
 7577 
 7578 // Multiply Register Long where the right operand's high 32 bits are zero
 7579 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7580   predicate(is_operand_hi32_zero(n->in(2)));
 7581   match(Set dst (MulL dst src));
 7582   effect(KILL cr, TEMP tmp);
 7583   ins_cost(2*100+2*400);
 7584 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7585 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7586   format %{ "MOV    $tmp,$src.lo\n\t"
 7587             "IMUL   $tmp,EDX\n\t"
 7588             "MUL    EDX:EAX,$src.lo\n\t"
 7589             "ADD    EDX,$tmp" %}
 7590   ins_encode %{
 7591     __ movl($tmp$$Register, $src$$Register);
 7592     __ imull($tmp$$Register, rdx);
 7593     __ mull($src$$Register);
 7594     __ addl(rdx, $tmp$$Register);
 7595   %}
 7596   ins_pipe( pipe_slow );
 7597 %}
 7598 
 7599 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7600 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7601   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7602   match(Set dst (MulL dst src));
 7603   effect(KILL cr);
 7604   ins_cost(1*400);
 7605 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7606 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7607   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7608   ins_encode %{
 7609     __ mull($src$$Register);
 7610   %}
 7611   ins_pipe( pipe_slow );
 7612 %}
 7613 
 7614 // Multiply Register Long by small constant
 7615 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7616   match(Set dst (MulL dst src));
 7617   effect(KILL cr, TEMP tmp);
 7618   ins_cost(2*100+2*400);
 7619   size(12);
 7620 // Basic idea: lo(result) = lo(src * EAX)
 7621 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7622   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7623             "MOV    EDX,$src\n\t"
 7624             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7625             "ADD    EDX,$tmp" %}
 7626   ins_encode( long_multiply_con( dst, src, tmp ) );
 7627   ins_pipe( pipe_slow );
 7628 %}
 7629 
 7630 // Integer DIV with Register
 7631 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7632   match(Set rax (DivI rax div));
 7633   effect(KILL rdx, KILL cr);
 7634   size(26);
 7635   ins_cost(30*100+10*100);
 7636   format %{ "CMP    EAX,0x80000000\n\t"
 7637             "JNE,s  normal\n\t"
 7638             "XOR    EDX,EDX\n\t"
 7639             "CMP    ECX,-1\n\t"
 7640             "JE,s   done\n"
 7641     "normal: CDQ\n\t"
 7642             "IDIV   $div\n\t"
 7643     "done:"        %}
 7644   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7645   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7646   ins_pipe( ialu_reg_reg_alu0 );
 7647 %}
 7648 
 7649 // Divide Register Long
 7650 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7651   match(Set dst (DivL src1 src2));
 7652   effect(CALL);
 7653   ins_cost(10000);
 7654   format %{ "PUSH   $src1.hi\n\t"
 7655             "PUSH   $src1.lo\n\t"
 7656             "PUSH   $src2.hi\n\t"
 7657             "PUSH   $src2.lo\n\t"
 7658             "CALL   SharedRuntime::ldiv\n\t"
 7659             "ADD    ESP,16" %}
 7660   ins_encode( long_div(src1,src2) );
 7661   ins_pipe( pipe_slow );
 7662 %}
 7663 
 7664 // Integer DIVMOD with Register, both quotient and mod results
 7665 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7666   match(DivModI rax div);
 7667   effect(KILL cr);
 7668   size(26);
 7669   ins_cost(30*100+10*100);
 7670   format %{ "CMP    EAX,0x80000000\n\t"
 7671             "JNE,s  normal\n\t"
 7672             "XOR    EDX,EDX\n\t"
 7673             "CMP    ECX,-1\n\t"
 7674             "JE,s   done\n"
 7675     "normal: CDQ\n\t"
 7676             "IDIV   $div\n\t"
 7677     "done:"        %}
 7678   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7679   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7680   ins_pipe( pipe_slow );
 7681 %}
 7682 
 7683 // Integer MOD with Register
 7684 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7685   match(Set rdx (ModI rax div));
 7686   effect(KILL rax, KILL cr);
 7687 
 7688   size(26);
 7689   ins_cost(300);
 7690   format %{ "CDQ\n\t"
 7691             "IDIV   $div" %}
 7692   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7693   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7694   ins_pipe( ialu_reg_reg_alu0 );
 7695 %}
 7696 
 7697 // Remainder Register Long
 7698 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7699   match(Set dst (ModL src1 src2));
 7700   effect(CALL);
 7701   ins_cost(10000);
 7702   format %{ "PUSH   $src1.hi\n\t"
 7703             "PUSH   $src1.lo\n\t"
 7704             "PUSH   $src2.hi\n\t"
 7705             "PUSH   $src2.lo\n\t"
 7706             "CALL   SharedRuntime::lrem\n\t"
 7707             "ADD    ESP,16" %}
 7708   ins_encode( long_mod(src1,src2) );
 7709   ins_pipe( pipe_slow );
 7710 %}
 7711 
 7712 // Divide Register Long (no special case since divisor != -1)
 7713 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7714   match(Set dst (DivL dst imm));
 7715   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7716   ins_cost(1000);
 7717   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7718             "XOR    $tmp2,$tmp2\n\t"
 7719             "CMP    $tmp,EDX\n\t"
 7720             "JA,s   fast\n\t"
 7721             "MOV    $tmp2,EAX\n\t"
 7722             "MOV    EAX,EDX\n\t"
 7723             "MOV    EDX,0\n\t"
 7724             "JLE,s  pos\n\t"
 7725             "LNEG   EAX : $tmp2\n\t"
 7726             "DIV    $tmp # unsigned division\n\t"
 7727             "XCHG   EAX,$tmp2\n\t"
 7728             "DIV    $tmp\n\t"
 7729             "LNEG   $tmp2 : EAX\n\t"
 7730             "JMP,s  done\n"
 7731     "pos:\n\t"
 7732             "DIV    $tmp\n\t"
 7733             "XCHG   EAX,$tmp2\n"
 7734     "fast:\n\t"
 7735             "DIV    $tmp\n"
 7736     "done:\n\t"
 7737             "MOV    EDX,$tmp2\n\t"
 7738             "NEG    EDX:EAX # if $imm < 0" %}
 7739   ins_encode %{
 7740     int con = (int)$imm$$constant;
 7741     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7742     int pcon = (con > 0) ? con : -con;
 7743     Label Lfast, Lpos, Ldone;
 7744 
 7745     __ movl($tmp$$Register, pcon);
 7746     __ xorl($tmp2$$Register,$tmp2$$Register);
 7747     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7748     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7749 
 7750     __ movl($tmp2$$Register, $dst$$Register); // save
 7751     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7752     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7753     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7754 
 7755     // Negative dividend.
 7756     // convert value to positive to use unsigned division
 7757     __ lneg($dst$$Register, $tmp2$$Register);
 7758     __ divl($tmp$$Register);
 7759     __ xchgl($dst$$Register, $tmp2$$Register);
 7760     __ divl($tmp$$Register);
 7761     // revert result back to negative
 7762     __ lneg($tmp2$$Register, $dst$$Register);
 7763     __ jmpb(Ldone);
 7764 
 7765     __ bind(Lpos);
 7766     __ divl($tmp$$Register); // Use unsigned division
 7767     __ xchgl($dst$$Register, $tmp2$$Register);
 7768     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7769 
 7770     __ bind(Lfast);
 7771     // fast path: src is positive
 7772     __ divl($tmp$$Register); // Use unsigned division
 7773 
 7774     __ bind(Ldone);
 7775     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7776     if (con < 0) {
 7777       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7778     }
 7779   %}
 7780   ins_pipe( pipe_slow );
 7781 %}
 7782 
 7783 // Remainder Register Long (remainder fit into 32 bits)
 7784 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7785   match(Set dst (ModL dst imm));
 7786   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7787   ins_cost(1000);
 7788   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7789             "CMP    $tmp,EDX\n\t"
 7790             "JA,s   fast\n\t"
 7791             "MOV    $tmp2,EAX\n\t"
 7792             "MOV    EAX,EDX\n\t"
 7793             "MOV    EDX,0\n\t"
 7794             "JLE,s  pos\n\t"
 7795             "LNEG   EAX : $tmp2\n\t"
 7796             "DIV    $tmp # unsigned division\n\t"
 7797             "MOV    EAX,$tmp2\n\t"
 7798             "DIV    $tmp\n\t"
 7799             "NEG    EDX\n\t"
 7800             "JMP,s  done\n"
 7801     "pos:\n\t"
 7802             "DIV    $tmp\n\t"
 7803             "MOV    EAX,$tmp2\n"
 7804     "fast:\n\t"
 7805             "DIV    $tmp\n"
 7806     "done:\n\t"
 7807             "MOV    EAX,EDX\n\t"
 7808             "SAR    EDX,31\n\t" %}
 7809   ins_encode %{
 7810     int con = (int)$imm$$constant;
 7811     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7812     int pcon = (con > 0) ? con : -con;
 7813     Label  Lfast, Lpos, Ldone;
 7814 
 7815     __ movl($tmp$$Register, pcon);
 7816     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7817     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7818 
 7819     __ movl($tmp2$$Register, $dst$$Register); // save
 7820     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7821     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7822     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7823 
 7824     // Negative dividend.
 7825     // convert value to positive to use unsigned division
 7826     __ lneg($dst$$Register, $tmp2$$Register);
 7827     __ divl($tmp$$Register);
 7828     __ movl($dst$$Register, $tmp2$$Register);
 7829     __ divl($tmp$$Register);
 7830     // revert remainder back to negative
 7831     __ negl(HIGH_FROM_LOW($dst$$Register));
 7832     __ jmpb(Ldone);
 7833 
 7834     __ bind(Lpos);
 7835     __ divl($tmp$$Register);
 7836     __ movl($dst$$Register, $tmp2$$Register);
 7837 
 7838     __ bind(Lfast);
 7839     // fast path: src is positive
 7840     __ divl($tmp$$Register);
 7841 
 7842     __ bind(Ldone);
 7843     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7844     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7845 
 7846   %}
 7847   ins_pipe( pipe_slow );
 7848 %}
 7849 
 7850 // Integer Shift Instructions
 7851 // Shift Left by one
 7852 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7853   match(Set dst (LShiftI dst shift));
 7854   effect(KILL cr);
 7855 
 7856   size(2);
 7857   format %{ "SHL    $dst,$shift" %}
 7858   opcode(0xD1, 0x4);  /* D1 /4 */
 7859   ins_encode( OpcP, RegOpc( dst ) );
 7860   ins_pipe( ialu_reg );
 7861 %}
 7862 
 7863 // Shift Left by 8-bit immediate
 7864 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7865   match(Set dst (LShiftI dst shift));
 7866   effect(KILL cr);
 7867 
 7868   size(3);
 7869   format %{ "SHL    $dst,$shift" %}
 7870   opcode(0xC1, 0x4);  /* C1 /4 ib */
 7871   ins_encode( RegOpcImm( dst, shift) );
 7872   ins_pipe( ialu_reg );
 7873 %}
 7874 
 7875 // Shift Left by variable
 7876 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7877   match(Set dst (LShiftI dst shift));
 7878   effect(KILL cr);
 7879 
 7880   size(2);
 7881   format %{ "SHL    $dst,$shift" %}
 7882   opcode(0xD3, 0x4);  /* D3 /4 */
 7883   ins_encode( OpcP, RegOpc( dst ) );
 7884   ins_pipe( ialu_reg_reg );
 7885 %}
 7886 
 7887 // Arithmetic shift right by one
 7888 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7889   match(Set dst (RShiftI dst shift));
 7890   effect(KILL cr);
 7891 
 7892   size(2);
 7893   format %{ "SAR    $dst,$shift" %}
 7894   opcode(0xD1, 0x7);  /* D1 /7 */
 7895   ins_encode( OpcP, RegOpc( dst ) );
 7896   ins_pipe( ialu_reg );
 7897 %}
 7898 
 7899 // Arithmetic shift right by one
 7900 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 7901   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7902   effect(KILL cr);
 7903   format %{ "SAR    $dst,$shift" %}
 7904   opcode(0xD1, 0x7);  /* D1 /7 */
 7905   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
 7906   ins_pipe( ialu_mem_imm );
 7907 %}
 7908 
 7909 // Arithmetic Shift Right by 8-bit immediate
 7910 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7911   match(Set dst (RShiftI dst shift));
 7912   effect(KILL cr);
 7913 
 7914   size(3);
 7915   format %{ "SAR    $dst,$shift" %}
 7916   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7917   ins_encode( RegOpcImm( dst, shift ) );
 7918   ins_pipe( ialu_mem_imm );
 7919 %}
 7920 
 7921 // Arithmetic Shift Right by 8-bit immediate
 7922 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 7923   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7924   effect(KILL cr);
 7925 
 7926   format %{ "SAR    $dst,$shift" %}
 7927   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7928   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
 7929   ins_pipe( ialu_mem_imm );
 7930 %}
 7931 
 7932 // Arithmetic Shift Right by variable
 7933 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7934   match(Set dst (RShiftI dst shift));
 7935   effect(KILL cr);
 7936 
 7937   size(2);
 7938   format %{ "SAR    $dst,$shift" %}
 7939   opcode(0xD3, 0x7);  /* D3 /7 */
 7940   ins_encode( OpcP, RegOpc( dst ) );
 7941   ins_pipe( ialu_reg_reg );
 7942 %}
 7943 
 7944 // Logical shift right by one
 7945 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7946   match(Set dst (URShiftI dst shift));
 7947   effect(KILL cr);
 7948 
 7949   size(2);
 7950   format %{ "SHR    $dst,$shift" %}
 7951   opcode(0xD1, 0x5);  /* D1 /5 */
 7952   ins_encode( OpcP, RegOpc( dst ) );
 7953   ins_pipe( ialu_reg );
 7954 %}
 7955 
 7956 // Logical Shift Right by 8-bit immediate
 7957 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7958   match(Set dst (URShiftI dst shift));
 7959   effect(KILL cr);
 7960 
 7961   size(3);
 7962   format %{ "SHR    $dst,$shift" %}
 7963   opcode(0xC1, 0x5);  /* C1 /5 ib */
 7964   ins_encode( RegOpcImm( dst, shift) );
 7965   ins_pipe( ialu_reg );
 7966 %}
 7967 
 7968 
 7969 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 7970 // This idiom is used by the compiler for the i2b bytecode.
 7971 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 7972   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 7973 
 7974   size(3);
 7975   format %{ "MOVSX  $dst,$src :8" %}
 7976   ins_encode %{
 7977     __ movsbl($dst$$Register, $src$$Register);
 7978   %}
 7979   ins_pipe(ialu_reg_reg);
 7980 %}
 7981 
 7982 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 7983 // This idiom is used by the compiler the i2s bytecode.
 7984 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 7985   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 7986 
 7987   size(3);
 7988   format %{ "MOVSX  $dst,$src :16" %}
 7989   ins_encode %{
 7990     __ movswl($dst$$Register, $src$$Register);
 7991   %}
 7992   ins_pipe(ialu_reg_reg);
 7993 %}
 7994 
 7995 
 7996 // Logical Shift Right by variable
 7997 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7998   match(Set dst (URShiftI dst shift));
 7999   effect(KILL cr);
 8000 
 8001   size(2);
 8002   format %{ "SHR    $dst,$shift" %}
 8003   opcode(0xD3, 0x5);  /* D3 /5 */
 8004   ins_encode( OpcP, RegOpc( dst ) );
 8005   ins_pipe( ialu_reg_reg );
 8006 %}
 8007 
 8008 
 8009 //----------Logical Instructions-----------------------------------------------
 8010 //----------Integer Logical Instructions---------------------------------------
 8011 // And Instructions
 8012 // And Register with Register
 8013 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8014   match(Set dst (AndI dst src));
 8015   effect(KILL cr);
 8016 
 8017   size(2);
 8018   format %{ "AND    $dst,$src" %}
 8019   opcode(0x23);
 8020   ins_encode( OpcP, RegReg( dst, src) );
 8021   ins_pipe( ialu_reg_reg );
 8022 %}
 8023 
 8024 // And Register with Immediate
 8025 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8026   match(Set dst (AndI dst src));
 8027   effect(KILL cr);
 8028 
 8029   format %{ "AND    $dst,$src" %}
 8030   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8031   // ins_encode( RegImm( dst, src) );
 8032   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8033   ins_pipe( ialu_reg );
 8034 %}
 8035 
 8036 // And Register with Memory
 8037 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8038   match(Set dst (AndI dst (LoadI src)));
 8039   effect(KILL cr);
 8040 
 8041   ins_cost(150);
 8042   format %{ "AND    $dst,$src" %}
 8043   opcode(0x23);
 8044   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8045   ins_pipe( ialu_reg_mem );
 8046 %}
 8047 
 8048 // And Memory with Register
 8049 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8050   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8051   effect(KILL cr);
 8052 
 8053   ins_cost(150);
 8054   format %{ "AND    $dst,$src" %}
 8055   opcode(0x21);  /* Opcode 21 /r */
 8056   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8057   ins_pipe( ialu_mem_reg );
 8058 %}
 8059 
 8060 // And Memory with Immediate
 8061 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8062   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8063   effect(KILL cr);
 8064 
 8065   ins_cost(125);
 8066   format %{ "AND    $dst,$src" %}
 8067   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8068   // ins_encode( MemImm( dst, src) );
 8069   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8070   ins_pipe( ialu_mem_imm );
 8071 %}
 8072 
 8073 // BMI1 instructions
 8074 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8075   match(Set dst (AndI (XorI src1 minus_1) src2));
 8076   predicate(UseBMI1Instructions);
 8077   effect(KILL cr);
 8078 
 8079   format %{ "ANDNL  $dst, $src1, $src2" %}
 8080 
 8081   ins_encode %{
 8082     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8083   %}
 8084   ins_pipe(ialu_reg);
 8085 %}
 8086 
 8087 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8088   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8089   predicate(UseBMI1Instructions);
 8090   effect(KILL cr);
 8091 
 8092   ins_cost(125);
 8093   format %{ "ANDNL  $dst, $src1, $src2" %}
 8094 
 8095   ins_encode %{
 8096     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8097   %}
 8098   ins_pipe(ialu_reg_mem);
 8099 %}
 8100 
 8101 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8102   match(Set dst (AndI (SubI imm_zero src) src));
 8103   predicate(UseBMI1Instructions);
 8104   effect(KILL cr);
 8105 
 8106   format %{ "BLSIL  $dst, $src" %}
 8107 
 8108   ins_encode %{
 8109     __ blsil($dst$$Register, $src$$Register);
 8110   %}
 8111   ins_pipe(ialu_reg);
 8112 %}
 8113 
 8114 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8115   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8116   predicate(UseBMI1Instructions);
 8117   effect(KILL cr);
 8118 
 8119   ins_cost(125);
 8120   format %{ "BLSIL  $dst, $src" %}
 8121 
 8122   ins_encode %{
 8123     __ blsil($dst$$Register, $src$$Address);
 8124   %}
 8125   ins_pipe(ialu_reg_mem);
 8126 %}
 8127 
 8128 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8129 %{
 8130   match(Set dst (XorI (AddI src minus_1) src));
 8131   predicate(UseBMI1Instructions);
 8132   effect(KILL cr);
 8133 
 8134   format %{ "BLSMSKL $dst, $src" %}
 8135 
 8136   ins_encode %{
 8137     __ blsmskl($dst$$Register, $src$$Register);
 8138   %}
 8139 
 8140   ins_pipe(ialu_reg);
 8141 %}
 8142 
 8143 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8144 %{
 8145   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8146   predicate(UseBMI1Instructions);
 8147   effect(KILL cr);
 8148 
 8149   ins_cost(125);
 8150   format %{ "BLSMSKL $dst, $src" %}
 8151 
 8152   ins_encode %{
 8153     __ blsmskl($dst$$Register, $src$$Address);
 8154   %}
 8155 
 8156   ins_pipe(ialu_reg_mem);
 8157 %}
 8158 
 8159 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8160 %{
 8161   match(Set dst (AndI (AddI src minus_1) src) );
 8162   predicate(UseBMI1Instructions);
 8163   effect(KILL cr);
 8164 
 8165   format %{ "BLSRL  $dst, $src" %}
 8166 
 8167   ins_encode %{
 8168     __ blsrl($dst$$Register, $src$$Register);
 8169   %}
 8170 
 8171   ins_pipe(ialu_reg);
 8172 %}
 8173 
 8174 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8175 %{
 8176   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8177   predicate(UseBMI1Instructions);
 8178   effect(KILL cr);
 8179 
 8180   ins_cost(125);
 8181   format %{ "BLSRL  $dst, $src" %}
 8182 
 8183   ins_encode %{
 8184     __ blsrl($dst$$Register, $src$$Address);
 8185   %}
 8186 
 8187   ins_pipe(ialu_reg_mem);
 8188 %}
 8189 
 8190 // Or Instructions
 8191 // Or Register with Register
 8192 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8193   match(Set dst (OrI dst src));
 8194   effect(KILL cr);
 8195 
 8196   size(2);
 8197   format %{ "OR     $dst,$src" %}
 8198   opcode(0x0B);
 8199   ins_encode( OpcP, RegReg( dst, src) );
 8200   ins_pipe( ialu_reg_reg );
 8201 %}
 8202 
 8203 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8204   match(Set dst (OrI dst (CastP2X src)));
 8205   effect(KILL cr);
 8206 
 8207   size(2);
 8208   format %{ "OR     $dst,$src" %}
 8209   opcode(0x0B);
 8210   ins_encode( OpcP, RegReg( dst, src) );
 8211   ins_pipe( ialu_reg_reg );
 8212 %}
 8213 
 8214 
 8215 // Or Register with Immediate
 8216 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8217   match(Set dst (OrI dst src));
 8218   effect(KILL cr);
 8219 
 8220   format %{ "OR     $dst,$src" %}
 8221   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8222   // ins_encode( RegImm( dst, src) );
 8223   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8224   ins_pipe( ialu_reg );
 8225 %}
 8226 
 8227 // Or Register with Memory
 8228 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8229   match(Set dst (OrI dst (LoadI src)));
 8230   effect(KILL cr);
 8231 
 8232   ins_cost(150);
 8233   format %{ "OR     $dst,$src" %}
 8234   opcode(0x0B);
 8235   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8236   ins_pipe( ialu_reg_mem );
 8237 %}
 8238 
 8239 // Or Memory with Register
 8240 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8241   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8242   effect(KILL cr);
 8243 
 8244   ins_cost(150);
 8245   format %{ "OR     $dst,$src" %}
 8246   opcode(0x09);  /* Opcode 09 /r */
 8247   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8248   ins_pipe( ialu_mem_reg );
 8249 %}
 8250 
 8251 // Or Memory with Immediate
 8252 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8253   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8254   effect(KILL cr);
 8255 
 8256   ins_cost(125);
 8257   format %{ "OR     $dst,$src" %}
 8258   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8259   // ins_encode( MemImm( dst, src) );
 8260   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8261   ins_pipe( ialu_mem_imm );
 8262 %}
 8263 
 8264 // ROL/ROR
 8265 // ROL expand
 8266 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8267   effect(USE_DEF dst, USE shift, KILL cr);
 8268 
 8269   format %{ "ROL    $dst, $shift" %}
 8270   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8271   ins_encode( OpcP, RegOpc( dst ));
 8272   ins_pipe( ialu_reg );
 8273 %}
 8274 
 8275 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8276   effect(USE_DEF dst, USE shift, KILL cr);
 8277 
 8278   format %{ "ROL    $dst, $shift" %}
 8279   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8280   ins_encode( RegOpcImm(dst, shift) );
 8281   ins_pipe(ialu_reg);
 8282 %}
 8283 
 8284 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8285   effect(USE_DEF dst, USE shift, KILL cr);
 8286 
 8287   format %{ "ROL    $dst, $shift" %}
 8288   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8289   ins_encode(OpcP, RegOpc(dst));
 8290   ins_pipe( ialu_reg_reg );
 8291 %}
 8292 // end of ROL expand
 8293 
 8294 // ROL 32bit by one once
 8295 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8296   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8297 
 8298   expand %{
 8299     rolI_eReg_imm1(dst, lshift, cr);
 8300   %}
 8301 %}
 8302 
 8303 // ROL 32bit var by imm8 once
 8304 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8305   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8306   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8307 
 8308   expand %{
 8309     rolI_eReg_imm8(dst, lshift, cr);
 8310   %}
 8311 %}
 8312 
 8313 // ROL 32bit var by var once
 8314 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8315   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8316 
 8317   expand %{
 8318     rolI_eReg_CL(dst, shift, cr);
 8319   %}
 8320 %}
 8321 
 8322 // ROL 32bit var by var once
 8323 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8324   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8325 
 8326   expand %{
 8327     rolI_eReg_CL(dst, shift, cr);
 8328   %}
 8329 %}
 8330 
 8331 // ROR expand
 8332 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8333   effect(USE_DEF dst, USE shift, KILL cr);
 8334 
 8335   format %{ "ROR    $dst, $shift" %}
 8336   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8337   ins_encode( OpcP, RegOpc( dst ) );
 8338   ins_pipe( ialu_reg );
 8339 %}
 8340 
 8341 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8342   effect (USE_DEF dst, USE shift, KILL cr);
 8343 
 8344   format %{ "ROR    $dst, $shift" %}
 8345   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8346   ins_encode( RegOpcImm(dst, shift) );
 8347   ins_pipe( ialu_reg );
 8348 %}
 8349 
 8350 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8351   effect(USE_DEF dst, USE shift, KILL cr);
 8352 
 8353   format %{ "ROR    $dst, $shift" %}
 8354   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8355   ins_encode(OpcP, RegOpc(dst));
 8356   ins_pipe( ialu_reg_reg );
 8357 %}
 8358 // end of ROR expand
 8359 
 8360 // ROR right once
 8361 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8362   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8363 
 8364   expand %{
 8365     rorI_eReg_imm1(dst, rshift, cr);
 8366   %}
 8367 %}
 8368 
 8369 // ROR 32bit by immI8 once
 8370 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8371   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8372   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8373 
 8374   expand %{
 8375     rorI_eReg_imm8(dst, rshift, cr);
 8376   %}
 8377 %}
 8378 
 8379 // ROR 32bit var by var once
 8380 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8381   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8382 
 8383   expand %{
 8384     rorI_eReg_CL(dst, shift, cr);
 8385   %}
 8386 %}
 8387 
 8388 // ROR 32bit var by var once
 8389 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8390   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8391 
 8392   expand %{
 8393     rorI_eReg_CL(dst, shift, cr);
 8394   %}
 8395 %}
 8396 
 8397 // Xor Instructions
 8398 // Xor Register with Register
 8399 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8400   match(Set dst (XorI dst src));
 8401   effect(KILL cr);
 8402 
 8403   size(2);
 8404   format %{ "XOR    $dst,$src" %}
 8405   opcode(0x33);
 8406   ins_encode( OpcP, RegReg( dst, src) );
 8407   ins_pipe( ialu_reg_reg );
 8408 %}
 8409 
 8410 // Xor Register with Immediate -1
 8411 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8412   match(Set dst (XorI dst imm));
 8413 
 8414   size(2);
 8415   format %{ "NOT    $dst" %}
 8416   ins_encode %{
 8417      __ notl($dst$$Register);
 8418   %}
 8419   ins_pipe( ialu_reg );
 8420 %}
 8421 
 8422 // Xor Register with Immediate
 8423 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8424   match(Set dst (XorI dst src));
 8425   effect(KILL cr);
 8426 
 8427   format %{ "XOR    $dst,$src" %}
 8428   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8429   // ins_encode( RegImm( dst, src) );
 8430   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8431   ins_pipe( ialu_reg );
 8432 %}
 8433 
 8434 // Xor Register with Memory
 8435 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8436   match(Set dst (XorI dst (LoadI src)));
 8437   effect(KILL cr);
 8438 
 8439   ins_cost(150);
 8440   format %{ "XOR    $dst,$src" %}
 8441   opcode(0x33);
 8442   ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
 8443   ins_pipe( ialu_reg_mem );
 8444 %}
 8445 
 8446 // Xor Memory with Register
 8447 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8448   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8449   effect(KILL cr);
 8450 
 8451   ins_cost(150);
 8452   format %{ "XOR    $dst,$src" %}
 8453   opcode(0x31);  /* Opcode 31 /r */
 8454   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8455   ins_pipe( ialu_mem_reg );
 8456 %}
 8457 
 8458 // Xor Memory with Immediate
 8459 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8460   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8461   effect(KILL cr);
 8462 
 8463   ins_cost(125);
 8464   format %{ "XOR    $dst,$src" %}
 8465   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8466   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8467   ins_pipe( ialu_mem_imm );
 8468 %}
 8469 
 8470 //----------Convert Int to Boolean---------------------------------------------
 8471 
 8472 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8473   effect( DEF dst, USE src );
 8474   format %{ "MOV    $dst,$src" %}
 8475   ins_encode( enc_Copy( dst, src) );
 8476   ins_pipe( ialu_reg_reg );
 8477 %}
 8478 
 8479 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8480   effect( USE_DEF dst, USE src, KILL cr );
 8481 
 8482   size(4);
 8483   format %{ "NEG    $dst\n\t"
 8484             "ADC    $dst,$src" %}
 8485   ins_encode( neg_reg(dst),
 8486               OpcRegReg(0x13,dst,src) );
 8487   ins_pipe( ialu_reg_reg_long );
 8488 %}
 8489 
 8490 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8491   match(Set dst (Conv2B src));
 8492 
 8493   expand %{
 8494     movI_nocopy(dst,src);
 8495     ci2b(dst,src,cr);
 8496   %}
 8497 %}
 8498 
 8499 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8500   effect( DEF dst, USE src );
 8501   format %{ "MOV    $dst,$src" %}
 8502   ins_encode( enc_Copy( dst, src) );
 8503   ins_pipe( ialu_reg_reg );
 8504 %}
 8505 
 8506 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8507   effect( USE_DEF dst, USE src, KILL cr );
 8508   format %{ "NEG    $dst\n\t"
 8509             "ADC    $dst,$src" %}
 8510   ins_encode( neg_reg(dst),
 8511               OpcRegReg(0x13,dst,src) );
 8512   ins_pipe( ialu_reg_reg_long );
 8513 %}
 8514 
 8515 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8516   match(Set dst (Conv2B src));
 8517 
 8518   expand %{
 8519     movP_nocopy(dst,src);
 8520     cp2b(dst,src,cr);
 8521   %}
 8522 %}
 8523 
 8524 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8525   match(Set dst (CmpLTMask p q));
 8526   effect(KILL cr);
 8527   ins_cost(400);
 8528 
 8529   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8530   format %{ "XOR    $dst,$dst\n\t"
 8531             "CMP    $p,$q\n\t"
 8532             "SETlt  $dst\n\t"
 8533             "NEG    $dst" %}
 8534   ins_encode %{
 8535     Register Rp = $p$$Register;
 8536     Register Rq = $q$$Register;
 8537     Register Rd = $dst$$Register;
 8538     Label done;
 8539     __ xorl(Rd, Rd);
 8540     __ cmpl(Rp, Rq);
 8541     __ setb(Assembler::less, Rd);
 8542     __ negl(Rd);
 8543   %}
 8544 
 8545   ins_pipe(pipe_slow);
 8546 %}
 8547 
 8548 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8549   match(Set dst (CmpLTMask dst zero));
 8550   effect(DEF dst, KILL cr);
 8551   ins_cost(100);
 8552 
 8553   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8554   ins_encode %{
 8555   __ sarl($dst$$Register, 31);
 8556   %}
 8557   ins_pipe(ialu_reg);
 8558 %}
 8559 
 8560 /* better to save a register than avoid a branch */
 8561 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8562   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8563   effect(KILL cr);
 8564   ins_cost(400);
 8565   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8566             "JGE    done\n\t"
 8567             "ADD    $p,$y\n"
 8568             "done:  " %}
 8569   ins_encode %{
 8570     Register Rp = $p$$Register;
 8571     Register Rq = $q$$Register;
 8572     Register Ry = $y$$Register;
 8573     Label done;
 8574     __ subl(Rp, Rq);
 8575     __ jccb(Assembler::greaterEqual, done);
 8576     __ addl(Rp, Ry);
 8577     __ bind(done);
 8578   %}
 8579 
 8580   ins_pipe(pipe_cmplt);
 8581 %}
 8582 
 8583 /* better to save a register than avoid a branch */
 8584 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8585   match(Set y (AndI (CmpLTMask p q) y));
 8586   effect(KILL cr);
 8587 
 8588   ins_cost(300);
 8589 
 8590   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8591             "JLT      done\n\t"
 8592             "XORL     $y, $y\n"
 8593             "done:  " %}
 8594   ins_encode %{
 8595     Register Rp = $p$$Register;
 8596     Register Rq = $q$$Register;
 8597     Register Ry = $y$$Register;
 8598     Label done;
 8599     __ cmpl(Rp, Rq);
 8600     __ jccb(Assembler::less, done);
 8601     __ xorl(Ry, Ry);
 8602     __ bind(done);
 8603   %}
 8604 
 8605   ins_pipe(pipe_cmplt);
 8606 %}
 8607 
 8608 /* If I enable this, I encourage spilling in the inner loop of compress.
 8609 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8610   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8611 */
 8612 //----------Overflow Math Instructions-----------------------------------------
 8613 
 8614 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8615 %{
 8616   match(Set cr (OverflowAddI op1 op2));
 8617   effect(DEF cr, USE_KILL op1, USE op2);
 8618 
 8619   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8620 
 8621   ins_encode %{
 8622     __ addl($op1$$Register, $op2$$Register);
 8623   %}
 8624   ins_pipe(ialu_reg_reg);
 8625 %}
 8626 
 8627 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8628 %{
 8629   match(Set cr (OverflowAddI op1 op2));
 8630   effect(DEF cr, USE_KILL op1, USE op2);
 8631 
 8632   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8633 
 8634   ins_encode %{
 8635     __ addl($op1$$Register, $op2$$constant);
 8636   %}
 8637   ins_pipe(ialu_reg_reg);
 8638 %}
 8639 
 8640 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8641 %{
 8642   match(Set cr (OverflowSubI op1 op2));
 8643 
 8644   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8645   ins_encode %{
 8646     __ cmpl($op1$$Register, $op2$$Register);
 8647   %}
 8648   ins_pipe(ialu_reg_reg);
 8649 %}
 8650 
 8651 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8652 %{
 8653   match(Set cr (OverflowSubI op1 op2));
 8654 
 8655   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8656   ins_encode %{
 8657     __ cmpl($op1$$Register, $op2$$constant);
 8658   %}
 8659   ins_pipe(ialu_reg_reg);
 8660 %}
 8661 
 8662 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8663 %{
 8664   match(Set cr (OverflowSubI zero op2));
 8665   effect(DEF cr, USE_KILL op2);
 8666 
 8667   format %{ "NEG    $op2\t# overflow check int" %}
 8668   ins_encode %{
 8669     __ negl($op2$$Register);
 8670   %}
 8671   ins_pipe(ialu_reg_reg);
 8672 %}
 8673 
 8674 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8675 %{
 8676   match(Set cr (OverflowMulI op1 op2));
 8677   effect(DEF cr, USE_KILL op1, USE op2);
 8678 
 8679   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8680   ins_encode %{
 8681     __ imull($op1$$Register, $op2$$Register);
 8682   %}
 8683   ins_pipe(ialu_reg_reg_alu0);
 8684 %}
 8685 
 8686 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8687 %{
 8688   match(Set cr (OverflowMulI op1 op2));
 8689   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8690 
 8691   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8692   ins_encode %{
 8693     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8694   %}
 8695   ins_pipe(ialu_reg_reg_alu0);
 8696 %}
 8697 
 8698 // Integer Absolute Instructions
 8699 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8700 %{
 8701   match(Set dst (AbsI src));
 8702   effect(TEMP dst, TEMP tmp, KILL cr);
 8703   format %{ "movl $tmp, $src\n\t"
 8704             "sarl $tmp, 31\n\t"
 8705             "movl $dst, $src\n\t"
 8706             "xorl $dst, $tmp\n\t"
 8707             "subl $dst, $tmp\n"
 8708           %}
 8709   ins_encode %{
 8710     __ movl($tmp$$Register, $src$$Register);
 8711     __ sarl($tmp$$Register, 31);
 8712     __ movl($dst$$Register, $src$$Register);
 8713     __ xorl($dst$$Register, $tmp$$Register);
 8714     __ subl($dst$$Register, $tmp$$Register);
 8715   %}
 8716 
 8717   ins_pipe(ialu_reg_reg);
 8718 %}
 8719 
 8720 //----------Long Instructions------------------------------------------------
 8721 // Add Long Register with Register
 8722 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8723   match(Set dst (AddL dst src));
 8724   effect(KILL cr);
 8725   ins_cost(200);
 8726   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8727             "ADC    $dst.hi,$src.hi" %}
 8728   opcode(0x03, 0x13);
 8729   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8730   ins_pipe( ialu_reg_reg_long );
 8731 %}
 8732 
 8733 // Add Long Register with Immediate
 8734 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8735   match(Set dst (AddL dst src));
 8736   effect(KILL cr);
 8737   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8738             "ADC    $dst.hi,$src.hi" %}
 8739   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8740   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8741   ins_pipe( ialu_reg_long );
 8742 %}
 8743 
 8744 // Add Long Register with Memory
 8745 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8746   match(Set dst (AddL dst (LoadL mem)));
 8747   effect(KILL cr);
 8748   ins_cost(125);
 8749   format %{ "ADD    $dst.lo,$mem\n\t"
 8750             "ADC    $dst.hi,$mem+4" %}
 8751   opcode(0x03, 0x13);
 8752   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8753   ins_pipe( ialu_reg_long_mem );
 8754 %}
 8755 
 8756 // Subtract Long Register with Register.
 8757 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8758   match(Set dst (SubL dst src));
 8759   effect(KILL cr);
 8760   ins_cost(200);
 8761   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8762             "SBB    $dst.hi,$src.hi" %}
 8763   opcode(0x2B, 0x1B);
 8764   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8765   ins_pipe( ialu_reg_reg_long );
 8766 %}
 8767 
 8768 // Subtract Long Register with Immediate
 8769 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8770   match(Set dst (SubL dst src));
 8771   effect(KILL cr);
 8772   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8773             "SBB    $dst.hi,$src.hi" %}
 8774   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8775   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8776   ins_pipe( ialu_reg_long );
 8777 %}
 8778 
 8779 // Subtract Long Register with Memory
 8780 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8781   match(Set dst (SubL dst (LoadL mem)));
 8782   effect(KILL cr);
 8783   ins_cost(125);
 8784   format %{ "SUB    $dst.lo,$mem\n\t"
 8785             "SBB    $dst.hi,$mem+4" %}
 8786   opcode(0x2B, 0x1B);
 8787   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8788   ins_pipe( ialu_reg_long_mem );
 8789 %}
 8790 
 8791 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8792   match(Set dst (SubL zero dst));
 8793   effect(KILL cr);
 8794   ins_cost(300);
 8795   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8796   ins_encode( neg_long(dst) );
 8797   ins_pipe( ialu_reg_reg_long );
 8798 %}
 8799 
 8800 // And Long Register with Register
 8801 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8802   match(Set dst (AndL dst src));
 8803   effect(KILL cr);
 8804   format %{ "AND    $dst.lo,$src.lo\n\t"
 8805             "AND    $dst.hi,$src.hi" %}
 8806   opcode(0x23,0x23);
 8807   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8808   ins_pipe( ialu_reg_reg_long );
 8809 %}
 8810 
 8811 // And Long Register with Immediate
 8812 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8813   match(Set dst (AndL dst src));
 8814   effect(KILL cr);
 8815   format %{ "AND    $dst.lo,$src.lo\n\t"
 8816             "AND    $dst.hi,$src.hi" %}
 8817   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8818   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8819   ins_pipe( ialu_reg_long );
 8820 %}
 8821 
 8822 // And Long Register with Memory
 8823 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8824   match(Set dst (AndL dst (LoadL mem)));
 8825   effect(KILL cr);
 8826   ins_cost(125);
 8827   format %{ "AND    $dst.lo,$mem\n\t"
 8828             "AND    $dst.hi,$mem+4" %}
 8829   opcode(0x23, 0x23);
 8830   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8831   ins_pipe( ialu_reg_long_mem );
 8832 %}
 8833 
 8834 // BMI1 instructions
 8835 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8836   match(Set dst (AndL (XorL src1 minus_1) src2));
 8837   predicate(UseBMI1Instructions);
 8838   effect(KILL cr, TEMP dst);
 8839 
 8840   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8841             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8842          %}
 8843 
 8844   ins_encode %{
 8845     Register Rdst = $dst$$Register;
 8846     Register Rsrc1 = $src1$$Register;
 8847     Register Rsrc2 = $src2$$Register;
 8848     __ andnl(Rdst, Rsrc1, Rsrc2);
 8849     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8850   %}
 8851   ins_pipe(ialu_reg_reg_long);
 8852 %}
 8853 
 8854 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8855   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8856   predicate(UseBMI1Instructions);
 8857   effect(KILL cr, TEMP dst);
 8858 
 8859   ins_cost(125);
 8860   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 8861             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 8862          %}
 8863 
 8864   ins_encode %{
 8865     Register Rdst = $dst$$Register;
 8866     Register Rsrc1 = $src1$$Register;
 8867     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 8868 
 8869     __ andnl(Rdst, Rsrc1, $src2$$Address);
 8870     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 8871   %}
 8872   ins_pipe(ialu_reg_mem);
 8873 %}
 8874 
 8875 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 8876   match(Set dst (AndL (SubL imm_zero src) src));
 8877   predicate(UseBMI1Instructions);
 8878   effect(KILL cr, TEMP dst);
 8879 
 8880   format %{ "MOVL   $dst.hi, 0\n\t"
 8881             "BLSIL  $dst.lo, $src.lo\n\t"
 8882             "JNZ    done\n\t"
 8883             "BLSIL  $dst.hi, $src.hi\n"
 8884             "done:"
 8885          %}
 8886 
 8887   ins_encode %{
 8888     Label done;
 8889     Register Rdst = $dst$$Register;
 8890     Register Rsrc = $src$$Register;
 8891     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8892     __ blsil(Rdst, Rsrc);
 8893     __ jccb(Assembler::notZero, done);
 8894     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8895     __ bind(done);
 8896   %}
 8897   ins_pipe(ialu_reg);
 8898 %}
 8899 
 8900 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 8901   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 8902   predicate(UseBMI1Instructions);
 8903   effect(KILL cr, TEMP dst);
 8904 
 8905   ins_cost(125);
 8906   format %{ "MOVL   $dst.hi, 0\n\t"
 8907             "BLSIL  $dst.lo, $src\n\t"
 8908             "JNZ    done\n\t"
 8909             "BLSIL  $dst.hi, $src+4\n"
 8910             "done:"
 8911          %}
 8912 
 8913   ins_encode %{
 8914     Label done;
 8915     Register Rdst = $dst$$Register;
 8916     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8917 
 8918     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8919     __ blsil(Rdst, $src$$Address);
 8920     __ jccb(Assembler::notZero, done);
 8921     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 8922     __ bind(done);
 8923   %}
 8924   ins_pipe(ialu_reg_mem);
 8925 %}
 8926 
 8927 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8928 %{
 8929   match(Set dst (XorL (AddL src minus_1) src));
 8930   predicate(UseBMI1Instructions);
 8931   effect(KILL cr, TEMP dst);
 8932 
 8933   format %{ "MOVL    $dst.hi, 0\n\t"
 8934             "BLSMSKL $dst.lo, $src.lo\n\t"
 8935             "JNC     done\n\t"
 8936             "BLSMSKL $dst.hi, $src.hi\n"
 8937             "done:"
 8938          %}
 8939 
 8940   ins_encode %{
 8941     Label done;
 8942     Register Rdst = $dst$$Register;
 8943     Register Rsrc = $src$$Register;
 8944     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8945     __ blsmskl(Rdst, Rsrc);
 8946     __ jccb(Assembler::carryClear, done);
 8947     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8948     __ bind(done);
 8949   %}
 8950 
 8951   ins_pipe(ialu_reg);
 8952 %}
 8953 
 8954 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 8955 %{
 8956   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 8957   predicate(UseBMI1Instructions);
 8958   effect(KILL cr, TEMP dst);
 8959 
 8960   ins_cost(125);
 8961   format %{ "MOVL    $dst.hi, 0\n\t"
 8962             "BLSMSKL $dst.lo, $src\n\t"
 8963             "JNC     done\n\t"
 8964             "BLSMSKL $dst.hi, $src+4\n"
 8965             "done:"
 8966          %}
 8967 
 8968   ins_encode %{
 8969     Label done;
 8970     Register Rdst = $dst$$Register;
 8971     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8972 
 8973     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8974     __ blsmskl(Rdst, $src$$Address);
 8975     __ jccb(Assembler::carryClear, done);
 8976     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 8977     __ bind(done);
 8978   %}
 8979 
 8980   ins_pipe(ialu_reg_mem);
 8981 %}
 8982 
 8983 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8984 %{
 8985   match(Set dst (AndL (AddL src minus_1) src) );
 8986   predicate(UseBMI1Instructions);
 8987   effect(KILL cr, TEMP dst);
 8988 
 8989   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 8990             "BLSRL  $dst.lo, $src.lo\n\t"
 8991             "JNC    done\n\t"
 8992             "BLSRL  $dst.hi, $src.hi\n"
 8993             "done:"
 8994   %}
 8995 
 8996   ins_encode %{
 8997     Label done;
 8998     Register Rdst = $dst$$Register;
 8999     Register Rsrc = $src$$Register;
 9000     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9001     __ blsrl(Rdst, Rsrc);
 9002     __ jccb(Assembler::carryClear, done);
 9003     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9004     __ bind(done);
 9005   %}
 9006 
 9007   ins_pipe(ialu_reg);
 9008 %}
 9009 
 9010 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9011 %{
 9012   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9013   predicate(UseBMI1Instructions);
 9014   effect(KILL cr, TEMP dst);
 9015 
 9016   ins_cost(125);
 9017   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9018             "BLSRL  $dst.lo, $src\n\t"
 9019             "JNC    done\n\t"
 9020             "BLSRL  $dst.hi, $src+4\n"
 9021             "done:"
 9022   %}
 9023 
 9024   ins_encode %{
 9025     Label done;
 9026     Register Rdst = $dst$$Register;
 9027     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9028     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9029     __ blsrl(Rdst, $src$$Address);
 9030     __ jccb(Assembler::carryClear, done);
 9031     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9032     __ bind(done);
 9033   %}
 9034 
 9035   ins_pipe(ialu_reg_mem);
 9036 %}
 9037 
 9038 // Or Long Register with Register
 9039 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9040   match(Set dst (OrL dst src));
 9041   effect(KILL cr);
 9042   format %{ "OR     $dst.lo,$src.lo\n\t"
 9043             "OR     $dst.hi,$src.hi" %}
 9044   opcode(0x0B,0x0B);
 9045   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9046   ins_pipe( ialu_reg_reg_long );
 9047 %}
 9048 
 9049 // Or Long Register with Immediate
 9050 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9051   match(Set dst (OrL dst src));
 9052   effect(KILL cr);
 9053   format %{ "OR     $dst.lo,$src.lo\n\t"
 9054             "OR     $dst.hi,$src.hi" %}
 9055   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9056   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9057   ins_pipe( ialu_reg_long );
 9058 %}
 9059 
 9060 // Or Long Register with Memory
 9061 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9062   match(Set dst (OrL dst (LoadL mem)));
 9063   effect(KILL cr);
 9064   ins_cost(125);
 9065   format %{ "OR     $dst.lo,$mem\n\t"
 9066             "OR     $dst.hi,$mem+4" %}
 9067   opcode(0x0B,0x0B);
 9068   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9069   ins_pipe( ialu_reg_long_mem );
 9070 %}
 9071 
 9072 // Xor Long Register with Register
 9073 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9074   match(Set dst (XorL dst src));
 9075   effect(KILL cr);
 9076   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9077             "XOR    $dst.hi,$src.hi" %}
 9078   opcode(0x33,0x33);
 9079   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9080   ins_pipe( ialu_reg_reg_long );
 9081 %}
 9082 
 9083 // Xor Long Register with Immediate -1
 9084 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9085   match(Set dst (XorL dst imm));
 9086   format %{ "NOT    $dst.lo\n\t"
 9087             "NOT    $dst.hi" %}
 9088   ins_encode %{
 9089      __ notl($dst$$Register);
 9090      __ notl(HIGH_FROM_LOW($dst$$Register));
 9091   %}
 9092   ins_pipe( ialu_reg_long );
 9093 %}
 9094 
 9095 // Xor Long Register with Immediate
 9096 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9097   match(Set dst (XorL dst src));
 9098   effect(KILL cr);
 9099   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9100             "XOR    $dst.hi,$src.hi" %}
 9101   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9102   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9103   ins_pipe( ialu_reg_long );
 9104 %}
 9105 
 9106 // Xor Long Register with Memory
 9107 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9108   match(Set dst (XorL dst (LoadL mem)));
 9109   effect(KILL cr);
 9110   ins_cost(125);
 9111   format %{ "XOR    $dst.lo,$mem\n\t"
 9112             "XOR    $dst.hi,$mem+4" %}
 9113   opcode(0x33,0x33);
 9114   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9115   ins_pipe( ialu_reg_long_mem );
 9116 %}
 9117 
 9118 // Shift Left Long by 1
 9119 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9120   predicate(UseNewLongLShift);
 9121   match(Set dst (LShiftL dst cnt));
 9122   effect(KILL cr);
 9123   ins_cost(100);
 9124   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9125             "ADC    $dst.hi,$dst.hi" %}
 9126   ins_encode %{
 9127     __ addl($dst$$Register,$dst$$Register);
 9128     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9129   %}
 9130   ins_pipe( ialu_reg_long );
 9131 %}
 9132 
 9133 // Shift Left Long by 2
 9134 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9135   predicate(UseNewLongLShift);
 9136   match(Set dst (LShiftL dst cnt));
 9137   effect(KILL cr);
 9138   ins_cost(100);
 9139   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9140             "ADC    $dst.hi,$dst.hi\n\t"
 9141             "ADD    $dst.lo,$dst.lo\n\t"
 9142             "ADC    $dst.hi,$dst.hi" %}
 9143   ins_encode %{
 9144     __ addl($dst$$Register,$dst$$Register);
 9145     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9146     __ addl($dst$$Register,$dst$$Register);
 9147     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9148   %}
 9149   ins_pipe( ialu_reg_long );
 9150 %}
 9151 
 9152 // Shift Left Long by 3
 9153 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9154   predicate(UseNewLongLShift);
 9155   match(Set dst (LShiftL dst cnt));
 9156   effect(KILL cr);
 9157   ins_cost(100);
 9158   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9159             "ADC    $dst.hi,$dst.hi\n\t"
 9160             "ADD    $dst.lo,$dst.lo\n\t"
 9161             "ADC    $dst.hi,$dst.hi\n\t"
 9162             "ADD    $dst.lo,$dst.lo\n\t"
 9163             "ADC    $dst.hi,$dst.hi" %}
 9164   ins_encode %{
 9165     __ addl($dst$$Register,$dst$$Register);
 9166     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9167     __ addl($dst$$Register,$dst$$Register);
 9168     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9169     __ addl($dst$$Register,$dst$$Register);
 9170     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9171   %}
 9172   ins_pipe( ialu_reg_long );
 9173 %}
 9174 
 9175 // Shift Left Long by 1-31
 9176 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9177   match(Set dst (LShiftL dst cnt));
 9178   effect(KILL cr);
 9179   ins_cost(200);
 9180   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9181             "SHL    $dst.lo,$cnt" %}
 9182   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9183   ins_encode( move_long_small_shift(dst,cnt) );
 9184   ins_pipe( ialu_reg_long );
 9185 %}
 9186 
 9187 // Shift Left Long by 32-63
 9188 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9189   match(Set dst (LShiftL dst cnt));
 9190   effect(KILL cr);
 9191   ins_cost(300);
 9192   format %{ "MOV    $dst.hi,$dst.lo\n"
 9193           "\tSHL    $dst.hi,$cnt-32\n"
 9194           "\tXOR    $dst.lo,$dst.lo" %}
 9195   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9196   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9197   ins_pipe( ialu_reg_long );
 9198 %}
 9199 
 9200 // Shift Left Long by variable
 9201 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9202   match(Set dst (LShiftL dst shift));
 9203   effect(KILL cr);
 9204   ins_cost(500+200);
 9205   size(17);
 9206   format %{ "TEST   $shift,32\n\t"
 9207             "JEQ,s  small\n\t"
 9208             "MOV    $dst.hi,$dst.lo\n\t"
 9209             "XOR    $dst.lo,$dst.lo\n"
 9210     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9211             "SHL    $dst.lo,$shift" %}
 9212   ins_encode( shift_left_long( dst, shift ) );
 9213   ins_pipe( pipe_slow );
 9214 %}
 9215 
 9216 // Shift Right Long by 1-31
 9217 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9218   match(Set dst (URShiftL dst cnt));
 9219   effect(KILL cr);
 9220   ins_cost(200);
 9221   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9222             "SHR    $dst.hi,$cnt" %}
 9223   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9224   ins_encode( move_long_small_shift(dst,cnt) );
 9225   ins_pipe( ialu_reg_long );
 9226 %}
 9227 
 9228 // Shift Right Long by 32-63
 9229 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9230   match(Set dst (URShiftL dst cnt));
 9231   effect(KILL cr);
 9232   ins_cost(300);
 9233   format %{ "MOV    $dst.lo,$dst.hi\n"
 9234           "\tSHR    $dst.lo,$cnt-32\n"
 9235           "\tXOR    $dst.hi,$dst.hi" %}
 9236   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9237   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9238   ins_pipe( ialu_reg_long );
 9239 %}
 9240 
 9241 // Shift Right Long by variable
 9242 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9243   match(Set dst (URShiftL dst shift));
 9244   effect(KILL cr);
 9245   ins_cost(600);
 9246   size(17);
 9247   format %{ "TEST   $shift,32\n\t"
 9248             "JEQ,s  small\n\t"
 9249             "MOV    $dst.lo,$dst.hi\n\t"
 9250             "XOR    $dst.hi,$dst.hi\n"
 9251     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9252             "SHR    $dst.hi,$shift" %}
 9253   ins_encode( shift_right_long( dst, shift ) );
 9254   ins_pipe( pipe_slow );
 9255 %}
 9256 
 9257 // Shift Right Long by 1-31
 9258 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9259   match(Set dst (RShiftL dst cnt));
 9260   effect(KILL cr);
 9261   ins_cost(200);
 9262   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9263             "SAR    $dst.hi,$cnt" %}
 9264   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9265   ins_encode( move_long_small_shift(dst,cnt) );
 9266   ins_pipe( ialu_reg_long );
 9267 %}
 9268 
 9269 // Shift Right Long by 32-63
 9270 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9271   match(Set dst (RShiftL dst cnt));
 9272   effect(KILL cr);
 9273   ins_cost(300);
 9274   format %{ "MOV    $dst.lo,$dst.hi\n"
 9275           "\tSAR    $dst.lo,$cnt-32\n"
 9276           "\tSAR    $dst.hi,31" %}
 9277   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9278   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9279   ins_pipe( ialu_reg_long );
 9280 %}
 9281 
 9282 // Shift Right arithmetic Long by variable
 9283 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9284   match(Set dst (RShiftL dst shift));
 9285   effect(KILL cr);
 9286   ins_cost(600);
 9287   size(18);
 9288   format %{ "TEST   $shift,32\n\t"
 9289             "JEQ,s  small\n\t"
 9290             "MOV    $dst.lo,$dst.hi\n\t"
 9291             "SAR    $dst.hi,31\n"
 9292     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9293             "SAR    $dst.hi,$shift" %}
 9294   ins_encode( shift_right_arith_long( dst, shift ) );
 9295   ins_pipe( pipe_slow );
 9296 %}
 9297 
 9298 
 9299 //----------Double Instructions------------------------------------------------
 9300 // Double Math
 9301 
 9302 // Compare & branch
 9303 
 9304 // P6 version of float compare, sets condition codes in EFLAGS
 9305 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9306   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9307   match(Set cr (CmpD src1 src2));
 9308   effect(KILL rax);
 9309   ins_cost(150);
 9310   format %{ "FLD    $src1\n\t"
 9311             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9312             "JNP    exit\n\t"
 9313             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9314             "SAHF\n"
 9315      "exit:\tNOP               // avoid branch to branch" %}
 9316   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9317   ins_encode( Push_Reg_DPR(src1),
 9318               OpcP, RegOpc(src2),
 9319               cmpF_P6_fixup );
 9320   ins_pipe( pipe_slow );
 9321 %}
 9322 
 9323 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9324   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9325   match(Set cr (CmpD src1 src2));
 9326   ins_cost(150);
 9327   format %{ "FLD    $src1\n\t"
 9328             "FUCOMIP ST,$src2  // P6 instruction" %}
 9329   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9330   ins_encode( Push_Reg_DPR(src1),
 9331               OpcP, RegOpc(src2));
 9332   ins_pipe( pipe_slow );
 9333 %}
 9334 
 9335 // Compare & branch
 9336 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9337   predicate(UseSSE<=1);
 9338   match(Set cr (CmpD src1 src2));
 9339   effect(KILL rax);
 9340   ins_cost(200);
 9341   format %{ "FLD    $src1\n\t"
 9342             "FCOMp  $src2\n\t"
 9343             "FNSTSW AX\n\t"
 9344             "TEST   AX,0x400\n\t"
 9345             "JZ,s   flags\n\t"
 9346             "MOV    AH,1\t# unordered treat as LT\n"
 9347     "flags:\tSAHF" %}
 9348   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9349   ins_encode( Push_Reg_DPR(src1),
 9350               OpcP, RegOpc(src2),
 9351               fpu_flags);
 9352   ins_pipe( pipe_slow );
 9353 %}
 9354 
 9355 // Compare vs zero into -1,0,1
 9356 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9357   predicate(UseSSE<=1);
 9358   match(Set dst (CmpD3 src1 zero));
 9359   effect(KILL cr, KILL rax);
 9360   ins_cost(280);
 9361   format %{ "FTSTD  $dst,$src1" %}
 9362   opcode(0xE4, 0xD9);
 9363   ins_encode( Push_Reg_DPR(src1),
 9364               OpcS, OpcP, PopFPU,
 9365               CmpF_Result(dst));
 9366   ins_pipe( pipe_slow );
 9367 %}
 9368 
 9369 // Compare into -1,0,1
 9370 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9371   predicate(UseSSE<=1);
 9372   match(Set dst (CmpD3 src1 src2));
 9373   effect(KILL cr, KILL rax);
 9374   ins_cost(300);
 9375   format %{ "FCMPD  $dst,$src1,$src2" %}
 9376   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9377   ins_encode( Push_Reg_DPR(src1),
 9378               OpcP, RegOpc(src2),
 9379               CmpF_Result(dst));
 9380   ins_pipe( pipe_slow );
 9381 %}
 9382 
 9383 // float compare and set condition codes in EFLAGS by XMM regs
 9384 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9385   predicate(UseSSE>=2);
 9386   match(Set cr (CmpD src1 src2));
 9387   ins_cost(145);
 9388   format %{ "UCOMISD $src1,$src2\n\t"
 9389             "JNP,s   exit\n\t"
 9390             "PUSHF\t# saw NaN, set CF\n\t"
 9391             "AND     [rsp], #0xffffff2b\n\t"
 9392             "POPF\n"
 9393     "exit:" %}
 9394   ins_encode %{
 9395     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9396     emit_cmpfp_fixup(masm);
 9397   %}
 9398   ins_pipe( pipe_slow );
 9399 %}
 9400 
 9401 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9402   predicate(UseSSE>=2);
 9403   match(Set cr (CmpD src1 src2));
 9404   ins_cost(100);
 9405   format %{ "UCOMISD $src1,$src2" %}
 9406   ins_encode %{
 9407     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9408   %}
 9409   ins_pipe( pipe_slow );
 9410 %}
 9411 
 9412 // float compare and set condition codes in EFLAGS by XMM regs
 9413 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9414   predicate(UseSSE>=2);
 9415   match(Set cr (CmpD src1 (LoadD src2)));
 9416   ins_cost(145);
 9417   format %{ "UCOMISD $src1,$src2\n\t"
 9418             "JNP,s   exit\n\t"
 9419             "PUSHF\t# saw NaN, set CF\n\t"
 9420             "AND     [rsp], #0xffffff2b\n\t"
 9421             "POPF\n"
 9422     "exit:" %}
 9423   ins_encode %{
 9424     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9425     emit_cmpfp_fixup(masm);
 9426   %}
 9427   ins_pipe( pipe_slow );
 9428 %}
 9429 
 9430 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9431   predicate(UseSSE>=2);
 9432   match(Set cr (CmpD src1 (LoadD src2)));
 9433   ins_cost(100);
 9434   format %{ "UCOMISD $src1,$src2" %}
 9435   ins_encode %{
 9436     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9437   %}
 9438   ins_pipe( pipe_slow );
 9439 %}
 9440 
 9441 // Compare into -1,0,1 in XMM
 9442 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9443   predicate(UseSSE>=2);
 9444   match(Set dst (CmpD3 src1 src2));
 9445   effect(KILL cr);
 9446   ins_cost(255);
 9447   format %{ "UCOMISD $src1, $src2\n\t"
 9448             "MOV     $dst, #-1\n\t"
 9449             "JP,s    done\n\t"
 9450             "JB,s    done\n\t"
 9451             "SETNE   $dst\n\t"
 9452             "MOVZB   $dst, $dst\n"
 9453     "done:" %}
 9454   ins_encode %{
 9455     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9456     emit_cmpfp3(masm, $dst$$Register);
 9457   %}
 9458   ins_pipe( pipe_slow );
 9459 %}
 9460 
 9461 // Compare into -1,0,1 in XMM and memory
 9462 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9463   predicate(UseSSE>=2);
 9464   match(Set dst (CmpD3 src1 (LoadD src2)));
 9465   effect(KILL cr);
 9466   ins_cost(275);
 9467   format %{ "UCOMISD $src1, $src2\n\t"
 9468             "MOV     $dst, #-1\n\t"
 9469             "JP,s    done\n\t"
 9470             "JB,s    done\n\t"
 9471             "SETNE   $dst\n\t"
 9472             "MOVZB   $dst, $dst\n"
 9473     "done:" %}
 9474   ins_encode %{
 9475     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9476     emit_cmpfp3(masm, $dst$$Register);
 9477   %}
 9478   ins_pipe( pipe_slow );
 9479 %}
 9480 
 9481 
 9482 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9483   predicate (UseSSE <=1);
 9484   match(Set dst (SubD dst src));
 9485 
 9486   format %{ "FLD    $src\n\t"
 9487             "DSUBp  $dst,ST" %}
 9488   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9489   ins_cost(150);
 9490   ins_encode( Push_Reg_DPR(src),
 9491               OpcP, RegOpc(dst) );
 9492   ins_pipe( fpu_reg_reg );
 9493 %}
 9494 
 9495 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9496   predicate (UseSSE <=1);
 9497   match(Set dst (RoundDouble (SubD src1 src2)));
 9498   ins_cost(250);
 9499 
 9500   format %{ "FLD    $src2\n\t"
 9501             "DSUB   ST,$src1\n\t"
 9502             "FSTP_D $dst\t# D-round" %}
 9503   opcode(0xD8, 0x5);
 9504   ins_encode( Push_Reg_DPR(src2),
 9505               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9506   ins_pipe( fpu_mem_reg_reg );
 9507 %}
 9508 
 9509 
 9510 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9511   predicate (UseSSE <=1);
 9512   match(Set dst (SubD dst (LoadD src)));
 9513   ins_cost(150);
 9514 
 9515   format %{ "FLD    $src\n\t"
 9516             "DSUBp  $dst,ST" %}
 9517   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9518   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9519               OpcP, RegOpc(dst), ClearInstMark );
 9520   ins_pipe( fpu_reg_mem );
 9521 %}
 9522 
 9523 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9524   predicate (UseSSE<=1);
 9525   match(Set dst (AbsD src));
 9526   ins_cost(100);
 9527   format %{ "FABS" %}
 9528   opcode(0xE1, 0xD9);
 9529   ins_encode( OpcS, OpcP );
 9530   ins_pipe( fpu_reg_reg );
 9531 %}
 9532 
 9533 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9534   predicate(UseSSE<=1);
 9535   match(Set dst (NegD src));
 9536   ins_cost(100);
 9537   format %{ "FCHS" %}
 9538   opcode(0xE0, 0xD9);
 9539   ins_encode( OpcS, OpcP );
 9540   ins_pipe( fpu_reg_reg );
 9541 %}
 9542 
 9543 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9544   predicate(UseSSE<=1);
 9545   match(Set dst (AddD dst src));
 9546   format %{ "FLD    $src\n\t"
 9547             "DADD   $dst,ST" %}
 9548   size(4);
 9549   ins_cost(150);
 9550   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9551   ins_encode( Push_Reg_DPR(src),
 9552               OpcP, RegOpc(dst) );
 9553   ins_pipe( fpu_reg_reg );
 9554 %}
 9555 
 9556 
 9557 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9558   predicate(UseSSE<=1);
 9559   match(Set dst (RoundDouble (AddD src1 src2)));
 9560   ins_cost(250);
 9561 
 9562   format %{ "FLD    $src2\n\t"
 9563             "DADD   ST,$src1\n\t"
 9564             "FSTP_D $dst\t# D-round" %}
 9565   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9566   ins_encode( Push_Reg_DPR(src2),
 9567               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9568   ins_pipe( fpu_mem_reg_reg );
 9569 %}
 9570 
 9571 
 9572 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9573   predicate(UseSSE<=1);
 9574   match(Set dst (AddD dst (LoadD src)));
 9575   ins_cost(150);
 9576 
 9577   format %{ "FLD    $src\n\t"
 9578             "DADDp  $dst,ST" %}
 9579   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9580   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9581               OpcP, RegOpc(dst), ClearInstMark );
 9582   ins_pipe( fpu_reg_mem );
 9583 %}
 9584 
 9585 // add-to-memory
 9586 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9587   predicate(UseSSE<=1);
 9588   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9589   ins_cost(150);
 9590 
 9591   format %{ "FLD_D  $dst\n\t"
 9592             "DADD   ST,$src\n\t"
 9593             "FST_D  $dst" %}
 9594   opcode(0xDD, 0x0);
 9595   ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
 9596               Opcode(0xD8), RegOpc(src), ClearInstMark,
 9597               SetInstMark,
 9598               Opcode(0xDD), RMopc_Mem(0x03,dst),
 9599               ClearInstMark);
 9600   ins_pipe( fpu_reg_mem );
 9601 %}
 9602 
 9603 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9604   predicate(UseSSE<=1);
 9605   match(Set dst (AddD dst con));
 9606   ins_cost(125);
 9607   format %{ "FLD1\n\t"
 9608             "DADDp  $dst,ST" %}
 9609   ins_encode %{
 9610     __ fld1();
 9611     __ faddp($dst$$reg);
 9612   %}
 9613   ins_pipe(fpu_reg);
 9614 %}
 9615 
 9616 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9617   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9618   match(Set dst (AddD dst con));
 9619   ins_cost(200);
 9620   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9621             "DADDp  $dst,ST" %}
 9622   ins_encode %{
 9623     __ fld_d($constantaddress($con));
 9624     __ faddp($dst$$reg);
 9625   %}
 9626   ins_pipe(fpu_reg_mem);
 9627 %}
 9628 
 9629 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9630   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9631   match(Set dst (RoundDouble (AddD src con)));
 9632   ins_cost(200);
 9633   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9634             "DADD   ST,$src\n\t"
 9635             "FSTP_D $dst\t# D-round" %}
 9636   ins_encode %{
 9637     __ fld_d($constantaddress($con));
 9638     __ fadd($src$$reg);
 9639     __ fstp_d(Address(rsp, $dst$$disp));
 9640   %}
 9641   ins_pipe(fpu_mem_reg_con);
 9642 %}
 9643 
 9644 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9645   predicate(UseSSE<=1);
 9646   match(Set dst (MulD dst src));
 9647   format %{ "FLD    $src\n\t"
 9648             "DMULp  $dst,ST" %}
 9649   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9650   ins_cost(150);
 9651   ins_encode( Push_Reg_DPR(src),
 9652               OpcP, RegOpc(dst) );
 9653   ins_pipe( fpu_reg_reg );
 9654 %}
 9655 
 9656 // Strict FP instruction biases argument before multiply then
 9657 // biases result to avoid double rounding of subnormals.
 9658 //
 9659 // scale arg1 by multiplying arg1 by 2^(-15360)
 9660 // load arg2
 9661 // multiply scaled arg1 by arg2
 9662 // rescale product by 2^(15360)
 9663 //
 9664 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9665   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9666   match(Set dst (MulD dst src));
 9667   ins_cost(1);   // Select this instruction for all FP double multiplies
 9668 
 9669   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9670             "DMULp  $dst,ST\n\t"
 9671             "FLD    $src\n\t"
 9672             "DMULp  $dst,ST\n\t"
 9673             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9674             "DMULp  $dst,ST\n\t" %}
 9675   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9676   ins_encode( strictfp_bias1(dst),
 9677               Push_Reg_DPR(src),
 9678               OpcP, RegOpc(dst),
 9679               strictfp_bias2(dst) );
 9680   ins_pipe( fpu_reg_reg );
 9681 %}
 9682 
 9683 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9684   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9685   match(Set dst (MulD dst con));
 9686   ins_cost(200);
 9687   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9688             "DMULp  $dst,ST" %}
 9689   ins_encode %{
 9690     __ fld_d($constantaddress($con));
 9691     __ fmulp($dst$$reg);
 9692   %}
 9693   ins_pipe(fpu_reg_mem);
 9694 %}
 9695 
 9696 
 9697 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9698   predicate( UseSSE<=1 );
 9699   match(Set dst (MulD dst (LoadD src)));
 9700   ins_cost(200);
 9701   format %{ "FLD_D  $src\n\t"
 9702             "DMULp  $dst,ST" %}
 9703   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9704   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9705               OpcP, RegOpc(dst), ClearInstMark );
 9706   ins_pipe( fpu_reg_mem );
 9707 %}
 9708 
 9709 //
 9710 // Cisc-alternate to reg-reg multiply
 9711 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9712   predicate( UseSSE<=1 );
 9713   match(Set dst (MulD src (LoadD mem)));
 9714   ins_cost(250);
 9715   format %{ "FLD_D  $mem\n\t"
 9716             "DMUL   ST,$src\n\t"
 9717             "FSTP_D $dst" %}
 9718   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9719   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
 9720               OpcReg_FPR(src),
 9721               Pop_Reg_DPR(dst), ClearInstMark );
 9722   ins_pipe( fpu_reg_reg_mem );
 9723 %}
 9724 
 9725 
 9726 // MACRO3 -- addDPR a mulDPR
 9727 // This instruction is a '2-address' instruction in that the result goes
 9728 // back to src2.  This eliminates a move from the macro; possibly the
 9729 // register allocator will have to add it back (and maybe not).
 9730 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9731   predicate( UseSSE<=1 );
 9732   match(Set src2 (AddD (MulD src0 src1) src2));
 9733   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9734             "DMUL   ST,$src1\n\t"
 9735             "DADDp  $src2,ST" %}
 9736   ins_cost(250);
 9737   opcode(0xDD); /* LoadD DD /0 */
 9738   ins_encode( Push_Reg_FPR(src0),
 9739               FMul_ST_reg(src1),
 9740               FAddP_reg_ST(src2) );
 9741   ins_pipe( fpu_reg_reg_reg );
 9742 %}
 9743 
 9744 
 9745 // MACRO3 -- subDPR a mulDPR
 9746 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9747   predicate( UseSSE<=1 );
 9748   match(Set src2 (SubD (MulD src0 src1) src2));
 9749   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9750             "DMUL   ST,$src1\n\t"
 9751             "DSUBRp $src2,ST" %}
 9752   ins_cost(250);
 9753   ins_encode( Push_Reg_FPR(src0),
 9754               FMul_ST_reg(src1),
 9755               Opcode(0xDE), Opc_plus(0xE0,src2));
 9756   ins_pipe( fpu_reg_reg_reg );
 9757 %}
 9758 
 9759 
 9760 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9761   predicate( UseSSE<=1 );
 9762   match(Set dst (DivD dst src));
 9763 
 9764   format %{ "FLD    $src\n\t"
 9765             "FDIVp  $dst,ST" %}
 9766   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9767   ins_cost(150);
 9768   ins_encode( Push_Reg_DPR(src),
 9769               OpcP, RegOpc(dst) );
 9770   ins_pipe( fpu_reg_reg );
 9771 %}
 9772 
 9773 // Strict FP instruction biases argument before division then
 9774 // biases result, to avoid double rounding of subnormals.
 9775 //
 9776 // scale dividend by multiplying dividend by 2^(-15360)
 9777 // load divisor
 9778 // divide scaled dividend by divisor
 9779 // rescale quotient by 2^(15360)
 9780 //
 9781 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9782   predicate (UseSSE<=1);
 9783   match(Set dst (DivD dst src));
 9784   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9785   ins_cost(01);
 9786 
 9787   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9788             "DMULp  $dst,ST\n\t"
 9789             "FLD    $src\n\t"
 9790             "FDIVp  $dst,ST\n\t"
 9791             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9792             "DMULp  $dst,ST\n\t" %}
 9793   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9794   ins_encode( strictfp_bias1(dst),
 9795               Push_Reg_DPR(src),
 9796               OpcP, RegOpc(dst),
 9797               strictfp_bias2(dst) );
 9798   ins_pipe( fpu_reg_reg );
 9799 %}
 9800 
 9801 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9802   predicate(UseSSE<=1);
 9803   match(Set dst (ModD dst src));
 9804   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9805 
 9806   format %{ "DMOD   $dst,$src" %}
 9807   ins_cost(250);
 9808   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9809               emitModDPR(),
 9810               Push_Result_Mod_DPR(src),
 9811               Pop_Reg_DPR(dst));
 9812   ins_pipe( pipe_slow );
 9813 %}
 9814 
 9815 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9816   predicate(UseSSE>=2);
 9817   match(Set dst (ModD src0 src1));
 9818   effect(KILL rax, KILL cr);
 9819 
 9820   format %{ "SUB    ESP,8\t # DMOD\n"
 9821           "\tMOVSD  [ESP+0],$src1\n"
 9822           "\tFLD_D  [ESP+0]\n"
 9823           "\tMOVSD  [ESP+0],$src0\n"
 9824           "\tFLD_D  [ESP+0]\n"
 9825      "loop:\tFPREM\n"
 9826           "\tFWAIT\n"
 9827           "\tFNSTSW AX\n"
 9828           "\tSAHF\n"
 9829           "\tJP     loop\n"
 9830           "\tFSTP_D [ESP+0]\n"
 9831           "\tMOVSD  $dst,[ESP+0]\n"
 9832           "\tADD    ESP,8\n"
 9833           "\tFSTP   ST0\t # Restore FPU Stack"
 9834     %}
 9835   ins_cost(250);
 9836   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9837   ins_pipe( pipe_slow );
 9838 %}
 9839 
 9840 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9841   predicate (UseSSE<=1);
 9842   match(Set dst(AtanD dst src));
 9843   format %{ "DATA   $dst,$src" %}
 9844   opcode(0xD9, 0xF3);
 9845   ins_encode( Push_Reg_DPR(src),
 9846               OpcP, OpcS, RegOpc(dst) );
 9847   ins_pipe( pipe_slow );
 9848 %}
 9849 
 9850 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9851   predicate (UseSSE>=2);
 9852   match(Set dst(AtanD dst src));
 9853   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9854   format %{ "DATA   $dst,$src" %}
 9855   opcode(0xD9, 0xF3);
 9856   ins_encode( Push_SrcD(src),
 9857               OpcP, OpcS, Push_ResultD(dst) );
 9858   ins_pipe( pipe_slow );
 9859 %}
 9860 
 9861 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
 9862   predicate (UseSSE<=1);
 9863   match(Set dst (SqrtD src));
 9864   format %{ "DSQRT  $dst,$src" %}
 9865   opcode(0xFA, 0xD9);
 9866   ins_encode( Push_Reg_DPR(src),
 9867               OpcS, OpcP, Pop_Reg_DPR(dst) );
 9868   ins_pipe( pipe_slow );
 9869 %}
 9870 
 9871 //-------------Float Instructions-------------------------------
 9872 // Float Math
 9873 
 9874 // Code for float compare:
 9875 //     fcompp();
 9876 //     fwait(); fnstsw_ax();
 9877 //     sahf();
 9878 //     movl(dst, unordered_result);
 9879 //     jcc(Assembler::parity, exit);
 9880 //     movl(dst, less_result);
 9881 //     jcc(Assembler::below, exit);
 9882 //     movl(dst, equal_result);
 9883 //     jcc(Assembler::equal, exit);
 9884 //     movl(dst, greater_result);
 9885 //   exit:
 9886 
 9887 // P6 version of float compare, sets condition codes in EFLAGS
 9888 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9889   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9890   match(Set cr (CmpF src1 src2));
 9891   effect(KILL rax);
 9892   ins_cost(150);
 9893   format %{ "FLD    $src1\n\t"
 9894             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9895             "JNP    exit\n\t"
 9896             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
 9897             "SAHF\n"
 9898      "exit:\tNOP               // avoid branch to branch" %}
 9899   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9900   ins_encode( Push_Reg_DPR(src1),
 9901               OpcP, RegOpc(src2),
 9902               cmpF_P6_fixup );
 9903   ins_pipe( pipe_slow );
 9904 %}
 9905 
 9906 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
 9907   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9908   match(Set cr (CmpF src1 src2));
 9909   ins_cost(100);
 9910   format %{ "FLD    $src1\n\t"
 9911             "FUCOMIP ST,$src2  // P6 instruction" %}
 9912   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9913   ins_encode( Push_Reg_DPR(src1),
 9914               OpcP, RegOpc(src2));
 9915   ins_pipe( pipe_slow );
 9916 %}
 9917 
 9918 
 9919 // Compare & branch
 9920 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9921   predicate(UseSSE == 0);
 9922   match(Set cr (CmpF src1 src2));
 9923   effect(KILL rax);
 9924   ins_cost(200);
 9925   format %{ "FLD    $src1\n\t"
 9926             "FCOMp  $src2\n\t"
 9927             "FNSTSW AX\n\t"
 9928             "TEST   AX,0x400\n\t"
 9929             "JZ,s   flags\n\t"
 9930             "MOV    AH,1\t# unordered treat as LT\n"
 9931     "flags:\tSAHF" %}
 9932   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9933   ins_encode( Push_Reg_DPR(src1),
 9934               OpcP, RegOpc(src2),
 9935               fpu_flags);
 9936   ins_pipe( pipe_slow );
 9937 %}
 9938 
 9939 // Compare vs zero into -1,0,1
 9940 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9941   predicate(UseSSE == 0);
 9942   match(Set dst (CmpF3 src1 zero));
 9943   effect(KILL cr, KILL rax);
 9944   ins_cost(280);
 9945   format %{ "FTSTF  $dst,$src1" %}
 9946   opcode(0xE4, 0xD9);
 9947   ins_encode( Push_Reg_DPR(src1),
 9948               OpcS, OpcP, PopFPU,
 9949               CmpF_Result(dst));
 9950   ins_pipe( pipe_slow );
 9951 %}
 9952 
 9953 // Compare into -1,0,1
 9954 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
 9955   predicate(UseSSE == 0);
 9956   match(Set dst (CmpF3 src1 src2));
 9957   effect(KILL cr, KILL rax);
 9958   ins_cost(300);
 9959   format %{ "FCMPF  $dst,$src1,$src2" %}
 9960   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9961   ins_encode( Push_Reg_DPR(src1),
 9962               OpcP, RegOpc(src2),
 9963               CmpF_Result(dst));
 9964   ins_pipe( pipe_slow );
 9965 %}
 9966 
 9967 // float compare and set condition codes in EFLAGS by XMM regs
 9968 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
 9969   predicate(UseSSE>=1);
 9970   match(Set cr (CmpF src1 src2));
 9971   ins_cost(145);
 9972   format %{ "UCOMISS $src1,$src2\n\t"
 9973             "JNP,s   exit\n\t"
 9974             "PUSHF\t# saw NaN, set CF\n\t"
 9975             "AND     [rsp], #0xffffff2b\n\t"
 9976             "POPF\n"
 9977     "exit:" %}
 9978   ins_encode %{
 9979     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9980     emit_cmpfp_fixup(masm);
 9981   %}
 9982   ins_pipe( pipe_slow );
 9983 %}
 9984 
 9985 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
 9986   predicate(UseSSE>=1);
 9987   match(Set cr (CmpF src1 src2));
 9988   ins_cost(100);
 9989   format %{ "UCOMISS $src1,$src2" %}
 9990   ins_encode %{
 9991     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9992   %}
 9993   ins_pipe( pipe_slow );
 9994 %}
 9995 
 9996 // float compare and set condition codes in EFLAGS by XMM regs
 9997 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
 9998   predicate(UseSSE>=1);
 9999   match(Set cr (CmpF src1 (LoadF src2)));
10000   ins_cost(165);
10001   format %{ "UCOMISS $src1,$src2\n\t"
10002             "JNP,s   exit\n\t"
10003             "PUSHF\t# saw NaN, set CF\n\t"
10004             "AND     [rsp], #0xffffff2b\n\t"
10005             "POPF\n"
10006     "exit:" %}
10007   ins_encode %{
10008     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10009     emit_cmpfp_fixup(masm);
10010   %}
10011   ins_pipe( pipe_slow );
10012 %}
10013 
10014 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10015   predicate(UseSSE>=1);
10016   match(Set cr (CmpF src1 (LoadF src2)));
10017   ins_cost(100);
10018   format %{ "UCOMISS $src1,$src2" %}
10019   ins_encode %{
10020     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10021   %}
10022   ins_pipe( pipe_slow );
10023 %}
10024 
10025 // Compare into -1,0,1 in XMM
10026 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10027   predicate(UseSSE>=1);
10028   match(Set dst (CmpF3 src1 src2));
10029   effect(KILL cr);
10030   ins_cost(255);
10031   format %{ "UCOMISS $src1, $src2\n\t"
10032             "MOV     $dst, #-1\n\t"
10033             "JP,s    done\n\t"
10034             "JB,s    done\n\t"
10035             "SETNE   $dst\n\t"
10036             "MOVZB   $dst, $dst\n"
10037     "done:" %}
10038   ins_encode %{
10039     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10040     emit_cmpfp3(masm, $dst$$Register);
10041   %}
10042   ins_pipe( pipe_slow );
10043 %}
10044 
10045 // Compare into -1,0,1 in XMM and memory
10046 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10047   predicate(UseSSE>=1);
10048   match(Set dst (CmpF3 src1 (LoadF src2)));
10049   effect(KILL cr);
10050   ins_cost(275);
10051   format %{ "UCOMISS $src1, $src2\n\t"
10052             "MOV     $dst, #-1\n\t"
10053             "JP,s    done\n\t"
10054             "JB,s    done\n\t"
10055             "SETNE   $dst\n\t"
10056             "MOVZB   $dst, $dst\n"
10057     "done:" %}
10058   ins_encode %{
10059     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10060     emit_cmpfp3(masm, $dst$$Register);
10061   %}
10062   ins_pipe( pipe_slow );
10063 %}
10064 
10065 // Spill to obtain 24-bit precision
10066 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10067   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10068   match(Set dst (SubF src1 src2));
10069 
10070   format %{ "FSUB   $dst,$src1 - $src2" %}
10071   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10072   ins_encode( Push_Reg_FPR(src1),
10073               OpcReg_FPR(src2),
10074               Pop_Mem_FPR(dst) );
10075   ins_pipe( fpu_mem_reg_reg );
10076 %}
10077 //
10078 // This instruction does not round to 24-bits
10079 instruct subFPR_reg(regFPR dst, regFPR src) %{
10080   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10081   match(Set dst (SubF dst src));
10082 
10083   format %{ "FSUB   $dst,$src" %}
10084   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10085   ins_encode( Push_Reg_FPR(src),
10086               OpcP, RegOpc(dst) );
10087   ins_pipe( fpu_reg_reg );
10088 %}
10089 
10090 // Spill to obtain 24-bit precision
10091 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10092   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10093   match(Set dst (AddF src1 src2));
10094 
10095   format %{ "FADD   $dst,$src1,$src2" %}
10096   opcode(0xD8, 0x0); /* D8 C0+i */
10097   ins_encode( Push_Reg_FPR(src2),
10098               OpcReg_FPR(src1),
10099               Pop_Mem_FPR(dst) );
10100   ins_pipe( fpu_mem_reg_reg );
10101 %}
10102 //
10103 // This instruction does not round to 24-bits
10104 instruct addFPR_reg(regFPR dst, regFPR src) %{
10105   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10106   match(Set dst (AddF dst src));
10107 
10108   format %{ "FLD    $src\n\t"
10109             "FADDp  $dst,ST" %}
10110   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10111   ins_encode( Push_Reg_FPR(src),
10112               OpcP, RegOpc(dst) );
10113   ins_pipe( fpu_reg_reg );
10114 %}
10115 
10116 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10117   predicate(UseSSE==0);
10118   match(Set dst (AbsF src));
10119   ins_cost(100);
10120   format %{ "FABS" %}
10121   opcode(0xE1, 0xD9);
10122   ins_encode( OpcS, OpcP );
10123   ins_pipe( fpu_reg_reg );
10124 %}
10125 
10126 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10127   predicate(UseSSE==0);
10128   match(Set dst (NegF src));
10129   ins_cost(100);
10130   format %{ "FCHS" %}
10131   opcode(0xE0, 0xD9);
10132   ins_encode( OpcS, OpcP );
10133   ins_pipe( fpu_reg_reg );
10134 %}
10135 
10136 // Cisc-alternate to addFPR_reg
10137 // Spill to obtain 24-bit precision
10138 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10139   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10140   match(Set dst (AddF src1 (LoadF src2)));
10141 
10142   format %{ "FLD    $src2\n\t"
10143             "FADD   ST,$src1\n\t"
10144             "FSTP_S $dst" %}
10145   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10146   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10147               OpcReg_FPR(src1),
10148               Pop_Mem_FPR(dst), ClearInstMark );
10149   ins_pipe( fpu_mem_reg_mem );
10150 %}
10151 //
10152 // Cisc-alternate to addFPR_reg
10153 // This instruction does not round to 24-bits
10154 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10155   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10156   match(Set dst (AddF dst (LoadF src)));
10157 
10158   format %{ "FADD   $dst,$src" %}
10159   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10160   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10161               OpcP, RegOpc(dst), ClearInstMark );
10162   ins_pipe( fpu_reg_mem );
10163 %}
10164 
10165 // // Following two instructions for _222_mpegaudio
10166 // Spill to obtain 24-bit precision
10167 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10168   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10169   match(Set dst (AddF src1 src2));
10170 
10171   format %{ "FADD   $dst,$src1,$src2" %}
10172   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10173   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10174               OpcReg_FPR(src2),
10175               Pop_Mem_FPR(dst), ClearInstMark );
10176   ins_pipe( fpu_mem_reg_mem );
10177 %}
10178 
10179 // Cisc-spill variant
10180 // Spill to obtain 24-bit precision
10181 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10182   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10183   match(Set dst (AddF src1 (LoadF src2)));
10184 
10185   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10186   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10187   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10188               OpcP, RMopc_Mem(secondary,src1),
10189               Pop_Mem_FPR(dst),
10190               ClearInstMark);
10191   ins_pipe( fpu_mem_mem_mem );
10192 %}
10193 
10194 // Spill to obtain 24-bit precision
10195 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10196   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10197   match(Set dst (AddF src1 src2));
10198 
10199   format %{ "FADD   $dst,$src1,$src2" %}
10200   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10201   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10202               OpcP, RMopc_Mem(secondary,src1),
10203               Pop_Mem_FPR(dst),
10204               ClearInstMark);
10205   ins_pipe( fpu_mem_mem_mem );
10206 %}
10207 
10208 
10209 // Spill to obtain 24-bit precision
10210 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10211   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10212   match(Set dst (AddF src con));
10213   format %{ "FLD    $src\n\t"
10214             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10215             "FSTP_S $dst"  %}
10216   ins_encode %{
10217     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10218     __ fadd_s($constantaddress($con));
10219     __ fstp_s(Address(rsp, $dst$$disp));
10220   %}
10221   ins_pipe(fpu_mem_reg_con);
10222 %}
10223 //
10224 // This instruction does not round to 24-bits
10225 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10226   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10227   match(Set dst (AddF src con));
10228   format %{ "FLD    $src\n\t"
10229             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10230             "FSTP   $dst"  %}
10231   ins_encode %{
10232     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10233     __ fadd_s($constantaddress($con));
10234     __ fstp_d($dst$$reg);
10235   %}
10236   ins_pipe(fpu_reg_reg_con);
10237 %}
10238 
10239 // Spill to obtain 24-bit precision
10240 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10241   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10242   match(Set dst (MulF src1 src2));
10243 
10244   format %{ "FLD    $src1\n\t"
10245             "FMUL   $src2\n\t"
10246             "FSTP_S $dst"  %}
10247   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10248   ins_encode( Push_Reg_FPR(src1),
10249               OpcReg_FPR(src2),
10250               Pop_Mem_FPR(dst) );
10251   ins_pipe( fpu_mem_reg_reg );
10252 %}
10253 //
10254 // This instruction does not round to 24-bits
10255 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10256   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10257   match(Set dst (MulF src1 src2));
10258 
10259   format %{ "FLD    $src1\n\t"
10260             "FMUL   $src2\n\t"
10261             "FSTP_S $dst"  %}
10262   opcode(0xD8, 0x1); /* D8 C8+i */
10263   ins_encode( Push_Reg_FPR(src2),
10264               OpcReg_FPR(src1),
10265               Pop_Reg_FPR(dst) );
10266   ins_pipe( fpu_reg_reg_reg );
10267 %}
10268 
10269 
10270 // Spill to obtain 24-bit precision
10271 // Cisc-alternate to reg-reg multiply
10272 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10273   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10274   match(Set dst (MulF src1 (LoadF src2)));
10275 
10276   format %{ "FLD_S  $src2\n\t"
10277             "FMUL   $src1\n\t"
10278             "FSTP_S $dst"  %}
10279   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10280   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10281               OpcReg_FPR(src1),
10282               Pop_Mem_FPR(dst), ClearInstMark );
10283   ins_pipe( fpu_mem_reg_mem );
10284 %}
10285 //
10286 // This instruction does not round to 24-bits
10287 // Cisc-alternate to reg-reg multiply
10288 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10289   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10290   match(Set dst (MulF src1 (LoadF src2)));
10291 
10292   format %{ "FMUL   $dst,$src1,$src2" %}
10293   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10294   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10295               OpcReg_FPR(src1),
10296               Pop_Reg_FPR(dst), ClearInstMark );
10297   ins_pipe( fpu_reg_reg_mem );
10298 %}
10299 
10300 // Spill to obtain 24-bit precision
10301 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10302   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10303   match(Set dst (MulF src1 src2));
10304 
10305   format %{ "FMUL   $dst,$src1,$src2" %}
10306   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10307   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10308               OpcP, RMopc_Mem(secondary,src1),
10309               Pop_Mem_FPR(dst),
10310               ClearInstMark );
10311   ins_pipe( fpu_mem_mem_mem );
10312 %}
10313 
10314 // Spill to obtain 24-bit precision
10315 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10316   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10317   match(Set dst (MulF src con));
10318 
10319   format %{ "FLD    $src\n\t"
10320             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10321             "FSTP_S $dst"  %}
10322   ins_encode %{
10323     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10324     __ fmul_s($constantaddress($con));
10325     __ fstp_s(Address(rsp, $dst$$disp));
10326   %}
10327   ins_pipe(fpu_mem_reg_con);
10328 %}
10329 //
10330 // This instruction does not round to 24-bits
10331 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10332   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10333   match(Set dst (MulF src con));
10334 
10335   format %{ "FLD    $src\n\t"
10336             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10337             "FSTP   $dst"  %}
10338   ins_encode %{
10339     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10340     __ fmul_s($constantaddress($con));
10341     __ fstp_d($dst$$reg);
10342   %}
10343   ins_pipe(fpu_reg_reg_con);
10344 %}
10345 
10346 
10347 //
10348 // MACRO1 -- subsume unshared load into mulFPR
10349 // This instruction does not round to 24-bits
10350 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10351   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10352   match(Set dst (MulF (LoadF mem1) src));
10353 
10354   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10355             "FMUL   ST,$src\n\t"
10356             "FSTP   $dst" %}
10357   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10358   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10359               OpcReg_FPR(src),
10360               Pop_Reg_FPR(dst), ClearInstMark );
10361   ins_pipe( fpu_reg_reg_mem );
10362 %}
10363 //
10364 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10365 // This instruction does not round to 24-bits
10366 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10367   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10368   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10369   ins_cost(95);
10370 
10371   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10372             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10373             "FADD   ST,$src2\n\t"
10374             "FSTP   $dst" %}
10375   opcode(0xD9); /* LoadF D9 /0 */
10376   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10377               FMul_ST_reg(src1),
10378               FAdd_ST_reg(src2),
10379               Pop_Reg_FPR(dst), ClearInstMark );
10380   ins_pipe( fpu_reg_mem_reg_reg );
10381 %}
10382 
10383 // MACRO3 -- addFPR a mulFPR
10384 // This instruction does not round to 24-bits.  It is a '2-address'
10385 // instruction in that the result goes back to src2.  This eliminates
10386 // a move from the macro; possibly the register allocator will have
10387 // to add it back (and maybe not).
10388 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10389   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10390   match(Set src2 (AddF (MulF src0 src1) src2));
10391 
10392   format %{ "FLD    $src0     ===MACRO3===\n\t"
10393             "FMUL   ST,$src1\n\t"
10394             "FADDP  $src2,ST" %}
10395   opcode(0xD9); /* LoadF D9 /0 */
10396   ins_encode( Push_Reg_FPR(src0),
10397               FMul_ST_reg(src1),
10398               FAddP_reg_ST(src2) );
10399   ins_pipe( fpu_reg_reg_reg );
10400 %}
10401 
10402 // MACRO4 -- divFPR subFPR
10403 // This instruction does not round to 24-bits
10404 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10405   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10406   match(Set dst (DivF (SubF src2 src1) src3));
10407 
10408   format %{ "FLD    $src2   ===MACRO4===\n\t"
10409             "FSUB   ST,$src1\n\t"
10410             "FDIV   ST,$src3\n\t"
10411             "FSTP  $dst" %}
10412   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10413   ins_encode( Push_Reg_FPR(src2),
10414               subFPR_divFPR_encode(src1,src3),
10415               Pop_Reg_FPR(dst) );
10416   ins_pipe( fpu_reg_reg_reg_reg );
10417 %}
10418 
10419 // Spill to obtain 24-bit precision
10420 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10421   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10422   match(Set dst (DivF src1 src2));
10423 
10424   format %{ "FDIV   $dst,$src1,$src2" %}
10425   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10426   ins_encode( Push_Reg_FPR(src1),
10427               OpcReg_FPR(src2),
10428               Pop_Mem_FPR(dst) );
10429   ins_pipe( fpu_mem_reg_reg );
10430 %}
10431 //
10432 // This instruction does not round to 24-bits
10433 instruct divFPR_reg(regFPR dst, regFPR src) %{
10434   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10435   match(Set dst (DivF dst src));
10436 
10437   format %{ "FDIV   $dst,$src" %}
10438   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10439   ins_encode( Push_Reg_FPR(src),
10440               OpcP, RegOpc(dst) );
10441   ins_pipe( fpu_reg_reg );
10442 %}
10443 
10444 
10445 // Spill to obtain 24-bit precision
10446 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10447   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10448   match(Set dst (ModF src1 src2));
10449   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10450 
10451   format %{ "FMOD   $dst,$src1,$src2" %}
10452   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10453               emitModDPR(),
10454               Push_Result_Mod_DPR(src2),
10455               Pop_Mem_FPR(dst));
10456   ins_pipe( pipe_slow );
10457 %}
10458 //
10459 // This instruction does not round to 24-bits
10460 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10461   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10462   match(Set dst (ModF dst src));
10463   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10464 
10465   format %{ "FMOD   $dst,$src" %}
10466   ins_encode(Push_Reg_Mod_DPR(dst, src),
10467               emitModDPR(),
10468               Push_Result_Mod_DPR(src),
10469               Pop_Reg_FPR(dst));
10470   ins_pipe( pipe_slow );
10471 %}
10472 
10473 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10474   predicate(UseSSE>=1);
10475   match(Set dst (ModF src0 src1));
10476   effect(KILL rax, KILL cr);
10477   format %{ "SUB    ESP,4\t # FMOD\n"
10478           "\tMOVSS  [ESP+0],$src1\n"
10479           "\tFLD_S  [ESP+0]\n"
10480           "\tMOVSS  [ESP+0],$src0\n"
10481           "\tFLD_S  [ESP+0]\n"
10482      "loop:\tFPREM\n"
10483           "\tFWAIT\n"
10484           "\tFNSTSW AX\n"
10485           "\tSAHF\n"
10486           "\tJP     loop\n"
10487           "\tFSTP_S [ESP+0]\n"
10488           "\tMOVSS  $dst,[ESP+0]\n"
10489           "\tADD    ESP,4\n"
10490           "\tFSTP   ST0\t # Restore FPU Stack"
10491     %}
10492   ins_cost(250);
10493   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10494   ins_pipe( pipe_slow );
10495 %}
10496 
10497 
10498 //----------Arithmetic Conversion Instructions---------------------------------
10499 // The conversions operations are all Alpha sorted.  Please keep it that way!
10500 
10501 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10502   predicate(UseSSE==0);
10503   match(Set dst (RoundFloat src));
10504   ins_cost(125);
10505   format %{ "FST_S  $dst,$src\t# F-round" %}
10506   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10507   ins_pipe( fpu_mem_reg );
10508 %}
10509 
10510 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10511   predicate(UseSSE<=1);
10512   match(Set dst (RoundDouble src));
10513   ins_cost(125);
10514   format %{ "FST_D  $dst,$src\t# D-round" %}
10515   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10516   ins_pipe( fpu_mem_reg );
10517 %}
10518 
10519 // Force rounding to 24-bit precision and 6-bit exponent
10520 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10521   predicate(UseSSE==0);
10522   match(Set dst (ConvD2F src));
10523   format %{ "FST_S  $dst,$src\t# F-round" %}
10524   expand %{
10525     roundFloat_mem_reg(dst,src);
10526   %}
10527 %}
10528 
10529 // Force rounding to 24-bit precision and 6-bit exponent
10530 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10531   predicate(UseSSE==1);
10532   match(Set dst (ConvD2F src));
10533   effect( KILL cr );
10534   format %{ "SUB    ESP,4\n\t"
10535             "FST_S  [ESP],$src\t# F-round\n\t"
10536             "MOVSS  $dst,[ESP]\n\t"
10537             "ADD ESP,4" %}
10538   ins_encode %{
10539     __ subptr(rsp, 4);
10540     if ($src$$reg != FPR1L_enc) {
10541       __ fld_s($src$$reg-1);
10542       __ fstp_s(Address(rsp, 0));
10543     } else {
10544       __ fst_s(Address(rsp, 0));
10545     }
10546     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10547     __ addptr(rsp, 4);
10548   %}
10549   ins_pipe( pipe_slow );
10550 %}
10551 
10552 // Force rounding double precision to single precision
10553 instruct convD2F_reg(regF dst, regD src) %{
10554   predicate(UseSSE>=2);
10555   match(Set dst (ConvD2F src));
10556   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10557   ins_encode %{
10558     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10559   %}
10560   ins_pipe( pipe_slow );
10561 %}
10562 
10563 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10564   predicate(UseSSE==0);
10565   match(Set dst (ConvF2D src));
10566   format %{ "FST_S  $dst,$src\t# D-round" %}
10567   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10568   ins_pipe( fpu_reg_reg );
10569 %}
10570 
10571 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10572   predicate(UseSSE==1);
10573   match(Set dst (ConvF2D src));
10574   format %{ "FST_D  $dst,$src\t# D-round" %}
10575   expand %{
10576     roundDouble_mem_reg(dst,src);
10577   %}
10578 %}
10579 
10580 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10581   predicate(UseSSE==1);
10582   match(Set dst (ConvF2D src));
10583   effect( KILL cr );
10584   format %{ "SUB    ESP,4\n\t"
10585             "MOVSS  [ESP] $src\n\t"
10586             "FLD_S  [ESP]\n\t"
10587             "ADD    ESP,4\n\t"
10588             "FSTP   $dst\t# D-round" %}
10589   ins_encode %{
10590     __ subptr(rsp, 4);
10591     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10592     __ fld_s(Address(rsp, 0));
10593     __ addptr(rsp, 4);
10594     __ fstp_d($dst$$reg);
10595   %}
10596   ins_pipe( pipe_slow );
10597 %}
10598 
10599 instruct convF2D_reg(regD dst, regF src) %{
10600   predicate(UseSSE>=2);
10601   match(Set dst (ConvF2D src));
10602   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10603   ins_encode %{
10604     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10605   %}
10606   ins_pipe( pipe_slow );
10607 %}
10608 
10609 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10610 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10611   predicate(UseSSE<=1);
10612   match(Set dst (ConvD2I src));
10613   effect( KILL tmp, KILL cr );
10614   format %{ "FLD    $src\t# Convert double to int \n\t"
10615             "FLDCW  trunc mode\n\t"
10616             "SUB    ESP,4\n\t"
10617             "FISTp  [ESP + #0]\n\t"
10618             "FLDCW  std/24-bit mode\n\t"
10619             "POP    EAX\n\t"
10620             "CMP    EAX,0x80000000\n\t"
10621             "JNE,s  fast\n\t"
10622             "FLD_D  $src\n\t"
10623             "CALL   d2i_wrapper\n"
10624       "fast:" %}
10625   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10626   ins_pipe( pipe_slow );
10627 %}
10628 
10629 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10630 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10631   predicate(UseSSE>=2);
10632   match(Set dst (ConvD2I src));
10633   effect( KILL tmp, KILL cr );
10634   format %{ "CVTTSD2SI $dst, $src\n\t"
10635             "CMP    $dst,0x80000000\n\t"
10636             "JNE,s  fast\n\t"
10637             "SUB    ESP, 8\n\t"
10638             "MOVSD  [ESP], $src\n\t"
10639             "FLD_D  [ESP]\n\t"
10640             "ADD    ESP, 8\n\t"
10641             "CALL   d2i_wrapper\n"
10642       "fast:" %}
10643   ins_encode %{
10644     Label fast;
10645     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10646     __ cmpl($dst$$Register, 0x80000000);
10647     __ jccb(Assembler::notEqual, fast);
10648     __ subptr(rsp, 8);
10649     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10650     __ fld_d(Address(rsp, 0));
10651     __ addptr(rsp, 8);
10652     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10653     __ post_call_nop();
10654     __ bind(fast);
10655   %}
10656   ins_pipe( pipe_slow );
10657 %}
10658 
10659 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10660   predicate(UseSSE<=1);
10661   match(Set dst (ConvD2L src));
10662   effect( KILL cr );
10663   format %{ "FLD    $src\t# Convert double to long\n\t"
10664             "FLDCW  trunc mode\n\t"
10665             "SUB    ESP,8\n\t"
10666             "FISTp  [ESP + #0]\n\t"
10667             "FLDCW  std/24-bit mode\n\t"
10668             "POP    EAX\n\t"
10669             "POP    EDX\n\t"
10670             "CMP    EDX,0x80000000\n\t"
10671             "JNE,s  fast\n\t"
10672             "TEST   EAX,EAX\n\t"
10673             "JNE,s  fast\n\t"
10674             "FLD    $src\n\t"
10675             "CALL   d2l_wrapper\n"
10676       "fast:" %}
10677   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10678   ins_pipe( pipe_slow );
10679 %}
10680 
10681 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10682 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10683   predicate (UseSSE>=2);
10684   match(Set dst (ConvD2L src));
10685   effect( KILL cr );
10686   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10687             "MOVSD  [ESP],$src\n\t"
10688             "FLD_D  [ESP]\n\t"
10689             "FLDCW  trunc mode\n\t"
10690             "FISTp  [ESP + #0]\n\t"
10691             "FLDCW  std/24-bit mode\n\t"
10692             "POP    EAX\n\t"
10693             "POP    EDX\n\t"
10694             "CMP    EDX,0x80000000\n\t"
10695             "JNE,s  fast\n\t"
10696             "TEST   EAX,EAX\n\t"
10697             "JNE,s  fast\n\t"
10698             "SUB    ESP,8\n\t"
10699             "MOVSD  [ESP],$src\n\t"
10700             "FLD_D  [ESP]\n\t"
10701             "ADD    ESP,8\n\t"
10702             "CALL   d2l_wrapper\n"
10703       "fast:" %}
10704   ins_encode %{
10705     Label fast;
10706     __ subptr(rsp, 8);
10707     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10708     __ fld_d(Address(rsp, 0));
10709     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10710     __ fistp_d(Address(rsp, 0));
10711     // Restore the rounding mode, mask the exception
10712     if (Compile::current()->in_24_bit_fp_mode()) {
10713       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10714     } else {
10715       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10716     }
10717     // Load the converted long, adjust CPU stack
10718     __ pop(rax);
10719     __ pop(rdx);
10720     __ cmpl(rdx, 0x80000000);
10721     __ jccb(Assembler::notEqual, fast);
10722     __ testl(rax, rax);
10723     __ jccb(Assembler::notEqual, fast);
10724     __ subptr(rsp, 8);
10725     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10726     __ fld_d(Address(rsp, 0));
10727     __ addptr(rsp, 8);
10728     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10729     __ post_call_nop();
10730     __ bind(fast);
10731   %}
10732   ins_pipe( pipe_slow );
10733 %}
10734 
10735 // Convert a double to an int.  Java semantics require we do complex
10736 // manglations in the corner cases.  So we set the rounding mode to
10737 // 'zero', store the darned double down as an int, and reset the
10738 // rounding mode to 'nearest'.  The hardware stores a flag value down
10739 // if we would overflow or converted a NAN; we check for this and
10740 // and go the slow path if needed.
10741 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10742   predicate(UseSSE==0);
10743   match(Set dst (ConvF2I src));
10744   effect( KILL tmp, KILL cr );
10745   format %{ "FLD    $src\t# Convert float to int \n\t"
10746             "FLDCW  trunc mode\n\t"
10747             "SUB    ESP,4\n\t"
10748             "FISTp  [ESP + #0]\n\t"
10749             "FLDCW  std/24-bit mode\n\t"
10750             "POP    EAX\n\t"
10751             "CMP    EAX,0x80000000\n\t"
10752             "JNE,s  fast\n\t"
10753             "FLD    $src\n\t"
10754             "CALL   d2i_wrapper\n"
10755       "fast:" %}
10756   // DPR2I_encoding works for FPR2I
10757   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10758   ins_pipe( pipe_slow );
10759 %}
10760 
10761 // Convert a float in xmm to an int reg.
10762 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10763   predicate(UseSSE>=1);
10764   match(Set dst (ConvF2I src));
10765   effect( KILL tmp, KILL cr );
10766   format %{ "CVTTSS2SI $dst, $src\n\t"
10767             "CMP    $dst,0x80000000\n\t"
10768             "JNE,s  fast\n\t"
10769             "SUB    ESP, 4\n\t"
10770             "MOVSS  [ESP], $src\n\t"
10771             "FLD    [ESP]\n\t"
10772             "ADD    ESP, 4\n\t"
10773             "CALL   d2i_wrapper\n"
10774       "fast:" %}
10775   ins_encode %{
10776     Label fast;
10777     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10778     __ cmpl($dst$$Register, 0x80000000);
10779     __ jccb(Assembler::notEqual, fast);
10780     __ subptr(rsp, 4);
10781     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10782     __ fld_s(Address(rsp, 0));
10783     __ addptr(rsp, 4);
10784     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10785     __ post_call_nop();
10786     __ bind(fast);
10787   %}
10788   ins_pipe( pipe_slow );
10789 %}
10790 
10791 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10792   predicate(UseSSE==0);
10793   match(Set dst (ConvF2L src));
10794   effect( KILL cr );
10795   format %{ "FLD    $src\t# Convert float to long\n\t"
10796             "FLDCW  trunc mode\n\t"
10797             "SUB    ESP,8\n\t"
10798             "FISTp  [ESP + #0]\n\t"
10799             "FLDCW  std/24-bit mode\n\t"
10800             "POP    EAX\n\t"
10801             "POP    EDX\n\t"
10802             "CMP    EDX,0x80000000\n\t"
10803             "JNE,s  fast\n\t"
10804             "TEST   EAX,EAX\n\t"
10805             "JNE,s  fast\n\t"
10806             "FLD    $src\n\t"
10807             "CALL   d2l_wrapper\n"
10808       "fast:" %}
10809   // DPR2L_encoding works for FPR2L
10810   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10811   ins_pipe( pipe_slow );
10812 %}
10813 
10814 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10815 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10816   predicate (UseSSE>=1);
10817   match(Set dst (ConvF2L src));
10818   effect( KILL cr );
10819   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10820             "MOVSS  [ESP],$src\n\t"
10821             "FLD_S  [ESP]\n\t"
10822             "FLDCW  trunc mode\n\t"
10823             "FISTp  [ESP + #0]\n\t"
10824             "FLDCW  std/24-bit mode\n\t"
10825             "POP    EAX\n\t"
10826             "POP    EDX\n\t"
10827             "CMP    EDX,0x80000000\n\t"
10828             "JNE,s  fast\n\t"
10829             "TEST   EAX,EAX\n\t"
10830             "JNE,s  fast\n\t"
10831             "SUB    ESP,4\t# Convert float to long\n\t"
10832             "MOVSS  [ESP],$src\n\t"
10833             "FLD_S  [ESP]\n\t"
10834             "ADD    ESP,4\n\t"
10835             "CALL   d2l_wrapper\n"
10836       "fast:" %}
10837   ins_encode %{
10838     Label fast;
10839     __ subptr(rsp, 8);
10840     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10841     __ fld_s(Address(rsp, 0));
10842     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10843     __ fistp_d(Address(rsp, 0));
10844     // Restore the rounding mode, mask the exception
10845     if (Compile::current()->in_24_bit_fp_mode()) {
10846       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10847     } else {
10848       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10849     }
10850     // Load the converted long, adjust CPU stack
10851     __ pop(rax);
10852     __ pop(rdx);
10853     __ cmpl(rdx, 0x80000000);
10854     __ jccb(Assembler::notEqual, fast);
10855     __ testl(rax, rax);
10856     __ jccb(Assembler::notEqual, fast);
10857     __ subptr(rsp, 4);
10858     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10859     __ fld_s(Address(rsp, 0));
10860     __ addptr(rsp, 4);
10861     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10862     __ post_call_nop();
10863     __ bind(fast);
10864   %}
10865   ins_pipe( pipe_slow );
10866 %}
10867 
10868 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10869   predicate( UseSSE<=1 );
10870   match(Set dst (ConvI2D src));
10871   format %{ "FILD   $src\n\t"
10872             "FSTP   $dst" %}
10873   opcode(0xDB, 0x0);  /* DB /0 */
10874   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10875   ins_pipe( fpu_reg_mem );
10876 %}
10877 
10878 instruct convI2D_reg(regD dst, rRegI src) %{
10879   predicate( UseSSE>=2 && !UseXmmI2D );
10880   match(Set dst (ConvI2D src));
10881   format %{ "CVTSI2SD $dst,$src" %}
10882   ins_encode %{
10883     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10884   %}
10885   ins_pipe( pipe_slow );
10886 %}
10887 
10888 instruct convI2D_mem(regD dst, memory mem) %{
10889   predicate( UseSSE>=2 );
10890   match(Set dst (ConvI2D (LoadI mem)));
10891   format %{ "CVTSI2SD $dst,$mem" %}
10892   ins_encode %{
10893     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10894   %}
10895   ins_pipe( pipe_slow );
10896 %}
10897 
10898 instruct convXI2D_reg(regD dst, rRegI src)
10899 %{
10900   predicate( UseSSE>=2 && UseXmmI2D );
10901   match(Set dst (ConvI2D src));
10902 
10903   format %{ "MOVD  $dst,$src\n\t"
10904             "CVTDQ2PD $dst,$dst\t# i2d" %}
10905   ins_encode %{
10906     __ movdl($dst$$XMMRegister, $src$$Register);
10907     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10908   %}
10909   ins_pipe(pipe_slow); // XXX
10910 %}
10911 
10912 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10913   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10914   match(Set dst (ConvI2D (LoadI mem)));
10915   format %{ "FILD   $mem\n\t"
10916             "FSTP   $dst" %}
10917   opcode(0xDB);      /* DB /0 */
10918   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10919               Pop_Reg_DPR(dst), ClearInstMark);
10920   ins_pipe( fpu_reg_mem );
10921 %}
10922 
10923 // Convert a byte to a float; no rounding step needed.
10924 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10925   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10926   match(Set dst (ConvI2F src));
10927   format %{ "FILD   $src\n\t"
10928             "FSTP   $dst" %}
10929 
10930   opcode(0xDB, 0x0);  /* DB /0 */
10931   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10932   ins_pipe( fpu_reg_mem );
10933 %}
10934 
10935 // In 24-bit mode, force exponent rounding by storing back out
10936 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10937   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10938   match(Set dst (ConvI2F src));
10939   ins_cost(200);
10940   format %{ "FILD   $src\n\t"
10941             "FSTP_S $dst" %}
10942   opcode(0xDB, 0x0);  /* DB /0 */
10943   ins_encode( Push_Mem_I(src),
10944               Pop_Mem_FPR(dst));
10945   ins_pipe( fpu_mem_mem );
10946 %}
10947 
10948 // In 24-bit mode, force exponent rounding by storing back out
10949 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10950   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10951   match(Set dst (ConvI2F (LoadI mem)));
10952   ins_cost(200);
10953   format %{ "FILD   $mem\n\t"
10954             "FSTP_S $dst" %}
10955   opcode(0xDB);  /* DB /0 */
10956   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10957               Pop_Mem_FPR(dst), ClearInstMark);
10958   ins_pipe( fpu_mem_mem );
10959 %}
10960 
10961 // This instruction does not round to 24-bits
10962 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10963   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10964   match(Set dst (ConvI2F src));
10965   format %{ "FILD   $src\n\t"
10966             "FSTP   $dst" %}
10967   opcode(0xDB, 0x0);  /* DB /0 */
10968   ins_encode( Push_Mem_I(src),
10969               Pop_Reg_FPR(dst));
10970   ins_pipe( fpu_reg_mem );
10971 %}
10972 
10973 // This instruction does not round to 24-bits
10974 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10975   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10976   match(Set dst (ConvI2F (LoadI mem)));
10977   format %{ "FILD   $mem\n\t"
10978             "FSTP   $dst" %}
10979   opcode(0xDB);      /* DB /0 */
10980   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10981               Pop_Reg_FPR(dst), ClearInstMark);
10982   ins_pipe( fpu_reg_mem );
10983 %}
10984 
10985 // Convert an int to a float in xmm; no rounding step needed.
10986 instruct convI2F_reg(regF dst, rRegI src) %{
10987   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
10988   match(Set dst (ConvI2F src));
10989   format %{ "CVTSI2SS $dst, $src" %}
10990   ins_encode %{
10991     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10992   %}
10993   ins_pipe( pipe_slow );
10994 %}
10995 
10996  instruct convXI2F_reg(regF dst, rRegI src)
10997 %{
10998   predicate( UseSSE>=2 && UseXmmI2F );
10999   match(Set dst (ConvI2F src));
11000 
11001   format %{ "MOVD  $dst,$src\n\t"
11002             "CVTDQ2PS $dst,$dst\t# i2f" %}
11003   ins_encode %{
11004     __ movdl($dst$$XMMRegister, $src$$Register);
11005     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11006   %}
11007   ins_pipe(pipe_slow); // XXX
11008 %}
11009 
11010 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11011   match(Set dst (ConvI2L src));
11012   effect(KILL cr);
11013   ins_cost(375);
11014   format %{ "MOV    $dst.lo,$src\n\t"
11015             "MOV    $dst.hi,$src\n\t"
11016             "SAR    $dst.hi,31" %}
11017   ins_encode(convert_int_long(dst,src));
11018   ins_pipe( ialu_reg_reg_long );
11019 %}
11020 
11021 // Zero-extend convert int to long
11022 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11023   match(Set dst (AndL (ConvI2L src) mask) );
11024   effect( KILL flags );
11025   ins_cost(250);
11026   format %{ "MOV    $dst.lo,$src\n\t"
11027             "XOR    $dst.hi,$dst.hi" %}
11028   opcode(0x33); // XOR
11029   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11030   ins_pipe( ialu_reg_reg_long );
11031 %}
11032 
11033 // Zero-extend long
11034 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11035   match(Set dst (AndL src mask) );
11036   effect( KILL flags );
11037   ins_cost(250);
11038   format %{ "MOV    $dst.lo,$src.lo\n\t"
11039             "XOR    $dst.hi,$dst.hi\n\t" %}
11040   opcode(0x33); // XOR
11041   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11042   ins_pipe( ialu_reg_reg_long );
11043 %}
11044 
11045 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11046   predicate (UseSSE<=1);
11047   match(Set dst (ConvL2D src));
11048   effect( KILL cr );
11049   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11050             "PUSH   $src.lo\n\t"
11051             "FILD   ST,[ESP + #0]\n\t"
11052             "ADD    ESP,8\n\t"
11053             "FSTP_D $dst\t# D-round" %}
11054   opcode(0xDF, 0x5);  /* DF /5 */
11055   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11056   ins_pipe( pipe_slow );
11057 %}
11058 
11059 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11060   predicate (UseSSE>=2);
11061   match(Set dst (ConvL2D src));
11062   effect( KILL cr );
11063   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11064             "PUSH   $src.lo\n\t"
11065             "FILD_D [ESP]\n\t"
11066             "FSTP_D [ESP]\n\t"
11067             "MOVSD  $dst,[ESP]\n\t"
11068             "ADD    ESP,8" %}
11069   opcode(0xDF, 0x5);  /* DF /5 */
11070   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11071   ins_pipe( pipe_slow );
11072 %}
11073 
11074 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11075   predicate (UseSSE>=1);
11076   match(Set dst (ConvL2F src));
11077   effect( KILL cr );
11078   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11079             "PUSH   $src.lo\n\t"
11080             "FILD_D [ESP]\n\t"
11081             "FSTP_S [ESP]\n\t"
11082             "MOVSS  $dst,[ESP]\n\t"
11083             "ADD    ESP,8" %}
11084   opcode(0xDF, 0x5);  /* DF /5 */
11085   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11086   ins_pipe( pipe_slow );
11087 %}
11088 
11089 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11090   match(Set dst (ConvL2F src));
11091   effect( KILL cr );
11092   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11093             "PUSH   $src.lo\n\t"
11094             "FILD   ST,[ESP + #0]\n\t"
11095             "ADD    ESP,8\n\t"
11096             "FSTP_S $dst\t# F-round" %}
11097   opcode(0xDF, 0x5);  /* DF /5 */
11098   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11099   ins_pipe( pipe_slow );
11100 %}
11101 
11102 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11103   match(Set dst (ConvL2I src));
11104   effect( DEF dst, USE src );
11105   format %{ "MOV    $dst,$src.lo" %}
11106   ins_encode(enc_CopyL_Lo(dst,src));
11107   ins_pipe( ialu_reg_reg );
11108 %}
11109 
11110 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11111   match(Set dst (MoveF2I src));
11112   effect( DEF dst, USE src );
11113   ins_cost(100);
11114   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11115   ins_encode %{
11116     __ movl($dst$$Register, Address(rsp, $src$$disp));
11117   %}
11118   ins_pipe( ialu_reg_mem );
11119 %}
11120 
11121 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11122   predicate(UseSSE==0);
11123   match(Set dst (MoveF2I src));
11124   effect( DEF dst, USE src );
11125 
11126   ins_cost(125);
11127   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11128   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11129   ins_pipe( fpu_mem_reg );
11130 %}
11131 
11132 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11133   predicate(UseSSE>=1);
11134   match(Set dst (MoveF2I src));
11135   effect( DEF dst, USE src );
11136 
11137   ins_cost(95);
11138   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11139   ins_encode %{
11140     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11141   %}
11142   ins_pipe( pipe_slow );
11143 %}
11144 
11145 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11146   predicate(UseSSE>=2);
11147   match(Set dst (MoveF2I src));
11148   effect( DEF dst, USE src );
11149   ins_cost(85);
11150   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11151   ins_encode %{
11152     __ movdl($dst$$Register, $src$$XMMRegister);
11153   %}
11154   ins_pipe( pipe_slow );
11155 %}
11156 
11157 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11158   match(Set dst (MoveI2F src));
11159   effect( DEF dst, USE src );
11160 
11161   ins_cost(100);
11162   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11163   ins_encode %{
11164     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11165   %}
11166   ins_pipe( ialu_mem_reg );
11167 %}
11168 
11169 
11170 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11171   predicate(UseSSE==0);
11172   match(Set dst (MoveI2F src));
11173   effect(DEF dst, USE src);
11174 
11175   ins_cost(125);
11176   format %{ "FLD_S  $src\n\t"
11177             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11178   opcode(0xD9);               /* D9 /0, FLD m32real */
11179   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11180               Pop_Reg_FPR(dst), ClearInstMark );
11181   ins_pipe( fpu_reg_mem );
11182 %}
11183 
11184 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11185   predicate(UseSSE>=1);
11186   match(Set dst (MoveI2F src));
11187   effect( DEF dst, USE src );
11188 
11189   ins_cost(95);
11190   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11191   ins_encode %{
11192     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11193   %}
11194   ins_pipe( pipe_slow );
11195 %}
11196 
11197 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11198   predicate(UseSSE>=2);
11199   match(Set dst (MoveI2F src));
11200   effect( DEF dst, USE src );
11201 
11202   ins_cost(85);
11203   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11204   ins_encode %{
11205     __ movdl($dst$$XMMRegister, $src$$Register);
11206   %}
11207   ins_pipe( pipe_slow );
11208 %}
11209 
11210 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11211   match(Set dst (MoveD2L src));
11212   effect(DEF dst, USE src);
11213 
11214   ins_cost(250);
11215   format %{ "MOV    $dst.lo,$src\n\t"
11216             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11217   opcode(0x8B, 0x8B);
11218   ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11219   ins_pipe( ialu_mem_long_reg );
11220 %}
11221 
11222 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11223   predicate(UseSSE<=1);
11224   match(Set dst (MoveD2L src));
11225   effect(DEF dst, USE src);
11226 
11227   ins_cost(125);
11228   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11229   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11230   ins_pipe( fpu_mem_reg );
11231 %}
11232 
11233 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11234   predicate(UseSSE>=2);
11235   match(Set dst (MoveD2L src));
11236   effect(DEF dst, USE src);
11237   ins_cost(95);
11238   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11239   ins_encode %{
11240     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11241   %}
11242   ins_pipe( pipe_slow );
11243 %}
11244 
11245 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11246   predicate(UseSSE>=2);
11247   match(Set dst (MoveD2L src));
11248   effect(DEF dst, USE src, TEMP tmp);
11249   ins_cost(85);
11250   format %{ "MOVD   $dst.lo,$src\n\t"
11251             "PSHUFLW $tmp,$src,0x4E\n\t"
11252             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11253   ins_encode %{
11254     __ movdl($dst$$Register, $src$$XMMRegister);
11255     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11256     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11257   %}
11258   ins_pipe( pipe_slow );
11259 %}
11260 
11261 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11262   match(Set dst (MoveL2D src));
11263   effect(DEF dst, USE src);
11264 
11265   ins_cost(200);
11266   format %{ "MOV    $dst,$src.lo\n\t"
11267             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11268   opcode(0x89, 0x89);
11269   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11270   ins_pipe( ialu_mem_long_reg );
11271 %}
11272 
11273 
11274 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11275   predicate(UseSSE<=1);
11276   match(Set dst (MoveL2D src));
11277   effect(DEF dst, USE src);
11278   ins_cost(125);
11279 
11280   format %{ "FLD_D  $src\n\t"
11281             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11282   opcode(0xDD);               /* DD /0, FLD m64real */
11283   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11284               Pop_Reg_DPR(dst), ClearInstMark );
11285   ins_pipe( fpu_reg_mem );
11286 %}
11287 
11288 
11289 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11290   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11291   match(Set dst (MoveL2D src));
11292   effect(DEF dst, USE src);
11293 
11294   ins_cost(95);
11295   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11296   ins_encode %{
11297     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11298   %}
11299   ins_pipe( pipe_slow );
11300 %}
11301 
11302 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11303   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11304   match(Set dst (MoveL2D src));
11305   effect(DEF dst, USE src);
11306 
11307   ins_cost(95);
11308   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11309   ins_encode %{
11310     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11311   %}
11312   ins_pipe( pipe_slow );
11313 %}
11314 
11315 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11316   predicate(UseSSE>=2);
11317   match(Set dst (MoveL2D src));
11318   effect(TEMP dst, USE src, TEMP tmp);
11319   ins_cost(85);
11320   format %{ "MOVD   $dst,$src.lo\n\t"
11321             "MOVD   $tmp,$src.hi\n\t"
11322             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11323   ins_encode %{
11324     __ movdl($dst$$XMMRegister, $src$$Register);
11325     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11326     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11327   %}
11328   ins_pipe( pipe_slow );
11329 %}
11330 
11331 //----------------------------- CompressBits/ExpandBits ------------------------
11332 
11333 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11334   predicate(n->bottom_type()->isa_long());
11335   match(Set dst (CompressBits src mask));
11336   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11337   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11338   ins_encode %{
11339     Label exit, partail_result;
11340     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11341     // Merge the results of upper and lower destination registers such that upper destination
11342     // results are contiguously laid out after the lower destination result.
11343     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11344     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11345     __ popcntl($rtmp$$Register, $mask$$Register);
11346     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11347     __ cmpl($rtmp$$Register, 32);
11348     __ jccb(Assembler::equal, exit);
11349     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11350     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11351     // Shift left the contents of upper destination register by true bit count of lower mask register
11352     // and merge with lower destination register.
11353     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11354     __ orl($dst$$Register, $rtmp$$Register);
11355     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11356     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11357     // since contents of upper destination have already been copied to lower destination
11358     // register.
11359     __ cmpl($rtmp$$Register, 0);
11360     __ jccb(Assembler::greater, partail_result);
11361     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11362     __ jmp(exit);
11363     __ bind(partail_result);
11364     // Perform right shift over upper destination register to move out bits already copied
11365     // to lower destination register.
11366     __ subl($rtmp$$Register, 32);
11367     __ negl($rtmp$$Register);
11368     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11369     __ bind(exit);
11370   %}
11371   ins_pipe( pipe_slow );
11372 %}
11373 
11374 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11375   predicate(n->bottom_type()->isa_long());
11376   match(Set dst (ExpandBits src mask));
11377   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11378   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11379   ins_encode %{
11380     // Extraction operation sequentially reads the bits from source register starting from LSB
11381     // and lays them out into destination register at bit locations corresponding to true bits
11382     // in mask register. Thus number of source bits read are equal to combined true bit count
11383     // of mask register pair.
11384     Label exit, mask_clipping;
11385     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11386     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11387     __ popcntl($rtmp$$Register, $mask$$Register);
11388     // If true bit count of lower mask register is 32 then none of bit of lower source register
11389     // will feed to upper destination register.
11390     __ cmpl($rtmp$$Register, 32);
11391     __ jccb(Assembler::equal, exit);
11392     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11393     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11394     // Shift right the contents of lower source register to remove already consumed bits.
11395     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11396     // Extract the bits from lower source register starting from LSB under the influence
11397     // of upper mask register.
11398     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11399     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11400     __ subl($rtmp$$Register, 32);
11401     __ negl($rtmp$$Register);
11402     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11403     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11404     // Clear the set bits in upper mask register which have been used to extract the contents
11405     // from lower source register.
11406     __ bind(mask_clipping);
11407     __ blsrl($mask$$Register, $mask$$Register);
11408     __ decrementl($rtmp$$Register, 1);
11409     __ jccb(Assembler::greater, mask_clipping);
11410     // Starting from LSB extract the bits from upper source register under the influence of
11411     // remaining set bits in upper mask register.
11412     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11413     // Merge the partial results extracted from lower and upper source register bits.
11414     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11415     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11416     __ bind(exit);
11417   %}
11418   ins_pipe( pipe_slow );
11419 %}
11420 
11421 // =======================================================================
11422 // Fast clearing of an array
11423 // Small non-constant length ClearArray for non-AVX512 targets.
11424 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11425   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11426   match(Set dummy (ClearArray cnt base));
11427   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11428 
11429   format %{ $$template
11430     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11431     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11432     $$emit$$"JG     LARGE\n\t"
11433     $$emit$$"SHL    ECX, 1\n\t"
11434     $$emit$$"DEC    ECX\n\t"
11435     $$emit$$"JS     DONE\t# Zero length\n\t"
11436     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11437     $$emit$$"DEC    ECX\n\t"
11438     $$emit$$"JGE    LOOP\n\t"
11439     $$emit$$"JMP    DONE\n\t"
11440     $$emit$$"# LARGE:\n\t"
11441     if (UseFastStosb) {
11442        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11443        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11444     } else if (UseXMMForObjInit) {
11445        $$emit$$"MOV     RDI,RAX\n\t"
11446        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11447        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11448        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11449        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11450        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11451        $$emit$$"ADD     0x40,RAX\n\t"
11452        $$emit$$"# L_zero_64_bytes:\n\t"
11453        $$emit$$"SUB     0x8,RCX\n\t"
11454        $$emit$$"JGE     L_loop\n\t"
11455        $$emit$$"ADD     0x4,RCX\n\t"
11456        $$emit$$"JL      L_tail\n\t"
11457        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11458        $$emit$$"ADD     0x20,RAX\n\t"
11459        $$emit$$"SUB     0x4,RCX\n\t"
11460        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11461        $$emit$$"ADD     0x4,RCX\n\t"
11462        $$emit$$"JLE     L_end\n\t"
11463        $$emit$$"DEC     RCX\n\t"
11464        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11465        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11466        $$emit$$"ADD     0x8,RAX\n\t"
11467        $$emit$$"DEC     RCX\n\t"
11468        $$emit$$"JGE     L_sloop\n\t"
11469        $$emit$$"# L_end:\n\t"
11470     } else {
11471        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11472        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11473     }
11474     $$emit$$"# DONE"
11475   %}
11476   ins_encode %{
11477     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11478                  $tmp$$XMMRegister, false, knoreg);
11479   %}
11480   ins_pipe( pipe_slow );
11481 %}
11482 
11483 // Small non-constant length ClearArray for AVX512 targets.
11484 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11485   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11486   match(Set dummy (ClearArray cnt base));
11487   ins_cost(125);
11488   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11489 
11490   format %{ $$template
11491     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11492     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11493     $$emit$$"JG     LARGE\n\t"
11494     $$emit$$"SHL    ECX, 1\n\t"
11495     $$emit$$"DEC    ECX\n\t"
11496     $$emit$$"JS     DONE\t# Zero length\n\t"
11497     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11498     $$emit$$"DEC    ECX\n\t"
11499     $$emit$$"JGE    LOOP\n\t"
11500     $$emit$$"JMP    DONE\n\t"
11501     $$emit$$"# LARGE:\n\t"
11502     if (UseFastStosb) {
11503        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11504        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11505     } else if (UseXMMForObjInit) {
11506        $$emit$$"MOV     RDI,RAX\n\t"
11507        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11508        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11509        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11510        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11511        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11512        $$emit$$"ADD     0x40,RAX\n\t"
11513        $$emit$$"# L_zero_64_bytes:\n\t"
11514        $$emit$$"SUB     0x8,RCX\n\t"
11515        $$emit$$"JGE     L_loop\n\t"
11516        $$emit$$"ADD     0x4,RCX\n\t"
11517        $$emit$$"JL      L_tail\n\t"
11518        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11519        $$emit$$"ADD     0x20,RAX\n\t"
11520        $$emit$$"SUB     0x4,RCX\n\t"
11521        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11522        $$emit$$"ADD     0x4,RCX\n\t"
11523        $$emit$$"JLE     L_end\n\t"
11524        $$emit$$"DEC     RCX\n\t"
11525        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11526        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11527        $$emit$$"ADD     0x8,RAX\n\t"
11528        $$emit$$"DEC     RCX\n\t"
11529        $$emit$$"JGE     L_sloop\n\t"
11530        $$emit$$"# L_end:\n\t"
11531     } else {
11532        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11533        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11534     }
11535     $$emit$$"# DONE"
11536   %}
11537   ins_encode %{
11538     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11539                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11540   %}
11541   ins_pipe( pipe_slow );
11542 %}
11543 
11544 // Large non-constant length ClearArray for non-AVX512 targets.
11545 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11546   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11547   match(Set dummy (ClearArray cnt base));
11548   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11549   format %{ $$template
11550     if (UseFastStosb) {
11551        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11552        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11553        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11554     } else if (UseXMMForObjInit) {
11555        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11556        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11557        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11558        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11559        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11560        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11561        $$emit$$"ADD     0x40,RAX\n\t"
11562        $$emit$$"# L_zero_64_bytes:\n\t"
11563        $$emit$$"SUB     0x8,RCX\n\t"
11564        $$emit$$"JGE     L_loop\n\t"
11565        $$emit$$"ADD     0x4,RCX\n\t"
11566        $$emit$$"JL      L_tail\n\t"
11567        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11568        $$emit$$"ADD     0x20,RAX\n\t"
11569        $$emit$$"SUB     0x4,RCX\n\t"
11570        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11571        $$emit$$"ADD     0x4,RCX\n\t"
11572        $$emit$$"JLE     L_end\n\t"
11573        $$emit$$"DEC     RCX\n\t"
11574        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11575        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11576        $$emit$$"ADD     0x8,RAX\n\t"
11577        $$emit$$"DEC     RCX\n\t"
11578        $$emit$$"JGE     L_sloop\n\t"
11579        $$emit$$"# L_end:\n\t"
11580     } else {
11581        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11582        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11583        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11584     }
11585     $$emit$$"# DONE"
11586   %}
11587   ins_encode %{
11588     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11589                  $tmp$$XMMRegister, true, knoreg);
11590   %}
11591   ins_pipe( pipe_slow );
11592 %}
11593 
11594 // Large non-constant length ClearArray for AVX512 targets.
11595 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11596   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11597   match(Set dummy (ClearArray cnt base));
11598   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11599   format %{ $$template
11600     if (UseFastStosb) {
11601        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11602        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11603        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11604     } else if (UseXMMForObjInit) {
11605        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11606        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11607        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11608        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11609        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11610        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11611        $$emit$$"ADD     0x40,RAX\n\t"
11612        $$emit$$"# L_zero_64_bytes:\n\t"
11613        $$emit$$"SUB     0x8,RCX\n\t"
11614        $$emit$$"JGE     L_loop\n\t"
11615        $$emit$$"ADD     0x4,RCX\n\t"
11616        $$emit$$"JL      L_tail\n\t"
11617        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11618        $$emit$$"ADD     0x20,RAX\n\t"
11619        $$emit$$"SUB     0x4,RCX\n\t"
11620        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11621        $$emit$$"ADD     0x4,RCX\n\t"
11622        $$emit$$"JLE     L_end\n\t"
11623        $$emit$$"DEC     RCX\n\t"
11624        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11625        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11626        $$emit$$"ADD     0x8,RAX\n\t"
11627        $$emit$$"DEC     RCX\n\t"
11628        $$emit$$"JGE     L_sloop\n\t"
11629        $$emit$$"# L_end:\n\t"
11630     } else {
11631        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11632        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11633        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11634     }
11635     $$emit$$"# DONE"
11636   %}
11637   ins_encode %{
11638     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11639                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11640   %}
11641   ins_pipe( pipe_slow );
11642 %}
11643 
11644 // Small constant length ClearArray for AVX512 targets.
11645 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11646 %{
11647   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11648   match(Set dummy (ClearArray cnt base));
11649   ins_cost(100);
11650   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11651   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11652   ins_encode %{
11653    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11654   %}
11655   ins_pipe(pipe_slow);
11656 %}
11657 
11658 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11659                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11660   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11661   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11662   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11663 
11664   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11665   ins_encode %{
11666     __ string_compare($str1$$Register, $str2$$Register,
11667                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11668                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11669   %}
11670   ins_pipe( pipe_slow );
11671 %}
11672 
11673 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11674                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11675   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11676   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11677   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11678 
11679   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11680   ins_encode %{
11681     __ string_compare($str1$$Register, $str2$$Register,
11682                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11683                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11684   %}
11685   ins_pipe( pipe_slow );
11686 %}
11687 
11688 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11689                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11690   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11691   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11692   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11693 
11694   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11695   ins_encode %{
11696     __ string_compare($str1$$Register, $str2$$Register,
11697                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11698                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11699   %}
11700   ins_pipe( pipe_slow );
11701 %}
11702 
11703 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11704                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11705   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11706   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11707   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11708 
11709   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11710   ins_encode %{
11711     __ string_compare($str1$$Register, $str2$$Register,
11712                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11713                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11714   %}
11715   ins_pipe( pipe_slow );
11716 %}
11717 
11718 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11719                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11720   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11721   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11722   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11723 
11724   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11725   ins_encode %{
11726     __ string_compare($str1$$Register, $str2$$Register,
11727                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11728                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11729   %}
11730   ins_pipe( pipe_slow );
11731 %}
11732 
11733 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11734                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11735   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11736   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11737   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11738 
11739   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11740   ins_encode %{
11741     __ string_compare($str1$$Register, $str2$$Register,
11742                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11743                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11744   %}
11745   ins_pipe( pipe_slow );
11746 %}
11747 
11748 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11749                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11750   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11751   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11752   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11753 
11754   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11755   ins_encode %{
11756     __ string_compare($str2$$Register, $str1$$Register,
11757                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11758                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11759   %}
11760   ins_pipe( pipe_slow );
11761 %}
11762 
11763 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11764                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11765   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11766   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11767   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11768 
11769   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11770   ins_encode %{
11771     __ string_compare($str2$$Register, $str1$$Register,
11772                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11773                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11774   %}
11775   ins_pipe( pipe_slow );
11776 %}
11777 
11778 // fast string equals
11779 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11780                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11781   predicate(!VM_Version::supports_avx512vlbw());
11782   match(Set result (StrEquals (Binary str1 str2) cnt));
11783   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11784 
11785   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11786   ins_encode %{
11787     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11788                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11789                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11790   %}
11791 
11792   ins_pipe( pipe_slow );
11793 %}
11794 
11795 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11796                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11797   predicate(VM_Version::supports_avx512vlbw());
11798   match(Set result (StrEquals (Binary str1 str2) cnt));
11799   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11800 
11801   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11802   ins_encode %{
11803     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11804                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11805                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11806   %}
11807 
11808   ins_pipe( pipe_slow );
11809 %}
11810 
11811 
11812 // fast search of substring with known size.
11813 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11814                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11815   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11816   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11817   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11818 
11819   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11820   ins_encode %{
11821     int icnt2 = (int)$int_cnt2$$constant;
11822     if (icnt2 >= 16) {
11823       // IndexOf for constant substrings with size >= 16 elements
11824       // which don't need to be loaded through stack.
11825       __ string_indexofC8($str1$$Register, $str2$$Register,
11826                           $cnt1$$Register, $cnt2$$Register,
11827                           icnt2, $result$$Register,
11828                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11829     } else {
11830       // Small strings are loaded through stack if they cross page boundary.
11831       __ string_indexof($str1$$Register, $str2$$Register,
11832                         $cnt1$$Register, $cnt2$$Register,
11833                         icnt2, $result$$Register,
11834                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11835     }
11836   %}
11837   ins_pipe( pipe_slow );
11838 %}
11839 
11840 // fast search of substring with known size.
11841 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11842                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11843   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11844   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11845   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11846 
11847   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11848   ins_encode %{
11849     int icnt2 = (int)$int_cnt2$$constant;
11850     if (icnt2 >= 8) {
11851       // IndexOf for constant substrings with size >= 8 elements
11852       // which don't need to be loaded through stack.
11853       __ string_indexofC8($str1$$Register, $str2$$Register,
11854                           $cnt1$$Register, $cnt2$$Register,
11855                           icnt2, $result$$Register,
11856                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11857     } else {
11858       // Small strings are loaded through stack if they cross page boundary.
11859       __ string_indexof($str1$$Register, $str2$$Register,
11860                         $cnt1$$Register, $cnt2$$Register,
11861                         icnt2, $result$$Register,
11862                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11863     }
11864   %}
11865   ins_pipe( pipe_slow );
11866 %}
11867 
11868 // fast search of substring with known size.
11869 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11870                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11871   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11872   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11873   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11874 
11875   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11876   ins_encode %{
11877     int icnt2 = (int)$int_cnt2$$constant;
11878     if (icnt2 >= 8) {
11879       // IndexOf for constant substrings with size >= 8 elements
11880       // which don't need to be loaded through stack.
11881       __ string_indexofC8($str1$$Register, $str2$$Register,
11882                           $cnt1$$Register, $cnt2$$Register,
11883                           icnt2, $result$$Register,
11884                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11885     } else {
11886       // Small strings are loaded through stack if they cross page boundary.
11887       __ string_indexof($str1$$Register, $str2$$Register,
11888                         $cnt1$$Register, $cnt2$$Register,
11889                         icnt2, $result$$Register,
11890                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11891     }
11892   %}
11893   ins_pipe( pipe_slow );
11894 %}
11895 
11896 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11897                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11898   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11899   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11900   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11901 
11902   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11903   ins_encode %{
11904     __ string_indexof($str1$$Register, $str2$$Register,
11905                       $cnt1$$Register, $cnt2$$Register,
11906                       (-1), $result$$Register,
11907                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11908   %}
11909   ins_pipe( pipe_slow );
11910 %}
11911 
11912 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11913                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11914   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11915   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11916   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11917 
11918   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11919   ins_encode %{
11920     __ string_indexof($str1$$Register, $str2$$Register,
11921                       $cnt1$$Register, $cnt2$$Register,
11922                       (-1), $result$$Register,
11923                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11924   %}
11925   ins_pipe( pipe_slow );
11926 %}
11927 
11928 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11929                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11930   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11931   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11932   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11933 
11934   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11935   ins_encode %{
11936     __ string_indexof($str1$$Register, $str2$$Register,
11937                       $cnt1$$Register, $cnt2$$Register,
11938                       (-1), $result$$Register,
11939                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11940   %}
11941   ins_pipe( pipe_slow );
11942 %}
11943 
11944 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11945                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11946   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11947   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11948   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11949   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11950   ins_encode %{
11951     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11952                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11953   %}
11954   ins_pipe( pipe_slow );
11955 %}
11956 
11957 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11958                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11959   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11960   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11961   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11962   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11963   ins_encode %{
11964     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11965                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11966   %}
11967   ins_pipe( pipe_slow );
11968 %}
11969 
11970 
11971 // fast array equals
11972 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11973                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11974 %{
11975   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11976   match(Set result (AryEq ary1 ary2));
11977   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11978   //ins_cost(300);
11979 
11980   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11981   ins_encode %{
11982     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11983                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11984                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11985   %}
11986   ins_pipe( pipe_slow );
11987 %}
11988 
11989 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11990                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11991 %{
11992   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11993   match(Set result (AryEq ary1 ary2));
11994   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11995   //ins_cost(300);
11996 
11997   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11998   ins_encode %{
11999     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12000                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12001                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12002   %}
12003   ins_pipe( pipe_slow );
12004 %}
12005 
12006 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12007                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12008 %{
12009   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12010   match(Set result (AryEq ary1 ary2));
12011   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12012   //ins_cost(300);
12013 
12014   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12015   ins_encode %{
12016     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12017                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12018                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12019   %}
12020   ins_pipe( pipe_slow );
12021 %}
12022 
12023 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12024                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12025 %{
12026   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12027   match(Set result (AryEq ary1 ary2));
12028   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12029   //ins_cost(300);
12030 
12031   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12032   ins_encode %{
12033     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12034                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12035                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12036   %}
12037   ins_pipe( pipe_slow );
12038 %}
12039 
12040 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12041                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12042 %{
12043   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12044   match(Set result (CountPositives ary1 len));
12045   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12046 
12047   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12048   ins_encode %{
12049     __ count_positives($ary1$$Register, $len$$Register,
12050                        $result$$Register, $tmp3$$Register,
12051                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12052   %}
12053   ins_pipe( pipe_slow );
12054 %}
12055 
12056 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12057                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12058 %{
12059   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12060   match(Set result (CountPositives ary1 len));
12061   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12062 
12063   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12064   ins_encode %{
12065     __ count_positives($ary1$$Register, $len$$Register,
12066                        $result$$Register, $tmp3$$Register,
12067                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12068   %}
12069   ins_pipe( pipe_slow );
12070 %}
12071 
12072 
12073 // fast char[] to byte[] compression
12074 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12075                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12076   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12077   match(Set result (StrCompressedCopy src (Binary dst len)));
12078   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12079 
12080   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12081   ins_encode %{
12082     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12083                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12084                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12085                            knoreg, knoreg);
12086   %}
12087   ins_pipe( pipe_slow );
12088 %}
12089 
12090 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12091                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12092   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12093   match(Set result (StrCompressedCopy src (Binary dst len)));
12094   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12095 
12096   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12097   ins_encode %{
12098     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12099                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12100                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12101                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12102   %}
12103   ins_pipe( pipe_slow );
12104 %}
12105 
12106 // fast byte[] to char[] inflation
12107 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12108                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12109   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12110   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12111   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12112 
12113   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12114   ins_encode %{
12115     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12116                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12117   %}
12118   ins_pipe( pipe_slow );
12119 %}
12120 
12121 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12122                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12123   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12124   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12125   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12126 
12127   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12128   ins_encode %{
12129     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12130                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12131   %}
12132   ins_pipe( pipe_slow );
12133 %}
12134 
12135 // encode char[] to byte[] in ISO_8859_1
12136 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12137                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12138                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12139   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12140   match(Set result (EncodeISOArray src (Binary dst len)));
12141   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12142 
12143   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12144   ins_encode %{
12145     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12146                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12147                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12148   %}
12149   ins_pipe( pipe_slow );
12150 %}
12151 
12152 // encode char[] to byte[] in ASCII
12153 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12154                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12155                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12156   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12157   match(Set result (EncodeISOArray src (Binary dst len)));
12158   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12159 
12160   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12161   ins_encode %{
12162     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12163                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12164                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12165   %}
12166   ins_pipe( pipe_slow );
12167 %}
12168 
12169 //----------Control Flow Instructions------------------------------------------
12170 // Signed compare Instructions
12171 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12172   match(Set cr (CmpI op1 op2));
12173   effect( DEF cr, USE op1, USE op2 );
12174   format %{ "CMP    $op1,$op2" %}
12175   opcode(0x3B);  /* Opcode 3B /r */
12176   ins_encode( OpcP, RegReg( op1, op2) );
12177   ins_pipe( ialu_cr_reg_reg );
12178 %}
12179 
12180 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12181   match(Set cr (CmpI op1 op2));
12182   effect( DEF cr, USE op1 );
12183   format %{ "CMP    $op1,$op2" %}
12184   opcode(0x81,0x07);  /* Opcode 81 /7 */
12185   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12186   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12187   ins_pipe( ialu_cr_reg_imm );
12188 %}
12189 
12190 // Cisc-spilled version of cmpI_eReg
12191 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12192   match(Set cr (CmpI op1 (LoadI op2)));
12193 
12194   format %{ "CMP    $op1,$op2" %}
12195   ins_cost(500);
12196   opcode(0x3B);  /* Opcode 3B /r */
12197   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12198   ins_pipe( ialu_cr_reg_mem );
12199 %}
12200 
12201 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12202   match(Set cr (CmpI src zero));
12203   effect( DEF cr, USE src );
12204 
12205   format %{ "TEST   $src,$src" %}
12206   opcode(0x85);
12207   ins_encode( OpcP, RegReg( src, src ) );
12208   ins_pipe( ialu_cr_reg_imm );
12209 %}
12210 
12211 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12212   match(Set cr (CmpI (AndI src con) zero));
12213 
12214   format %{ "TEST   $src,$con" %}
12215   opcode(0xF7,0x00);
12216   ins_encode( OpcP, RegOpc(src), Con32(con) );
12217   ins_pipe( ialu_cr_reg_imm );
12218 %}
12219 
12220 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12221   match(Set cr (CmpI (AndI src mem) zero));
12222 
12223   format %{ "TEST   $src,$mem" %}
12224   opcode(0x85);
12225   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12226   ins_pipe( ialu_cr_reg_mem );
12227 %}
12228 
12229 // Unsigned compare Instructions; really, same as signed except they
12230 // produce an eFlagsRegU instead of eFlagsReg.
12231 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12232   match(Set cr (CmpU op1 op2));
12233 
12234   format %{ "CMPu   $op1,$op2" %}
12235   opcode(0x3B);  /* Opcode 3B /r */
12236   ins_encode( OpcP, RegReg( op1, op2) );
12237   ins_pipe( ialu_cr_reg_reg );
12238 %}
12239 
12240 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12241   match(Set cr (CmpU op1 op2));
12242 
12243   format %{ "CMPu   $op1,$op2" %}
12244   opcode(0x81,0x07);  /* Opcode 81 /7 */
12245   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12246   ins_pipe( ialu_cr_reg_imm );
12247 %}
12248 
12249 // // Cisc-spilled version of cmpU_eReg
12250 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12251   match(Set cr (CmpU op1 (LoadI op2)));
12252 
12253   format %{ "CMPu   $op1,$op2" %}
12254   ins_cost(500);
12255   opcode(0x3B);  /* Opcode 3B /r */
12256   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12257   ins_pipe( ialu_cr_reg_mem );
12258 %}
12259 
12260 // // Cisc-spilled version of cmpU_eReg
12261 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12262 //  match(Set cr (CmpU (LoadI op1) op2));
12263 //
12264 //  format %{ "CMPu   $op1,$op2" %}
12265 //  ins_cost(500);
12266 //  opcode(0x39);  /* Opcode 39 /r */
12267 //  ins_encode( OpcP, RegMem( op1, op2) );
12268 //%}
12269 
12270 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12271   match(Set cr (CmpU src zero));
12272 
12273   format %{ "TESTu  $src,$src" %}
12274   opcode(0x85);
12275   ins_encode( OpcP, RegReg( src, src ) );
12276   ins_pipe( ialu_cr_reg_imm );
12277 %}
12278 
12279 // Unsigned pointer compare Instructions
12280 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12281   match(Set cr (CmpP op1 op2));
12282 
12283   format %{ "CMPu   $op1,$op2" %}
12284   opcode(0x3B);  /* Opcode 3B /r */
12285   ins_encode( OpcP, RegReg( op1, op2) );
12286   ins_pipe( ialu_cr_reg_reg );
12287 %}
12288 
12289 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12290   match(Set cr (CmpP op1 op2));
12291 
12292   format %{ "CMPu   $op1,$op2" %}
12293   opcode(0x81,0x07);  /* Opcode 81 /7 */
12294   ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12295   ins_pipe( ialu_cr_reg_imm );
12296 %}
12297 
12298 // // Cisc-spilled version of cmpP_eReg
12299 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12300   match(Set cr (CmpP op1 (LoadP op2)));
12301 
12302   format %{ "CMPu   $op1,$op2" %}
12303   ins_cost(500);
12304   opcode(0x3B);  /* Opcode 3B /r */
12305   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12306   ins_pipe( ialu_cr_reg_mem );
12307 %}
12308 
12309 // // Cisc-spilled version of cmpP_eReg
12310 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12311 //  match(Set cr (CmpP (LoadP op1) op2));
12312 //
12313 //  format %{ "CMPu   $op1,$op2" %}
12314 //  ins_cost(500);
12315 //  opcode(0x39);  /* Opcode 39 /r */
12316 //  ins_encode( OpcP, RegMem( op1, op2) );
12317 //%}
12318 
12319 // Compare raw pointer (used in out-of-heap check).
12320 // Only works because non-oop pointers must be raw pointers
12321 // and raw pointers have no anti-dependencies.
12322 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12323   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12324   match(Set cr (CmpP op1 (LoadP op2)));
12325 
12326   format %{ "CMPu   $op1,$op2" %}
12327   opcode(0x3B);  /* Opcode 3B /r */
12328   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12329   ins_pipe( ialu_cr_reg_mem );
12330 %}
12331 
12332 //
12333 // This will generate a signed flags result. This should be ok
12334 // since any compare to a zero should be eq/neq.
12335 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12336   match(Set cr (CmpP src zero));
12337 
12338   format %{ "TEST   $src,$src" %}
12339   opcode(0x85);
12340   ins_encode( OpcP, RegReg( src, src ) );
12341   ins_pipe( ialu_cr_reg_imm );
12342 %}
12343 
12344 // Cisc-spilled version of testP_reg
12345 // This will generate a signed flags result. This should be ok
12346 // since any compare to a zero should be eq/neq.
12347 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12348   match(Set cr (CmpP (LoadP op) zero));
12349 
12350   format %{ "TEST   $op,0xFFFFFFFF" %}
12351   ins_cost(500);
12352   opcode(0xF7);               /* Opcode F7 /0 */
12353   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12354   ins_pipe( ialu_cr_reg_imm );
12355 %}
12356 
12357 // Yanked all unsigned pointer compare operations.
12358 // Pointer compares are done with CmpP which is already unsigned.
12359 
12360 //----------Max and Min--------------------------------------------------------
12361 // Min Instructions
12362 ////
12363 //   *** Min and Max using the conditional move are slower than the
12364 //   *** branch version on a Pentium III.
12365 // // Conditional move for min
12366 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12367 //  effect( USE_DEF op2, USE op1, USE cr );
12368 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12369 //  opcode(0x4C,0x0F);
12370 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12371 //  ins_pipe( pipe_cmov_reg );
12372 //%}
12373 //
12374 //// Min Register with Register (P6 version)
12375 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12376 //  predicate(VM_Version::supports_cmov() );
12377 //  match(Set op2 (MinI op1 op2));
12378 //  ins_cost(200);
12379 //  expand %{
12380 //    eFlagsReg cr;
12381 //    compI_eReg(cr,op1,op2);
12382 //    cmovI_reg_lt(op2,op1,cr);
12383 //  %}
12384 //%}
12385 
12386 // Min Register with Register (generic version)
12387 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12388   match(Set dst (MinI dst src));
12389   effect(KILL flags);
12390   ins_cost(300);
12391 
12392   format %{ "MIN    $dst,$src" %}
12393   opcode(0xCC);
12394   ins_encode( min_enc(dst,src) );
12395   ins_pipe( pipe_slow );
12396 %}
12397 
12398 // Max Register with Register
12399 //   *** Min and Max using the conditional move are slower than the
12400 //   *** branch version on a Pentium III.
12401 // // Conditional move for max
12402 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12403 //  effect( USE_DEF op2, USE op1, USE cr );
12404 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12405 //  opcode(0x4F,0x0F);
12406 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12407 //  ins_pipe( pipe_cmov_reg );
12408 //%}
12409 //
12410 // // Max Register with Register (P6 version)
12411 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12412 //  predicate(VM_Version::supports_cmov() );
12413 //  match(Set op2 (MaxI op1 op2));
12414 //  ins_cost(200);
12415 //  expand %{
12416 //    eFlagsReg cr;
12417 //    compI_eReg(cr,op1,op2);
12418 //    cmovI_reg_gt(op2,op1,cr);
12419 //  %}
12420 //%}
12421 
12422 // Max Register with Register (generic version)
12423 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12424   match(Set dst (MaxI dst src));
12425   effect(KILL flags);
12426   ins_cost(300);
12427 
12428   format %{ "MAX    $dst,$src" %}
12429   opcode(0xCC);
12430   ins_encode( max_enc(dst,src) );
12431   ins_pipe( pipe_slow );
12432 %}
12433 
12434 // ============================================================================
12435 // Counted Loop limit node which represents exact final iterator value.
12436 // Note: the resulting value should fit into integer range since
12437 // counted loops have limit check on overflow.
12438 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12439   match(Set limit (LoopLimit (Binary init limit) stride));
12440   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12441   ins_cost(300);
12442 
12443   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12444   ins_encode %{
12445     int strd = (int)$stride$$constant;
12446     assert(strd != 1 && strd != -1, "sanity");
12447     int m1 = (strd > 0) ? 1 : -1;
12448     // Convert limit to long (EAX:EDX)
12449     __ cdql();
12450     // Convert init to long (init:tmp)
12451     __ movl($tmp$$Register, $init$$Register);
12452     __ sarl($tmp$$Register, 31);
12453     // $limit - $init
12454     __ subl($limit$$Register, $init$$Register);
12455     __ sbbl($limit_hi$$Register, $tmp$$Register);
12456     // + ($stride - 1)
12457     if (strd > 0) {
12458       __ addl($limit$$Register, (strd - 1));
12459       __ adcl($limit_hi$$Register, 0);
12460       __ movl($tmp$$Register, strd);
12461     } else {
12462       __ addl($limit$$Register, (strd + 1));
12463       __ adcl($limit_hi$$Register, -1);
12464       __ lneg($limit_hi$$Register, $limit$$Register);
12465       __ movl($tmp$$Register, -strd);
12466     }
12467     // signed division: (EAX:EDX) / pos_stride
12468     __ idivl($tmp$$Register);
12469     if (strd < 0) {
12470       // restore sign
12471       __ negl($tmp$$Register);
12472     }
12473     // (EAX) * stride
12474     __ mull($tmp$$Register);
12475     // + init (ignore upper bits)
12476     __ addl($limit$$Register, $init$$Register);
12477   %}
12478   ins_pipe( pipe_slow );
12479 %}
12480 
12481 // ============================================================================
12482 // Branch Instructions
12483 // Jump Table
12484 instruct jumpXtnd(rRegI switch_val) %{
12485   match(Jump switch_val);
12486   ins_cost(350);
12487   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12488   ins_encode %{
12489     // Jump to Address(table_base + switch_reg)
12490     Address index(noreg, $switch_val$$Register, Address::times_1);
12491     __ jump(ArrayAddress($constantaddress, index), noreg);
12492   %}
12493   ins_pipe(pipe_jmp);
12494 %}
12495 
12496 // Jump Direct - Label defines a relative address from JMP+1
12497 instruct jmpDir(label labl) %{
12498   match(Goto);
12499   effect(USE labl);
12500 
12501   ins_cost(300);
12502   format %{ "JMP    $labl" %}
12503   size(5);
12504   ins_encode %{
12505     Label* L = $labl$$label;
12506     __ jmp(*L, false); // Always long jump
12507   %}
12508   ins_pipe( pipe_jmp );
12509 %}
12510 
12511 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12512 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12513   match(If cop cr);
12514   effect(USE labl);
12515 
12516   ins_cost(300);
12517   format %{ "J$cop    $labl" %}
12518   size(6);
12519   ins_encode %{
12520     Label* L = $labl$$label;
12521     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12522   %}
12523   ins_pipe( pipe_jcc );
12524 %}
12525 
12526 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12527 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12528   match(CountedLoopEnd cop cr);
12529   effect(USE labl);
12530 
12531   ins_cost(300);
12532   format %{ "J$cop    $labl\t# Loop end" %}
12533   size(6);
12534   ins_encode %{
12535     Label* L = $labl$$label;
12536     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12537   %}
12538   ins_pipe( pipe_jcc );
12539 %}
12540 
12541 // Jump Direct Conditional - using unsigned comparison
12542 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12543   match(If cop cmp);
12544   effect(USE labl);
12545 
12546   ins_cost(300);
12547   format %{ "J$cop,u  $labl" %}
12548   size(6);
12549   ins_encode %{
12550     Label* L = $labl$$label;
12551     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12552   %}
12553   ins_pipe(pipe_jcc);
12554 %}
12555 
12556 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12557   match(If cop cmp);
12558   effect(USE labl);
12559 
12560   ins_cost(200);
12561   format %{ "J$cop,u  $labl" %}
12562   size(6);
12563   ins_encode %{
12564     Label* L = $labl$$label;
12565     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12566   %}
12567   ins_pipe(pipe_jcc);
12568 %}
12569 
12570 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12571   match(If cop cmp);
12572   effect(USE labl);
12573 
12574   ins_cost(200);
12575   format %{ $$template
12576     if ($cop$$cmpcode == Assembler::notEqual) {
12577       $$emit$$"JP,u   $labl\n\t"
12578       $$emit$$"J$cop,u   $labl"
12579     } else {
12580       $$emit$$"JP,u   done\n\t"
12581       $$emit$$"J$cop,u   $labl\n\t"
12582       $$emit$$"done:"
12583     }
12584   %}
12585   ins_encode %{
12586     Label* l = $labl$$label;
12587     if ($cop$$cmpcode == Assembler::notEqual) {
12588       __ jcc(Assembler::parity, *l, false);
12589       __ jcc(Assembler::notEqual, *l, false);
12590     } else if ($cop$$cmpcode == Assembler::equal) {
12591       Label done;
12592       __ jccb(Assembler::parity, done);
12593       __ jcc(Assembler::equal, *l, false);
12594       __ bind(done);
12595     } else {
12596        ShouldNotReachHere();
12597     }
12598   %}
12599   ins_pipe(pipe_jcc);
12600 %}
12601 
12602 // ============================================================================
12603 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12604 // array for an instance of the superklass.  Set a hidden internal cache on a
12605 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12606 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12607 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12608   match(Set result (PartialSubtypeCheck sub super));
12609   effect( KILL rcx, KILL cr );
12610 
12611   ins_cost(1100);  // slightly larger than the next version
12612   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12613             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12614             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12615             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12616             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12617             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12618             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12619      "miss:\t" %}
12620 
12621   opcode(0x1); // Force a XOR of EDI
12622   ins_encode( enc_PartialSubtypeCheck() );
12623   ins_pipe( pipe_slow );
12624 %}
12625 
12626 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12627   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12628   effect( KILL rcx, KILL result );
12629 
12630   ins_cost(1000);
12631   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12632             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12633             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12634             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12635             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12636             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12637      "miss:\t" %}
12638 
12639   opcode(0x0);  // No need to XOR EDI
12640   ins_encode( enc_PartialSubtypeCheck() );
12641   ins_pipe( pipe_slow );
12642 %}
12643 
12644 // ============================================================================
12645 // Branch Instructions -- short offset versions
12646 //
12647 // These instructions are used to replace jumps of a long offset (the default
12648 // match) with jumps of a shorter offset.  These instructions are all tagged
12649 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12650 // match rules in general matching.  Instead, the ADLC generates a conversion
12651 // method in the MachNode which can be used to do in-place replacement of the
12652 // long variant with the shorter variant.  The compiler will determine if a
12653 // branch can be taken by the is_short_branch_offset() predicate in the machine
12654 // specific code section of the file.
12655 
12656 // Jump Direct - Label defines a relative address from JMP+1
12657 instruct jmpDir_short(label labl) %{
12658   match(Goto);
12659   effect(USE labl);
12660 
12661   ins_cost(300);
12662   format %{ "JMP,s  $labl" %}
12663   size(2);
12664   ins_encode %{
12665     Label* L = $labl$$label;
12666     __ jmpb(*L);
12667   %}
12668   ins_pipe( pipe_jmp );
12669   ins_short_branch(1);
12670 %}
12671 
12672 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12673 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12674   match(If cop cr);
12675   effect(USE labl);
12676 
12677   ins_cost(300);
12678   format %{ "J$cop,s  $labl" %}
12679   size(2);
12680   ins_encode %{
12681     Label* L = $labl$$label;
12682     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12683   %}
12684   ins_pipe( pipe_jcc );
12685   ins_short_branch(1);
12686 %}
12687 
12688 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12689 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12690   match(CountedLoopEnd cop cr);
12691   effect(USE labl);
12692 
12693   ins_cost(300);
12694   format %{ "J$cop,s  $labl\t# Loop end" %}
12695   size(2);
12696   ins_encode %{
12697     Label* L = $labl$$label;
12698     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12699   %}
12700   ins_pipe( pipe_jcc );
12701   ins_short_branch(1);
12702 %}
12703 
12704 // Jump Direct Conditional - using unsigned comparison
12705 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12706   match(If cop cmp);
12707   effect(USE labl);
12708 
12709   ins_cost(300);
12710   format %{ "J$cop,us $labl" %}
12711   size(2);
12712   ins_encode %{
12713     Label* L = $labl$$label;
12714     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12715   %}
12716   ins_pipe( pipe_jcc );
12717   ins_short_branch(1);
12718 %}
12719 
12720 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12721   match(If cop cmp);
12722   effect(USE labl);
12723 
12724   ins_cost(300);
12725   format %{ "J$cop,us $labl" %}
12726   size(2);
12727   ins_encode %{
12728     Label* L = $labl$$label;
12729     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12730   %}
12731   ins_pipe( pipe_jcc );
12732   ins_short_branch(1);
12733 %}
12734 
12735 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12736   match(If cop cmp);
12737   effect(USE labl);
12738 
12739   ins_cost(300);
12740   format %{ $$template
12741     if ($cop$$cmpcode == Assembler::notEqual) {
12742       $$emit$$"JP,u,s   $labl\n\t"
12743       $$emit$$"J$cop,u,s   $labl"
12744     } else {
12745       $$emit$$"JP,u,s   done\n\t"
12746       $$emit$$"J$cop,u,s  $labl\n\t"
12747       $$emit$$"done:"
12748     }
12749   %}
12750   size(4);
12751   ins_encode %{
12752     Label* l = $labl$$label;
12753     if ($cop$$cmpcode == Assembler::notEqual) {
12754       __ jccb(Assembler::parity, *l);
12755       __ jccb(Assembler::notEqual, *l);
12756     } else if ($cop$$cmpcode == Assembler::equal) {
12757       Label done;
12758       __ jccb(Assembler::parity, done);
12759       __ jccb(Assembler::equal, *l);
12760       __ bind(done);
12761     } else {
12762        ShouldNotReachHere();
12763     }
12764   %}
12765   ins_pipe(pipe_jcc);
12766   ins_short_branch(1);
12767 %}
12768 
12769 // ============================================================================
12770 // Long Compare
12771 //
12772 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12773 // is tricky.  The flavor of compare used depends on whether we are testing
12774 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12775 // The GE test is the negated LT test.  The LE test can be had by commuting
12776 // the operands (yielding a GE test) and then negating; negate again for the
12777 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12778 // NE test is negated from that.
12779 
12780 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12781 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12782 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12783 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12784 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12785 // foo match ends up with the wrong leaf.  One fix is to not match both
12786 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12787 // both forms beat the trinary form of long-compare and both are very useful
12788 // on Intel which has so few registers.
12789 
12790 // Manifest a CmpL result in an integer register.  Very painful.
12791 // This is the test to avoid.
12792 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12793   match(Set dst (CmpL3 src1 src2));
12794   effect( KILL flags );
12795   ins_cost(1000);
12796   format %{ "XOR    $dst,$dst\n\t"
12797             "CMP    $src1.hi,$src2.hi\n\t"
12798             "JLT,s  m_one\n\t"
12799             "JGT,s  p_one\n\t"
12800             "CMP    $src1.lo,$src2.lo\n\t"
12801             "JB,s   m_one\n\t"
12802             "JEQ,s  done\n"
12803     "p_one:\tINC    $dst\n\t"
12804             "JMP,s  done\n"
12805     "m_one:\tDEC    $dst\n"
12806      "done:" %}
12807   ins_encode %{
12808     Label p_one, m_one, done;
12809     __ xorptr($dst$$Register, $dst$$Register);
12810     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12811     __ jccb(Assembler::less,    m_one);
12812     __ jccb(Assembler::greater, p_one);
12813     __ cmpl($src1$$Register, $src2$$Register);
12814     __ jccb(Assembler::below,   m_one);
12815     __ jccb(Assembler::equal,   done);
12816     __ bind(p_one);
12817     __ incrementl($dst$$Register);
12818     __ jmpb(done);
12819     __ bind(m_one);
12820     __ decrementl($dst$$Register);
12821     __ bind(done);
12822   %}
12823   ins_pipe( pipe_slow );
12824 %}
12825 
12826 //======
12827 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12828 // compares.  Can be used for LE or GT compares by reversing arguments.
12829 // NOT GOOD FOR EQ/NE tests.
12830 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12831   match( Set flags (CmpL src zero ));
12832   ins_cost(100);
12833   format %{ "TEST   $src.hi,$src.hi" %}
12834   opcode(0x85);
12835   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12836   ins_pipe( ialu_cr_reg_reg );
12837 %}
12838 
12839 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12840 // compares.  Can be used for LE or GT compares by reversing arguments.
12841 // NOT GOOD FOR EQ/NE tests.
12842 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12843   match( Set flags (CmpL src1 src2 ));
12844   effect( TEMP tmp );
12845   ins_cost(300);
12846   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12847             "MOV    $tmp,$src1.hi\n\t"
12848             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12849   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12850   ins_pipe( ialu_cr_reg_reg );
12851 %}
12852 
12853 // Long compares reg < zero/req OR reg >= zero/req.
12854 // Just a wrapper for a normal branch, plus the predicate test.
12855 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12856   match(If cmp flags);
12857   effect(USE labl);
12858   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12859   expand %{
12860     jmpCon(cmp,flags,labl);    // JLT or JGE...
12861   %}
12862 %}
12863 
12864 //======
12865 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12866 // compares.  Can be used for LE or GT compares by reversing arguments.
12867 // NOT GOOD FOR EQ/NE tests.
12868 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12869   match(Set flags (CmpUL src zero));
12870   ins_cost(100);
12871   format %{ "TEST   $src.hi,$src.hi" %}
12872   opcode(0x85);
12873   ins_encode(OpcP, RegReg_Hi2(src, src));
12874   ins_pipe(ialu_cr_reg_reg);
12875 %}
12876 
12877 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12878 // compares.  Can be used for LE or GT compares by reversing arguments.
12879 // NOT GOOD FOR EQ/NE tests.
12880 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12881   match(Set flags (CmpUL src1 src2));
12882   effect(TEMP tmp);
12883   ins_cost(300);
12884   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12885             "MOV    $tmp,$src1.hi\n\t"
12886             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12887   ins_encode(long_cmp_flags2(src1, src2, tmp));
12888   ins_pipe(ialu_cr_reg_reg);
12889 %}
12890 
12891 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12892 // Just a wrapper for a normal branch, plus the predicate test.
12893 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12894   match(If cmp flags);
12895   effect(USE labl);
12896   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12897   expand %{
12898     jmpCon(cmp, flags, labl);    // JLT or JGE...
12899   %}
12900 %}
12901 
12902 // Compare 2 longs and CMOVE longs.
12903 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12904   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12905   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12906   ins_cost(400);
12907   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12908             "CMOV$cmp $dst.hi,$src.hi" %}
12909   opcode(0x0F,0x40);
12910   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12911   ins_pipe( pipe_cmov_reg_long );
12912 %}
12913 
12914 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12915   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12916   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12917   ins_cost(500);
12918   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12919             "CMOV$cmp $dst.hi,$src.hi" %}
12920   opcode(0x0F,0x40);
12921   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12922   ins_pipe( pipe_cmov_reg_long );
12923 %}
12924 
12925 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12926   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12927   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12928   ins_cost(400);
12929   expand %{
12930     cmovLL_reg_LTGE(cmp, flags, dst, src);
12931   %}
12932 %}
12933 
12934 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12935   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12936   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12937   ins_cost(500);
12938   expand %{
12939     cmovLL_mem_LTGE(cmp, flags, dst, src);
12940   %}
12941 %}
12942 
12943 // Compare 2 longs and CMOVE ints.
12944 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12945   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12946   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12947   ins_cost(200);
12948   format %{ "CMOV$cmp $dst,$src" %}
12949   opcode(0x0F,0x40);
12950   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12951   ins_pipe( pipe_cmov_reg );
12952 %}
12953 
12954 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12955   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12956   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12957   ins_cost(250);
12958   format %{ "CMOV$cmp $dst,$src" %}
12959   opcode(0x0F,0x40);
12960   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12961   ins_pipe( pipe_cmov_mem );
12962 %}
12963 
12964 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12965   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12966   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12967   ins_cost(200);
12968   expand %{
12969     cmovII_reg_LTGE(cmp, flags, dst, src);
12970   %}
12971 %}
12972 
12973 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12974   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12975   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12976   ins_cost(250);
12977   expand %{
12978     cmovII_mem_LTGE(cmp, flags, dst, src);
12979   %}
12980 %}
12981 
12982 // Compare 2 longs and CMOVE ptrs.
12983 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12984   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12985   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12986   ins_cost(200);
12987   format %{ "CMOV$cmp $dst,$src" %}
12988   opcode(0x0F,0x40);
12989   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12990   ins_pipe( pipe_cmov_reg );
12991 %}
12992 
12993 // Compare 2 unsigned longs and CMOVE ptrs.
12994 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
12995   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12996   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12997   ins_cost(200);
12998   expand %{
12999     cmovPP_reg_LTGE(cmp,flags,dst,src);
13000   %}
13001 %}
13002 
13003 // Compare 2 longs and CMOVE doubles
13004 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13005   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13006   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13007   ins_cost(200);
13008   expand %{
13009     fcmovDPR_regS(cmp,flags,dst,src);
13010   %}
13011 %}
13012 
13013 // Compare 2 longs and CMOVE doubles
13014 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13015   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13016   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13017   ins_cost(200);
13018   expand %{
13019     fcmovD_regS(cmp,flags,dst,src);
13020   %}
13021 %}
13022 
13023 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13024   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13025   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13026   ins_cost(200);
13027   expand %{
13028     fcmovFPR_regS(cmp,flags,dst,src);
13029   %}
13030 %}
13031 
13032 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13033   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13034   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13035   ins_cost(200);
13036   expand %{
13037     fcmovF_regS(cmp,flags,dst,src);
13038   %}
13039 %}
13040 
13041 //======
13042 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13043 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13044   match( Set flags (CmpL src zero ));
13045   effect(TEMP tmp);
13046   ins_cost(200);
13047   format %{ "MOV    $tmp,$src.lo\n\t"
13048             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13049   ins_encode( long_cmp_flags0( src, tmp ) );
13050   ins_pipe( ialu_reg_reg_long );
13051 %}
13052 
13053 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13054 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13055   match( Set flags (CmpL src1 src2 ));
13056   ins_cost(200+300);
13057   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13058             "JNE,s  skip\n\t"
13059             "CMP    $src1.hi,$src2.hi\n\t"
13060      "skip:\t" %}
13061   ins_encode( long_cmp_flags1( src1, src2 ) );
13062   ins_pipe( ialu_cr_reg_reg );
13063 %}
13064 
13065 // Long compare reg == zero/reg OR reg != zero/reg
13066 // Just a wrapper for a normal branch, plus the predicate test.
13067 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13068   match(If cmp flags);
13069   effect(USE labl);
13070   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13071   expand %{
13072     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13073   %}
13074 %}
13075 
13076 //======
13077 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13078 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13079   match(Set flags (CmpUL src zero));
13080   effect(TEMP tmp);
13081   ins_cost(200);
13082   format %{ "MOV    $tmp,$src.lo\n\t"
13083             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13084   ins_encode(long_cmp_flags0(src, tmp));
13085   ins_pipe(ialu_reg_reg_long);
13086 %}
13087 
13088 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13089 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13090   match(Set flags (CmpUL src1 src2));
13091   ins_cost(200+300);
13092   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13093             "JNE,s  skip\n\t"
13094             "CMP    $src1.hi,$src2.hi\n\t"
13095      "skip:\t" %}
13096   ins_encode(long_cmp_flags1(src1, src2));
13097   ins_pipe(ialu_cr_reg_reg);
13098 %}
13099 
13100 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13101 // Just a wrapper for a normal branch, plus the predicate test.
13102 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13103   match(If cmp flags);
13104   effect(USE labl);
13105   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13106   expand %{
13107     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13108   %}
13109 %}
13110 
13111 // Compare 2 longs and CMOVE longs.
13112 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13113   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13114   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13115   ins_cost(400);
13116   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13117             "CMOV$cmp $dst.hi,$src.hi" %}
13118   opcode(0x0F,0x40);
13119   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13120   ins_pipe( pipe_cmov_reg_long );
13121 %}
13122 
13123 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13124   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13125   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13126   ins_cost(500);
13127   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13128             "CMOV$cmp $dst.hi,$src.hi" %}
13129   opcode(0x0F,0x40);
13130   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13131   ins_pipe( pipe_cmov_reg_long );
13132 %}
13133 
13134 // Compare 2 longs and CMOVE ints.
13135 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13136   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13137   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13138   ins_cost(200);
13139   format %{ "CMOV$cmp $dst,$src" %}
13140   opcode(0x0F,0x40);
13141   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13142   ins_pipe( pipe_cmov_reg );
13143 %}
13144 
13145 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13146   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13147   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13148   ins_cost(250);
13149   format %{ "CMOV$cmp $dst,$src" %}
13150   opcode(0x0F,0x40);
13151   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13152   ins_pipe( pipe_cmov_mem );
13153 %}
13154 
13155 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13156   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13157   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13158   ins_cost(200);
13159   expand %{
13160     cmovII_reg_EQNE(cmp, flags, dst, src);
13161   %}
13162 %}
13163 
13164 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13165   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13166   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13167   ins_cost(250);
13168   expand %{
13169     cmovII_mem_EQNE(cmp, flags, dst, src);
13170   %}
13171 %}
13172 
13173 // Compare 2 longs and CMOVE ptrs.
13174 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13175   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13176   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13177   ins_cost(200);
13178   format %{ "CMOV$cmp $dst,$src" %}
13179   opcode(0x0F,0x40);
13180   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13181   ins_pipe( pipe_cmov_reg );
13182 %}
13183 
13184 // Compare 2 unsigned longs and CMOVE ptrs.
13185 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13186   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13187   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13188   ins_cost(200);
13189   expand %{
13190     cmovPP_reg_EQNE(cmp,flags,dst,src);
13191   %}
13192 %}
13193 
13194 // Compare 2 longs and CMOVE doubles
13195 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13196   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13197   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13198   ins_cost(200);
13199   expand %{
13200     fcmovDPR_regS(cmp,flags,dst,src);
13201   %}
13202 %}
13203 
13204 // Compare 2 longs and CMOVE doubles
13205 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13206   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13207   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13208   ins_cost(200);
13209   expand %{
13210     fcmovD_regS(cmp,flags,dst,src);
13211   %}
13212 %}
13213 
13214 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13215   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13216   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13217   ins_cost(200);
13218   expand %{
13219     fcmovFPR_regS(cmp,flags,dst,src);
13220   %}
13221 %}
13222 
13223 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13224   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13225   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13226   ins_cost(200);
13227   expand %{
13228     fcmovF_regS(cmp,flags,dst,src);
13229   %}
13230 %}
13231 
13232 //======
13233 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13234 // Same as cmpL_reg_flags_LEGT except must negate src
13235 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13236   match( Set flags (CmpL src zero ));
13237   effect( TEMP tmp );
13238   ins_cost(300);
13239   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13240             "CMP    $tmp,$src.lo\n\t"
13241             "SBB    $tmp,$src.hi\n\t" %}
13242   ins_encode( long_cmp_flags3(src, tmp) );
13243   ins_pipe( ialu_reg_reg_long );
13244 %}
13245 
13246 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13247 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13248 // requires a commuted test to get the same result.
13249 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13250   match( Set flags (CmpL src1 src2 ));
13251   effect( TEMP tmp );
13252   ins_cost(300);
13253   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13254             "MOV    $tmp,$src2.hi\n\t"
13255             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13256   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13257   ins_pipe( ialu_cr_reg_reg );
13258 %}
13259 
13260 // Long compares reg < zero/req OR reg >= zero/req.
13261 // Just a wrapper for a normal branch, plus the predicate test
13262 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13263   match(If cmp flags);
13264   effect(USE labl);
13265   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13266   ins_cost(300);
13267   expand %{
13268     jmpCon(cmp,flags,labl);    // JGT or JLE...
13269   %}
13270 %}
13271 
13272 //======
13273 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13274 // Same as cmpUL_reg_flags_LEGT except must negate src
13275 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13276   match(Set flags (CmpUL src zero));
13277   effect(TEMP tmp);
13278   ins_cost(300);
13279   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13280             "CMP    $tmp,$src.lo\n\t"
13281             "SBB    $tmp,$src.hi\n\t" %}
13282   ins_encode(long_cmp_flags3(src, tmp));
13283   ins_pipe(ialu_reg_reg_long);
13284 %}
13285 
13286 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13287 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13288 // requires a commuted test to get the same result.
13289 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13290   match(Set flags (CmpUL src1 src2));
13291   effect(TEMP tmp);
13292   ins_cost(300);
13293   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13294             "MOV    $tmp,$src2.hi\n\t"
13295             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13296   ins_encode(long_cmp_flags2( src2, src1, tmp));
13297   ins_pipe(ialu_cr_reg_reg);
13298 %}
13299 
13300 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13301 // Just a wrapper for a normal branch, plus the predicate test
13302 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13303   match(If cmp flags);
13304   effect(USE labl);
13305   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13306   ins_cost(300);
13307   expand %{
13308     jmpCon(cmp, flags, labl);    // JGT or JLE...
13309   %}
13310 %}
13311 
13312 // Compare 2 longs and CMOVE longs.
13313 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13314   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13315   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13316   ins_cost(400);
13317   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13318             "CMOV$cmp $dst.hi,$src.hi" %}
13319   opcode(0x0F,0x40);
13320   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13321   ins_pipe( pipe_cmov_reg_long );
13322 %}
13323 
13324 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13325   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13326   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13327   ins_cost(500);
13328   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13329             "CMOV$cmp $dst.hi,$src.hi+4" %}
13330   opcode(0x0F,0x40);
13331   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13332   ins_pipe( pipe_cmov_reg_long );
13333 %}
13334 
13335 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13336   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13337   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13338   ins_cost(400);
13339   expand %{
13340     cmovLL_reg_LEGT(cmp, flags, dst, src);
13341   %}
13342 %}
13343 
13344 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13345   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13346   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13347   ins_cost(500);
13348   expand %{
13349     cmovLL_mem_LEGT(cmp, flags, dst, src);
13350   %}
13351 %}
13352 
13353 // Compare 2 longs and CMOVE ints.
13354 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13355   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13356   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13357   ins_cost(200);
13358   format %{ "CMOV$cmp $dst,$src" %}
13359   opcode(0x0F,0x40);
13360   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13361   ins_pipe( pipe_cmov_reg );
13362 %}
13363 
13364 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13365   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13366   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13367   ins_cost(250);
13368   format %{ "CMOV$cmp $dst,$src" %}
13369   opcode(0x0F,0x40);
13370   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13371   ins_pipe( pipe_cmov_mem );
13372 %}
13373 
13374 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13375   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13376   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13377   ins_cost(200);
13378   expand %{
13379     cmovII_reg_LEGT(cmp, flags, dst, src);
13380   %}
13381 %}
13382 
13383 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13384   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13385   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13386   ins_cost(250);
13387   expand %{
13388     cmovII_mem_LEGT(cmp, flags, dst, src);
13389   %}
13390 %}
13391 
13392 // Compare 2 longs and CMOVE ptrs.
13393 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13394   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13395   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13396   ins_cost(200);
13397   format %{ "CMOV$cmp $dst,$src" %}
13398   opcode(0x0F,0x40);
13399   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13400   ins_pipe( pipe_cmov_reg );
13401 %}
13402 
13403 // Compare 2 unsigned longs and CMOVE ptrs.
13404 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13405   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13406   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13407   ins_cost(200);
13408   expand %{
13409     cmovPP_reg_LEGT(cmp,flags,dst,src);
13410   %}
13411 %}
13412 
13413 // Compare 2 longs and CMOVE doubles
13414 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13415   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13416   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13417   ins_cost(200);
13418   expand %{
13419     fcmovDPR_regS(cmp,flags,dst,src);
13420   %}
13421 %}
13422 
13423 // Compare 2 longs and CMOVE doubles
13424 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13425   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13426   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13427   ins_cost(200);
13428   expand %{
13429     fcmovD_regS(cmp,flags,dst,src);
13430   %}
13431 %}
13432 
13433 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13434   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13435   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13436   ins_cost(200);
13437   expand %{
13438     fcmovFPR_regS(cmp,flags,dst,src);
13439   %}
13440 %}
13441 
13442 
13443 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13444   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13445   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13446   ins_cost(200);
13447   expand %{
13448     fcmovF_regS(cmp,flags,dst,src);
13449   %}
13450 %}
13451 
13452 
13453 // ============================================================================
13454 // Procedure Call/Return Instructions
13455 // Call Java Static Instruction
13456 // Note: If this code changes, the corresponding ret_addr_offset() and
13457 //       compute_padding() functions will have to be adjusted.
13458 instruct CallStaticJavaDirect(method meth) %{
13459   match(CallStaticJava);
13460   effect(USE meth);
13461 
13462   ins_cost(300);
13463   format %{ "CALL,static " %}
13464   opcode(0xE8); /* E8 cd */
13465   ins_encode( pre_call_resets,
13466               Java_Static_Call( meth ),
13467               call_epilog,
13468               post_call_FPU );
13469   ins_pipe( pipe_slow );
13470   ins_alignment(4);
13471 %}
13472 
13473 // Call Java Dynamic Instruction
13474 // Note: If this code changes, the corresponding ret_addr_offset() and
13475 //       compute_padding() functions will have to be adjusted.
13476 instruct CallDynamicJavaDirect(method meth) %{
13477   match(CallDynamicJava);
13478   effect(USE meth);
13479 
13480   ins_cost(300);
13481   format %{ "MOV    EAX,(oop)-1\n\t"
13482             "CALL,dynamic" %}
13483   opcode(0xE8); /* E8 cd */
13484   ins_encode( pre_call_resets,
13485               Java_Dynamic_Call( meth ),
13486               call_epilog,
13487               post_call_FPU );
13488   ins_pipe( pipe_slow );
13489   ins_alignment(4);
13490 %}
13491 
13492 // Call Runtime Instruction
13493 instruct CallRuntimeDirect(method meth) %{
13494   match(CallRuntime );
13495   effect(USE meth);
13496 
13497   ins_cost(300);
13498   format %{ "CALL,runtime " %}
13499   opcode(0xE8); /* E8 cd */
13500   // Use FFREEs to clear entries in float stack
13501   ins_encode( pre_call_resets,
13502               FFree_Float_Stack_All,
13503               Java_To_Runtime( meth ),
13504               post_call_FPU );
13505   ins_pipe( pipe_slow );
13506 %}
13507 
13508 // Call runtime without safepoint
13509 instruct CallLeafDirect(method meth) %{
13510   match(CallLeaf);
13511   effect(USE meth);
13512 
13513   ins_cost(300);
13514   format %{ "CALL_LEAF,runtime " %}
13515   opcode(0xE8); /* E8 cd */
13516   ins_encode( pre_call_resets,
13517               FFree_Float_Stack_All,
13518               Java_To_Runtime( meth ),
13519               Verify_FPU_For_Leaf, post_call_FPU );
13520   ins_pipe( pipe_slow );
13521 %}
13522 
13523 instruct CallLeafNoFPDirect(method meth) %{
13524   match(CallLeafNoFP);
13525   effect(USE meth);
13526 
13527   ins_cost(300);
13528   format %{ "CALL_LEAF_NOFP,runtime " %}
13529   opcode(0xE8); /* E8 cd */
13530   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13531   ins_pipe( pipe_slow );
13532 %}
13533 
13534 
13535 // Return Instruction
13536 // Remove the return address & jump to it.
13537 instruct Ret() %{
13538   match(Return);
13539   format %{ "RET" %}
13540   opcode(0xC3);
13541   ins_encode(OpcP);
13542   ins_pipe( pipe_jmp );
13543 %}
13544 
13545 // Tail Call; Jump from runtime stub to Java code.
13546 // Also known as an 'interprocedural jump'.
13547 // Target of jump will eventually return to caller.
13548 // TailJump below removes the return address.
13549 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13550 // emitted just above the TailCall which has reset ebp to the caller state.
13551 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13552   match(TailCall jump_target method_ptr);
13553   ins_cost(300);
13554   format %{ "JMP    $jump_target \t# EBX holds method" %}
13555   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13556   ins_encode( OpcP, RegOpc(jump_target) );
13557   ins_pipe( pipe_jmp );
13558 %}
13559 
13560 
13561 // Tail Jump; remove the return address; jump to target.
13562 // TailCall above leaves the return address around.
13563 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13564   match( TailJump jump_target ex_oop );
13565   ins_cost(300);
13566   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13567             "JMP    $jump_target " %}
13568   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13569   ins_encode( enc_pop_rdx,
13570               OpcP, RegOpc(jump_target) );
13571   ins_pipe( pipe_jmp );
13572 %}
13573 
13574 // Forward exception.
13575 instruct ForwardExceptionjmp()
13576 %{
13577   match(ForwardException);
13578 
13579   format %{ "JMP    forward_exception_stub" %}
13580   ins_encode %{
13581     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
13582   %}
13583   ins_pipe(pipe_jmp);
13584 %}
13585 
13586 // Create exception oop: created by stack-crawling runtime code.
13587 // Created exception is now available to this handler, and is setup
13588 // just prior to jumping to this handler.  No code emitted.
13589 instruct CreateException( eAXRegP ex_oop )
13590 %{
13591   match(Set ex_oop (CreateEx));
13592 
13593   size(0);
13594   // use the following format syntax
13595   format %{ "# exception oop is in EAX; no code emitted" %}
13596   ins_encode();
13597   ins_pipe( empty );
13598 %}
13599 
13600 
13601 // Rethrow exception:
13602 // The exception oop will come in the first argument position.
13603 // Then JUMP (not call) to the rethrow stub code.
13604 instruct RethrowException()
13605 %{
13606   match(Rethrow);
13607 
13608   // use the following format syntax
13609   format %{ "JMP    rethrow_stub" %}
13610   ins_encode(enc_rethrow);
13611   ins_pipe( pipe_jmp );
13612 %}
13613 
13614 // inlined locking and unlocking
13615 
13616 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13617   predicate(LockingMode != LM_LIGHTWEIGHT);
13618   match(Set cr (FastLock object box));
13619   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13620   ins_cost(300);
13621   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13622   ins_encode %{
13623     __ get_thread($thread$$Register);
13624     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13625                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13626   %}
13627   ins_pipe(pipe_slow);
13628 %}
13629 
13630 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13631   predicate(LockingMode != LM_LIGHTWEIGHT);
13632   match(Set cr (FastUnlock object box));
13633   effect(TEMP tmp, USE_KILL box);
13634   ins_cost(300);
13635   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13636   ins_encode %{
13637     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13638   %}
13639   ins_pipe(pipe_slow);
13640 %}
13641 
13642 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13643   predicate(LockingMode == LM_LIGHTWEIGHT);
13644   match(Set cr (FastLock object box));
13645   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13646   ins_cost(300);
13647   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13648   ins_encode %{
13649     __ get_thread($thread$$Register);
13650     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13651   %}
13652   ins_pipe(pipe_slow);
13653 %}
13654 
13655 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13656   predicate(LockingMode == LM_LIGHTWEIGHT);
13657   match(Set cr (FastUnlock object eax_reg));
13658   effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13659   ins_cost(300);
13660   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13661   ins_encode %{
13662     __ get_thread($thread$$Register);
13663     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13664   %}
13665   ins_pipe(pipe_slow);
13666 %}
13667 
13668 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13669   predicate(Matcher::vector_length(n) <= 32);
13670   match(Set dst (MaskAll src));
13671   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13672   ins_encode %{
13673     int mask_len = Matcher::vector_length(this);
13674     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13675   %}
13676   ins_pipe( pipe_slow );
13677 %}
13678 
13679 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13680   predicate(Matcher::vector_length(n) > 32);
13681   match(Set dst (MaskAll src));
13682   effect(TEMP ktmp);
13683   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13684   ins_encode %{
13685     int mask_len = Matcher::vector_length(this);
13686     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13687   %}
13688   ins_pipe( pipe_slow );
13689 %}
13690 
13691 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13692   predicate(Matcher::vector_length(n) > 32);
13693   match(Set dst (MaskAll src));
13694   effect(TEMP ktmp);
13695   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13696   ins_encode %{
13697     int mask_len = Matcher::vector_length(this);
13698     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13699   %}
13700   ins_pipe( pipe_slow );
13701 %}
13702 
13703 // ============================================================================
13704 // Safepoint Instruction
13705 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13706   match(SafePoint poll);
13707   effect(KILL cr, USE poll);
13708 
13709   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13710   ins_cost(125);
13711   // EBP would need size(3)
13712   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13713   ins_encode %{
13714     __ set_inst_mark();
13715     __ relocate(relocInfo::poll_type);
13716     __ clear_inst_mark();
13717     address pre_pc = __ pc();
13718     __ testl(rax, Address($poll$$Register, 0));
13719     address post_pc = __ pc();
13720     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13721   %}
13722   ins_pipe(ialu_reg_mem);
13723 %}
13724 
13725 
13726 // ============================================================================
13727 // This name is KNOWN by the ADLC and cannot be changed.
13728 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13729 // for this guy.
13730 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13731   match(Set dst (ThreadLocal));
13732   effect(DEF dst, KILL cr);
13733 
13734   format %{ "MOV    $dst, Thread::current()" %}
13735   ins_encode %{
13736     Register dstReg = as_Register($dst$$reg);
13737     __ get_thread(dstReg);
13738   %}
13739   ins_pipe( ialu_reg_fat );
13740 %}
13741 
13742 
13743 
13744 //----------PEEPHOLE RULES-----------------------------------------------------
13745 // These must follow all instruction definitions as they use the names
13746 // defined in the instructions definitions.
13747 //
13748 // peepmatch ( root_instr_name [preceding_instruction]* );
13749 //
13750 // peepconstraint %{
13751 // (instruction_number.operand_name relational_op instruction_number.operand_name
13752 //  [, ...] );
13753 // // instruction numbers are zero-based using left to right order in peepmatch
13754 //
13755 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13756 // // provide an instruction_number.operand_name for each operand that appears
13757 // // in the replacement instruction's match rule
13758 //
13759 // ---------VM FLAGS---------------------------------------------------------
13760 //
13761 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13762 //
13763 // Each peephole rule is given an identifying number starting with zero and
13764 // increasing by one in the order seen by the parser.  An individual peephole
13765 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13766 // on the command-line.
13767 //
13768 // ---------CURRENT LIMITATIONS----------------------------------------------
13769 //
13770 // Only match adjacent instructions in same basic block
13771 // Only equality constraints
13772 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13773 // Only one replacement instruction
13774 //
13775 // ---------EXAMPLE----------------------------------------------------------
13776 //
13777 // // pertinent parts of existing instructions in architecture description
13778 // instruct movI(rRegI dst, rRegI src) %{
13779 //   match(Set dst (CopyI src));
13780 // %}
13781 //
13782 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13783 //   match(Set dst (AddI dst src));
13784 //   effect(KILL cr);
13785 // %}
13786 //
13787 // // Change (inc mov) to lea
13788 // peephole %{
13789 //   // increment preceded by register-register move
13790 //   peepmatch ( incI_eReg movI );
13791 //   // require that the destination register of the increment
13792 //   // match the destination register of the move
13793 //   peepconstraint ( 0.dst == 1.dst );
13794 //   // construct a replacement instruction that sets
13795 //   // the destination to ( move's source register + one )
13796 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13797 // %}
13798 //
13799 // Implementation no longer uses movX instructions since
13800 // machine-independent system no longer uses CopyX nodes.
13801 //
13802 // peephole %{
13803 //   peepmatch ( incI_eReg movI );
13804 //   peepconstraint ( 0.dst == 1.dst );
13805 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13806 // %}
13807 //
13808 // peephole %{
13809 //   peepmatch ( decI_eReg movI );
13810 //   peepconstraint ( 0.dst == 1.dst );
13811 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13812 // %}
13813 //
13814 // peephole %{
13815 //   peepmatch ( addI_eReg_imm movI );
13816 //   peepconstraint ( 0.dst == 1.dst );
13817 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13818 // %}
13819 //
13820 // peephole %{
13821 //   peepmatch ( addP_eReg_imm movP );
13822 //   peepconstraint ( 0.dst == 1.dst );
13823 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13824 // %}
13825 
13826 // // Change load of spilled value to only a spill
13827 // instruct storeI(memory mem, rRegI src) %{
13828 //   match(Set mem (StoreI mem src));
13829 // %}
13830 //
13831 // instruct loadI(rRegI dst, memory mem) %{
13832 //   match(Set dst (LoadI mem));
13833 // %}
13834 //
13835 peephole %{
13836   peepmatch ( loadI storeI );
13837   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13838   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13839 %}
13840 
13841 //----------SMARTSPILL RULES---------------------------------------------------
13842 // These must follow all instruction definitions as they use the names
13843 // defined in the instructions definitions.