Old src/hotspot/cpu/x86/x86

    1 //
    2 // Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ masm->
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   __ emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   __ emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(C2_MacroAssembler *masm, int code) {
  353   __ emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
  358   __ relocate(__ inst_mark() + offset, reloc);
  359   emit_opcode(masm, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(C2_MacroAssembler *masm, int d8) {
  364   __ emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(C2_MacroAssembler *masm, int d16) {
  369   __ emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(C2_MacroAssembler *masm, int d32) {
  374   __ emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   __ relocate(__ inst_mark(), reloc, format);
  381   __ emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   __ relocate(__ inst_mark(), rspec, format);
  393   __ emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
  398   emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (masm, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(masm, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(masm, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(masm, 0x1, reg_encoding, base);
  423         emit_d8(masm, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(masm, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(masm, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (masm, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(masm, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(masm, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (masm, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(masm, 0x0, reg_encoding, 0x4);
  450       emit_rm(masm, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(masm, 0x1, reg_encoding, 0x4);
  456         emit_rm(masm, scale, index, base);
  457         emit_d8(masm, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(masm, 0x2, reg_encoding, 0x4);
  462           emit_rm(masm, scale, index, 0x04);
  463         } else {
  464           emit_rm(masm, 0x2, reg_encoding, 0x4);
  465           emit_rm(masm, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(masm, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (masm, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( masm, 0x8B );
  483     emit_rm(masm, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler* masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612 
  613   int framesize = C->output()->frame_size_in_bytes();
  614   int bangsize = C->output()->bang_size_in_bytes();
  615 
  616   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
  617 
  618   C->output()->set_frame_complete(__ offset());
  619 
  620   if (C->has_mach_constant_base_node()) {
  621     // NOTE: We set the table base offset here because users might be
  622     // emitted before MachConstantBaseNode.
  623     ConstantTable& constant_table = C->output()->constant_table();
  624     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  625   }
  626 }
  627 
  628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  629   return MachNode::size(ra_); // too many variables; just compute it the hard way
  630 }
  631 
  632 int MachPrologNode::reloc() const {
  633   return 0; // a large enough number
  634 }
  635 
  636 //=============================================================================
  637 #ifndef PRODUCT
  638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  639   Compile *C = ra_->C;
  640   int framesize = C->output()->frame_size_in_bytes();
  641   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  642   // Remove two words for return addr and rbp,
  643   framesize -= 2*wordSize;
  644 
  645   if (C->max_vector_size() > 16) {
  646     st->print("VZEROUPPER");
  647     st->cr(); st->print("\t");
  648   }
  649   if (C->in_24_bit_fp_mode()) {
  650     st->print("FLDCW  standard control word");
  651     st->cr(); st->print("\t");
  652   }
  653   if (framesize) {
  654     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  655     st->cr(); st->print("\t");
  656   }
  657   st->print_cr("POPL   EBP"); st->print("\t");
  658   if (do_polling() && C->is_method_compilation()) {
  659     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  660               "JA      #safepoint_stub\t"
  661               "# Safepoint: poll for GC");
  662   }
  663 }
  664 #endif
  665 
  666 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  667   Compile *C = ra_->C;
  668 
  669   if (C->max_vector_size() > 16) {
  670     // Clear upper bits of YMM registers when current compiled code uses
  671     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  672     __ vzeroupper();
  673   }
  674   // If method set FPU control word, restore to standard control word
  675   if (C->in_24_bit_fp_mode()) {
  676     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  677   }
  678 
  679   int framesize = C->output()->frame_size_in_bytes();
  680   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  681   // Remove two words for return addr and rbp,
  682   framesize -= 2*wordSize;
  683 
  684   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  685 
  686   if (framesize >= 128) {
  687     emit_opcode(masm, 0x81); // add  SP, #framesize
  688     emit_rm(masm, 0x3, 0x00, ESP_enc);
  689     emit_d32(masm, framesize);
  690   } else if (framesize) {
  691     emit_opcode(masm, 0x83); // add  SP, #framesize
  692     emit_rm(masm, 0x3, 0x00, ESP_enc);
  693     emit_d8(masm, framesize);
  694   }
  695 
  696   emit_opcode(masm, 0x58 | EBP_enc);
  697 
  698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  699     __ reserved_stack_check();
  700   }
  701 
  702   if (do_polling() && C->is_method_compilation()) {
  703     Register thread = as_Register(EBX_enc);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  709       C->output()->add_stub(stub);
  710       code_stub = &stub->entry();
  711     }
  712     __ set_inst_mark();
  713     __ relocate(relocInfo::poll_return_type);
  714     __ clear_inst_mark();
  715     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  716   }
  717 }
  718 
  719 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  720   return MachNode::size(ra_); // too many variables; just compute it
  721                               // the hard way
  722 }
  723 
  724 int MachEpilogNode::reloc() const {
  725   return 0; // a large enough number
  726 }
  727 
  728 const Pipeline * MachEpilogNode::pipeline() const {
  729   return MachNode::pipeline_class();
  730 }
  731 
  732 //=============================================================================
  733 
  734 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  735 static enum RC rc_class( OptoReg::Name reg ) {
  736 
  737   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  738   if (OptoReg::is_stack(reg)) return rc_stack;
  739 
  740   VMReg r = OptoReg::as_VMReg(reg);
  741   if (r->is_Register()) return rc_int;
  742   if (r->is_FloatRegister()) {
  743     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  744     return rc_float;
  745   }
  746   if (r->is_KRegister()) return rc_kreg;
  747   assert(r->is_XMMRegister(), "must be");
  748   return rc_xmm;
  749 }
  750 
  751 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
  752                         int opcode, const char *op_str, int size, outputStream* st ) {
  753   if( masm ) {
  754     masm->set_inst_mark();
  755     emit_opcode  (masm, opcode );
  756     encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757     masm->clear_inst_mark();
  758 #ifndef PRODUCT
  759   } else if( !do_size ) {
  760     if( size != 0 ) st->print("\n\t");
  761     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  762       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  763       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  764     } else { // FLD, FST, PUSH, POP
  765       st->print("%s [ESP + #%d]",op_str,offset);
  766     }
  767 #endif
  768   }
  769   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  770   return size+3+offset_size;
  771 }
  772 
  773 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  774 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
  775                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  776   int in_size_in_bits = Assembler::EVEX_32bit;
  777   int evex_encoding = 0;
  778   if (reg_lo+1 == reg_hi) {
  779     in_size_in_bits = Assembler::EVEX_64bit;
  780     evex_encoding = Assembler::VEX_W;
  781   }
  782   if (masm) {
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     __ set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (masm) {
  835     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  836     __ set_managed();
  837     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  838       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  839                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  840     } else {
  841       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  842                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  843     }
  844 #ifndef PRODUCT
  845   } else if (!do_size) {
  846     if (size != 0) st->print("\n\t");
  847     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  848       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  849         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  850       } else {
  851         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  852       }
  853     } else {
  854       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  855         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  856       } else {
  857         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  858       }
  859     }
  860 #endif
  861   }
  862   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  863   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  864   int sz = (UseAVX > 2) ? 6 : 4;
  865   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  866       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  867   return size + sz;
  868 }
  869 
  870 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  871                             int src_hi, int dst_hi, int size, outputStream* st ) {
  872   // 32-bit
  873   if (masm) {
  874     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  875     __ set_managed();
  876     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  877              as_Register(Matcher::_regEncode[src_lo]));
  878 #ifndef PRODUCT
  879   } else if (!do_size) {
  880     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  881 #endif
  882   }
  883   return (UseAVX> 2) ? 6 : 4;
  884 }
  885 
  886 
  887 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  888                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  889   // 32-bit
  890   if (masm) {
  891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  892     __ set_managed();
  893     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  894              as_XMMRegister(Matcher::_regEncode[src_lo]));
  895 #ifndef PRODUCT
  896   } else if (!do_size) {
  897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  898 #endif
  899   }
  900   return (UseAVX> 2) ? 6 : 4;
  901 }
  902 
  903 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
  904   if( masm ) {
  905     emit_opcode(masm, 0x8B );
  906     emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  907 #ifndef PRODUCT
  908   } else if( !do_size ) {
  909     if( size != 0 ) st->print("\n\t");
  910     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  911 #endif
  912   }
  913   return size+2;
  914 }
  915 
  916 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  917                                  int offset, int size, outputStream* st ) {
  918   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  919     if( masm ) {
  920       emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
  921       emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
  922 #ifndef PRODUCT
  923     } else if( !do_size ) {
  924       if( size != 0 ) st->print("\n\t");
  925       st->print("FLD    %s",Matcher::regName[src_lo]);
  926 #endif
  927     }
  928     size += 2;
  929   }
  930 
  931   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  932   const char *op_str;
  933   int op;
  934   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  936     op = 0xDD;
  937   } else {                   // 32-bit store
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  939     op = 0xD9;
  940     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  941   }
  942 
  943   return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
  944 }
  945 
  946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  947 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
  948                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  949 
  950 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
  951                             int stack_offset, int reg, uint ireg, outputStream* st);
  952 
  953 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
  954                                      int dst_offset, uint ireg, outputStream* st) {
  955   if (masm) {
  956     switch (ireg) {
  957     case Op_VecS:
  958       __ pushl(Address(rsp, src_offset));
  959       __ popl (Address(rsp, dst_offset));
  960       break;
  961     case Op_VecD:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       __ pushl(Address(rsp, src_offset+4));
  965       __ popl (Address(rsp, dst_offset+4));
  966       break;
  967     case Op_VecX:
  968       __ movdqu(Address(rsp, -16), xmm0);
  969       __ movdqu(xmm0, Address(rsp, src_offset));
  970       __ movdqu(Address(rsp, dst_offset), xmm0);
  971       __ movdqu(xmm0, Address(rsp, -16));
  972       break;
  973     case Op_VecY:
  974       __ vmovdqu(Address(rsp, -32), xmm0);
  975       __ vmovdqu(xmm0, Address(rsp, src_offset));
  976       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  977       __ vmovdqu(xmm0, Address(rsp, -32));
  978       break;
  979     case Op_VecZ:
  980       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  981       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  982       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  983       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  984       break;
  985     default:
  986       ShouldNotReachHere();
  987     }
  988 #ifndef PRODUCT
  989   } else {
  990     switch (ireg) {
  991     case Op_VecS:
  992       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  993                 "popl    [rsp + #%d]",
  994                 src_offset, dst_offset);
  995       break;
  996     case Op_VecD:
  997       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  998                 "popq    [rsp + #%d]\n\t"
  999                 "pushl   [rsp + #%d]\n\t"
 1000                 "popq    [rsp + #%d]",
 1001                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1002       break;
 1003      case Op_VecX:
 1004       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1005                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1006                 "movdqu  [rsp + #%d], xmm0\n\t"
 1007                 "movdqu  xmm0, [rsp - #16]",
 1008                 src_offset, dst_offset);
 1009       break;
 1010     case Op_VecY:
 1011       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1012                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1013                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1014                 "vmovdqu xmm0, [rsp - #32]",
 1015                 src_offset, dst_offset);
 1016       break;
 1017     case Op_VecZ:
 1018       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1019                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1020                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1021                 "vmovdqu xmm0, [rsp - #64]",
 1022                 src_offset, dst_offset);
 1023       break;
 1024     default:
 1025       ShouldNotReachHere();
 1026     }
 1027 #endif
 1028   }
 1029 }
 1030 
 1031 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1032   // Get registers to move
 1033   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1034   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1035   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1036   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1037 
 1038   enum RC src_second_rc = rc_class(src_second);
 1039   enum RC src_first_rc = rc_class(src_first);
 1040   enum RC dst_second_rc = rc_class(dst_second);
 1041   enum RC dst_first_rc = rc_class(dst_first);
 1042 
 1043   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1044 
 1045   // Generate spill code!
 1046   int size = 0;
 1047 
 1048   if( src_first == dst_first && src_second == dst_second )
 1049     return size;            // Self copy, no move
 1050 
 1051   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1052     uint ireg = ideal_reg();
 1053     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1054     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1055     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1056     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1057       // mem -> mem
 1058       int src_offset = ra_->reg2offset(src_first);
 1059       int dst_offset = ra_->reg2offset(dst_first);
 1060       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 1061     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1062       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 1063     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1064       int stack_offset = ra_->reg2offset(dst_first);
 1065       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 1066     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1067       int stack_offset = ra_->reg2offset(src_first);
 1068       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 1069     } else {
 1070       ShouldNotReachHere();
 1071     }
 1072     return 0;
 1073   }
 1074 
 1075   // --------------------------------------
 1076   // Check for mem-mem move.  push/pop to move.
 1077   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1078     if( src_second == dst_first ) { // overlapping stack copy ranges
 1079       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1080       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1081       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1082       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1083     }
 1084     // move low bits
 1085     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1086     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1087     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1088       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1089       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1090     }
 1091     return size;
 1092   }
 1093 
 1094   // --------------------------------------
 1095   // Check for integer reg-reg copy
 1096   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1097     size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
 1098 
 1099   // Check for integer store
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1101     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1102 
 1103   // Check for integer load
 1104   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1105     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1106 
 1107   // Check for integer reg-xmm reg copy
 1108   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1109     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1110             "no 64 bit integer-float reg moves" );
 1111     return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1112   }
 1113   // --------------------------------------
 1114   // Check for float reg-reg copy
 1115   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1116     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1117             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1118     if( masm ) {
 1119 
 1120       // Note the mucking with the register encode to compensate for the 0/1
 1121       // indexing issue mentioned in a comment in the reg_def sections
 1122       // for FPR registers many lines above here.
 1123 
 1124       if( src_first != FPR1L_num ) {
 1125         emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
 1126         emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
 1127         emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1128         emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1129      } else {
 1130         emit_opcode  (masm, 0xDD );           // FST    ST(i)
 1131         emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1132      }
 1133 #ifndef PRODUCT
 1134     } else if( !do_size ) {
 1135       if( size != 0 ) st->print("\n\t");
 1136       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1137       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1138 #endif
 1139     }
 1140     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1141   }
 1142 
 1143   // Check for float store
 1144   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1145     return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1146   }
 1147 
 1148   // Check for float load
 1149   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1150     int offset = ra_->reg2offset(src_first);
 1151     const char *op_str;
 1152     int op;
 1153     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1154       op_str = "FLD_D";
 1155       op = 0xDD;
 1156     } else {                   // 32-bit load
 1157       op_str = "FLD_S";
 1158       op = 0xD9;
 1159       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1160     }
 1161     if( masm ) {
 1162       masm->set_inst_mark();
 1163       emit_opcode  (masm, op );
 1164       encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1165       emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1166       emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1167       masm->clear_inst_mark();
 1168 #ifndef PRODUCT
 1169     } else if( !do_size ) {
 1170       if( size != 0 ) st->print("\n\t");
 1171       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1172 #endif
 1173     }
 1174     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1175     return size + 3+offset_size+2;
 1176   }
 1177 
 1178   // Check for xmm reg-reg copy
 1179   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1180     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1181             (src_first+1 == src_second && dst_first+1 == dst_second),
 1182             "no non-adjacent float-moves" );
 1183     return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1184   }
 1185 
 1186   // Check for xmm reg-integer reg copy
 1187   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1188     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1189             "no 64 bit float-integer reg moves" );
 1190     return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1191   }
 1192 
 1193   // Check for xmm store
 1194   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1195     return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1196   }
 1197 
 1198   // Check for float xmm load
 1199   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1200     return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1201   }
 1202 
 1203   // Copy from float reg to xmm reg
 1204   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1205     // copy to the top of stack from floating point reg
 1206     // and use LEA to preserve flags
 1207     if( masm ) {
 1208       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
 1209       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1210       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1211       emit_d8(masm,0xF8);
 1212 #ifndef PRODUCT
 1213     } else if( !do_size ) {
 1214       if( size != 0 ) st->print("\n\t");
 1215       st->print("LEA    ESP,[ESP-8]");
 1216 #endif
 1217     }
 1218     size += 4;
 1219 
 1220     size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1221 
 1222     // Copy from the temp memory to the xmm reg.
 1223     size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
 1224 
 1225     if( masm ) {
 1226       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
 1227       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1228       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1229       emit_d8(masm,0x08);
 1230 #ifndef PRODUCT
 1231     } else if( !do_size ) {
 1232       if( size != 0 ) st->print("\n\t");
 1233       st->print("LEA    ESP,[ESP+8]");
 1234 #endif
 1235     }
 1236     size += 4;
 1237     return size;
 1238   }
 1239 
 1240   // AVX-512 opmask specific spilling.
 1241   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1242     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1243     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1244     int offset = ra_->reg2offset(src_first);
 1245     if (masm != nullptr) {
 1246       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1247 #ifndef PRODUCT
 1248     } else {
 1249       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1250 #endif
 1251     }
 1252     return 0;
 1253   }
 1254 
 1255   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1256     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1257     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1258     int offset = ra_->reg2offset(dst_first);
 1259     if (masm != nullptr) {
 1260       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1261 #ifndef PRODUCT
 1262     } else {
 1263       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1264 #endif
 1265     }
 1266     return 0;
 1267   }
 1268 
 1269   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1270     Unimplemented();
 1271     return 0;
 1272   }
 1273 
 1274   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1275     Unimplemented();
 1276     return 0;
 1277   }
 1278 
 1279   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1280     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1281     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1282     if (masm != nullptr) {
 1283       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1284 #ifndef PRODUCT
 1285     } else {
 1286       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1287 #endif
 1288     }
 1289     return 0;
 1290   }
 1291 
 1292   assert( size > 0, "missed a case" );
 1293 
 1294   // --------------------------------------------------------------------
 1295   // Check for second bits still needing moving.
 1296   if( src_second == dst_second )
 1297     return size;               // Self copy; no move
 1298   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1299 
 1300   // Check for second word int-int move
 1301   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1302     return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
 1303 
 1304   // Check for second word integer store
 1305   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1306     return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1307 
 1308   // Check for second word integer load
 1309   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1310     return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1311 
 1312   Unimplemented();
 1313   return 0; // Mute compiler
 1314 }
 1315 
 1316 #ifndef PRODUCT
 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1318   implementation( nullptr, ra_, false, st );
 1319 }
 1320 #endif
 1321 
 1322 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1323   implementation( masm, ra_, false, nullptr );
 1324 }
 1325 
 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1327   return MachNode::size(ra_);
 1328 }
 1329 
 1330 
 1331 //=============================================================================
 1332 #ifndef PRODUCT
 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1334   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1335   int reg = ra_->get_reg_first(this);
 1336   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1337 }
 1338 #endif
 1339 
 1340 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1341   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1342   int reg = ra_->get_encode(this);
 1343   if( offset >= 128 ) {
 1344     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1345     emit_rm(masm, 0x2, reg, 0x04);
 1346     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1347     emit_d32(masm, offset);
 1348   }
 1349   else {
 1350     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1351     emit_rm(masm, 0x1, reg, 0x04);
 1352     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1353     emit_d8(masm, offset);
 1354   }
 1355 }
 1356 
 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1358   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1359   if( offset >= 128 ) {
 1360     return 7;
 1361   }
 1362   else {
 1363     return 4;
 1364   }
 1365 }
 1366 
 1367 //=============================================================================
 1368 #ifndef PRODUCT
 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1370   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1371   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1372   st->print_cr("\tNOP");
 1373   st->print_cr("\tNOP");
 1374   if( !OptoBreakpoint )
 1375     st->print_cr("\tNOP");
 1376 }
 1377 #endif
 1378 
 1379 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1380   __ ic_check(CodeEntryAlignment);
 1381 }
 1382 
 1383 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1384   return MachNode::size(ra_); // too many variables; just compute it
 1385                               // the hard way
 1386 }
 1387 
 1388 
 1389 //=============================================================================
 1390 
 1391 // Vector calling convention not supported.
 1392 bool Matcher::supports_vector_calling_convention() {
 1393   return false;
 1394 }
 1395 
 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1397   Unimplemented();
 1398   return OptoRegPair(0, 0);
 1399 }
 1400 
 1401 // Is this branch offset short enough that a short branch can be used?
 1402 //
 1403 // NOTE: If the platform does not provide any short branch variants, then
 1404 //       this method should return false for offset 0.
 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1406   // The passed offset is relative to address of the branch.
 1407   // On 86 a branch displacement is calculated relative to address
 1408   // of a next instruction.
 1409   offset -= br_size;
 1410 
 1411   // the short version of jmpConUCF2 contains multiple branches,
 1412   // making the reach slightly less
 1413   if (rule == jmpConUCF2_rule)
 1414     return (-126 <= offset && offset <= 125);
 1415   return (-128 <= offset && offset <= 127);
 1416 }
 1417 
 1418 // Return whether or not this register is ever used as an argument.  This
 1419 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1421 // arguments in those registers not be available to the callee.
 1422 bool Matcher::can_be_java_arg( int reg ) {
 1423   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1424   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1425   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1426   return false;
 1427 }
 1428 
 1429 bool Matcher::is_spillable_arg( int reg ) {
 1430   return can_be_java_arg(reg);
 1431 }
 1432 
 1433 uint Matcher::int_pressure_limit()
 1434 {
 1435   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1436 }
 1437 
 1438 uint Matcher::float_pressure_limit()
 1439 {
 1440   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1441 }
 1442 
 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1444   // Use hardware integer DIV instruction when
 1445   // it is faster than a code which use multiply.
 1446   // Only when constant divisor fits into 32 bit
 1447   // (min_jint is excluded to get only correct
 1448   // positive 32 bit values from negative).
 1449   return VM_Version::has_fast_idiv() &&
 1450          (divisor == (int)divisor && divisor != min_jint);
 1451 }
 1452 
 1453 // Register for DIVI projection of divmodI
 1454 RegMask Matcher::divI_proj_mask() {
 1455   return EAX_REG_mask();
 1456 }
 1457 
 1458 // Register for MODI projection of divmodI
 1459 RegMask Matcher::modI_proj_mask() {
 1460   return EDX_REG_mask();
 1461 }
 1462 
 1463 // Register for DIVL projection of divmodL
 1464 RegMask Matcher::divL_proj_mask() {
 1465   ShouldNotReachHere();
 1466   return RegMask();
 1467 }
 1468 
 1469 // Register for MODL projection of divmodL
 1470 RegMask Matcher::modL_proj_mask() {
 1471   ShouldNotReachHere();
 1472   return RegMask();
 1473 }
 1474 
 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1476   return NO_REG_mask();
 1477 }
 1478 
 1479 // Returns true if the high 32 bits of the value is known to be zero.
 1480 bool is_operand_hi32_zero(Node* n) {
 1481   int opc = n->Opcode();
 1482   if (opc == Op_AndL) {
 1483     Node* o2 = n->in(2);
 1484     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1485       return true;
 1486     }
 1487   }
 1488   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1489     return true;
 1490   }
 1491   return false;
 1492 }
 1493 
 1494 %}
 1495 
 1496 //----------ENCODING BLOCK-----------------------------------------------------
 1497 // This block specifies the encoding classes used by the compiler to output
 1498 // byte streams.  Encoding classes generate functions which are called by
 1499 // Machine Instruction Nodes in order to generate the bit encoding of the
 1500 // instruction.  Operands specify their base encoding interface with the
 1501 // interface keyword.  There are currently supported four interfaces,
 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1503 // operand to generate a function which returns its register number when
 1504 // queried.   CONST_INTER causes an operand to generate a function which
 1505 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1506 // operand to generate four functions which return the Base Register, the
 1507 // Index Register, the Scale Value, and the Offset Value of the operand when
 1508 // queried.  COND_INTER causes an operand to generate six functions which
 1509 // return the encoding code (ie - encoding bits for the instruction)
 1510 // associated with each basic boolean condition for a conditional instruction.
 1511 // Instructions specify two basic values for encoding.  They use the
 1512 // ins_encode keyword to specify their encoding class (which must be one of
 1513 // the class names specified in the encoding block), and they use the
 1514 // opcode keyword to specify, in order, their primary, secondary, and
 1515 // tertiary opcode.  Only the opcode sections which a particular instruction
 1516 // needs for encoding need to be specified.
 1517 encode %{
 1518   // Build emit functions for each basic byte or larger field in the intel
 1519   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1520   // code in the enc_class source block.  Emit functions will live in the
 1521   // main source block for now.  In future, we can generalize this by
 1522   // adding a syntax that specifies the sizes of fields in an order,
 1523   // so that the adlc can build the emit functions automagically
 1524 
 1525   // Set instruction mark in MacroAssembler. This is used only in
 1526   // instructions that emit bytes directly to the CodeBuffer wraped
 1527   // in the MacroAssembler. Should go away once all "instruct" are
 1528   // patched to emit bytes only using methods in MacroAssembler.
 1529   enc_class SetInstMark %{
 1530     __ set_inst_mark();
 1531   %}
 1532 
 1533   enc_class ClearInstMark %{
 1534     __ clear_inst_mark();
 1535   %}
 1536 
 1537   // Emit primary opcode
 1538   enc_class OpcP %{
 1539     emit_opcode(masm, $primary);
 1540   %}
 1541 
 1542   // Emit secondary opcode
 1543   enc_class OpcS %{
 1544     emit_opcode(masm, $secondary);
 1545   %}
 1546 
 1547   // Emit opcode directly
 1548   enc_class Opcode(immI d8) %{
 1549     emit_opcode(masm, $d8$$constant);
 1550   %}
 1551 
 1552   enc_class SizePrefix %{
 1553     emit_opcode(masm,0x66);
 1554   %}
 1555 
 1556   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1557     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1558   %}
 1559 
 1560   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1561     emit_opcode(masm,$opcode$$constant);
 1562     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1563   %}
 1564 
 1565   enc_class mov_r32_imm0( rRegI dst ) %{
 1566     emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1567     emit_d32   ( masm, 0x0  );             //                         imm32==0x0
 1568   %}
 1569 
 1570   enc_class cdq_enc %{
 1571     // Full implementation of Java idiv and irem; checks for
 1572     // special case as described in JVM spec., p.243 & p.271.
 1573     //
 1574     //         normal case                           special case
 1575     //
 1576     // input : rax,: dividend                         min_int
 1577     //         reg: divisor                          -1
 1578     //
 1579     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1580     //         rdx: remainder (= rax, irem reg)       0
 1581     //
 1582     //  Code sequnce:
 1583     //
 1584     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1585     //  0F 85 0B 00 00 00    jne         normal_case
 1586     //  33 D2                xor         rdx,edx
 1587     //  83 F9 FF             cmp         rcx,0FFh
 1588     //  0F 84 03 00 00 00    je          done
 1589     //                  normal_case:
 1590     //  99                   cdq
 1591     //  F7 F9                idiv        rax,ecx
 1592     //                  done:
 1593     //
 1594     emit_opcode(masm,0x81); emit_d8(masm,0xF8);
 1595     emit_opcode(masm,0x00); emit_d8(masm,0x00);
 1596     emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
 1597     emit_opcode(masm,0x0F); emit_d8(masm,0x85);
 1598     emit_opcode(masm,0x0B); emit_d8(masm,0x00);
 1599     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
 1600     emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
 1601     emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
 1602     emit_opcode(masm,0x0F); emit_d8(masm,0x84);
 1603     emit_opcode(masm,0x03); emit_d8(masm,0x00);
 1604     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
 1605     // normal_case:
 1606     emit_opcode(masm,0x99);                                         // cdq
 1607     // idiv (note: must be emitted by the user of this rule)
 1608     // normal:
 1609   %}
 1610 
 1611   // Dense encoding for older common ops
 1612   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1613     emit_opcode(masm, $opcode$$constant + $reg$$reg);
 1614   %}
 1615 
 1616 
 1617   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1618   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1619     // Check for 8-bit immediate, and set sign extend bit in opcode
 1620     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1621       emit_opcode(masm, $primary | 0x02);
 1622     }
 1623     else {                          // If 32-bit immediate
 1624       emit_opcode(masm, $primary);
 1625     }
 1626   %}
 1627 
 1628   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1629     // Emit primary opcode and set sign-extend bit
 1630     // Check for 8-bit immediate, and set sign extend bit in opcode
 1631     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1632       emit_opcode(masm, $primary | 0x02);    }
 1633     else {                          // If 32-bit immediate
 1634       emit_opcode(masm, $primary);
 1635     }
 1636     // Emit r/m byte with secondary opcode, after primary opcode.
 1637     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1638   %}
 1639 
 1640   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1641     // Check for 8-bit immediate, and set sign extend bit in opcode
 1642     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1643       $$$emit8$imm$$constant;
 1644     }
 1645     else {                          // If 32-bit immediate
 1646       // Output immediate
 1647       $$$emit32$imm$$constant;
 1648     }
 1649   %}
 1650 
 1651   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1652     // Emit primary opcode and set sign-extend bit
 1653     // Check for 8-bit immediate, and set sign extend bit in opcode
 1654     int con = (int)$imm$$constant; // Throw away top bits
 1655     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1656     // Emit r/m byte with secondary opcode, after primary opcode.
 1657     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1658     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1659     else                               emit_d32(masm,con);
 1660   %}
 1661 
 1662   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1663     // Emit primary opcode and set sign-extend bit
 1664     // Check for 8-bit immediate, and set sign extend bit in opcode
 1665     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1666     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1667     // Emit r/m byte with tertiary opcode, after primary opcode.
 1668     emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1669     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1670     else                               emit_d32(masm,con);
 1671   %}
 1672 
 1673   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1674     emit_cc(masm, $secondary, $dst$$reg );
 1675   %}
 1676 
 1677   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1678     int destlo = $dst$$reg;
 1679     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1680     // bswap lo
 1681     emit_opcode(masm, 0x0F);
 1682     emit_cc(masm, 0xC8, destlo);
 1683     // bswap hi
 1684     emit_opcode(masm, 0x0F);
 1685     emit_cc(masm, 0xC8, desthi);
 1686     // xchg lo and hi
 1687     emit_opcode(masm, 0x87);
 1688     emit_rm(masm, 0x3, destlo, desthi);
 1689   %}
 1690 
 1691   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1692     emit_rm(masm, 0x3, $secondary, $div$$reg );
 1693   %}
 1694 
 1695   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1696     $$$emit8$primary;
 1697     emit_cc(masm, $secondary, $cop$$cmpcode);
 1698   %}
 1699 
 1700   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1701     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1702     emit_d8(masm, op >> 8 );
 1703     emit_d8(masm, op & 255);
 1704   %}
 1705 
 1706   // emulate a CMOV with a conditional branch around a MOV
 1707   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1708     // Invert sense of branch from sense of CMOV
 1709     emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
 1710     emit_d8( masm, $brOffs$$constant );
 1711   %}
 1712 
 1713   enc_class enc_PartialSubtypeCheck( ) %{
 1714     Register Redi = as_Register(EDI_enc); // result register
 1715     Register Reax = as_Register(EAX_enc); // super class
 1716     Register Recx = as_Register(ECX_enc); // killed
 1717     Register Resi = as_Register(ESI_enc); // sub class
 1718     Label miss;
 1719 
 1720     // NB: Callers may assume that, when $result is a valid register,
 1721     // check_klass_subtype_slow_path sets it to a nonzero value.
 1722      __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1723                                      nullptr, &miss,
 1724                                      /*set_cond_codes:*/ true);
 1725     if ($primary) {
 1726       __ xorptr(Redi, Redi);
 1727     }
 1728     __ bind(miss);
 1729   %}
 1730 
 1731   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1732     int start = __ offset();
 1733     if (UseSSE >= 2) {
 1734       if (VerifyFPU) {
 1735         __ verify_FPU(0, "must be empty in SSE2+ mode");
 1736       }
 1737     } else {
 1738       // External c_calling_convention expects the FPU stack to be 'clean'.
 1739       // Compiled code leaves it dirty.  Do cleanup now.
 1740       __ empty_FPU_stack();
 1741     }
 1742     if (sizeof_FFree_Float_Stack_All == -1) {
 1743       sizeof_FFree_Float_Stack_All = __ offset() - start;
 1744     } else {
 1745       assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1746     }
 1747   %}
 1748 
 1749   enc_class Verify_FPU_For_Leaf %{
 1750     if( VerifyFPU ) {
 1751       __ verify_FPU( -3, "Returning from Runtime Leaf call");
 1752     }
 1753   %}
 1754 
 1755   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1756     // This is the instruction starting address for relocation info.
 1757     __ set_inst_mark();
 1758     $$$emit8$primary;
 1759     // CALL directly to the runtime
 1760     emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1761                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1762     __ clear_inst_mark();
 1763     __ post_call_nop();
 1764 
 1765     if (UseSSE >= 2) {
 1766       BasicType rt = tf()->return_type();
 1767 
 1768       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1769         // A C runtime call where the return value is unused.  In SSE2+
 1770         // mode the result needs to be removed from the FPU stack.  It's
 1771         // likely that this function call could be removed by the
 1772         // optimizer if the C function is a pure function.
 1773         __ ffree(0);
 1774       } else if (rt == T_FLOAT) {
 1775         __ lea(rsp, Address(rsp, -4));
 1776         __ fstp_s(Address(rsp, 0));
 1777         __ movflt(xmm0, Address(rsp, 0));
 1778         __ lea(rsp, Address(rsp,  4));
 1779       } else if (rt == T_DOUBLE) {
 1780         __ lea(rsp, Address(rsp, -8));
 1781         __ fstp_d(Address(rsp, 0));
 1782         __ movdbl(xmm0, Address(rsp, 0));
 1783         __ lea(rsp, Address(rsp,  8));
 1784       }
 1785     }
 1786   %}
 1787 
 1788   enc_class pre_call_resets %{
 1789     // If method sets FPU control word restore it here
 1790     debug_only(int off0 = __ offset());
 1791     if (ra_->C->in_24_bit_fp_mode()) {
 1792       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1793     }
 1794     // Clear upper bits of YMM registers when current compiled code uses
 1795     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1796     __ vzeroupper();
 1797     debug_only(int off1 = __ offset());
 1798     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1799   %}
 1800 
 1801   enc_class post_call_FPU %{
 1802     // If method sets FPU control word do it here also
 1803     if (Compile::current()->in_24_bit_fp_mode()) {
 1804       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1805     }
 1806   %}
 1807 
 1808   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1809     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1810     // who we intended to call.
 1811     __ set_inst_mark();
 1812     $$$emit8$primary;
 1813 
 1814     if (!_method) {
 1815       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1816                      runtime_call_Relocation::spec(),
 1817                      RELOC_IMM32);
 1818       __ clear_inst_mark();
 1819       __ post_call_nop();
 1820     } else {
 1821       int method_index = resolved_method_index(masm);
 1822       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1823                                                   : static_call_Relocation::spec(method_index);
 1824       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1825                      rspec, RELOC_DISP32);
 1826       __ post_call_nop();
 1827       address mark = __ inst_mark();
 1828       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1829         // Calls of the same statically bound method can share
 1830         // a stub to the interpreter.
 1831         __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
 1832         __ clear_inst_mark();
 1833       } else {
 1834         // Emit stubs for static call.
 1835         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 1836         __ clear_inst_mark();
 1837         if (stub == nullptr) {
 1838           ciEnv::current()->record_failure("CodeCache is full");
 1839           return;
 1840         }
 1841       }
 1842     }
 1843   %}
 1844 
 1845   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1846     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 1847     __ post_call_nop();
 1848   %}
 1849 
 1850   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1851     int disp = in_bytes(Method::from_compiled_offset());
 1852     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1853 
 1854     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1855     __ set_inst_mark();
 1856     $$$emit8$primary;
 1857     emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
 1858     emit_d8(masm, disp);             // Displacement
 1859     __ clear_inst_mark();
 1860     __ post_call_nop();
 1861   %}
 1862 
 1863   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1864     $$$emit8$primary;
 1865     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1866     $$$emit8$shift$$constant;
 1867   %}
 1868 
 1869   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1870     // Load immediate does not have a zero or sign extended version
 1871     // for 8-bit immediates
 1872     emit_opcode(masm, 0xB8 + $dst$$reg);
 1873     $$$emit32$src$$constant;
 1874   %}
 1875 
 1876   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1877     // Load immediate does not have a zero or sign extended version
 1878     // for 8-bit immediates
 1879     emit_opcode(masm, $primary + $dst$$reg);
 1880     $$$emit32$src$$constant;
 1881   %}
 1882 
 1883   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1884     // Load immediate does not have a zero or sign extended version
 1885     // for 8-bit immediates
 1886     int dst_enc = $dst$$reg;
 1887     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1888     if (src_con == 0) {
 1889       // xor dst, dst
 1890       emit_opcode(masm, 0x33);
 1891       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1892     } else {
 1893       emit_opcode(masm, $primary + dst_enc);
 1894       emit_d32(masm, src_con);
 1895     }
 1896   %}
 1897 
 1898   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1899     // Load immediate does not have a zero or sign extended version
 1900     // for 8-bit immediates
 1901     int dst_enc = $dst$$reg + 2;
 1902     int src_con = ((julong)($src$$constant)) >> 32;
 1903     if (src_con == 0) {
 1904       // xor dst, dst
 1905       emit_opcode(masm, 0x33);
 1906       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1907     } else {
 1908       emit_opcode(masm, $primary + dst_enc);
 1909       emit_d32(masm, src_con);
 1910     }
 1911   %}
 1912 
 1913 
 1914   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1915   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1916     encode_Copy( masm, $dst$$reg, $src$$reg );
 1917   %}
 1918 
 1919   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1920     encode_Copy( masm, $dst$$reg, $src$$reg );
 1921   %}
 1922 
 1923   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1924     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1925   %}
 1926 
 1927   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1928     $$$emit8$primary;
 1929     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1930   %}
 1931 
 1932   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1933     $$$emit8$secondary;
 1934     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1935   %}
 1936 
 1937   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1938     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1939   %}
 1940 
 1941   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1942     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1943   %}
 1944 
 1945   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1946     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1947   %}
 1948 
 1949   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1950     // Output immediate
 1951     $$$emit32$src$$constant;
 1952   %}
 1953 
 1954   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1955     // Output Float immediate bits
 1956     jfloat jf = $src$$constant;
 1957     int    jf_as_bits = jint_cast( jf );
 1958     emit_d32(masm, jf_as_bits);
 1959   %}
 1960 
 1961   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1962     // Output Float immediate bits
 1963     jfloat jf = $src$$constant;
 1964     int    jf_as_bits = jint_cast( jf );
 1965     emit_d32(masm, jf_as_bits);
 1966   %}
 1967 
 1968   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1969     // Output immediate
 1970     $$$emit16$src$$constant;
 1971   %}
 1972 
 1973   enc_class Con_d32(immI src) %{
 1974     emit_d32(masm,$src$$constant);
 1975   %}
 1976 
 1977   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1978     // Output immediate memory reference
 1979     emit_rm(masm, 0x00, $t1$$reg, 0x05 );
 1980     emit_d32(masm, 0x00);
 1981   %}
 1982 
 1983   enc_class lock_prefix( ) %{
 1984     emit_opcode(masm,0xF0);         // [Lock]
 1985   %}
 1986 
 1987   // Cmp-xchg long value.
 1988   // Note: we need to swap rbx, and rcx before and after the
 1989   //       cmpxchg8 instruction because the instruction uses
 1990   //       rcx as the high order word of the new value to store but
 1991   //       our register encoding uses rbx,.
 1992   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 1993 
 1994     // XCHG  rbx,ecx
 1995     emit_opcode(masm,0x87);
 1996     emit_opcode(masm,0xD9);
 1997     // [Lock]
 1998     emit_opcode(masm,0xF0);
 1999     // CMPXCHG8 [Eptr]
 2000     emit_opcode(masm,0x0F);
 2001     emit_opcode(masm,0xC7);
 2002     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2003     // XCHG  rbx,ecx
 2004     emit_opcode(masm,0x87);
 2005     emit_opcode(masm,0xD9);
 2006   %}
 2007 
 2008   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2009     // [Lock]
 2010     emit_opcode(masm,0xF0);
 2011 
 2012     // CMPXCHG [Eptr]
 2013     emit_opcode(masm,0x0F);
 2014     emit_opcode(masm,0xB1);
 2015     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2016   %}
 2017 
 2018   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2019     // [Lock]
 2020     emit_opcode(masm,0xF0);
 2021 
 2022     // CMPXCHGB [Eptr]
 2023     emit_opcode(masm,0x0F);
 2024     emit_opcode(masm,0xB0);
 2025     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2026   %}
 2027 
 2028   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2029     // [Lock]
 2030     emit_opcode(masm,0xF0);
 2031 
 2032     // 16-bit mode
 2033     emit_opcode(masm, 0x66);
 2034 
 2035     // CMPXCHGW [Eptr]
 2036     emit_opcode(masm,0x0F);
 2037     emit_opcode(masm,0xB1);
 2038     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2039   %}
 2040 
 2041   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2042     int res_encoding = $res$$reg;
 2043 
 2044     // MOV  res,0
 2045     emit_opcode( masm, 0xB8 + res_encoding);
 2046     emit_d32( masm, 0 );
 2047     // JNE,s  fail
 2048     emit_opcode(masm,0x75);
 2049     emit_d8(masm, 5 );
 2050     // MOV  res,1
 2051     emit_opcode( masm, 0xB8 + res_encoding);
 2052     emit_d32( masm, 1 );
 2053     // fail:
 2054   %}
 2055 
 2056   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2057     int reg_encoding = $ereg$$reg;
 2058     int base  = $mem$$base;
 2059     int index = $mem$$index;
 2060     int scale = $mem$$scale;
 2061     int displace = $mem$$disp;
 2062     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2063     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2064   %}
 2065 
 2066   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2067     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2068     int base  = $mem$$base;
 2069     int index = $mem$$index;
 2070     int scale = $mem$$scale;
 2071     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2072     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2073     encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
 2074   %}
 2075 
 2076   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2077     int r1, r2;
 2078     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2079     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2080     emit_opcode(masm,0x0F);
 2081     emit_opcode(masm,$tertiary);
 2082     emit_rm(masm, 0x3, r1, r2);
 2083     emit_d8(masm,$cnt$$constant);
 2084     emit_d8(masm,$primary);
 2085     emit_rm(masm, 0x3, $secondary, r1);
 2086     emit_d8(masm,$cnt$$constant);
 2087   %}
 2088 
 2089   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2090     emit_opcode( masm, 0x8B ); // Move
 2091     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2092     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2093       emit_d8(masm,$primary);
 2094       emit_rm(masm, 0x3, $secondary, $dst$$reg);
 2095       emit_d8(masm,$cnt$$constant-32);
 2096     }
 2097     emit_d8(masm,$primary);
 2098     emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2099     emit_d8(masm,31);
 2100   %}
 2101 
 2102   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2103     int r1, r2;
 2104     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2105     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2106 
 2107     emit_opcode( masm, 0x8B ); // Move r1,r2
 2108     emit_rm(masm, 0x3, r1, r2);
 2109     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2110       emit_opcode(masm,$primary);
 2111       emit_rm(masm, 0x3, $secondary, r1);
 2112       emit_d8(masm,$cnt$$constant-32);
 2113     }
 2114     emit_opcode(masm,0x33);  // XOR r2,r2
 2115     emit_rm(masm, 0x3, r2, r2);
 2116   %}
 2117 
 2118   // Clone of RegMem but accepts an extra parameter to access each
 2119   // half of a double in memory; it never needs relocation info.
 2120   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2121     emit_opcode(masm,$opcode$$constant);
 2122     int reg_encoding = $rm_reg$$reg;
 2123     int base     = $mem$$base;
 2124     int index    = $mem$$index;
 2125     int scale    = $mem$$scale;
 2126     int displace = $mem$$disp + $disp_for_half$$constant;
 2127     relocInfo::relocType disp_reloc = relocInfo::none;
 2128     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2129   %}
 2130 
 2131   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2132   //
 2133   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2134   // and it never needs relocation information.
 2135   // Frequently used to move data between FPU's Stack Top and memory.
 2136   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2137     int rm_byte_opcode = $rm_opcode$$constant;
 2138     int base     = $mem$$base;
 2139     int index    = $mem$$index;
 2140     int scale    = $mem$$scale;
 2141     int displace = $mem$$disp;
 2142     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2143     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2144   %}
 2145 
 2146   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2147     int rm_byte_opcode = $rm_opcode$$constant;
 2148     int base     = $mem$$base;
 2149     int index    = $mem$$index;
 2150     int scale    = $mem$$scale;
 2151     int displace = $mem$$disp;
 2152     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2153     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2154   %}
 2155 
 2156   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2157     int reg_encoding = $dst$$reg;
 2158     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2159     int index        = 0x04;            // 0x04 indicates no index
 2160     int scale        = 0x00;            // 0x00 indicates no scale
 2161     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2162     relocInfo::relocType disp_reloc = relocInfo::none;
 2163     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2164   %}
 2165 
 2166   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2167     // Compare dst,src
 2168     emit_opcode(masm,0x3B);
 2169     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2170     // jmp dst < src around move
 2171     emit_opcode(masm,0x7C);
 2172     emit_d8(masm,2);
 2173     // move dst,src
 2174     emit_opcode(masm,0x8B);
 2175     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2176   %}
 2177 
 2178   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2179     // Compare dst,src
 2180     emit_opcode(masm,0x3B);
 2181     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2182     // jmp dst > src around move
 2183     emit_opcode(masm,0x7F);
 2184     emit_d8(masm,2);
 2185     // move dst,src
 2186     emit_opcode(masm,0x8B);
 2187     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2188   %}
 2189 
 2190   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2191     // If src is FPR1, we can just FST to store it.
 2192     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2193     int reg_encoding = 0x2; // Just store
 2194     int base  = $mem$$base;
 2195     int index = $mem$$index;
 2196     int scale = $mem$$scale;
 2197     int displace = $mem$$disp;
 2198     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2199     if( $src$$reg != FPR1L_enc ) {
 2200       reg_encoding = 0x3;  // Store & pop
 2201       emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
 2202       emit_d8( masm, 0xC0-1+$src$$reg );
 2203     }
 2204     __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
 2205     emit_opcode(masm,$primary);
 2206     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2207     __ clear_inst_mark();
 2208   %}
 2209 
 2210   enc_class neg_reg(rRegI dst) %{
 2211     // NEG $dst
 2212     emit_opcode(masm,0xF7);
 2213     emit_rm(masm, 0x3, 0x03, $dst$$reg );
 2214   %}
 2215 
 2216   enc_class setLT_reg(eCXRegI dst) %{
 2217     // SETLT $dst
 2218     emit_opcode(masm,0x0F);
 2219     emit_opcode(masm,0x9C);
 2220     emit_rm( masm, 0x3, 0x4, $dst$$reg );
 2221   %}
 2222 
 2223   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2224     int tmpReg = $tmp$$reg;
 2225 
 2226     // SUB $p,$q
 2227     emit_opcode(masm,0x2B);
 2228     emit_rm(masm, 0x3, $p$$reg, $q$$reg);
 2229     // SBB $tmp,$tmp
 2230     emit_opcode(masm,0x1B);
 2231     emit_rm(masm, 0x3, tmpReg, tmpReg);
 2232     // AND $tmp,$y
 2233     emit_opcode(masm,0x23);
 2234     emit_rm(masm, 0x3, tmpReg, $y$$reg);
 2235     // ADD $p,$tmp
 2236     emit_opcode(masm,0x03);
 2237     emit_rm(masm, 0x3, $p$$reg, tmpReg);
 2238   %}
 2239 
 2240   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2241     // TEST shift,32
 2242     emit_opcode(masm,0xF7);
 2243     emit_rm(masm, 0x3, 0, ECX_enc);
 2244     emit_d32(masm,0x20);
 2245     // JEQ,s small
 2246     emit_opcode(masm, 0x74);
 2247     emit_d8(masm, 0x04);
 2248     // MOV    $dst.hi,$dst.lo
 2249     emit_opcode( masm, 0x8B );
 2250     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2251     // CLR    $dst.lo
 2252     emit_opcode(masm, 0x33);
 2253     emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
 2254 // small:
 2255     // SHLD   $dst.hi,$dst.lo,$shift
 2256     emit_opcode(masm,0x0F);
 2257     emit_opcode(masm,0xA5);
 2258     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2259     // SHL    $dst.lo,$shift"
 2260     emit_opcode(masm,0xD3);
 2261     emit_rm(masm, 0x3, 0x4, $dst$$reg );
 2262   %}
 2263 
 2264   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2265     // TEST shift,32
 2266     emit_opcode(masm,0xF7);
 2267     emit_rm(masm, 0x3, 0, ECX_enc);
 2268     emit_d32(masm,0x20);
 2269     // JEQ,s small
 2270     emit_opcode(masm, 0x74);
 2271     emit_d8(masm, 0x04);
 2272     // MOV    $dst.lo,$dst.hi
 2273     emit_opcode( masm, 0x8B );
 2274     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2275     // CLR    $dst.hi
 2276     emit_opcode(masm, 0x33);
 2277     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2278 // small:
 2279     // SHRD   $dst.lo,$dst.hi,$shift
 2280     emit_opcode(masm,0x0F);
 2281     emit_opcode(masm,0xAD);
 2282     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2283     // SHR    $dst.hi,$shift"
 2284     emit_opcode(masm,0xD3);
 2285     emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2286   %}
 2287 
 2288   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2289     // TEST shift,32
 2290     emit_opcode(masm,0xF7);
 2291     emit_rm(masm, 0x3, 0, ECX_enc);
 2292     emit_d32(masm,0x20);
 2293     // JEQ,s small
 2294     emit_opcode(masm, 0x74);
 2295     emit_d8(masm, 0x05);
 2296     // MOV    $dst.lo,$dst.hi
 2297     emit_opcode( masm, 0x8B );
 2298     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2299     // SAR    $dst.hi,31
 2300     emit_opcode(masm, 0xC1);
 2301     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2302     emit_d8(masm, 0x1F );
 2303 // small:
 2304     // SHRD   $dst.lo,$dst.hi,$shift
 2305     emit_opcode(masm,0x0F);
 2306     emit_opcode(masm,0xAD);
 2307     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2308     // SAR    $dst.hi,$shift"
 2309     emit_opcode(masm,0xD3);
 2310     emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2311   %}
 2312 
 2313 
 2314   // ----------------- Encodings for floating point unit -----------------
 2315   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2316   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2317     $$$emit8$primary;
 2318     emit_rm(masm, 0x3, $secondary, $src$$reg );
 2319   %}
 2320 
 2321   // Pop argument in FPR0 with FSTP ST(0)
 2322   enc_class PopFPU() %{
 2323     emit_opcode( masm, 0xDD );
 2324     emit_d8( masm, 0xD8 );
 2325   %}
 2326 
 2327   // !!!!! equivalent to Pop_Reg_F
 2328   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2329     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2330     emit_d8( masm, 0xD8+$dst$$reg );
 2331   %}
 2332 
 2333   enc_class Push_Reg_DPR( regDPR dst ) %{
 2334     emit_opcode( masm, 0xD9 );
 2335     emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2336   %}
 2337 
 2338   enc_class strictfp_bias1( regDPR dst ) %{
 2339     emit_opcode( masm, 0xDB );           // FLD m80real
 2340     emit_opcode( masm, 0x2D );
 2341     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2342     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2343     emit_opcode( masm, 0xC8+$dst$$reg );
 2344   %}
 2345 
 2346   enc_class strictfp_bias2( regDPR dst ) %{
 2347     emit_opcode( masm, 0xDB );           // FLD m80real
 2348     emit_opcode( masm, 0x2D );
 2349     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2350     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2351     emit_opcode( masm, 0xC8+$dst$$reg );
 2352   %}
 2353 
 2354   // Special case for moving an integer register to a stack slot.
 2355   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2356     store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
 2357   %}
 2358 
 2359   // Special case for moving a register to a stack slot.
 2360   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2361     // Opcode already emitted
 2362     emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2363     emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2364     emit_d32(masm, $dst$$disp);   // Displacement
 2365   %}
 2366 
 2367   // Push the integer in stackSlot 'src' onto FP-stack
 2368   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2369     store_to_stackslot( masm, $primary, $secondary, $src$$disp );
 2370   %}
 2371 
 2372   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2373   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2374     store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
 2375   %}
 2376 
 2377   // Same as Pop_Mem_F except for opcode
 2378   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2379   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2380     store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
 2381   %}
 2382 
 2383   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2384     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2385     emit_d8( masm, 0xD8+$dst$$reg );
 2386   %}
 2387 
 2388   enc_class Push_Reg_FPR( regFPR dst ) %{
 2389     emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
 2390     emit_d8( masm, 0xC0-1+$dst$$reg );
 2391   %}
 2392 
 2393   // Push FPU's float to a stack-slot, and pop FPU-stack
 2394   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2395     int pop = 0x02;
 2396     if ($src$$reg != FPR1L_enc) {
 2397       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2398       emit_d8( masm, 0xC0-1+$src$$reg );
 2399       pop = 0x03;
 2400     }
 2401     store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2402   %}
 2403 
 2404   // Push FPU's double to a stack-slot, and pop FPU-stack
 2405   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2406     int pop = 0x02;
 2407     if ($src$$reg != FPR1L_enc) {
 2408       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2409       emit_d8( masm, 0xC0-1+$src$$reg );
 2410       pop = 0x03;
 2411     }
 2412     store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2413   %}
 2414 
 2415   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2416   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2417     int pop = 0xD0 - 1; // -1 since we skip FLD
 2418     if ($src$$reg != FPR1L_enc) {
 2419       emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
 2420       emit_d8( masm, 0xC0-1+$src$$reg );
 2421       pop = 0xD8;
 2422     }
 2423     emit_opcode( masm, 0xDD );
 2424     emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
 2425   %}
 2426 
 2427 
 2428   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2429     // load dst in FPR0
 2430     emit_opcode( masm, 0xD9 );
 2431     emit_d8( masm, 0xC0-1+$dst$$reg );
 2432     if ($src$$reg != FPR1L_enc) {
 2433       // fincstp
 2434       emit_opcode (masm, 0xD9);
 2435       emit_opcode (masm, 0xF7);
 2436       // swap src with FPR1:
 2437       // FXCH FPR1 with src
 2438       emit_opcode(masm, 0xD9);
 2439       emit_d8(masm, 0xC8-1+$src$$reg );
 2440       // fdecstp
 2441       emit_opcode (masm, 0xD9);
 2442       emit_opcode (masm, 0xF6);
 2443     }
 2444   %}
 2445 
 2446   enc_class Push_ResultD(regD dst) %{
 2447     __ fstp_d(Address(rsp, 0));
 2448     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2449     __ addptr(rsp, 8);
 2450   %}
 2451 
 2452   enc_class Push_ResultF(regF dst, immI d8) %{
 2453     __ fstp_s(Address(rsp, 0));
 2454     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2455     __ addptr(rsp, $d8$$constant);
 2456   %}
 2457 
 2458   enc_class Push_SrcD(regD src) %{
 2459     __ subptr(rsp, 8);
 2460     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2461     __ fld_d(Address(rsp, 0));
 2462   %}
 2463 
 2464   enc_class push_stack_temp_qword() %{
 2465     __ subptr(rsp, 8);
 2466   %}
 2467 
 2468   enc_class pop_stack_temp_qword() %{
 2469     __ addptr(rsp, 8);
 2470   %}
 2471 
 2472   enc_class push_xmm_to_fpr1(regD src) %{
 2473     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2474     __ fld_d(Address(rsp, 0));
 2475   %}
 2476 
 2477   enc_class fnstsw_sahf_skip_parity() %{
 2478     // fnstsw ax
 2479     emit_opcode( masm, 0xDF );
 2480     emit_opcode( masm, 0xE0 );
 2481     // sahf
 2482     emit_opcode( masm, 0x9E );
 2483     // jnp  ::skip
 2484     emit_opcode( masm, 0x7B );
 2485     emit_opcode( masm, 0x05 );
 2486   %}
 2487 
 2488   enc_class fpu_flags() %{
 2489     // fnstsw_ax
 2490     emit_opcode( masm, 0xDF);
 2491     emit_opcode( masm, 0xE0);
 2492     // test ax,0x0400
 2493     emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
 2494     emit_opcode( masm, 0xA9 );
 2495     emit_d16   ( masm, 0x0400 );
 2496     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2497     // // test rax,0x0400
 2498     // emit_opcode( masm, 0xA9 );
 2499     // emit_d32   ( masm, 0x00000400 );
 2500     //
 2501     // jz exit (no unordered comparison)
 2502     emit_opcode( masm, 0x74 );
 2503     emit_d8    ( masm, 0x02 );
 2504     // mov ah,1 - treat as LT case (set carry flag)
 2505     emit_opcode( masm, 0xB4 );
 2506     emit_d8    ( masm, 0x01 );
 2507     // sahf
 2508     emit_opcode( masm, 0x9E);
 2509   %}
 2510 
 2511   enc_class cmpF_P6_fixup() %{
 2512     // Fixup the integer flags in case comparison involved a NaN
 2513     //
 2514     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2515     emit_opcode( masm, 0x7B );
 2516     emit_d8    ( masm, 0x03 );
 2517     // MOV AH,1 - treat as LT case (set carry flag)
 2518     emit_opcode( masm, 0xB4 );
 2519     emit_d8    ( masm, 0x01 );
 2520     // SAHF
 2521     emit_opcode( masm, 0x9E);
 2522     // NOP     // target for branch to avoid branch to branch
 2523     emit_opcode( masm, 0x90);
 2524   %}
 2525 
 2526 //     fnstsw_ax();
 2527 //     sahf();
 2528 //     movl(dst, nan_result);
 2529 //     jcc(Assembler::parity, exit);
 2530 //     movl(dst, less_result);
 2531 //     jcc(Assembler::below, exit);
 2532 //     movl(dst, equal_result);
 2533 //     jcc(Assembler::equal, exit);
 2534 //     movl(dst, greater_result);
 2535 
 2536 // less_result     =  1;
 2537 // greater_result  = -1;
 2538 // equal_result    = 0;
 2539 // nan_result      = -1;
 2540 
 2541   enc_class CmpF_Result(rRegI dst) %{
 2542     // fnstsw_ax();
 2543     emit_opcode( masm, 0xDF);
 2544     emit_opcode( masm, 0xE0);
 2545     // sahf
 2546     emit_opcode( masm, 0x9E);
 2547     // movl(dst, nan_result);
 2548     emit_opcode( masm, 0xB8 + $dst$$reg);
 2549     emit_d32( masm, -1 );
 2550     // jcc(Assembler::parity, exit);
 2551     emit_opcode( masm, 0x7A );
 2552     emit_d8    ( masm, 0x13 );
 2553     // movl(dst, less_result);
 2554     emit_opcode( masm, 0xB8 + $dst$$reg);
 2555     emit_d32( masm, -1 );
 2556     // jcc(Assembler::below, exit);
 2557     emit_opcode( masm, 0x72 );
 2558     emit_d8    ( masm, 0x0C );
 2559     // movl(dst, equal_result);
 2560     emit_opcode( masm, 0xB8 + $dst$$reg);
 2561     emit_d32( masm, 0 );
 2562     // jcc(Assembler::equal, exit);
 2563     emit_opcode( masm, 0x74 );
 2564     emit_d8    ( masm, 0x05 );
 2565     // movl(dst, greater_result);
 2566     emit_opcode( masm, 0xB8 + $dst$$reg);
 2567     emit_d32( masm, 1 );
 2568   %}
 2569 
 2570 
 2571   // Compare the longs and set flags
 2572   // BROKEN!  Do Not use as-is
 2573   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2574     // CMP    $src1.hi,$src2.hi
 2575     emit_opcode( masm, 0x3B );
 2576     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2577     // JNE,s  done
 2578     emit_opcode(masm,0x75);
 2579     emit_d8(masm, 2 );
 2580     // CMP    $src1.lo,$src2.lo
 2581     emit_opcode( masm, 0x3B );
 2582     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2583 // done:
 2584   %}
 2585 
 2586   enc_class convert_int_long( regL dst, rRegI src ) %{
 2587     // mov $dst.lo,$src
 2588     int dst_encoding = $dst$$reg;
 2589     int src_encoding = $src$$reg;
 2590     encode_Copy( masm, dst_encoding  , src_encoding );
 2591     // mov $dst.hi,$src
 2592     encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2593     // sar $dst.hi,31
 2594     emit_opcode( masm, 0xC1 );
 2595     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2596     emit_d8(masm, 0x1F );
 2597   %}
 2598 
 2599   enc_class convert_long_double( eRegL src ) %{
 2600     // push $src.hi
 2601     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2602     // push $src.lo
 2603     emit_opcode(masm, 0x50+$src$$reg  );
 2604     // fild 64-bits at [SP]
 2605     emit_opcode(masm,0xdf);
 2606     emit_d8(masm, 0x6C);
 2607     emit_d8(masm, 0x24);
 2608     emit_d8(masm, 0x00);
 2609     // pop stack
 2610     emit_opcode(masm, 0x83); // add  SP, #8
 2611     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2612     emit_d8(masm, 0x8);
 2613   %}
 2614 
 2615   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2616     // IMUL   EDX:EAX,$src1
 2617     emit_opcode( masm, 0xF7 );
 2618     emit_rm( masm, 0x3, 0x5, $src1$$reg );
 2619     // SAR    EDX,$cnt-32
 2620     int shift_count = ((int)$cnt$$constant) - 32;
 2621     if (shift_count > 0) {
 2622       emit_opcode(masm, 0xC1);
 2623       emit_rm(masm, 0x3, 7, $dst$$reg );
 2624       emit_d8(masm, shift_count);
 2625     }
 2626   %}
 2627 
 2628   // this version doesn't have add sp, 8
 2629   enc_class convert_long_double2( eRegL src ) %{
 2630     // push $src.hi
 2631     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2632     // push $src.lo
 2633     emit_opcode(masm, 0x50+$src$$reg  );
 2634     // fild 64-bits at [SP]
 2635     emit_opcode(masm,0xdf);
 2636     emit_d8(masm, 0x6C);
 2637     emit_d8(masm, 0x24);
 2638     emit_d8(masm, 0x00);
 2639   %}
 2640 
 2641   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2642     // Basic idea: long = (long)int * (long)int
 2643     // IMUL EDX:EAX, src
 2644     emit_opcode( masm, 0xF7 );
 2645     emit_rm( masm, 0x3, 0x5, $src$$reg);
 2646   %}
 2647 
 2648   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2649     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2650     // MUL EDX:EAX, src
 2651     emit_opcode( masm, 0xF7 );
 2652     emit_rm( masm, 0x3, 0x4, $src$$reg);
 2653   %}
 2654 
 2655   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2656     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2657     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2658     // MOV    $tmp,$src.lo
 2659     encode_Copy( masm, $tmp$$reg, $src$$reg );
 2660     // IMUL   $tmp,EDX
 2661     emit_opcode( masm, 0x0F );
 2662     emit_opcode( masm, 0xAF );
 2663     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2664     // MOV    EDX,$src.hi
 2665     encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2666     // IMUL   EDX,EAX
 2667     emit_opcode( masm, 0x0F );
 2668     emit_opcode( masm, 0xAF );
 2669     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2670     // ADD    $tmp,EDX
 2671     emit_opcode( masm, 0x03 );
 2672     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2673     // MUL   EDX:EAX,$src.lo
 2674     emit_opcode( masm, 0xF7 );
 2675     emit_rm( masm, 0x3, 0x4, $src$$reg );
 2676     // ADD    EDX,ESI
 2677     emit_opcode( masm, 0x03 );
 2678     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2679   %}
 2680 
 2681   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2682     // Basic idea: lo(result) = lo(src * y_lo)
 2683     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2684     // IMUL   $tmp,EDX,$src
 2685     emit_opcode( masm, 0x6B );
 2686     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2687     emit_d8( masm, (int)$src$$constant );
 2688     // MOV    EDX,$src
 2689     emit_opcode(masm, 0xB8 + EDX_enc);
 2690     emit_d32( masm, (int)$src$$constant );
 2691     // MUL   EDX:EAX,EDX
 2692     emit_opcode( masm, 0xF7 );
 2693     emit_rm( masm, 0x3, 0x4, EDX_enc );
 2694     // ADD    EDX,ESI
 2695     emit_opcode( masm, 0x03 );
 2696     emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
 2697   %}
 2698 
 2699   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2700     // PUSH src1.hi
 2701     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2702     // PUSH src1.lo
 2703     emit_opcode(masm,               0x50+$src1$$reg  );
 2704     // PUSH src2.hi
 2705     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2706     // PUSH src2.lo
 2707     emit_opcode(masm,               0x50+$src2$$reg  );
 2708     // CALL directly to the runtime
 2709     __ set_inst_mark();
 2710     emit_opcode(masm,0xE8);       // Call into runtime
 2711     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2712     __ clear_inst_mark();
 2713     __ post_call_nop();
 2714     // Restore stack
 2715     emit_opcode(masm, 0x83); // add  SP, #framesize
 2716     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2717     emit_d8(masm, 4*4);
 2718   %}
 2719 
 2720   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2721     // PUSH src1.hi
 2722     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2723     // PUSH src1.lo
 2724     emit_opcode(masm,               0x50+$src1$$reg  );
 2725     // PUSH src2.hi
 2726     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2727     // PUSH src2.lo
 2728     emit_opcode(masm,               0x50+$src2$$reg  );
 2729     // CALL directly to the runtime
 2730     __ set_inst_mark();
 2731     emit_opcode(masm,0xE8);       // Call into runtime
 2732     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2733     __ clear_inst_mark();
 2734     __ post_call_nop();
 2735     // Restore stack
 2736     emit_opcode(masm, 0x83); // add  SP, #framesize
 2737     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2738     emit_d8(masm, 4*4);
 2739   %}
 2740 
 2741   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2742     // MOV   $tmp,$src.lo
 2743     emit_opcode(masm, 0x8B);
 2744     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
 2745     // OR    $tmp,$src.hi
 2746     emit_opcode(masm, 0x0B);
 2747     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2748   %}
 2749 
 2750   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2751     // CMP    $src1.lo,$src2.lo
 2752     emit_opcode( masm, 0x3B );
 2753     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2754     // JNE,s  skip
 2755     emit_cc(masm, 0x70, 0x5);
 2756     emit_d8(masm,2);
 2757     // CMP    $src1.hi,$src2.hi
 2758     emit_opcode( masm, 0x3B );
 2759     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2760   %}
 2761 
 2762   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2763     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2764     emit_opcode( masm, 0x3B );
 2765     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2766     // MOV    $tmp,$src1.hi
 2767     emit_opcode( masm, 0x8B );
 2768     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2769     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2770     emit_opcode( masm, 0x1B );
 2771     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2772   %}
 2773 
 2774   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2775     // XOR    $tmp,$tmp
 2776     emit_opcode(masm,0x33);  // XOR
 2777     emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
 2778     // CMP    $tmp,$src.lo
 2779     emit_opcode( masm, 0x3B );
 2780     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
 2781     // SBB    $tmp,$src.hi
 2782     emit_opcode( masm, 0x1B );
 2783     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2784   %}
 2785 
 2786  // Sniff, sniff... smells like Gnu Superoptimizer
 2787   enc_class neg_long( eRegL dst ) %{
 2788     emit_opcode(masm,0xF7);    // NEG hi
 2789     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2790     emit_opcode(masm,0xF7);    // NEG lo
 2791     emit_rm    (masm,0x3, 0x3,               $dst$$reg );
 2792     emit_opcode(masm,0x83);    // SBB hi,0
 2793     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2794     emit_d8    (masm,0 );
 2795   %}
 2796 
 2797   enc_class enc_pop_rdx() %{
 2798     emit_opcode(masm,0x5A);
 2799   %}
 2800 
 2801   enc_class enc_rethrow() %{
 2802     __ set_inst_mark();
 2803     emit_opcode(masm, 0xE9);        // jmp    entry
 2804     emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
 2805                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2806     __ clear_inst_mark();
 2807     __ post_call_nop();
 2808   %}
 2809 
 2810 
 2811   // Convert a double to an int.  Java semantics require we do complex
 2812   // manglelations in the corner cases.  So we set the rounding mode to
 2813   // 'zero', store the darned double down as an int, and reset the
 2814   // rounding mode to 'nearest'.  The hardware throws an exception which
 2815   // patches up the correct value directly to the stack.
 2816   enc_class DPR2I_encoding( regDPR src ) %{
 2817     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2818     // exceptions here, so that a NAN or other corner-case value will
 2819     // thrown an exception (but normal values get converted at full speed).
 2820     // However, I2C adapters and other float-stack manglers leave pending
 2821     // invalid-op exceptions hanging.  We would have to clear them before
 2822     // enabling them and that is more expensive than just testing for the
 2823     // invalid value Intel stores down in the corner cases.
 2824     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2825     emit_opcode(masm,0x2D);
 2826     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2827     // Allocate a word
 2828     emit_opcode(masm,0x83);            // SUB ESP,4
 2829     emit_opcode(masm,0xEC);
 2830     emit_d8(masm,0x04);
 2831     // Encoding assumes a double has been pushed into FPR0.
 2832     // Store down the double as an int, popping the FPU stack
 2833     emit_opcode(masm,0xDB);            // FISTP [ESP]
 2834     emit_opcode(masm,0x1C);
 2835     emit_d8(masm,0x24);
 2836     // Restore the rounding mode; mask the exception
 2837     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2838     emit_opcode(masm,0x2D);
 2839     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2840         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2841         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2842 
 2843     // Load the converted int; adjust CPU stack
 2844     emit_opcode(masm,0x58);       // POP EAX
 2845     emit_opcode(masm,0x3D);       // CMP EAX,imm
 2846     emit_d32   (masm,0x80000000); //         0x80000000
 2847     emit_opcode(masm,0x75);       // JNE around_slow_call
 2848     emit_d8    (masm,0x07);       // Size of slow_call
 2849     // Push src onto stack slow-path
 2850     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2851     emit_d8    (masm,0xC0-1+$src$$reg );
 2852     // CALL directly to the runtime
 2853     __ set_inst_mark();
 2854     emit_opcode(masm,0xE8);       // Call into runtime
 2855     emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2856     __ clear_inst_mark();
 2857     __ post_call_nop();
 2858     // Carry on here...
 2859   %}
 2860 
 2861   enc_class DPR2L_encoding( regDPR src ) %{
 2862     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2863     emit_opcode(masm,0x2D);
 2864     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2865     // Allocate a word
 2866     emit_opcode(masm,0x83);            // SUB ESP,8
 2867     emit_opcode(masm,0xEC);
 2868     emit_d8(masm,0x08);
 2869     // Encoding assumes a double has been pushed into FPR0.
 2870     // Store down the double as a long, popping the FPU stack
 2871     emit_opcode(masm,0xDF);            // FISTP [ESP]
 2872     emit_opcode(masm,0x3C);
 2873     emit_d8(masm,0x24);
 2874     // Restore the rounding mode; mask the exception
 2875     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2876     emit_opcode(masm,0x2D);
 2877     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2878         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2879         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2880 
 2881     // Load the converted int; adjust CPU stack
 2882     emit_opcode(masm,0x58);       // POP EAX
 2883     emit_opcode(masm,0x5A);       // POP EDX
 2884     emit_opcode(masm,0x81);       // CMP EDX,imm
 2885     emit_d8    (masm,0xFA);       // rdx
 2886     emit_d32   (masm,0x80000000); //         0x80000000
 2887     emit_opcode(masm,0x75);       // JNE around_slow_call
 2888     emit_d8    (masm,0x07+4);     // Size of slow_call
 2889     emit_opcode(masm,0x85);       // TEST EAX,EAX
 2890     emit_opcode(masm,0xC0);       // 2/rax,/rax,
 2891     emit_opcode(masm,0x75);       // JNE around_slow_call
 2892     emit_d8    (masm,0x07);       // Size of slow_call
 2893     // Push src onto stack slow-path
 2894     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2895     emit_d8    (masm,0xC0-1+$src$$reg );
 2896     // CALL directly to the runtime
 2897     __ set_inst_mark();
 2898     emit_opcode(masm,0xE8);       // Call into runtime
 2899     emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2900     __ clear_inst_mark();
 2901     __ post_call_nop();
 2902     // Carry on here...
 2903   %}
 2904 
 2905   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2906     // Operand was loaded from memory into fp ST (stack top)
 2907     // FMUL   ST,$src  /* D8 C8+i */
 2908     emit_opcode(masm, 0xD8);
 2909     emit_opcode(masm, 0xC8 + $src1$$reg);
 2910   %}
 2911 
 2912   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2913     // FADDP  ST,src2  /* D8 C0+i */
 2914     emit_opcode(masm, 0xD8);
 2915     emit_opcode(masm, 0xC0 + $src2$$reg);
 2916     //could use FADDP  src2,fpST  /* DE C0+i */
 2917   %}
 2918 
 2919   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2920     // FADDP  src2,ST  /* DE C0+i */
 2921     emit_opcode(masm, 0xDE);
 2922     emit_opcode(masm, 0xC0 + $src2$$reg);
 2923   %}
 2924 
 2925   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2926     // Operand has been loaded into fp ST (stack top)
 2927       // FSUB   ST,$src1
 2928       emit_opcode(masm, 0xD8);
 2929       emit_opcode(masm, 0xE0 + $src1$$reg);
 2930 
 2931       // FDIV
 2932       emit_opcode(masm, 0xD8);
 2933       emit_opcode(masm, 0xF0 + $src2$$reg);
 2934   %}
 2935 
 2936   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 2937     // Operand was loaded from memory into fp ST (stack top)
 2938     // FADD   ST,$src  /* D8 C0+i */
 2939     emit_opcode(masm, 0xD8);
 2940     emit_opcode(masm, 0xC0 + $src1$$reg);
 2941 
 2942     // FMUL  ST,src2  /* D8 C*+i */
 2943     emit_opcode(masm, 0xD8);
 2944     emit_opcode(masm, 0xC8 + $src2$$reg);
 2945   %}
 2946 
 2947 
 2948   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 2949     // Operand was loaded from memory into fp ST (stack top)
 2950     // FADD   ST,$src  /* D8 C0+i */
 2951     emit_opcode(masm, 0xD8);
 2952     emit_opcode(masm, 0xC0 + $src1$$reg);
 2953 
 2954     // FMULP  src2,ST  /* DE C8+i */
 2955     emit_opcode(masm, 0xDE);
 2956     emit_opcode(masm, 0xC8 + $src2$$reg);
 2957   %}
 2958 
 2959   // Atomically load the volatile long
 2960   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 2961     emit_opcode(masm,0xDF);
 2962     int rm_byte_opcode = 0x05;
 2963     int base     = $mem$$base;
 2964     int index    = $mem$$index;
 2965     int scale    = $mem$$scale;
 2966     int displace = $mem$$disp;
 2967     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2968     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2969     store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
 2970   %}
 2971 
 2972   // Volatile Store Long.  Must be atomic, so move it into
 2973   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 2974   // target address before the store (for null-ptr checks)
 2975   // so the memory operand is used twice in the encoding.
 2976   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 2977     store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
 2978     __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
 2979     emit_opcode(masm,0xDF);
 2980     int rm_byte_opcode = 0x07;
 2981     int base     = $mem$$base;
 2982     int index    = $mem$$index;
 2983     int scale    = $mem$$scale;
 2984     int displace = $mem$$disp;
 2985     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2986     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2987     __ clear_inst_mark();
 2988   %}
 2989 
 2990 %}
 2991 
 2992 
 2993 //----------FRAME--------------------------------------------------------------
 2994 // Definition of frame structure and management information.
 2995 //
 2996 //  S T A C K   L A Y O U T    Allocators stack-slot number
 2997 //                             |   (to get allocators register number
 2998 //  G  Owned by    |        |  v    add OptoReg::stack0())
 2999 //  r   CALLER     |        |
 3000 //  o     |        +--------+      pad to even-align allocators stack-slot
 3001 //  w     V        |  pad0  |        numbers; owned by CALLER
 3002 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3003 //  h     ^        |   in   |  5
 3004 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3005 //  |     |        |        |  3
 3006 //  |     |        +--------+
 3007 //  V     |        | old out|      Empty on Intel, window on Sparc
 3008 //        |    old |preserve|      Must be even aligned.
 3009 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3010 //        |        |   in   |  3   area for Intel ret address
 3011 //     Owned by    |preserve|      Empty on Sparc.
 3012 //       SELF      +--------+
 3013 //        |        |  pad2  |  2   pad to align old SP
 3014 //        |        +--------+  1
 3015 //        |        | locks  |  0
 3016 //        |        +--------+----> OptoReg::stack0(), even aligned
 3017 //        |        |  pad1  | 11   pad to align new SP
 3018 //        |        +--------+
 3019 //        |        |        | 10
 3020 //        |        | spills |  9   spills
 3021 //        V        |        |  8   (pad0 slot for callee)
 3022 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3023 //        ^        |  out   |  7
 3024 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3025 //     Owned by    +--------+
 3026 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3027 //        |    new |preserve|      Must be even-aligned.
 3028 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3029 //        |        |        |
 3030 //
 3031 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3032 //         known from SELF's arguments and the Java calling convention.
 3033 //         Region 6-7 is determined per call site.
 3034 // Note 2: If the calling convention leaves holes in the incoming argument
 3035 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3036 //         are owned by the CALLEE.  Holes should not be necessary in the
 3037 //         incoming area, as the Java calling convention is completely under
 3038 //         the control of the AD file.  Doubles can be sorted and packed to
 3039 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3040 //         varargs C calling conventions.
 3041 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3042 //         even aligned with pad0 as needed.
 3043 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3044 //         region 6-11 is even aligned; it may be padded out more so that
 3045 //         the region from SP to FP meets the minimum stack alignment.
 3046 
 3047 frame %{
 3048   // These three registers define part of the calling convention
 3049   // between compiled code and the interpreter.
 3050   inline_cache_reg(EAX);                // Inline Cache Register
 3051 
 3052   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3053   cisc_spilling_operand_name(indOffset32);
 3054 
 3055   // Number of stack slots consumed by locking an object
 3056   sync_stack_slots(1);
 3057 
 3058   // Compiled code's Frame Pointer
 3059   frame_pointer(ESP);
 3060   // Interpreter stores its frame pointer in a register which is
 3061   // stored to the stack by I2CAdaptors.
 3062   // I2CAdaptors convert from interpreted java to compiled java.
 3063   interpreter_frame_pointer(EBP);
 3064 
 3065   // Stack alignment requirement
 3066   // Alignment size in bytes (128-bit -> 16 bytes)
 3067   stack_alignment(StackAlignmentInBytes);
 3068 
 3069   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3070   // for calls to C.  Supports the var-args backing area for register parms.
 3071   varargs_C_out_slots_killed(0);
 3072 
 3073   // The after-PROLOG location of the return address.  Location of
 3074   // return address specifies a type (REG or STACK) and a number
 3075   // representing the register number (i.e. - use a register name) or
 3076   // stack slot.
 3077   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3078   // Otherwise, it is above the locks and verification slot and alignment word
 3079   return_addr(STACK - 1 +
 3080               align_up((Compile::current()->in_preserve_stack_slots() +
 3081                         Compile::current()->fixed_slots()),
 3082                        stack_alignment_in_slots()));
 3083 
 3084   // Location of C & interpreter return values
 3085   c_return_value %{
 3086     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3087     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3088     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3089 
 3090     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3091     // that C functions return float and double results in XMM0.
 3092     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3093       return OptoRegPair(XMM0b_num,XMM0_num);
 3094     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3095       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3096 
 3097     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3098   %}
 3099 
 3100   // Location of return values
 3101   return_value %{
 3102     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3103     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3104     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3105     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3106       return OptoRegPair(XMM0b_num,XMM0_num);
 3107     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3108       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3109     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3110   %}
 3111 
 3112 %}
 3113 
 3114 //----------ATTRIBUTES---------------------------------------------------------
 3115 //----------Operand Attributes-------------------------------------------------
 3116 op_attrib op_cost(0);        // Required cost attribute
 3117 
 3118 //----------Instruction Attributes---------------------------------------------
 3119 ins_attrib ins_cost(100);       // Required cost attribute
 3120 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3121 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3122                                 // non-matching short branch variant of some
 3123                                                             // long branch?
 3124 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3125                                 // specifies the alignment that some part of the instruction (not
 3126                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3127                                 // function must be provided for the instruction
 3128 
 3129 //----------OPERANDS-----------------------------------------------------------
 3130 // Operand definitions must precede instruction definitions for correct parsing
 3131 // in the ADLC because operands constitute user defined types which are used in
 3132 // instruction definitions.
 3133 
 3134 //----------Simple Operands----------------------------------------------------
 3135 // Immediate Operands
 3136 // Integer Immediate
 3137 operand immI() %{
 3138   match(ConI);
 3139 
 3140   op_cost(10);
 3141   format %{ %}
 3142   interface(CONST_INTER);
 3143 %}
 3144 
 3145 // Constant for test vs zero
 3146 operand immI_0() %{
 3147   predicate(n->get_int() == 0);
 3148   match(ConI);
 3149 
 3150   op_cost(0);
 3151   format %{ %}
 3152   interface(CONST_INTER);
 3153 %}
 3154 
 3155 // Constant for increment
 3156 operand immI_1() %{
 3157   predicate(n->get_int() == 1);
 3158   match(ConI);
 3159 
 3160   op_cost(0);
 3161   format %{ %}
 3162   interface(CONST_INTER);
 3163 %}
 3164 
 3165 // Constant for decrement
 3166 operand immI_M1() %{
 3167   predicate(n->get_int() == -1);
 3168   match(ConI);
 3169 
 3170   op_cost(0);
 3171   format %{ %}
 3172   interface(CONST_INTER);
 3173 %}
 3174 
 3175 // Valid scale values for addressing modes
 3176 operand immI2() %{
 3177   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3178   match(ConI);
 3179 
 3180   format %{ %}
 3181   interface(CONST_INTER);
 3182 %}
 3183 
 3184 operand immI8() %{
 3185   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3186   match(ConI);
 3187 
 3188   op_cost(5);
 3189   format %{ %}
 3190   interface(CONST_INTER);
 3191 %}
 3192 
 3193 operand immU8() %{
 3194   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3195   match(ConI);
 3196 
 3197   op_cost(5);
 3198   format %{ %}
 3199   interface(CONST_INTER);
 3200 %}
 3201 
 3202 operand immI16() %{
 3203   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3204   match(ConI);
 3205 
 3206   op_cost(10);
 3207   format %{ %}
 3208   interface(CONST_INTER);
 3209 %}
 3210 
 3211 // Int Immediate non-negative
 3212 operand immU31()
 3213 %{
 3214   predicate(n->get_int() >= 0);
 3215   match(ConI);
 3216 
 3217   op_cost(0);
 3218   format %{ %}
 3219   interface(CONST_INTER);
 3220 %}
 3221 
 3222 // Constant for long shifts
 3223 operand immI_32() %{
 3224   predicate( n->get_int() == 32 );
 3225   match(ConI);
 3226 
 3227   op_cost(0);
 3228   format %{ %}
 3229   interface(CONST_INTER);
 3230 %}
 3231 
 3232 operand immI_1_31() %{
 3233   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3234   match(ConI);
 3235 
 3236   op_cost(0);
 3237   format %{ %}
 3238   interface(CONST_INTER);
 3239 %}
 3240 
 3241 operand immI_32_63() %{
 3242   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3243   match(ConI);
 3244   op_cost(0);
 3245 
 3246   format %{ %}
 3247   interface(CONST_INTER);
 3248 %}
 3249 
 3250 operand immI_2() %{
 3251   predicate( n->get_int() == 2 );
 3252   match(ConI);
 3253 
 3254   op_cost(0);
 3255   format %{ %}
 3256   interface(CONST_INTER);
 3257 %}
 3258 
 3259 operand immI_3() %{
 3260   predicate( n->get_int() == 3 );
 3261   match(ConI);
 3262 
 3263   op_cost(0);
 3264   format %{ %}
 3265   interface(CONST_INTER);
 3266 %}
 3267 
 3268 operand immI_4()
 3269 %{
 3270   predicate(n->get_int() == 4);
 3271   match(ConI);
 3272 
 3273   op_cost(0);
 3274   format %{ %}
 3275   interface(CONST_INTER);
 3276 %}
 3277 
 3278 operand immI_8()
 3279 %{
 3280   predicate(n->get_int() == 8);
 3281   match(ConI);
 3282 
 3283   op_cost(0);
 3284   format %{ %}
 3285   interface(CONST_INTER);
 3286 %}
 3287 
 3288 // Pointer Immediate
 3289 operand immP() %{
 3290   match(ConP);
 3291 
 3292   op_cost(10);
 3293   format %{ %}
 3294   interface(CONST_INTER);
 3295 %}
 3296 
 3297 // Null Pointer Immediate
 3298 operand immP0() %{
 3299   predicate( n->get_ptr() == 0 );
 3300   match(ConP);
 3301   op_cost(0);
 3302 
 3303   format %{ %}
 3304   interface(CONST_INTER);
 3305 %}
 3306 
 3307 // Long Immediate
 3308 operand immL() %{
 3309   match(ConL);
 3310 
 3311   op_cost(20);
 3312   format %{ %}
 3313   interface(CONST_INTER);
 3314 %}
 3315 
 3316 // Long Immediate zero
 3317 operand immL0() %{
 3318   predicate( n->get_long() == 0L );
 3319   match(ConL);
 3320   op_cost(0);
 3321 
 3322   format %{ %}
 3323   interface(CONST_INTER);
 3324 %}
 3325 
 3326 // Long Immediate zero
 3327 operand immL_M1() %{
 3328   predicate( n->get_long() == -1L );
 3329   match(ConL);
 3330   op_cost(0);
 3331 
 3332   format %{ %}
 3333   interface(CONST_INTER);
 3334 %}
 3335 
 3336 // Long immediate from 0 to 127.
 3337 // Used for a shorter form of long mul by 10.
 3338 operand immL_127() %{
 3339   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3340   match(ConL);
 3341   op_cost(0);
 3342 
 3343   format %{ %}
 3344   interface(CONST_INTER);
 3345 %}
 3346 
 3347 // Long Immediate: low 32-bit mask
 3348 operand immL_32bits() %{
 3349   predicate(n->get_long() == 0xFFFFFFFFL);
 3350   match(ConL);
 3351   op_cost(0);
 3352 
 3353   format %{ %}
 3354   interface(CONST_INTER);
 3355 %}
 3356 
 3357 // Long Immediate: low 32-bit mask
 3358 operand immL32() %{
 3359   predicate(n->get_long() == (int)(n->get_long()));
 3360   match(ConL);
 3361   op_cost(20);
 3362 
 3363   format %{ %}
 3364   interface(CONST_INTER);
 3365 %}
 3366 
 3367 //Double Immediate zero
 3368 operand immDPR0() %{
 3369   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3370   // bug that generates code such that NaNs compare equal to 0.0
 3371   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3372   match(ConD);
 3373 
 3374   op_cost(5);
 3375   format %{ %}
 3376   interface(CONST_INTER);
 3377 %}
 3378 
 3379 // Double Immediate one
 3380 operand immDPR1() %{
 3381   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3382   match(ConD);
 3383 
 3384   op_cost(5);
 3385   format %{ %}
 3386   interface(CONST_INTER);
 3387 %}
 3388 
 3389 // Double Immediate
 3390 operand immDPR() %{
 3391   predicate(UseSSE<=1);
 3392   match(ConD);
 3393 
 3394   op_cost(5);
 3395   format %{ %}
 3396   interface(CONST_INTER);
 3397 %}
 3398 
 3399 operand immD() %{
 3400   predicate(UseSSE>=2);
 3401   match(ConD);
 3402 
 3403   op_cost(5);
 3404   format %{ %}
 3405   interface(CONST_INTER);
 3406 %}
 3407 
 3408 // Double Immediate zero
 3409 operand immD0() %{
 3410   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3411   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3412   // compare equal to -0.0.
 3413   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3414   match(ConD);
 3415 
 3416   format %{ %}
 3417   interface(CONST_INTER);
 3418 %}
 3419 
 3420 // Float Immediate zero
 3421 operand immFPR0() %{
 3422   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3423   match(ConF);
 3424 
 3425   op_cost(5);
 3426   format %{ %}
 3427   interface(CONST_INTER);
 3428 %}
 3429 
 3430 // Float Immediate one
 3431 operand immFPR1() %{
 3432   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3433   match(ConF);
 3434 
 3435   op_cost(5);
 3436   format %{ %}
 3437   interface(CONST_INTER);
 3438 %}
 3439 
 3440 // Float Immediate
 3441 operand immFPR() %{
 3442   predicate( UseSSE == 0 );
 3443   match(ConF);
 3444 
 3445   op_cost(5);
 3446   format %{ %}
 3447   interface(CONST_INTER);
 3448 %}
 3449 
 3450 // Float Immediate
 3451 operand immF() %{
 3452   predicate(UseSSE >= 1);
 3453   match(ConF);
 3454 
 3455   op_cost(5);
 3456   format %{ %}
 3457   interface(CONST_INTER);
 3458 %}
 3459 
 3460 // Float Immediate zero.  Zero and not -0.0
 3461 operand immF0() %{
 3462   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3463   match(ConF);
 3464 
 3465   op_cost(5);
 3466   format %{ %}
 3467   interface(CONST_INTER);
 3468 %}
 3469 
 3470 // Immediates for special shifts (sign extend)
 3471 
 3472 // Constants for increment
 3473 operand immI_16() %{
 3474   predicate( n->get_int() == 16 );
 3475   match(ConI);
 3476 
 3477   format %{ %}
 3478   interface(CONST_INTER);
 3479 %}
 3480 
 3481 operand immI_24() %{
 3482   predicate( n->get_int() == 24 );
 3483   match(ConI);
 3484 
 3485   format %{ %}
 3486   interface(CONST_INTER);
 3487 %}
 3488 
 3489 // Constant for byte-wide masking
 3490 operand immI_255() %{
 3491   predicate( n->get_int() == 255 );
 3492   match(ConI);
 3493 
 3494   format %{ %}
 3495   interface(CONST_INTER);
 3496 %}
 3497 
 3498 // Constant for short-wide masking
 3499 operand immI_65535() %{
 3500   predicate(n->get_int() == 65535);
 3501   match(ConI);
 3502 
 3503   format %{ %}
 3504   interface(CONST_INTER);
 3505 %}
 3506 
 3507 operand kReg()
 3508 %{
 3509   constraint(ALLOC_IN_RC(vectmask_reg));
 3510   match(RegVectMask);
 3511   format %{%}
 3512   interface(REG_INTER);
 3513 %}
 3514 
 3515 // Register Operands
 3516 // Integer Register
 3517 operand rRegI() %{
 3518   constraint(ALLOC_IN_RC(int_reg));
 3519   match(RegI);
 3520   match(xRegI);
 3521   match(eAXRegI);
 3522   match(eBXRegI);
 3523   match(eCXRegI);
 3524   match(eDXRegI);
 3525   match(eDIRegI);
 3526   match(eSIRegI);
 3527 
 3528   format %{ %}
 3529   interface(REG_INTER);
 3530 %}
 3531 
 3532 // Subset of Integer Register
 3533 operand xRegI(rRegI reg) %{
 3534   constraint(ALLOC_IN_RC(int_x_reg));
 3535   match(reg);
 3536   match(eAXRegI);
 3537   match(eBXRegI);
 3538   match(eCXRegI);
 3539   match(eDXRegI);
 3540 
 3541   format %{ %}
 3542   interface(REG_INTER);
 3543 %}
 3544 
 3545 // Special Registers
 3546 operand eAXRegI(xRegI reg) %{
 3547   constraint(ALLOC_IN_RC(eax_reg));
 3548   match(reg);
 3549   match(rRegI);
 3550 
 3551   format %{ "EAX" %}
 3552   interface(REG_INTER);
 3553 %}
 3554 
 3555 // Special Registers
 3556 operand eBXRegI(xRegI reg) %{
 3557   constraint(ALLOC_IN_RC(ebx_reg));
 3558   match(reg);
 3559   match(rRegI);
 3560 
 3561   format %{ "EBX" %}
 3562   interface(REG_INTER);
 3563 %}
 3564 
 3565 operand eCXRegI(xRegI reg) %{
 3566   constraint(ALLOC_IN_RC(ecx_reg));
 3567   match(reg);
 3568   match(rRegI);
 3569 
 3570   format %{ "ECX" %}
 3571   interface(REG_INTER);
 3572 %}
 3573 
 3574 operand eDXRegI(xRegI reg) %{
 3575   constraint(ALLOC_IN_RC(edx_reg));
 3576   match(reg);
 3577   match(rRegI);
 3578 
 3579   format %{ "EDX" %}
 3580   interface(REG_INTER);
 3581 %}
 3582 
 3583 operand eDIRegI(xRegI reg) %{
 3584   constraint(ALLOC_IN_RC(edi_reg));
 3585   match(reg);
 3586   match(rRegI);
 3587 
 3588   format %{ "EDI" %}
 3589   interface(REG_INTER);
 3590 %}
 3591 
 3592 operand nadxRegI() %{
 3593   constraint(ALLOC_IN_RC(nadx_reg));
 3594   match(RegI);
 3595   match(eBXRegI);
 3596   match(eCXRegI);
 3597   match(eSIRegI);
 3598   match(eDIRegI);
 3599 
 3600   format %{ %}
 3601   interface(REG_INTER);
 3602 %}
 3603 
 3604 operand ncxRegI() %{
 3605   constraint(ALLOC_IN_RC(ncx_reg));
 3606   match(RegI);
 3607   match(eAXRegI);
 3608   match(eDXRegI);
 3609   match(eSIRegI);
 3610   match(eDIRegI);
 3611 
 3612   format %{ %}
 3613   interface(REG_INTER);
 3614 %}
 3615 
 3616 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3617 // //
 3618 operand eSIRegI(xRegI reg) %{
 3619    constraint(ALLOC_IN_RC(esi_reg));
 3620    match(reg);
 3621    match(rRegI);
 3622 
 3623    format %{ "ESI" %}
 3624    interface(REG_INTER);
 3625 %}
 3626 
 3627 // Pointer Register
 3628 operand anyRegP() %{
 3629   constraint(ALLOC_IN_RC(any_reg));
 3630   match(RegP);
 3631   match(eAXRegP);
 3632   match(eBXRegP);
 3633   match(eCXRegP);
 3634   match(eDIRegP);
 3635   match(eRegP);
 3636 
 3637   format %{ %}
 3638   interface(REG_INTER);
 3639 %}
 3640 
 3641 operand eRegP() %{
 3642   constraint(ALLOC_IN_RC(int_reg));
 3643   match(RegP);
 3644   match(eAXRegP);
 3645   match(eBXRegP);
 3646   match(eCXRegP);
 3647   match(eDIRegP);
 3648 
 3649   format %{ %}
 3650   interface(REG_INTER);
 3651 %}
 3652 
 3653 operand rRegP() %{
 3654   constraint(ALLOC_IN_RC(int_reg));
 3655   match(RegP);
 3656   match(eAXRegP);
 3657   match(eBXRegP);
 3658   match(eCXRegP);
 3659   match(eDIRegP);
 3660 
 3661   format %{ %}
 3662   interface(REG_INTER);
 3663 %}
 3664 
 3665 // On windows95, EBP is not safe to use for implicit null tests.
 3666 operand eRegP_no_EBP() %{
 3667   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3668   match(RegP);
 3669   match(eAXRegP);
 3670   match(eBXRegP);
 3671   match(eCXRegP);
 3672   match(eDIRegP);
 3673 
 3674   op_cost(100);
 3675   format %{ %}
 3676   interface(REG_INTER);
 3677 %}
 3678 
 3679 operand pRegP() %{
 3680   constraint(ALLOC_IN_RC(p_reg));
 3681   match(RegP);
 3682   match(eBXRegP);
 3683   match(eDXRegP);
 3684   match(eSIRegP);
 3685   match(eDIRegP);
 3686 
 3687   format %{ %}
 3688   interface(REG_INTER);
 3689 %}
 3690 
 3691 // Special Registers
 3692 // Return a pointer value
 3693 operand eAXRegP(eRegP reg) %{
 3694   constraint(ALLOC_IN_RC(eax_reg));
 3695   match(reg);
 3696   format %{ "EAX" %}
 3697   interface(REG_INTER);
 3698 %}
 3699 
 3700 // Used in AtomicAdd
 3701 operand eBXRegP(eRegP reg) %{
 3702   constraint(ALLOC_IN_RC(ebx_reg));
 3703   match(reg);
 3704   format %{ "EBX" %}
 3705   interface(REG_INTER);
 3706 %}
 3707 
 3708 // Tail-call (interprocedural jump) to interpreter
 3709 operand eCXRegP(eRegP reg) %{
 3710   constraint(ALLOC_IN_RC(ecx_reg));
 3711   match(reg);
 3712   format %{ "ECX" %}
 3713   interface(REG_INTER);
 3714 %}
 3715 
 3716 operand eDXRegP(eRegP reg) %{
 3717   constraint(ALLOC_IN_RC(edx_reg));
 3718   match(reg);
 3719   format %{ "EDX" %}
 3720   interface(REG_INTER);
 3721 %}
 3722 
 3723 operand eSIRegP(eRegP reg) %{
 3724   constraint(ALLOC_IN_RC(esi_reg));
 3725   match(reg);
 3726   format %{ "ESI" %}
 3727   interface(REG_INTER);
 3728 %}
 3729 
 3730 // Used in rep stosw
 3731 operand eDIRegP(eRegP reg) %{
 3732   constraint(ALLOC_IN_RC(edi_reg));
 3733   match(reg);
 3734   format %{ "EDI" %}
 3735   interface(REG_INTER);
 3736 %}
 3737 
 3738 operand eRegL() %{
 3739   constraint(ALLOC_IN_RC(long_reg));
 3740   match(RegL);
 3741   match(eADXRegL);
 3742 
 3743   format %{ %}
 3744   interface(REG_INTER);
 3745 %}
 3746 
 3747 operand eADXRegL( eRegL reg ) %{
 3748   constraint(ALLOC_IN_RC(eadx_reg));
 3749   match(reg);
 3750 
 3751   format %{ "EDX:EAX" %}
 3752   interface(REG_INTER);
 3753 %}
 3754 
 3755 operand eBCXRegL( eRegL reg ) %{
 3756   constraint(ALLOC_IN_RC(ebcx_reg));
 3757   match(reg);
 3758 
 3759   format %{ "EBX:ECX" %}
 3760   interface(REG_INTER);
 3761 %}
 3762 
 3763 operand eBDPRegL( eRegL reg ) %{
 3764   constraint(ALLOC_IN_RC(ebpd_reg));
 3765   match(reg);
 3766 
 3767   format %{ "EBP:EDI" %}
 3768   interface(REG_INTER);
 3769 %}
 3770 // Special case for integer high multiply
 3771 operand eADXRegL_low_only() %{
 3772   constraint(ALLOC_IN_RC(eadx_reg));
 3773   match(RegL);
 3774 
 3775   format %{ "EAX" %}
 3776   interface(REG_INTER);
 3777 %}
 3778 
 3779 // Flags register, used as output of compare instructions
 3780 operand rFlagsReg() %{
 3781   constraint(ALLOC_IN_RC(int_flags));
 3782   match(RegFlags);
 3783 
 3784   format %{ "EFLAGS" %}
 3785   interface(REG_INTER);
 3786 %}
 3787 
 3788 // Flags register, used as output of compare instructions
 3789 operand eFlagsReg() %{
 3790   constraint(ALLOC_IN_RC(int_flags));
 3791   match(RegFlags);
 3792 
 3793   format %{ "EFLAGS" %}
 3794   interface(REG_INTER);
 3795 %}
 3796 
 3797 // Flags register, used as output of FLOATING POINT compare instructions
 3798 operand eFlagsRegU() %{
 3799   constraint(ALLOC_IN_RC(int_flags));
 3800   match(RegFlags);
 3801 
 3802   format %{ "EFLAGS_U" %}
 3803   interface(REG_INTER);
 3804 %}
 3805 
 3806 operand eFlagsRegUCF() %{
 3807   constraint(ALLOC_IN_RC(int_flags));
 3808   match(RegFlags);
 3809   predicate(false);
 3810 
 3811   format %{ "EFLAGS_U_CF" %}
 3812   interface(REG_INTER);
 3813 %}
 3814 
 3815 // Condition Code Register used by long compare
 3816 operand flagsReg_long_LTGE() %{
 3817   constraint(ALLOC_IN_RC(int_flags));
 3818   match(RegFlags);
 3819   format %{ "FLAGS_LTGE" %}
 3820   interface(REG_INTER);
 3821 %}
 3822 operand flagsReg_long_EQNE() %{
 3823   constraint(ALLOC_IN_RC(int_flags));
 3824   match(RegFlags);
 3825   format %{ "FLAGS_EQNE" %}
 3826   interface(REG_INTER);
 3827 %}
 3828 operand flagsReg_long_LEGT() %{
 3829   constraint(ALLOC_IN_RC(int_flags));
 3830   match(RegFlags);
 3831   format %{ "FLAGS_LEGT" %}
 3832   interface(REG_INTER);
 3833 %}
 3834 
 3835 // Condition Code Register used by unsigned long compare
 3836 operand flagsReg_ulong_LTGE() %{
 3837   constraint(ALLOC_IN_RC(int_flags));
 3838   match(RegFlags);
 3839   format %{ "FLAGS_U_LTGE" %}
 3840   interface(REG_INTER);
 3841 %}
 3842 operand flagsReg_ulong_EQNE() %{
 3843   constraint(ALLOC_IN_RC(int_flags));
 3844   match(RegFlags);
 3845   format %{ "FLAGS_U_EQNE" %}
 3846   interface(REG_INTER);
 3847 %}
 3848 operand flagsReg_ulong_LEGT() %{
 3849   constraint(ALLOC_IN_RC(int_flags));
 3850   match(RegFlags);
 3851   format %{ "FLAGS_U_LEGT" %}
 3852   interface(REG_INTER);
 3853 %}
 3854 
 3855 // Float register operands
 3856 operand regDPR() %{
 3857   predicate( UseSSE < 2 );
 3858   constraint(ALLOC_IN_RC(fp_dbl_reg));
 3859   match(RegD);
 3860   match(regDPR1);
 3861   match(regDPR2);
 3862   format %{ %}
 3863   interface(REG_INTER);
 3864 %}
 3865 
 3866 operand regDPR1(regDPR reg) %{
 3867   predicate( UseSSE < 2 );
 3868   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 3869   match(reg);
 3870   format %{ "FPR1" %}
 3871   interface(REG_INTER);
 3872 %}
 3873 
 3874 operand regDPR2(regDPR reg) %{
 3875   predicate( UseSSE < 2 );
 3876   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 3877   match(reg);
 3878   format %{ "FPR2" %}
 3879   interface(REG_INTER);
 3880 %}
 3881 
 3882 operand regnotDPR1(regDPR reg) %{
 3883   predicate( UseSSE < 2 );
 3884   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 3885   match(reg);
 3886   format %{ %}
 3887   interface(REG_INTER);
 3888 %}
 3889 
 3890 // Float register operands
 3891 operand regFPR() %{
 3892   predicate( UseSSE < 2 );
 3893   constraint(ALLOC_IN_RC(fp_flt_reg));
 3894   match(RegF);
 3895   match(regFPR1);
 3896   format %{ %}
 3897   interface(REG_INTER);
 3898 %}
 3899 
 3900 // Float register operands
 3901 operand regFPR1(regFPR reg) %{
 3902   predicate( UseSSE < 2 );
 3903   constraint(ALLOC_IN_RC(fp_flt_reg0));
 3904   match(reg);
 3905   format %{ "FPR1" %}
 3906   interface(REG_INTER);
 3907 %}
 3908 
 3909 // XMM Float register operands
 3910 operand regF() %{
 3911   predicate( UseSSE>=1 );
 3912   constraint(ALLOC_IN_RC(float_reg_legacy));
 3913   match(RegF);
 3914   format %{ %}
 3915   interface(REG_INTER);
 3916 %}
 3917 
 3918 operand legRegF() %{
 3919   predicate( UseSSE>=1 );
 3920   constraint(ALLOC_IN_RC(float_reg_legacy));
 3921   match(RegF);
 3922   format %{ %}
 3923   interface(REG_INTER);
 3924 %}
 3925 
 3926 // Float register operands
 3927 operand vlRegF() %{
 3928    constraint(ALLOC_IN_RC(float_reg_vl));
 3929    match(RegF);
 3930 
 3931    format %{ %}
 3932    interface(REG_INTER);
 3933 %}
 3934 
 3935 // XMM Double register operands
 3936 operand regD() %{
 3937   predicate( UseSSE>=2 );
 3938   constraint(ALLOC_IN_RC(double_reg_legacy));
 3939   match(RegD);
 3940   format %{ %}
 3941   interface(REG_INTER);
 3942 %}
 3943 
 3944 // Double register operands
 3945 operand legRegD() %{
 3946   predicate( UseSSE>=2 );
 3947   constraint(ALLOC_IN_RC(double_reg_legacy));
 3948   match(RegD);
 3949   format %{ %}
 3950   interface(REG_INTER);
 3951 %}
 3952 
 3953 operand vlRegD() %{
 3954    constraint(ALLOC_IN_RC(double_reg_vl));
 3955    match(RegD);
 3956 
 3957    format %{ %}
 3958    interface(REG_INTER);
 3959 %}
 3960 
 3961 //----------Memory Operands----------------------------------------------------
 3962 // Direct Memory Operand
 3963 operand direct(immP addr) %{
 3964   match(addr);
 3965 
 3966   format %{ "[$addr]" %}
 3967   interface(MEMORY_INTER) %{
 3968     base(0xFFFFFFFF);
 3969     index(0x4);
 3970     scale(0x0);
 3971     disp($addr);
 3972   %}
 3973 %}
 3974 
 3975 // Indirect Memory Operand
 3976 operand indirect(eRegP reg) %{
 3977   constraint(ALLOC_IN_RC(int_reg));
 3978   match(reg);
 3979 
 3980   format %{ "[$reg]" %}
 3981   interface(MEMORY_INTER) %{
 3982     base($reg);
 3983     index(0x4);
 3984     scale(0x0);
 3985     disp(0x0);
 3986   %}
 3987 %}
 3988 
 3989 // Indirect Memory Plus Short Offset Operand
 3990 operand indOffset8(eRegP reg, immI8 off) %{
 3991   match(AddP reg off);
 3992 
 3993   format %{ "[$reg + $off]" %}
 3994   interface(MEMORY_INTER) %{
 3995     base($reg);
 3996     index(0x4);
 3997     scale(0x0);
 3998     disp($off);
 3999   %}
 4000 %}
 4001 
 4002 // Indirect Memory Plus Long Offset Operand
 4003 operand indOffset32(eRegP reg, immI off) %{
 4004   match(AddP reg off);
 4005 
 4006   format %{ "[$reg + $off]" %}
 4007   interface(MEMORY_INTER) %{
 4008     base($reg);
 4009     index(0x4);
 4010     scale(0x0);
 4011     disp($off);
 4012   %}
 4013 %}
 4014 
 4015 // Indirect Memory Plus Long Offset Operand
 4016 operand indOffset32X(rRegI reg, immP off) %{
 4017   match(AddP off reg);
 4018 
 4019   format %{ "[$reg + $off]" %}
 4020   interface(MEMORY_INTER) %{
 4021     base($reg);
 4022     index(0x4);
 4023     scale(0x0);
 4024     disp($off);
 4025   %}
 4026 %}
 4027 
 4028 // Indirect Memory Plus Index Register Plus Offset Operand
 4029 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4030   match(AddP (AddP reg ireg) off);
 4031 
 4032   op_cost(10);
 4033   format %{"[$reg + $off + $ireg]" %}
 4034   interface(MEMORY_INTER) %{
 4035     base($reg);
 4036     index($ireg);
 4037     scale(0x0);
 4038     disp($off);
 4039   %}
 4040 %}
 4041 
 4042 // Indirect Memory Plus Index Register Plus Offset Operand
 4043 operand indIndex(eRegP reg, rRegI ireg) %{
 4044   match(AddP reg ireg);
 4045 
 4046   op_cost(10);
 4047   format %{"[$reg + $ireg]" %}
 4048   interface(MEMORY_INTER) %{
 4049     base($reg);
 4050     index($ireg);
 4051     scale(0x0);
 4052     disp(0x0);
 4053   %}
 4054 %}
 4055 
 4056 // // -------------------------------------------------------------------------
 4057 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4058 // // -------------------------------------------------------------------------
 4059 // // Scaled Memory Operands
 4060 // // Indirect Memory Times Scale Plus Offset Operand
 4061 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4062 //   match(AddP off (LShiftI ireg scale));
 4063 //
 4064 //   op_cost(10);
 4065 //   format %{"[$off + $ireg << $scale]" %}
 4066 //   interface(MEMORY_INTER) %{
 4067 //     base(0x4);
 4068 //     index($ireg);
 4069 //     scale($scale);
 4070 //     disp($off);
 4071 //   %}
 4072 // %}
 4073 
 4074 // Indirect Memory Times Scale Plus Index Register
 4075 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4076   match(AddP reg (LShiftI ireg scale));
 4077 
 4078   op_cost(10);
 4079   format %{"[$reg + $ireg << $scale]" %}
 4080   interface(MEMORY_INTER) %{
 4081     base($reg);
 4082     index($ireg);
 4083     scale($scale);
 4084     disp(0x0);
 4085   %}
 4086 %}
 4087 
 4088 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4089 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4090   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4091 
 4092   op_cost(10);
 4093   format %{"[$reg + $off + $ireg << $scale]" %}
 4094   interface(MEMORY_INTER) %{
 4095     base($reg);
 4096     index($ireg);
 4097     scale($scale);
 4098     disp($off);
 4099   %}
 4100 %}
 4101 
 4102 //----------Load Long Memory Operands------------------------------------------
 4103 // The load-long idiom will use it's address expression again after loading
 4104 // the first word of the long.  If the load-long destination overlaps with
 4105 // registers used in the addressing expression, the 2nd half will be loaded
 4106 // from a clobbered address.  Fix this by requiring that load-long use
 4107 // address registers that do not overlap with the load-long target.
 4108 
 4109 // load-long support
 4110 operand load_long_RegP() %{
 4111   constraint(ALLOC_IN_RC(esi_reg));
 4112   match(RegP);
 4113   match(eSIRegP);
 4114   op_cost(100);
 4115   format %{  %}
 4116   interface(REG_INTER);
 4117 %}
 4118 
 4119 // Indirect Memory Operand Long
 4120 operand load_long_indirect(load_long_RegP reg) %{
 4121   constraint(ALLOC_IN_RC(esi_reg));
 4122   match(reg);
 4123 
 4124   format %{ "[$reg]" %}
 4125   interface(MEMORY_INTER) %{
 4126     base($reg);
 4127     index(0x4);
 4128     scale(0x0);
 4129     disp(0x0);
 4130   %}
 4131 %}
 4132 
 4133 // Indirect Memory Plus Long Offset Operand
 4134 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4135   match(AddP reg off);
 4136 
 4137   format %{ "[$reg + $off]" %}
 4138   interface(MEMORY_INTER) %{
 4139     base($reg);
 4140     index(0x4);
 4141     scale(0x0);
 4142     disp($off);
 4143   %}
 4144 %}
 4145 
 4146 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4147 
 4148 
 4149 //----------Special Memory Operands--------------------------------------------
 4150 // Stack Slot Operand - This operand is used for loading and storing temporary
 4151 //                      values on the stack where a match requires a value to
 4152 //                      flow through memory.
 4153 operand stackSlotP(sRegP reg) %{
 4154   constraint(ALLOC_IN_RC(stack_slots));
 4155   // No match rule because this operand is only generated in matching
 4156   format %{ "[$reg]" %}
 4157   interface(MEMORY_INTER) %{
 4158     base(0x4);   // ESP
 4159     index(0x4);  // No Index
 4160     scale(0x0);  // No Scale
 4161     disp($reg);  // Stack Offset
 4162   %}
 4163 %}
 4164 
 4165 operand stackSlotI(sRegI reg) %{
 4166   constraint(ALLOC_IN_RC(stack_slots));
 4167   // No match rule because this operand is only generated in matching
 4168   format %{ "[$reg]" %}
 4169   interface(MEMORY_INTER) %{
 4170     base(0x4);   // ESP
 4171     index(0x4);  // No Index
 4172     scale(0x0);  // No Scale
 4173     disp($reg);  // Stack Offset
 4174   %}
 4175 %}
 4176 
 4177 operand stackSlotF(sRegF reg) %{
 4178   constraint(ALLOC_IN_RC(stack_slots));
 4179   // No match rule because this operand is only generated in matching
 4180   format %{ "[$reg]" %}
 4181   interface(MEMORY_INTER) %{
 4182     base(0x4);   // ESP
 4183     index(0x4);  // No Index
 4184     scale(0x0);  // No Scale
 4185     disp($reg);  // Stack Offset
 4186   %}
 4187 %}
 4188 
 4189 operand stackSlotD(sRegD reg) %{
 4190   constraint(ALLOC_IN_RC(stack_slots));
 4191   // No match rule because this operand is only generated in matching
 4192   format %{ "[$reg]" %}
 4193   interface(MEMORY_INTER) %{
 4194     base(0x4);   // ESP
 4195     index(0x4);  // No Index
 4196     scale(0x0);  // No Scale
 4197     disp($reg);  // Stack Offset
 4198   %}
 4199 %}
 4200 
 4201 operand stackSlotL(sRegL reg) %{
 4202   constraint(ALLOC_IN_RC(stack_slots));
 4203   // No match rule because this operand is only generated in matching
 4204   format %{ "[$reg]" %}
 4205   interface(MEMORY_INTER) %{
 4206     base(0x4);   // ESP
 4207     index(0x4);  // No Index
 4208     scale(0x0);  // No Scale
 4209     disp($reg);  // Stack Offset
 4210   %}
 4211 %}
 4212 
 4213 //----------Conditional Branch Operands----------------------------------------
 4214 // Comparison Op  - This is the operation of the comparison, and is limited to
 4215 //                  the following set of codes:
 4216 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4217 //
 4218 // Other attributes of the comparison, such as unsignedness, are specified
 4219 // by the comparison instruction that sets a condition code flags register.
 4220 // That result is represented by a flags operand whose subtype is appropriate
 4221 // to the unsignedness (etc.) of the comparison.
 4222 //
 4223 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4224 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4225 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4226 
 4227 // Comparison Code
 4228 operand cmpOp() %{
 4229   match(Bool);
 4230 
 4231   format %{ "" %}
 4232   interface(COND_INTER) %{
 4233     equal(0x4, "e");
 4234     not_equal(0x5, "ne");
 4235     less(0xC, "l");
 4236     greater_equal(0xD, "ge");
 4237     less_equal(0xE, "le");
 4238     greater(0xF, "g");
 4239     overflow(0x0, "o");
 4240     no_overflow(0x1, "no");
 4241   %}
 4242 %}
 4243 
 4244 // Comparison Code, unsigned compare.  Used by FP also, with
 4245 // C2 (unordered) turned into GT or LT already.  The other bits
 4246 // C0 and C3 are turned into Carry & Zero flags.
 4247 operand cmpOpU() %{
 4248   match(Bool);
 4249 
 4250   format %{ "" %}
 4251   interface(COND_INTER) %{
 4252     equal(0x4, "e");
 4253     not_equal(0x5, "ne");
 4254     less(0x2, "b");
 4255     greater_equal(0x3, "nb");
 4256     less_equal(0x6, "be");
 4257     greater(0x7, "nbe");
 4258     overflow(0x0, "o");
 4259     no_overflow(0x1, "no");
 4260   %}
 4261 %}
 4262 
 4263 // Floating comparisons that don't require any fixup for the unordered case
 4264 operand cmpOpUCF() %{
 4265   match(Bool);
 4266   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4267             n->as_Bool()->_test._test == BoolTest::ge ||
 4268             n->as_Bool()->_test._test == BoolTest::le ||
 4269             n->as_Bool()->_test._test == BoolTest::gt);
 4270   format %{ "" %}
 4271   interface(COND_INTER) %{
 4272     equal(0x4, "e");
 4273     not_equal(0x5, "ne");
 4274     less(0x2, "b");
 4275     greater_equal(0x3, "nb");
 4276     less_equal(0x6, "be");
 4277     greater(0x7, "nbe");
 4278     overflow(0x0, "o");
 4279     no_overflow(0x1, "no");
 4280   %}
 4281 %}
 4282 
 4283 
 4284 // Floating comparisons that can be fixed up with extra conditional jumps
 4285 operand cmpOpUCF2() %{
 4286   match(Bool);
 4287   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4288             n->as_Bool()->_test._test == BoolTest::eq);
 4289   format %{ "" %}
 4290   interface(COND_INTER) %{
 4291     equal(0x4, "e");
 4292     not_equal(0x5, "ne");
 4293     less(0x2, "b");
 4294     greater_equal(0x3, "nb");
 4295     less_equal(0x6, "be");
 4296     greater(0x7, "nbe");
 4297     overflow(0x0, "o");
 4298     no_overflow(0x1, "no");
 4299   %}
 4300 %}
 4301 
 4302 // Comparison Code for FP conditional move
 4303 operand cmpOp_fcmov() %{
 4304   match(Bool);
 4305 
 4306   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4307             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4308   format %{ "" %}
 4309   interface(COND_INTER) %{
 4310     equal        (0x0C8);
 4311     not_equal    (0x1C8);
 4312     less         (0x0C0);
 4313     greater_equal(0x1C0);
 4314     less_equal   (0x0D0);
 4315     greater      (0x1D0);
 4316     overflow(0x0, "o"); // not really supported by the instruction
 4317     no_overflow(0x1, "no"); // not really supported by the instruction
 4318   %}
 4319 %}
 4320 
 4321 // Comparison Code used in long compares
 4322 operand cmpOp_commute() %{
 4323   match(Bool);
 4324 
 4325   format %{ "" %}
 4326   interface(COND_INTER) %{
 4327     equal(0x4, "e");
 4328     not_equal(0x5, "ne");
 4329     less(0xF, "g");
 4330     greater_equal(0xE, "le");
 4331     less_equal(0xD, "ge");
 4332     greater(0xC, "l");
 4333     overflow(0x0, "o");
 4334     no_overflow(0x1, "no");
 4335   %}
 4336 %}
 4337 
 4338 // Comparison Code used in unsigned long compares
 4339 operand cmpOpU_commute() %{
 4340   match(Bool);
 4341 
 4342   format %{ "" %}
 4343   interface(COND_INTER) %{
 4344     equal(0x4, "e");
 4345     not_equal(0x5, "ne");
 4346     less(0x7, "nbe");
 4347     greater_equal(0x6, "be");
 4348     less_equal(0x3, "nb");
 4349     greater(0x2, "b");
 4350     overflow(0x0, "o");
 4351     no_overflow(0x1, "no");
 4352   %}
 4353 %}
 4354 
 4355 //----------OPERAND CLASSES----------------------------------------------------
 4356 // Operand Classes are groups of operands that are used as to simplify
 4357 // instruction definitions by not requiring the AD writer to specify separate
 4358 // instructions for every form of operand when the instruction accepts
 4359 // multiple operand types with the same basic encoding and format.  The classic
 4360 // case of this is memory operands.
 4361 
 4362 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4363                indIndex, indIndexScale, indIndexScaleOffset);
 4364 
 4365 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4366 // This means some kind of offset is always required and you cannot use
 4367 // an oop as the offset (done when working on static globals).
 4368 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4369                     indIndex, indIndexScale, indIndexScaleOffset);
 4370 
 4371 
 4372 //----------PIPELINE-----------------------------------------------------------
 4373 // Rules which define the behavior of the target architectures pipeline.
 4374 pipeline %{
 4375 
 4376 //----------ATTRIBUTES---------------------------------------------------------
 4377 attributes %{
 4378   variable_size_instructions;        // Fixed size instructions
 4379   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4380   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4381   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4382   instruction_fetch_units = 1;       // of 16 bytes
 4383 
 4384   // List of nop instructions
 4385   nops( MachNop );
 4386 %}
 4387 
 4388 //----------RESOURCES----------------------------------------------------------
 4389 // Resources are the functional units available to the machine
 4390 
 4391 // Generic P2/P3 pipeline
 4392 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4393 // 3 instructions decoded per cycle.
 4394 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4395 // 2 ALU op, only ALU0 handles mul/div instructions.
 4396 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4397            MS0, MS1, MEM = MS0 | MS1,
 4398            BR, FPU,
 4399            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4400 
 4401 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4402 // Pipeline Description specifies the stages in the machine's pipeline
 4403 
 4404 // Generic P2/P3 pipeline
 4405 pipe_desc(S0, S1, S2, S3, S4, S5);
 4406 
 4407 //----------PIPELINE CLASSES---------------------------------------------------
 4408 // Pipeline Classes describe the stages in which input and output are
 4409 // referenced by the hardware pipeline.
 4410 
 4411 // Naming convention: ialu or fpu
 4412 // Then: _reg
 4413 // Then: _reg if there is a 2nd register
 4414 // Then: _long if it's a pair of instructions implementing a long
 4415 // Then: _fat if it requires the big decoder
 4416 //   Or: _mem if it requires the big decoder and a memory unit.
 4417 
 4418 // Integer ALU reg operation
 4419 pipe_class ialu_reg(rRegI dst) %{
 4420     single_instruction;
 4421     dst    : S4(write);
 4422     dst    : S3(read);
 4423     DECODE : S0;        // any decoder
 4424     ALU    : S3;        // any alu
 4425 %}
 4426 
 4427 // Long ALU reg operation
 4428 pipe_class ialu_reg_long(eRegL dst) %{
 4429     instruction_count(2);
 4430     dst    : S4(write);
 4431     dst    : S3(read);
 4432     DECODE : S0(2);     // any 2 decoders
 4433     ALU    : S3(2);     // both alus
 4434 %}
 4435 
 4436 // Integer ALU reg operation using big decoder
 4437 pipe_class ialu_reg_fat(rRegI dst) %{
 4438     single_instruction;
 4439     dst    : S4(write);
 4440     dst    : S3(read);
 4441     D0     : S0;        // big decoder only
 4442     ALU    : S3;        // any alu
 4443 %}
 4444 
 4445 // Long ALU reg operation using big decoder
 4446 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4447     instruction_count(2);
 4448     dst    : S4(write);
 4449     dst    : S3(read);
 4450     D0     : S0(2);     // big decoder only; twice
 4451     ALU    : S3(2);     // any 2 alus
 4452 %}
 4453 
 4454 // Integer ALU reg-reg operation
 4455 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4456     single_instruction;
 4457     dst    : S4(write);
 4458     src    : S3(read);
 4459     DECODE : S0;        // any decoder
 4460     ALU    : S3;        // any alu
 4461 %}
 4462 
 4463 // Long ALU reg-reg operation
 4464 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4465     instruction_count(2);
 4466     dst    : S4(write);
 4467     src    : S3(read);
 4468     DECODE : S0(2);     // any 2 decoders
 4469     ALU    : S3(2);     // both alus
 4470 %}
 4471 
 4472 // Integer ALU reg-reg operation
 4473 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4474     single_instruction;
 4475     dst    : S4(write);
 4476     src    : S3(read);
 4477     D0     : S0;        // big decoder only
 4478     ALU    : S3;        // any alu
 4479 %}
 4480 
 4481 // Long ALU reg-reg operation
 4482 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4483     instruction_count(2);
 4484     dst    : S4(write);
 4485     src    : S3(read);
 4486     D0     : S0(2);     // big decoder only; twice
 4487     ALU    : S3(2);     // both alus
 4488 %}
 4489 
 4490 // Integer ALU reg-mem operation
 4491 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4492     single_instruction;
 4493     dst    : S5(write);
 4494     mem    : S3(read);
 4495     D0     : S0;        // big decoder only
 4496     ALU    : S4;        // any alu
 4497     MEM    : S3;        // any mem
 4498 %}
 4499 
 4500 // Long ALU reg-mem operation
 4501 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4502     instruction_count(2);
 4503     dst    : S5(write);
 4504     mem    : S3(read);
 4505     D0     : S0(2);     // big decoder only; twice
 4506     ALU    : S4(2);     // any 2 alus
 4507     MEM    : S3(2);     // both mems
 4508 %}
 4509 
 4510 // Integer mem operation (prefetch)
 4511 pipe_class ialu_mem(memory mem)
 4512 %{
 4513     single_instruction;
 4514     mem    : S3(read);
 4515     D0     : S0;        // big decoder only
 4516     MEM    : S3;        // any mem
 4517 %}
 4518 
 4519 // Integer Store to Memory
 4520 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4521     single_instruction;
 4522     mem    : S3(read);
 4523     src    : S5(read);
 4524     D0     : S0;        // big decoder only
 4525     ALU    : S4;        // any alu
 4526     MEM    : S3;
 4527 %}
 4528 
 4529 // Long Store to Memory
 4530 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4531     instruction_count(2);
 4532     mem    : S3(read);
 4533     src    : S5(read);
 4534     D0     : S0(2);     // big decoder only; twice
 4535     ALU    : S4(2);     // any 2 alus
 4536     MEM    : S3(2);     // Both mems
 4537 %}
 4538 
 4539 // Integer Store to Memory
 4540 pipe_class ialu_mem_imm(memory mem) %{
 4541     single_instruction;
 4542     mem    : S3(read);
 4543     D0     : S0;        // big decoder only
 4544     ALU    : S4;        // any alu
 4545     MEM    : S3;
 4546 %}
 4547 
 4548 // Integer ALU0 reg-reg operation
 4549 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4550     single_instruction;
 4551     dst    : S4(write);
 4552     src    : S3(read);
 4553     D0     : S0;        // Big decoder only
 4554     ALU0   : S3;        // only alu0
 4555 %}
 4556 
 4557 // Integer ALU0 reg-mem operation
 4558 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4559     single_instruction;
 4560     dst    : S5(write);
 4561     mem    : S3(read);
 4562     D0     : S0;        // big decoder only
 4563     ALU0   : S4;        // ALU0 only
 4564     MEM    : S3;        // any mem
 4565 %}
 4566 
 4567 // Integer ALU reg-reg operation
 4568 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4569     single_instruction;
 4570     cr     : S4(write);
 4571     src1   : S3(read);
 4572     src2   : S3(read);
 4573     DECODE : S0;        // any decoder
 4574     ALU    : S3;        // any alu
 4575 %}
 4576 
 4577 // Integer ALU reg-imm operation
 4578 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4579     single_instruction;
 4580     cr     : S4(write);
 4581     src1   : S3(read);
 4582     DECODE : S0;        // any decoder
 4583     ALU    : S3;        // any alu
 4584 %}
 4585 
 4586 // Integer ALU reg-mem operation
 4587 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4588     single_instruction;
 4589     cr     : S4(write);
 4590     src1   : S3(read);
 4591     src2   : S3(read);
 4592     D0     : S0;        // big decoder only
 4593     ALU    : S4;        // any alu
 4594     MEM    : S3;
 4595 %}
 4596 
 4597 // Conditional move reg-reg
 4598 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4599     instruction_count(4);
 4600     y      : S4(read);
 4601     q      : S3(read);
 4602     p      : S3(read);
 4603     DECODE : S0(4);     // any decoder
 4604 %}
 4605 
 4606 // Conditional move reg-reg
 4607 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4608     single_instruction;
 4609     dst    : S4(write);
 4610     src    : S3(read);
 4611     cr     : S3(read);
 4612     DECODE : S0;        // any decoder
 4613 %}
 4614 
 4615 // Conditional move reg-mem
 4616 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4617     single_instruction;
 4618     dst    : S4(write);
 4619     src    : S3(read);
 4620     cr     : S3(read);
 4621     DECODE : S0;        // any decoder
 4622     MEM    : S3;
 4623 %}
 4624 
 4625 // Conditional move reg-reg long
 4626 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4627     single_instruction;
 4628     dst    : S4(write);
 4629     src    : S3(read);
 4630     cr     : S3(read);
 4631     DECODE : S0(2);     // any 2 decoders
 4632 %}
 4633 
 4634 // Conditional move double reg-reg
 4635 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4636     single_instruction;
 4637     dst    : S4(write);
 4638     src    : S3(read);
 4639     cr     : S3(read);
 4640     DECODE : S0;        // any decoder
 4641 %}
 4642 
 4643 // Float reg-reg operation
 4644 pipe_class fpu_reg(regDPR dst) %{
 4645     instruction_count(2);
 4646     dst    : S3(read);
 4647     DECODE : S0(2);     // any 2 decoders
 4648     FPU    : S3;
 4649 %}
 4650 
 4651 // Float reg-reg operation
 4652 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4653     instruction_count(2);
 4654     dst    : S4(write);
 4655     src    : S3(read);
 4656     DECODE : S0(2);     // any 2 decoders
 4657     FPU    : S3;
 4658 %}
 4659 
 4660 // Float reg-reg operation
 4661 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4662     instruction_count(3);
 4663     dst    : S4(write);
 4664     src1   : S3(read);
 4665     src2   : S3(read);
 4666     DECODE : S0(3);     // any 3 decoders
 4667     FPU    : S3(2);
 4668 %}
 4669 
 4670 // Float reg-reg operation
 4671 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4672     instruction_count(4);
 4673     dst    : S4(write);
 4674     src1   : S3(read);
 4675     src2   : S3(read);
 4676     src3   : S3(read);
 4677     DECODE : S0(4);     // any 3 decoders
 4678     FPU    : S3(2);
 4679 %}
 4680 
 4681 // Float reg-reg operation
 4682 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4683     instruction_count(4);
 4684     dst    : S4(write);
 4685     src1   : S3(read);
 4686     src2   : S3(read);
 4687     src3   : S3(read);
 4688     DECODE : S1(3);     // any 3 decoders
 4689     D0     : S0;        // Big decoder only
 4690     FPU    : S3(2);
 4691     MEM    : S3;
 4692 %}
 4693 
 4694 // Float reg-mem operation
 4695 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4696     instruction_count(2);
 4697     dst    : S5(write);
 4698     mem    : S3(read);
 4699     D0     : S0;        // big decoder only
 4700     DECODE : S1;        // any decoder for FPU POP
 4701     FPU    : S4;
 4702     MEM    : S3;        // any mem
 4703 %}
 4704 
 4705 // Float reg-mem operation
 4706 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4707     instruction_count(3);
 4708     dst    : S5(write);
 4709     src1   : S3(read);
 4710     mem    : S3(read);
 4711     D0     : S0;        // big decoder only
 4712     DECODE : S1(2);     // any decoder for FPU POP
 4713     FPU    : S4;
 4714     MEM    : S3;        // any mem
 4715 %}
 4716 
 4717 // Float mem-reg operation
 4718 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4719     instruction_count(2);
 4720     src    : S5(read);
 4721     mem    : S3(read);
 4722     DECODE : S0;        // any decoder for FPU PUSH
 4723     D0     : S1;        // big decoder only
 4724     FPU    : S4;
 4725     MEM    : S3;        // any mem
 4726 %}
 4727 
 4728 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4729     instruction_count(3);
 4730     src1   : S3(read);
 4731     src2   : S3(read);
 4732     mem    : S3(read);
 4733     DECODE : S0(2);     // any decoder for FPU PUSH
 4734     D0     : S1;        // big decoder only
 4735     FPU    : S4;
 4736     MEM    : S3;        // any mem
 4737 %}
 4738 
 4739 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4740     instruction_count(3);
 4741     src1   : S3(read);
 4742     src2   : S3(read);
 4743     mem    : S4(read);
 4744     DECODE : S0;        // any decoder for FPU PUSH
 4745     D0     : S0(2);     // big decoder only
 4746     FPU    : S4;
 4747     MEM    : S3(2);     // any mem
 4748 %}
 4749 
 4750 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4751     instruction_count(2);
 4752     src1   : S3(read);
 4753     dst    : S4(read);
 4754     D0     : S0(2);     // big decoder only
 4755     MEM    : S3(2);     // any mem
 4756 %}
 4757 
 4758 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4759     instruction_count(3);
 4760     src1   : S3(read);
 4761     src2   : S3(read);
 4762     dst    : S4(read);
 4763     D0     : S0(3);     // big decoder only
 4764     FPU    : S4;
 4765     MEM    : S3(3);     // any mem
 4766 %}
 4767 
 4768 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4769     instruction_count(3);
 4770     src1   : S4(read);
 4771     mem    : S4(read);
 4772     DECODE : S0;        // any decoder for FPU PUSH
 4773     D0     : S0(2);     // big decoder only
 4774     FPU    : S4;
 4775     MEM    : S3(2);     // any mem
 4776 %}
 4777 
 4778 // Float load constant
 4779 pipe_class fpu_reg_con(regDPR dst) %{
 4780     instruction_count(2);
 4781     dst    : S5(write);
 4782     D0     : S0;        // big decoder only for the load
 4783     DECODE : S1;        // any decoder for FPU POP
 4784     FPU    : S4;
 4785     MEM    : S3;        // any mem
 4786 %}
 4787 
 4788 // Float load constant
 4789 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4790     instruction_count(3);
 4791     dst    : S5(write);
 4792     src    : S3(read);
 4793     D0     : S0;        // big decoder only for the load
 4794     DECODE : S1(2);     // any decoder for FPU POP
 4795     FPU    : S4;
 4796     MEM    : S3;        // any mem
 4797 %}
 4798 
 4799 // UnConditional branch
 4800 pipe_class pipe_jmp( label labl ) %{
 4801     single_instruction;
 4802     BR   : S3;
 4803 %}
 4804 
 4805 // Conditional branch
 4806 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4807     single_instruction;
 4808     cr    : S1(read);
 4809     BR    : S3;
 4810 %}
 4811 
 4812 // Allocation idiom
 4813 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4814     instruction_count(1); force_serialization;
 4815     fixed_latency(6);
 4816     heap_ptr : S3(read);
 4817     DECODE   : S0(3);
 4818     D0       : S2;
 4819     MEM      : S3;
 4820     ALU      : S3(2);
 4821     dst      : S5(write);
 4822     BR       : S5;
 4823 %}
 4824 
 4825 // Generic big/slow expanded idiom
 4826 pipe_class pipe_slow(  ) %{
 4827     instruction_count(10); multiple_bundles; force_serialization;
 4828     fixed_latency(100);
 4829     D0  : S0(2);
 4830     MEM : S3(2);
 4831 %}
 4832 
 4833 // The real do-nothing guy
 4834 pipe_class empty( ) %{
 4835     instruction_count(0);
 4836 %}
 4837 
 4838 // Define the class for the Nop node
 4839 define %{
 4840    MachNop = empty;
 4841 %}
 4842 
 4843 %}
 4844 
 4845 //----------INSTRUCTIONS-------------------------------------------------------
 4846 //
 4847 // match      -- States which machine-independent subtree may be replaced
 4848 //               by this instruction.
 4849 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4850 //               selection to identify a minimum cost tree of machine
 4851 //               instructions that matches a tree of machine-independent
 4852 //               instructions.
 4853 // format     -- A string providing the disassembly for this instruction.
 4854 //               The value of an instruction's operand may be inserted
 4855 //               by referring to it with a '$' prefix.
 4856 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4857 //               to within an encode class as $primary, $secondary, and $tertiary
 4858 //               respectively.  The primary opcode is commonly used to
 4859 //               indicate the type of machine instruction, while secondary
 4860 //               and tertiary are often used for prefix options or addressing
 4861 //               modes.
 4862 // ins_encode -- A list of encode classes with parameters. The encode class
 4863 //               name must have been defined in an 'enc_class' specification
 4864 //               in the encode section of the architecture description.
 4865 
 4866 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4867 // Load Float
 4868 instruct MoveF2LEG(legRegF dst, regF src) %{
 4869   match(Set dst src);
 4870   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4871   ins_encode %{
 4872     ShouldNotReachHere();
 4873   %}
 4874   ins_pipe( fpu_reg_reg );
 4875 %}
 4876 
 4877 // Load Float
 4878 instruct MoveLEG2F(regF dst, legRegF src) %{
 4879   match(Set dst src);
 4880   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4881   ins_encode %{
 4882     ShouldNotReachHere();
 4883   %}
 4884   ins_pipe( fpu_reg_reg );
 4885 %}
 4886 
 4887 // Load Float
 4888 instruct MoveF2VL(vlRegF dst, regF src) %{
 4889   match(Set dst src);
 4890   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4891   ins_encode %{
 4892     ShouldNotReachHere();
 4893   %}
 4894   ins_pipe( fpu_reg_reg );
 4895 %}
 4896 
 4897 // Load Float
 4898 instruct MoveVL2F(regF dst, vlRegF src) %{
 4899   match(Set dst src);
 4900   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4901   ins_encode %{
 4902     ShouldNotReachHere();
 4903   %}
 4904   ins_pipe( fpu_reg_reg );
 4905 %}
 4906 
 4907 
 4908 
 4909 // Load Double
 4910 instruct MoveD2LEG(legRegD dst, regD src) %{
 4911   match(Set dst src);
 4912   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4913   ins_encode %{
 4914     ShouldNotReachHere();
 4915   %}
 4916   ins_pipe( fpu_reg_reg );
 4917 %}
 4918 
 4919 // Load Double
 4920 instruct MoveLEG2D(regD dst, legRegD src) %{
 4921   match(Set dst src);
 4922   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4923   ins_encode %{
 4924     ShouldNotReachHere();
 4925   %}
 4926   ins_pipe( fpu_reg_reg );
 4927 %}
 4928 
 4929 // Load Double
 4930 instruct MoveD2VL(vlRegD dst, regD src) %{
 4931   match(Set dst src);
 4932   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4933   ins_encode %{
 4934     ShouldNotReachHere();
 4935   %}
 4936   ins_pipe( fpu_reg_reg );
 4937 %}
 4938 
 4939 // Load Double
 4940 instruct MoveVL2D(regD dst, vlRegD src) %{
 4941   match(Set dst src);
 4942   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4943   ins_encode %{
 4944     ShouldNotReachHere();
 4945   %}
 4946   ins_pipe( fpu_reg_reg );
 4947 %}
 4948 
 4949 //----------BSWAP-Instruction--------------------------------------------------
 4950 instruct bytes_reverse_int(rRegI dst) %{
 4951   match(Set dst (ReverseBytesI dst));
 4952 
 4953   format %{ "BSWAP  $dst" %}
 4954   opcode(0x0F, 0xC8);
 4955   ins_encode( OpcP, OpcSReg(dst) );
 4956   ins_pipe( ialu_reg );
 4957 %}
 4958 
 4959 instruct bytes_reverse_long(eRegL dst) %{
 4960   match(Set dst (ReverseBytesL dst));
 4961 
 4962   format %{ "BSWAP  $dst.lo\n\t"
 4963             "BSWAP  $dst.hi\n\t"
 4964             "XCHG   $dst.lo $dst.hi" %}
 4965 
 4966   ins_cost(125);
 4967   ins_encode( bswap_long_bytes(dst) );
 4968   ins_pipe( ialu_reg_reg);
 4969 %}
 4970 
 4971 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 4972   match(Set dst (ReverseBytesUS dst));
 4973   effect(KILL cr);
 4974 
 4975   format %{ "BSWAP  $dst\n\t"
 4976             "SHR    $dst,16\n\t" %}
 4977   ins_encode %{
 4978     __ bswapl($dst$$Register);
 4979     __ shrl($dst$$Register, 16);
 4980   %}
 4981   ins_pipe( ialu_reg );
 4982 %}
 4983 
 4984 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 4985   match(Set dst (ReverseBytesS dst));
 4986   effect(KILL cr);
 4987 
 4988   format %{ "BSWAP  $dst\n\t"
 4989             "SAR    $dst,16\n\t" %}
 4990   ins_encode %{
 4991     __ bswapl($dst$$Register);
 4992     __ sarl($dst$$Register, 16);
 4993   %}
 4994   ins_pipe( ialu_reg );
 4995 %}
 4996 
 4997 
 4998 //---------- Zeros Count Instructions ------------------------------------------
 4999 
 5000 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5001   predicate(UseCountLeadingZerosInstruction);
 5002   match(Set dst (CountLeadingZerosI src));
 5003   effect(KILL cr);
 5004 
 5005   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5006   ins_encode %{
 5007     __ lzcntl($dst$$Register, $src$$Register);
 5008   %}
 5009   ins_pipe(ialu_reg);
 5010 %}
 5011 
 5012 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5013   predicate(!UseCountLeadingZerosInstruction);
 5014   match(Set dst (CountLeadingZerosI src));
 5015   effect(KILL cr);
 5016 
 5017   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5018             "JNZ    skip\n\t"
 5019             "MOV    $dst, -1\n"
 5020       "skip:\n\t"
 5021             "NEG    $dst\n\t"
 5022             "ADD    $dst, 31" %}
 5023   ins_encode %{
 5024     Register Rdst = $dst$$Register;
 5025     Register Rsrc = $src$$Register;
 5026     Label skip;
 5027     __ bsrl(Rdst, Rsrc);
 5028     __ jccb(Assembler::notZero, skip);
 5029     __ movl(Rdst, -1);
 5030     __ bind(skip);
 5031     __ negl(Rdst);
 5032     __ addl(Rdst, BitsPerInt - 1);
 5033   %}
 5034   ins_pipe(ialu_reg);
 5035 %}
 5036 
 5037 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5038   predicate(UseCountLeadingZerosInstruction);
 5039   match(Set dst (CountLeadingZerosL src));
 5040   effect(TEMP dst, KILL cr);
 5041 
 5042   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5043             "JNC    done\n\t"
 5044             "LZCNT  $dst, $src.lo\n\t"
 5045             "ADD    $dst, 32\n"
 5046       "done:" %}
 5047   ins_encode %{
 5048     Register Rdst = $dst$$Register;
 5049     Register Rsrc = $src$$Register;
 5050     Label done;
 5051     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5052     __ jccb(Assembler::carryClear, done);
 5053     __ lzcntl(Rdst, Rsrc);
 5054     __ addl(Rdst, BitsPerInt);
 5055     __ bind(done);
 5056   %}
 5057   ins_pipe(ialu_reg);
 5058 %}
 5059 
 5060 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5061   predicate(!UseCountLeadingZerosInstruction);
 5062   match(Set dst (CountLeadingZerosL src));
 5063   effect(TEMP dst, KILL cr);
 5064 
 5065   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5066             "JZ     msw_is_zero\n\t"
 5067             "ADD    $dst, 32\n\t"
 5068             "JMP    not_zero\n"
 5069       "msw_is_zero:\n\t"
 5070             "BSR    $dst, $src.lo\n\t"
 5071             "JNZ    not_zero\n\t"
 5072             "MOV    $dst, -1\n"
 5073       "not_zero:\n\t"
 5074             "NEG    $dst\n\t"
 5075             "ADD    $dst, 63\n" %}
 5076  ins_encode %{
 5077     Register Rdst = $dst$$Register;
 5078     Register Rsrc = $src$$Register;
 5079     Label msw_is_zero;
 5080     Label not_zero;
 5081     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5082     __ jccb(Assembler::zero, msw_is_zero);
 5083     __ addl(Rdst, BitsPerInt);
 5084     __ jmpb(not_zero);
 5085     __ bind(msw_is_zero);
 5086     __ bsrl(Rdst, Rsrc);
 5087     __ jccb(Assembler::notZero, not_zero);
 5088     __ movl(Rdst, -1);
 5089     __ bind(not_zero);
 5090     __ negl(Rdst);
 5091     __ addl(Rdst, BitsPerLong - 1);
 5092   %}
 5093   ins_pipe(ialu_reg);
 5094 %}
 5095 
 5096 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5097   predicate(UseCountTrailingZerosInstruction);
 5098   match(Set dst (CountTrailingZerosI src));
 5099   effect(KILL cr);
 5100 
 5101   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5102   ins_encode %{
 5103     __ tzcntl($dst$$Register, $src$$Register);
 5104   %}
 5105   ins_pipe(ialu_reg);
 5106 %}
 5107 
 5108 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5109   predicate(!UseCountTrailingZerosInstruction);
 5110   match(Set dst (CountTrailingZerosI src));
 5111   effect(KILL cr);
 5112 
 5113   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5114             "JNZ    done\n\t"
 5115             "MOV    $dst, 32\n"
 5116       "done:" %}
 5117   ins_encode %{
 5118     Register Rdst = $dst$$Register;
 5119     Label done;
 5120     __ bsfl(Rdst, $src$$Register);
 5121     __ jccb(Assembler::notZero, done);
 5122     __ movl(Rdst, BitsPerInt);
 5123     __ bind(done);
 5124   %}
 5125   ins_pipe(ialu_reg);
 5126 %}
 5127 
 5128 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5129   predicate(UseCountTrailingZerosInstruction);
 5130   match(Set dst (CountTrailingZerosL src));
 5131   effect(TEMP dst, KILL cr);
 5132 
 5133   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5134             "JNC    done\n\t"
 5135             "TZCNT  $dst, $src.hi\n\t"
 5136             "ADD    $dst, 32\n"
 5137             "done:" %}
 5138   ins_encode %{
 5139     Register Rdst = $dst$$Register;
 5140     Register Rsrc = $src$$Register;
 5141     Label done;
 5142     __ tzcntl(Rdst, Rsrc);
 5143     __ jccb(Assembler::carryClear, done);
 5144     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5145     __ addl(Rdst, BitsPerInt);
 5146     __ bind(done);
 5147   %}
 5148   ins_pipe(ialu_reg);
 5149 %}
 5150 
 5151 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5152   predicate(!UseCountTrailingZerosInstruction);
 5153   match(Set dst (CountTrailingZerosL src));
 5154   effect(TEMP dst, KILL cr);
 5155 
 5156   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5157             "JNZ    done\n\t"
 5158             "BSF    $dst, $src.hi\n\t"
 5159             "JNZ    msw_not_zero\n\t"
 5160             "MOV    $dst, 32\n"
 5161       "msw_not_zero:\n\t"
 5162             "ADD    $dst, 32\n"
 5163       "done:" %}
 5164   ins_encode %{
 5165     Register Rdst = $dst$$Register;
 5166     Register Rsrc = $src$$Register;
 5167     Label msw_not_zero;
 5168     Label done;
 5169     __ bsfl(Rdst, Rsrc);
 5170     __ jccb(Assembler::notZero, done);
 5171     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5172     __ jccb(Assembler::notZero, msw_not_zero);
 5173     __ movl(Rdst, BitsPerInt);
 5174     __ bind(msw_not_zero);
 5175     __ addl(Rdst, BitsPerInt);
 5176     __ bind(done);
 5177   %}
 5178   ins_pipe(ialu_reg);
 5179 %}
 5180 
 5181 
 5182 //---------- Population Count Instructions -------------------------------------
 5183 
 5184 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5185   predicate(UsePopCountInstruction);
 5186   match(Set dst (PopCountI src));
 5187   effect(KILL cr);
 5188 
 5189   format %{ "POPCNT $dst, $src" %}
 5190   ins_encode %{
 5191     __ popcntl($dst$$Register, $src$$Register);
 5192   %}
 5193   ins_pipe(ialu_reg);
 5194 %}
 5195 
 5196 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5197   predicate(UsePopCountInstruction);
 5198   match(Set dst (PopCountI (LoadI mem)));
 5199   effect(KILL cr);
 5200 
 5201   format %{ "POPCNT $dst, $mem" %}
 5202   ins_encode %{
 5203     __ popcntl($dst$$Register, $mem$$Address);
 5204   %}
 5205   ins_pipe(ialu_reg);
 5206 %}
 5207 
 5208 // Note: Long.bitCount(long) returns an int.
 5209 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5210   predicate(UsePopCountInstruction);
 5211   match(Set dst (PopCountL src));
 5212   effect(KILL cr, TEMP tmp, TEMP dst);
 5213 
 5214   format %{ "POPCNT $dst, $src.lo\n\t"
 5215             "POPCNT $tmp, $src.hi\n\t"
 5216             "ADD    $dst, $tmp" %}
 5217   ins_encode %{
 5218     __ popcntl($dst$$Register, $src$$Register);
 5219     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5220     __ addl($dst$$Register, $tmp$$Register);
 5221   %}
 5222   ins_pipe(ialu_reg);
 5223 %}
 5224 
 5225 // Note: Long.bitCount(long) returns an int.
 5226 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5227   predicate(UsePopCountInstruction);
 5228   match(Set dst (PopCountL (LoadL mem)));
 5229   effect(KILL cr, TEMP tmp, TEMP dst);
 5230 
 5231   format %{ "POPCNT $dst, $mem\n\t"
 5232             "POPCNT $tmp, $mem+4\n\t"
 5233             "ADD    $dst, $tmp" %}
 5234   ins_encode %{
 5235     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5236     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5237     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5238     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5239     __ addl($dst$$Register, $tmp$$Register);
 5240   %}
 5241   ins_pipe(ialu_reg);
 5242 %}
 5243 
 5244 
 5245 //----------Load/Store/Move Instructions---------------------------------------
 5246 //----------Load Instructions--------------------------------------------------
 5247 // Load Byte (8bit signed)
 5248 instruct loadB(xRegI dst, memory mem) %{
 5249   match(Set dst (LoadB mem));
 5250 
 5251   ins_cost(125);
 5252   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5253 
 5254   ins_encode %{
 5255     __ movsbl($dst$$Register, $mem$$Address);
 5256   %}
 5257 
 5258   ins_pipe(ialu_reg_mem);
 5259 %}
 5260 
 5261 // Load Byte (8bit signed) into Long Register
 5262 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5263   match(Set dst (ConvI2L (LoadB mem)));
 5264   effect(KILL cr);
 5265 
 5266   ins_cost(375);
 5267   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5268             "MOV    $dst.hi,$dst.lo\n\t"
 5269             "SAR    $dst.hi,7" %}
 5270 
 5271   ins_encode %{
 5272     __ movsbl($dst$$Register, $mem$$Address);
 5273     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5274     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5275   %}
 5276 
 5277   ins_pipe(ialu_reg_mem);
 5278 %}
 5279 
 5280 // Load Unsigned Byte (8bit UNsigned)
 5281 instruct loadUB(xRegI dst, memory mem) %{
 5282   match(Set dst (LoadUB mem));
 5283 
 5284   ins_cost(125);
 5285   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5286 
 5287   ins_encode %{
 5288     __ movzbl($dst$$Register, $mem$$Address);
 5289   %}
 5290 
 5291   ins_pipe(ialu_reg_mem);
 5292 %}
 5293 
 5294 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5295 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5296   match(Set dst (ConvI2L (LoadUB mem)));
 5297   effect(KILL cr);
 5298 
 5299   ins_cost(250);
 5300   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5301             "XOR    $dst.hi,$dst.hi" %}
 5302 
 5303   ins_encode %{
 5304     Register Rdst = $dst$$Register;
 5305     __ movzbl(Rdst, $mem$$Address);
 5306     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5307   %}
 5308 
 5309   ins_pipe(ialu_reg_mem);
 5310 %}
 5311 
 5312 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5313 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5314   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5315   effect(KILL cr);
 5316 
 5317   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5318             "XOR    $dst.hi,$dst.hi\n\t"
 5319             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5320   ins_encode %{
 5321     Register Rdst = $dst$$Register;
 5322     __ movzbl(Rdst, $mem$$Address);
 5323     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5324     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5325   %}
 5326   ins_pipe(ialu_reg_mem);
 5327 %}
 5328 
 5329 // Load Short (16bit signed)
 5330 instruct loadS(rRegI dst, memory mem) %{
 5331   match(Set dst (LoadS mem));
 5332 
 5333   ins_cost(125);
 5334   format %{ "MOVSX  $dst,$mem\t# short" %}
 5335 
 5336   ins_encode %{
 5337     __ movswl($dst$$Register, $mem$$Address);
 5338   %}
 5339 
 5340   ins_pipe(ialu_reg_mem);
 5341 %}
 5342 
 5343 // Load Short (16 bit signed) to Byte (8 bit signed)
 5344 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5345   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5346 
 5347   ins_cost(125);
 5348   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5349   ins_encode %{
 5350     __ movsbl($dst$$Register, $mem$$Address);
 5351   %}
 5352   ins_pipe(ialu_reg_mem);
 5353 %}
 5354 
 5355 // Load Short (16bit signed) into Long Register
 5356 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5357   match(Set dst (ConvI2L (LoadS mem)));
 5358   effect(KILL cr);
 5359 
 5360   ins_cost(375);
 5361   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5362             "MOV    $dst.hi,$dst.lo\n\t"
 5363             "SAR    $dst.hi,15" %}
 5364 
 5365   ins_encode %{
 5366     __ movswl($dst$$Register, $mem$$Address);
 5367     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5368     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5369   %}
 5370 
 5371   ins_pipe(ialu_reg_mem);
 5372 %}
 5373 
 5374 // Load Unsigned Short/Char (16bit unsigned)
 5375 instruct loadUS(rRegI dst, memory mem) %{
 5376   match(Set dst (LoadUS mem));
 5377 
 5378   ins_cost(125);
 5379   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5380 
 5381   ins_encode %{
 5382     __ movzwl($dst$$Register, $mem$$Address);
 5383   %}
 5384 
 5385   ins_pipe(ialu_reg_mem);
 5386 %}
 5387 
 5388 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5389 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5390   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5391 
 5392   ins_cost(125);
 5393   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5394   ins_encode %{
 5395     __ movsbl($dst$$Register, $mem$$Address);
 5396   %}
 5397   ins_pipe(ialu_reg_mem);
 5398 %}
 5399 
 5400 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5401 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5402   match(Set dst (ConvI2L (LoadUS mem)));
 5403   effect(KILL cr);
 5404 
 5405   ins_cost(250);
 5406   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5407             "XOR    $dst.hi,$dst.hi" %}
 5408 
 5409   ins_encode %{
 5410     __ movzwl($dst$$Register, $mem$$Address);
 5411     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5412   %}
 5413 
 5414   ins_pipe(ialu_reg_mem);
 5415 %}
 5416 
 5417 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5418 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5419   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5420   effect(KILL cr);
 5421 
 5422   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5423             "XOR    $dst.hi,$dst.hi" %}
 5424   ins_encode %{
 5425     Register Rdst = $dst$$Register;
 5426     __ movzbl(Rdst, $mem$$Address);
 5427     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5428   %}
 5429   ins_pipe(ialu_reg_mem);
 5430 %}
 5431 
 5432 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5433 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5434   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5435   effect(KILL cr);
 5436 
 5437   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5438             "XOR    $dst.hi,$dst.hi\n\t"
 5439             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5440   ins_encode %{
 5441     Register Rdst = $dst$$Register;
 5442     __ movzwl(Rdst, $mem$$Address);
 5443     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5444     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5445   %}
 5446   ins_pipe(ialu_reg_mem);
 5447 %}
 5448 
 5449 // Load Integer
 5450 instruct loadI(rRegI dst, memory mem) %{
 5451   match(Set dst (LoadI mem));
 5452 
 5453   ins_cost(125);
 5454   format %{ "MOV    $dst,$mem\t# int" %}
 5455 
 5456   ins_encode %{
 5457     __ movl($dst$$Register, $mem$$Address);
 5458   %}
 5459 
 5460   ins_pipe(ialu_reg_mem);
 5461 %}
 5462 
 5463 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5464 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5465   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5466 
 5467   ins_cost(125);
 5468   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5469   ins_encode %{
 5470     __ movsbl($dst$$Register, $mem$$Address);
 5471   %}
 5472   ins_pipe(ialu_reg_mem);
 5473 %}
 5474 
 5475 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5476 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5477   match(Set dst (AndI (LoadI mem) mask));
 5478 
 5479   ins_cost(125);
 5480   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5481   ins_encode %{
 5482     __ movzbl($dst$$Register, $mem$$Address);
 5483   %}
 5484   ins_pipe(ialu_reg_mem);
 5485 %}
 5486 
 5487 // Load Integer (32 bit signed) to Short (16 bit signed)
 5488 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5489   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5490 
 5491   ins_cost(125);
 5492   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5493   ins_encode %{
 5494     __ movswl($dst$$Register, $mem$$Address);
 5495   %}
 5496   ins_pipe(ialu_reg_mem);
 5497 %}
 5498 
 5499 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5500 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5501   match(Set dst (AndI (LoadI mem) mask));
 5502 
 5503   ins_cost(125);
 5504   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5505   ins_encode %{
 5506     __ movzwl($dst$$Register, $mem$$Address);
 5507   %}
 5508   ins_pipe(ialu_reg_mem);
 5509 %}
 5510 
 5511 // Load Integer into Long Register
 5512 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5513   match(Set dst (ConvI2L (LoadI mem)));
 5514   effect(KILL cr);
 5515 
 5516   ins_cost(375);
 5517   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5518             "MOV    $dst.hi,$dst.lo\n\t"
 5519             "SAR    $dst.hi,31" %}
 5520 
 5521   ins_encode %{
 5522     __ movl($dst$$Register, $mem$$Address);
 5523     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5524     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5525   %}
 5526 
 5527   ins_pipe(ialu_reg_mem);
 5528 %}
 5529 
 5530 // Load Integer with mask 0xFF into Long Register
 5531 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5532   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5533   effect(KILL cr);
 5534 
 5535   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5536             "XOR    $dst.hi,$dst.hi" %}
 5537   ins_encode %{
 5538     Register Rdst = $dst$$Register;
 5539     __ movzbl(Rdst, $mem$$Address);
 5540     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5541   %}
 5542   ins_pipe(ialu_reg_mem);
 5543 %}
 5544 
 5545 // Load Integer with mask 0xFFFF into Long Register
 5546 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5547   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5548   effect(KILL cr);
 5549 
 5550   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5551             "XOR    $dst.hi,$dst.hi" %}
 5552   ins_encode %{
 5553     Register Rdst = $dst$$Register;
 5554     __ movzwl(Rdst, $mem$$Address);
 5555     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5556   %}
 5557   ins_pipe(ialu_reg_mem);
 5558 %}
 5559 
 5560 // Load Integer with 31-bit mask into Long Register
 5561 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5562   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5563   effect(KILL cr);
 5564 
 5565   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5566             "XOR    $dst.hi,$dst.hi\n\t"
 5567             "AND    $dst.lo,$mask" %}
 5568   ins_encode %{
 5569     Register Rdst = $dst$$Register;
 5570     __ movl(Rdst, $mem$$Address);
 5571     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5572     __ andl(Rdst, $mask$$constant);
 5573   %}
 5574   ins_pipe(ialu_reg_mem);
 5575 %}
 5576 
 5577 // Load Unsigned Integer into Long Register
 5578 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5579   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5580   effect(KILL cr);
 5581 
 5582   ins_cost(250);
 5583   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5584             "XOR    $dst.hi,$dst.hi" %}
 5585 
 5586   ins_encode %{
 5587     __ movl($dst$$Register, $mem$$Address);
 5588     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5589   %}
 5590 
 5591   ins_pipe(ialu_reg_mem);
 5592 %}
 5593 
 5594 // Load Long.  Cannot clobber address while loading, so restrict address
 5595 // register to ESI
 5596 instruct loadL(eRegL dst, load_long_memory mem) %{
 5597   predicate(!((LoadLNode*)n)->require_atomic_access());
 5598   match(Set dst (LoadL mem));
 5599 
 5600   ins_cost(250);
 5601   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5602             "MOV    $dst.hi,$mem+4" %}
 5603 
 5604   ins_encode %{
 5605     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5606     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5607     __ movl($dst$$Register, Amemlo);
 5608     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5609   %}
 5610 
 5611   ins_pipe(ialu_reg_long_mem);
 5612 %}
 5613 
 5614 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5615 // then store it down to the stack and reload on the int
 5616 // side.
 5617 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5618   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5619   match(Set dst (LoadL mem));
 5620 
 5621   ins_cost(200);
 5622   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5623             "FISTp  $dst" %}
 5624   ins_encode(enc_loadL_volatile(mem,dst));
 5625   ins_pipe( fpu_reg_mem );
 5626 %}
 5627 
 5628 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5629   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5630   match(Set dst (LoadL mem));
 5631   effect(TEMP tmp);
 5632   ins_cost(180);
 5633   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5634             "MOVSD  $dst,$tmp" %}
 5635   ins_encode %{
 5636     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5637     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5638   %}
 5639   ins_pipe( pipe_slow );
 5640 %}
 5641 
 5642 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5643   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5644   match(Set dst (LoadL mem));
 5645   effect(TEMP tmp);
 5646   ins_cost(160);
 5647   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5648             "MOVD   $dst.lo,$tmp\n\t"
 5649             "PSRLQ  $tmp,32\n\t"
 5650             "MOVD   $dst.hi,$tmp" %}
 5651   ins_encode %{
 5652     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5653     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5654     __ psrlq($tmp$$XMMRegister, 32);
 5655     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5656   %}
 5657   ins_pipe( pipe_slow );
 5658 %}
 5659 
 5660 // Load Range
 5661 instruct loadRange(rRegI dst, memory mem) %{
 5662   match(Set dst (LoadRange mem));
 5663 
 5664   ins_cost(125);
 5665   format %{ "MOV    $dst,$mem" %}
 5666   opcode(0x8B);
 5667   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5668   ins_pipe( ialu_reg_mem );
 5669 %}
 5670 
 5671 
 5672 // Load Pointer
 5673 instruct loadP(eRegP dst, memory mem) %{
 5674   match(Set dst (LoadP mem));
 5675 
 5676   ins_cost(125);
 5677   format %{ "MOV    $dst,$mem" %}
 5678   opcode(0x8B);
 5679   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5680   ins_pipe( ialu_reg_mem );
 5681 %}
 5682 
 5683 // Load Klass Pointer
 5684 instruct loadKlass(eRegP dst, memory mem) %{
 5685   match(Set dst (LoadKlass mem));
 5686 
 5687   ins_cost(125);
 5688   format %{ "MOV    $dst,$mem" %}
 5689   opcode(0x8B);
 5690   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5691   ins_pipe( ialu_reg_mem );
 5692 %}
 5693 
 5694 // Load Double
 5695 instruct loadDPR(regDPR dst, memory mem) %{
 5696   predicate(UseSSE<=1);
 5697   match(Set dst (LoadD mem));
 5698 
 5699   ins_cost(150);
 5700   format %{ "FLD_D  ST,$mem\n\t"
 5701             "FSTP   $dst" %}
 5702   opcode(0xDD);               /* DD /0 */
 5703   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5704               Pop_Reg_DPR(dst), ClearInstMark );
 5705   ins_pipe( fpu_reg_mem );
 5706 %}
 5707 
 5708 // Load Double to XMM
 5709 instruct loadD(regD dst, memory mem) %{
 5710   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5711   match(Set dst (LoadD mem));
 5712   ins_cost(145);
 5713   format %{ "MOVSD  $dst,$mem" %}
 5714   ins_encode %{
 5715     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5716   %}
 5717   ins_pipe( pipe_slow );
 5718 %}
 5719 
 5720 instruct loadD_partial(regD dst, memory mem) %{
 5721   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5722   match(Set dst (LoadD mem));
 5723   ins_cost(145);
 5724   format %{ "MOVLPD $dst,$mem" %}
 5725   ins_encode %{
 5726     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5727   %}
 5728   ins_pipe( pipe_slow );
 5729 %}
 5730 
 5731 // Load to XMM register (single-precision floating point)
 5732 // MOVSS instruction
 5733 instruct loadF(regF dst, memory mem) %{
 5734   predicate(UseSSE>=1);
 5735   match(Set dst (LoadF mem));
 5736   ins_cost(145);
 5737   format %{ "MOVSS  $dst,$mem" %}
 5738   ins_encode %{
 5739     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5740   %}
 5741   ins_pipe( pipe_slow );
 5742 %}
 5743 
 5744 // Load Float
 5745 instruct loadFPR(regFPR dst, memory mem) %{
 5746   predicate(UseSSE==0);
 5747   match(Set dst (LoadF mem));
 5748 
 5749   ins_cost(150);
 5750   format %{ "FLD_S  ST,$mem\n\t"
 5751             "FSTP   $dst" %}
 5752   opcode(0xD9);               /* D9 /0 */
 5753   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5754               Pop_Reg_FPR(dst), ClearInstMark );
 5755   ins_pipe( fpu_reg_mem );
 5756 %}
 5757 
 5758 // Load Effective Address
 5759 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5760   match(Set dst mem);
 5761 
 5762   ins_cost(110);
 5763   format %{ "LEA    $dst,$mem" %}
 5764   opcode(0x8D);
 5765   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5766   ins_pipe( ialu_reg_reg_fat );
 5767 %}
 5768 
 5769 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5770   match(Set dst mem);
 5771 
 5772   ins_cost(110);
 5773   format %{ "LEA    $dst,$mem" %}
 5774   opcode(0x8D);
 5775   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5776   ins_pipe( ialu_reg_reg_fat );
 5777 %}
 5778 
 5779 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5780   match(Set dst mem);
 5781 
 5782   ins_cost(110);
 5783   format %{ "LEA    $dst,$mem" %}
 5784   opcode(0x8D);
 5785   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5786   ins_pipe( ialu_reg_reg_fat );
 5787 %}
 5788 
 5789 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5790   match(Set dst mem);
 5791 
 5792   ins_cost(110);
 5793   format %{ "LEA    $dst,$mem" %}
 5794   opcode(0x8D);
 5795   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5796   ins_pipe( ialu_reg_reg_fat );
 5797 %}
 5798 
 5799 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5800   match(Set dst mem);
 5801 
 5802   ins_cost(110);
 5803   format %{ "LEA    $dst,$mem" %}
 5804   opcode(0x8D);
 5805   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5806   ins_pipe( ialu_reg_reg_fat );
 5807 %}
 5808 
 5809 // Load Constant
 5810 instruct loadConI(rRegI dst, immI src) %{
 5811   match(Set dst src);
 5812 
 5813   format %{ "MOV    $dst,$src" %}
 5814   ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
 5815   ins_pipe( ialu_reg_fat );
 5816 %}
 5817 
 5818 // Load Constant zero
 5819 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5820   match(Set dst src);
 5821   effect(KILL cr);
 5822 
 5823   ins_cost(50);
 5824   format %{ "XOR    $dst,$dst" %}
 5825   opcode(0x33);  /* + rd */
 5826   ins_encode( OpcP, RegReg( dst, dst ) );
 5827   ins_pipe( ialu_reg );
 5828 %}
 5829 
 5830 instruct loadConP(eRegP dst, immP src) %{
 5831   match(Set dst src);
 5832 
 5833   format %{ "MOV    $dst,$src" %}
 5834   opcode(0xB8);  /* + rd */
 5835   ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
 5836   ins_pipe( ialu_reg_fat );
 5837 %}
 5838 
 5839 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5840   match(Set dst src);
 5841   effect(KILL cr);
 5842   ins_cost(200);
 5843   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5844             "MOV    $dst.hi,$src.hi" %}
 5845   opcode(0xB8);
 5846   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5847   ins_pipe( ialu_reg_long_fat );
 5848 %}
 5849 
 5850 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5851   match(Set dst src);
 5852   effect(KILL cr);
 5853   ins_cost(150);
 5854   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5855             "XOR    $dst.hi,$dst.hi" %}
 5856   opcode(0x33,0x33);
 5857   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5858   ins_pipe( ialu_reg_long );
 5859 %}
 5860 
 5861 // The instruction usage is guarded by predicate in operand immFPR().
 5862 instruct loadConFPR(regFPR dst, immFPR con) %{
 5863   match(Set dst con);
 5864   ins_cost(125);
 5865   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5866             "FSTP   $dst" %}
 5867   ins_encode %{
 5868     __ fld_s($constantaddress($con));
 5869     __ fstp_d($dst$$reg);
 5870   %}
 5871   ins_pipe(fpu_reg_con);
 5872 %}
 5873 
 5874 // The instruction usage is guarded by predicate in operand immFPR0().
 5875 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5876   match(Set dst con);
 5877   ins_cost(125);
 5878   format %{ "FLDZ   ST\n\t"
 5879             "FSTP   $dst" %}
 5880   ins_encode %{
 5881     __ fldz();
 5882     __ fstp_d($dst$$reg);
 5883   %}
 5884   ins_pipe(fpu_reg_con);
 5885 %}
 5886 
 5887 // The instruction usage is guarded by predicate in operand immFPR1().
 5888 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5889   match(Set dst con);
 5890   ins_cost(125);
 5891   format %{ "FLD1   ST\n\t"
 5892             "FSTP   $dst" %}
 5893   ins_encode %{
 5894     __ fld1();
 5895     __ fstp_d($dst$$reg);
 5896   %}
 5897   ins_pipe(fpu_reg_con);
 5898 %}
 5899 
 5900 // The instruction usage is guarded by predicate in operand immF().
 5901 instruct loadConF(regF dst, immF con) %{
 5902   match(Set dst con);
 5903   ins_cost(125);
 5904   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 5905   ins_encode %{
 5906     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5907   %}
 5908   ins_pipe(pipe_slow);
 5909 %}
 5910 
 5911 // The instruction usage is guarded by predicate in operand immF0().
 5912 instruct loadConF0(regF dst, immF0 src) %{
 5913   match(Set dst src);
 5914   ins_cost(100);
 5915   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 5916   ins_encode %{
 5917     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5918   %}
 5919   ins_pipe(pipe_slow);
 5920 %}
 5921 
 5922 // The instruction usage is guarded by predicate in operand immDPR().
 5923 instruct loadConDPR(regDPR dst, immDPR con) %{
 5924   match(Set dst con);
 5925   ins_cost(125);
 5926 
 5927   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 5928             "FSTP   $dst" %}
 5929   ins_encode %{
 5930     __ fld_d($constantaddress($con));
 5931     __ fstp_d($dst$$reg);
 5932   %}
 5933   ins_pipe(fpu_reg_con);
 5934 %}
 5935 
 5936 // The instruction usage is guarded by predicate in operand immDPR0().
 5937 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 5938   match(Set dst con);
 5939   ins_cost(125);
 5940 
 5941   format %{ "FLDZ   ST\n\t"
 5942             "FSTP   $dst" %}
 5943   ins_encode %{
 5944     __ fldz();
 5945     __ fstp_d($dst$$reg);
 5946   %}
 5947   ins_pipe(fpu_reg_con);
 5948 %}
 5949 
 5950 // The instruction usage is guarded by predicate in operand immDPR1().
 5951 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 5952   match(Set dst con);
 5953   ins_cost(125);
 5954 
 5955   format %{ "FLD1   ST\n\t"
 5956             "FSTP   $dst" %}
 5957   ins_encode %{
 5958     __ fld1();
 5959     __ fstp_d($dst$$reg);
 5960   %}
 5961   ins_pipe(fpu_reg_con);
 5962 %}
 5963 
 5964 // The instruction usage is guarded by predicate in operand immD().
 5965 instruct loadConD(regD dst, immD con) %{
 5966   match(Set dst con);
 5967   ins_cost(125);
 5968   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 5969   ins_encode %{
 5970     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 5971   %}
 5972   ins_pipe(pipe_slow);
 5973 %}
 5974 
 5975 // The instruction usage is guarded by predicate in operand immD0().
 5976 instruct loadConD0(regD dst, immD0 src) %{
 5977   match(Set dst src);
 5978   ins_cost(100);
 5979   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 5980   ins_encode %{
 5981     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 5982   %}
 5983   ins_pipe( pipe_slow );
 5984 %}
 5985 
 5986 // Load Stack Slot
 5987 instruct loadSSI(rRegI dst, stackSlotI src) %{
 5988   match(Set dst src);
 5989   ins_cost(125);
 5990 
 5991   format %{ "MOV    $dst,$src" %}
 5992   opcode(0x8B);
 5993   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 5994   ins_pipe( ialu_reg_mem );
 5995 %}
 5996 
 5997 instruct loadSSL(eRegL dst, stackSlotL src) %{
 5998   match(Set dst src);
 5999 
 6000   ins_cost(200);
 6001   format %{ "MOV    $dst,$src.lo\n\t"
 6002             "MOV    $dst+4,$src.hi" %}
 6003   opcode(0x8B, 0x8B);
 6004   ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
 6005   ins_pipe( ialu_mem_long_reg );
 6006 %}
 6007 
 6008 // Load Stack Slot
 6009 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6010   match(Set dst src);
 6011   ins_cost(125);
 6012 
 6013   format %{ "MOV    $dst,$src" %}
 6014   opcode(0x8B);
 6015   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6016   ins_pipe( ialu_reg_mem );
 6017 %}
 6018 
 6019 // Load Stack Slot
 6020 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6021   match(Set dst src);
 6022   ins_cost(125);
 6023 
 6024   format %{ "FLD_S  $src\n\t"
 6025             "FSTP   $dst" %}
 6026   opcode(0xD9);               /* D9 /0, FLD m32real */
 6027   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6028               Pop_Reg_FPR(dst), ClearInstMark );
 6029   ins_pipe( fpu_reg_mem );
 6030 %}
 6031 
 6032 // Load Stack Slot
 6033 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6034   match(Set dst src);
 6035   ins_cost(125);
 6036 
 6037   format %{ "FLD_D  $src\n\t"
 6038             "FSTP   $dst" %}
 6039   opcode(0xDD);               /* DD /0, FLD m64real */
 6040   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6041               Pop_Reg_DPR(dst), ClearInstMark );
 6042   ins_pipe( fpu_reg_mem );
 6043 %}
 6044 
 6045 // Prefetch instructions for allocation.
 6046 // Must be safe to execute with invalid address (cannot fault).
 6047 
 6048 instruct prefetchAlloc0( memory mem ) %{
 6049   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6050   match(PrefetchAllocation mem);
 6051   ins_cost(0);
 6052   size(0);
 6053   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6054   ins_encode();
 6055   ins_pipe(empty);
 6056 %}
 6057 
 6058 instruct prefetchAlloc( memory mem ) %{
 6059   predicate(AllocatePrefetchInstr==3);
 6060   match( PrefetchAllocation mem );
 6061   ins_cost(100);
 6062 
 6063   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6064   ins_encode %{
 6065     __ prefetchw($mem$$Address);
 6066   %}
 6067   ins_pipe(ialu_mem);
 6068 %}
 6069 
 6070 instruct prefetchAllocNTA( memory mem ) %{
 6071   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6072   match(PrefetchAllocation mem);
 6073   ins_cost(100);
 6074 
 6075   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6076   ins_encode %{
 6077     __ prefetchnta($mem$$Address);
 6078   %}
 6079   ins_pipe(ialu_mem);
 6080 %}
 6081 
 6082 instruct prefetchAllocT0( memory mem ) %{
 6083   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6084   match(PrefetchAllocation mem);
 6085   ins_cost(100);
 6086 
 6087   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6088   ins_encode %{
 6089     __ prefetcht0($mem$$Address);
 6090   %}
 6091   ins_pipe(ialu_mem);
 6092 %}
 6093 
 6094 instruct prefetchAllocT2( memory mem ) %{
 6095   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6096   match(PrefetchAllocation mem);
 6097   ins_cost(100);
 6098 
 6099   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6100   ins_encode %{
 6101     __ prefetcht2($mem$$Address);
 6102   %}
 6103   ins_pipe(ialu_mem);
 6104 %}
 6105 
 6106 //----------Store Instructions-------------------------------------------------
 6107 
 6108 // Store Byte
 6109 instruct storeB(memory mem, xRegI src) %{
 6110   match(Set mem (StoreB mem src));
 6111 
 6112   ins_cost(125);
 6113   format %{ "MOV8   $mem,$src" %}
 6114   opcode(0x88);
 6115   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6116   ins_pipe( ialu_mem_reg );
 6117 %}
 6118 
 6119 // Store Char/Short
 6120 instruct storeC(memory mem, rRegI src) %{
 6121   match(Set mem (StoreC mem src));
 6122 
 6123   ins_cost(125);
 6124   format %{ "MOV16  $mem,$src" %}
 6125   opcode(0x89, 0x66);
 6126   ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
 6127   ins_pipe( ialu_mem_reg );
 6128 %}
 6129 
 6130 // Store Integer
 6131 instruct storeI(memory mem, rRegI src) %{
 6132   match(Set mem (StoreI mem src));
 6133 
 6134   ins_cost(125);
 6135   format %{ "MOV    $mem,$src" %}
 6136   opcode(0x89);
 6137   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6138   ins_pipe( ialu_mem_reg );
 6139 %}
 6140 
 6141 // Store Long
 6142 instruct storeL(long_memory mem, eRegL src) %{
 6143   predicate(!((StoreLNode*)n)->require_atomic_access());
 6144   match(Set mem (StoreL mem src));
 6145 
 6146   ins_cost(200);
 6147   format %{ "MOV    $mem,$src.lo\n\t"
 6148             "MOV    $mem+4,$src.hi" %}
 6149   opcode(0x89, 0x89);
 6150   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
 6151   ins_pipe( ialu_mem_long_reg );
 6152 %}
 6153 
 6154 // Store Long to Integer
 6155 instruct storeL2I(memory mem, eRegL src) %{
 6156   match(Set mem (StoreI mem (ConvL2I src)));
 6157 
 6158   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6159   ins_encode %{
 6160     __ movl($mem$$Address, $src$$Register);
 6161   %}
 6162   ins_pipe(ialu_mem_reg);
 6163 %}
 6164 
 6165 // Volatile Store Long.  Must be atomic, so move it into
 6166 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6167 // target address before the store (for null-ptr checks)
 6168 // so the memory operand is used twice in the encoding.
 6169 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6170   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6171   match(Set mem (StoreL mem src));
 6172   effect( KILL cr );
 6173   ins_cost(400);
 6174   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6175             "FILD   $src\n\t"
 6176             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6177   opcode(0x3B);
 6178   ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
 6179   ins_pipe( fpu_reg_mem );
 6180 %}
 6181 
 6182 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6183   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6184   match(Set mem (StoreL mem src));
 6185   effect( TEMP tmp, KILL cr );
 6186   ins_cost(380);
 6187   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6188             "MOVSD  $tmp,$src\n\t"
 6189             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6190   ins_encode %{
 6191     __ cmpl(rax, $mem$$Address);
 6192     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6193     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6194   %}
 6195   ins_pipe( pipe_slow );
 6196 %}
 6197 
 6198 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6199   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6200   match(Set mem (StoreL mem src));
 6201   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6202   ins_cost(360);
 6203   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6204             "MOVD   $tmp,$src.lo\n\t"
 6205             "MOVD   $tmp2,$src.hi\n\t"
 6206             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6207             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6208   ins_encode %{
 6209     __ cmpl(rax, $mem$$Address);
 6210     __ movdl($tmp$$XMMRegister, $src$$Register);
 6211     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6212     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6213     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6214   %}
 6215   ins_pipe( pipe_slow );
 6216 %}
 6217 
 6218 // Store Pointer; for storing unknown oops and raw pointers
 6219 instruct storeP(memory mem, anyRegP src) %{
 6220   match(Set mem (StoreP mem src));
 6221 
 6222   ins_cost(125);
 6223   format %{ "MOV    $mem,$src" %}
 6224   opcode(0x89);
 6225   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6226   ins_pipe( ialu_mem_reg );
 6227 %}
 6228 
 6229 // Store Integer Immediate
 6230 instruct storeImmI(memory mem, immI src) %{
 6231   match(Set mem (StoreI mem src));
 6232 
 6233   ins_cost(150);
 6234   format %{ "MOV    $mem,$src" %}
 6235   opcode(0xC7);               /* C7 /0 */
 6236   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
 6237   ins_pipe( ialu_mem_imm );
 6238 %}
 6239 
 6240 // Store Short/Char Immediate
 6241 instruct storeImmI16(memory mem, immI16 src) %{
 6242   predicate(UseStoreImmI16);
 6243   match(Set mem (StoreC mem src));
 6244 
 6245   ins_cost(150);
 6246   format %{ "MOV16  $mem,$src" %}
 6247   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6248   ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
 6249   ins_pipe( ialu_mem_imm );
 6250 %}
 6251 
 6252 // Store Pointer Immediate; null pointers or constant oops that do not
 6253 // need card-mark barriers.
 6254 instruct storeImmP(memory mem, immP src) %{
 6255   match(Set mem (StoreP mem src));
 6256 
 6257   ins_cost(150);
 6258   format %{ "MOV    $mem,$src" %}
 6259   opcode(0xC7);               /* C7 /0 */
 6260   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
 6261   ins_pipe( ialu_mem_imm );
 6262 %}
 6263 
 6264 // Store Byte Immediate
 6265 instruct storeImmB(memory mem, immI8 src) %{
 6266   match(Set mem (StoreB mem src));
 6267 
 6268   ins_cost(150);
 6269   format %{ "MOV8   $mem,$src" %}
 6270   opcode(0xC6);               /* C6 /0 */
 6271   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
 6272   ins_pipe( ialu_mem_imm );
 6273 %}
 6274 
 6275 // Store Double
 6276 instruct storeDPR( memory mem, regDPR1 src) %{
 6277   predicate(UseSSE<=1);
 6278   match(Set mem (StoreD mem src));
 6279 
 6280   ins_cost(100);
 6281   format %{ "FST_D  $mem,$src" %}
 6282   opcode(0xDD);       /* DD /2 */
 6283   ins_encode( enc_FPR_store(mem,src) );
 6284   ins_pipe( fpu_mem_reg );
 6285 %}
 6286 
 6287 // Store double does rounding on x86
 6288 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6289   predicate(UseSSE<=1);
 6290   match(Set mem (StoreD mem (RoundDouble src)));
 6291 
 6292   ins_cost(100);
 6293   format %{ "FST_D  $mem,$src\t# round" %}
 6294   opcode(0xDD);       /* DD /2 */
 6295   ins_encode( enc_FPR_store(mem,src) );
 6296   ins_pipe( fpu_mem_reg );
 6297 %}
 6298 
 6299 // Store XMM register to memory (double-precision floating points)
 6300 // MOVSD instruction
 6301 instruct storeD(memory mem, regD src) %{
 6302   predicate(UseSSE>=2);
 6303   match(Set mem (StoreD mem src));
 6304   ins_cost(95);
 6305   format %{ "MOVSD  $mem,$src" %}
 6306   ins_encode %{
 6307     __ movdbl($mem$$Address, $src$$XMMRegister);
 6308   %}
 6309   ins_pipe( pipe_slow );
 6310 %}
 6311 
 6312 // Store XMM register to memory (single-precision floating point)
 6313 // MOVSS instruction
 6314 instruct storeF(memory mem, regF src) %{
 6315   predicate(UseSSE>=1);
 6316   match(Set mem (StoreF mem src));
 6317   ins_cost(95);
 6318   format %{ "MOVSS  $mem,$src" %}
 6319   ins_encode %{
 6320     __ movflt($mem$$Address, $src$$XMMRegister);
 6321   %}
 6322   ins_pipe( pipe_slow );
 6323 %}
 6324 
 6325 
 6326 // Store Float
 6327 instruct storeFPR( memory mem, regFPR1 src) %{
 6328   predicate(UseSSE==0);
 6329   match(Set mem (StoreF mem src));
 6330 
 6331   ins_cost(100);
 6332   format %{ "FST_S  $mem,$src" %}
 6333   opcode(0xD9);       /* D9 /2 */
 6334   ins_encode( enc_FPR_store(mem,src) );
 6335   ins_pipe( fpu_mem_reg );
 6336 %}
 6337 
 6338 // Store Float does rounding on x86
 6339 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6340   predicate(UseSSE==0);
 6341   match(Set mem (StoreF mem (RoundFloat src)));
 6342 
 6343   ins_cost(100);
 6344   format %{ "FST_S  $mem,$src\t# round" %}
 6345   opcode(0xD9);       /* D9 /2 */
 6346   ins_encode( enc_FPR_store(mem,src) );
 6347   ins_pipe( fpu_mem_reg );
 6348 %}
 6349 
 6350 // Store Float does rounding on x86
 6351 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6352   predicate(UseSSE<=1);
 6353   match(Set mem (StoreF mem (ConvD2F src)));
 6354 
 6355   ins_cost(100);
 6356   format %{ "FST_S  $mem,$src\t# D-round" %}
 6357   opcode(0xD9);       /* D9 /2 */
 6358   ins_encode( enc_FPR_store(mem,src) );
 6359   ins_pipe( fpu_mem_reg );
 6360 %}
 6361 
 6362 // Store immediate Float value (it is faster than store from FPU register)
 6363 // The instruction usage is guarded by predicate in operand immFPR().
 6364 instruct storeFPR_imm( memory mem, immFPR src) %{
 6365   match(Set mem (StoreF mem src));
 6366 
 6367   ins_cost(50);
 6368   format %{ "MOV    $mem,$src\t# store float" %}
 6369   opcode(0xC7);               /* C7 /0 */
 6370   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
 6371   ins_pipe( ialu_mem_imm );
 6372 %}
 6373 
 6374 // Store immediate Float value (it is faster than store from XMM register)
 6375 // The instruction usage is guarded by predicate in operand immF().
 6376 instruct storeF_imm( memory mem, immF src) %{
 6377   match(Set mem (StoreF mem src));
 6378 
 6379   ins_cost(50);
 6380   format %{ "MOV    $mem,$src\t# store float" %}
 6381   opcode(0xC7);               /* C7 /0 */
 6382   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
 6383   ins_pipe( ialu_mem_imm );
 6384 %}
 6385 
 6386 // Store Integer to stack slot
 6387 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6388   match(Set dst src);
 6389 
 6390   ins_cost(100);
 6391   format %{ "MOV    $dst,$src" %}
 6392   opcode(0x89);
 6393   ins_encode( OpcPRegSS( dst, src ) );
 6394   ins_pipe( ialu_mem_reg );
 6395 %}
 6396 
 6397 // Store Integer to stack slot
 6398 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6399   match(Set dst src);
 6400 
 6401   ins_cost(100);
 6402   format %{ "MOV    $dst,$src" %}
 6403   opcode(0x89);
 6404   ins_encode( OpcPRegSS( dst, src ) );
 6405   ins_pipe( ialu_mem_reg );
 6406 %}
 6407 
 6408 // Store Long to stack slot
 6409 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6410   match(Set dst src);
 6411 
 6412   ins_cost(200);
 6413   format %{ "MOV    $dst,$src.lo\n\t"
 6414             "MOV    $dst+4,$src.hi" %}
 6415   opcode(0x89, 0x89);
 6416   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
 6417   ins_pipe( ialu_mem_long_reg );
 6418 %}
 6419 
 6420 //----------MemBar Instructions-----------------------------------------------
 6421 // Memory barrier flavors
 6422 
 6423 instruct membar_acquire() %{
 6424   match(MemBarAcquire);
 6425   match(LoadFence);
 6426   ins_cost(400);
 6427 
 6428   size(0);
 6429   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6430   ins_encode();
 6431   ins_pipe(empty);
 6432 %}
 6433 
 6434 instruct membar_acquire_lock() %{
 6435   match(MemBarAcquireLock);
 6436   ins_cost(0);
 6437 
 6438   size(0);
 6439   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6440   ins_encode( );
 6441   ins_pipe(empty);
 6442 %}
 6443 
 6444 instruct membar_release() %{
 6445   match(MemBarRelease);
 6446   match(StoreFence);
 6447   ins_cost(400);
 6448 
 6449   size(0);
 6450   format %{ "MEMBAR-release ! (empty encoding)" %}
 6451   ins_encode( );
 6452   ins_pipe(empty);
 6453 %}
 6454 
 6455 instruct membar_release_lock() %{
 6456   match(MemBarReleaseLock);
 6457   ins_cost(0);
 6458 
 6459   size(0);
 6460   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6461   ins_encode( );
 6462   ins_pipe(empty);
 6463 %}
 6464 
 6465 instruct membar_volatile(eFlagsReg cr) %{
 6466   match(MemBarVolatile);
 6467   effect(KILL cr);
 6468   ins_cost(400);
 6469 
 6470   format %{
 6471     $$template
 6472     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6473   %}
 6474   ins_encode %{
 6475     __ membar(Assembler::StoreLoad);
 6476   %}
 6477   ins_pipe(pipe_slow);
 6478 %}
 6479 
 6480 instruct unnecessary_membar_volatile() %{
 6481   match(MemBarVolatile);
 6482   predicate(Matcher::post_store_load_barrier(n));
 6483   ins_cost(0);
 6484 
 6485   size(0);
 6486   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6487   ins_encode( );
 6488   ins_pipe(empty);
 6489 %}
 6490 
 6491 instruct membar_storestore() %{
 6492   match(MemBarStoreStore);
 6493   match(StoreStoreFence);
 6494   ins_cost(0);
 6495 
 6496   size(0);
 6497   format %{ "MEMBAR-storestore (empty encoding)" %}
 6498   ins_encode( );
 6499   ins_pipe(empty);
 6500 %}
 6501 
 6502 //----------Move Instructions--------------------------------------------------
 6503 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6504   match(Set dst (CastX2P src));
 6505   format %{ "# X2P  $dst, $src" %}
 6506   ins_encode( /*empty encoding*/ );
 6507   ins_cost(0);
 6508   ins_pipe(empty);
 6509 %}
 6510 
 6511 instruct castP2X(rRegI dst, eRegP src ) %{
 6512   match(Set dst (CastP2X src));
 6513   ins_cost(50);
 6514   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6515   ins_encode( enc_Copy( dst, src) );
 6516   ins_pipe( ialu_reg_reg );
 6517 %}
 6518 
 6519 //----------Conditional Move---------------------------------------------------
 6520 // Conditional move
 6521 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6522   predicate(!VM_Version::supports_cmov() );
 6523   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6524   ins_cost(200);
 6525   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6526             "MOV    $dst,$src\n"
 6527       "skip:" %}
 6528   ins_encode %{
 6529     Label Lskip;
 6530     // Invert sense of branch from sense of CMOV
 6531     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6532     __ movl($dst$$Register, $src$$Register);
 6533     __ bind(Lskip);
 6534   %}
 6535   ins_pipe( pipe_cmov_reg );
 6536 %}
 6537 
 6538 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6539   predicate(!VM_Version::supports_cmov() );
 6540   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6541   ins_cost(200);
 6542   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6543             "MOV    $dst,$src\n"
 6544       "skip:" %}
 6545   ins_encode %{
 6546     Label Lskip;
 6547     // Invert sense of branch from sense of CMOV
 6548     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6549     __ movl($dst$$Register, $src$$Register);
 6550     __ bind(Lskip);
 6551   %}
 6552   ins_pipe( pipe_cmov_reg );
 6553 %}
 6554 
 6555 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6556   predicate(VM_Version::supports_cmov() );
 6557   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6558   ins_cost(200);
 6559   format %{ "CMOV$cop $dst,$src" %}
 6560   opcode(0x0F,0x40);
 6561   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6562   ins_pipe( pipe_cmov_reg );
 6563 %}
 6564 
 6565 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6566   predicate(VM_Version::supports_cmov() );
 6567   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6568   ins_cost(200);
 6569   format %{ "CMOV$cop $dst,$src" %}
 6570   opcode(0x0F,0x40);
 6571   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6572   ins_pipe( pipe_cmov_reg );
 6573 %}
 6574 
 6575 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6576   predicate(VM_Version::supports_cmov() );
 6577   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6578   ins_cost(200);
 6579   expand %{
 6580     cmovI_regU(cop, cr, dst, src);
 6581   %}
 6582 %}
 6583 
 6584 // Conditional move
 6585 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6586   predicate(VM_Version::supports_cmov() );
 6587   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6588   ins_cost(250);
 6589   format %{ "CMOV$cop $dst,$src" %}
 6590   opcode(0x0F,0x40);
 6591   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6592   ins_pipe( pipe_cmov_mem );
 6593 %}
 6594 
 6595 // Conditional move
 6596 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6597   predicate(VM_Version::supports_cmov() );
 6598   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6599   ins_cost(250);
 6600   format %{ "CMOV$cop $dst,$src" %}
 6601   opcode(0x0F,0x40);
 6602   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6603   ins_pipe( pipe_cmov_mem );
 6604 %}
 6605 
 6606 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6607   predicate(VM_Version::supports_cmov() );
 6608   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6609   ins_cost(250);
 6610   expand %{
 6611     cmovI_memU(cop, cr, dst, src);
 6612   %}
 6613 %}
 6614 
 6615 // Conditional move
 6616 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6617   predicate(VM_Version::supports_cmov() );
 6618   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6619   ins_cost(200);
 6620   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6621   opcode(0x0F,0x40);
 6622   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6623   ins_pipe( pipe_cmov_reg );
 6624 %}
 6625 
 6626 // Conditional move (non-P6 version)
 6627 // Note:  a CMoveP is generated for  stubs and native wrappers
 6628 //        regardless of whether we are on a P6, so we
 6629 //        emulate a cmov here
 6630 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6631   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6632   ins_cost(300);
 6633   format %{ "Jn$cop   skip\n\t"
 6634           "MOV    $dst,$src\t# pointer\n"
 6635       "skip:" %}
 6636   opcode(0x8b);
 6637   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6638   ins_pipe( pipe_cmov_reg );
 6639 %}
 6640 
 6641 // Conditional move
 6642 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6643   predicate(VM_Version::supports_cmov() );
 6644   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6645   ins_cost(200);
 6646   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6647   opcode(0x0F,0x40);
 6648   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6649   ins_pipe( pipe_cmov_reg );
 6650 %}
 6651 
 6652 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6653   predicate(VM_Version::supports_cmov() );
 6654   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6655   ins_cost(200);
 6656   expand %{
 6657     cmovP_regU(cop, cr, dst, src);
 6658   %}
 6659 %}
 6660 
 6661 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6662 // correctly meets the two pointer arguments; one is an incoming
 6663 // register but the other is a memory operand.  ALSO appears to
 6664 // be buggy with implicit null checks.
 6665 //
 6666 //// Conditional move
 6667 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6668 //  predicate(VM_Version::supports_cmov() );
 6669 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6670 //  ins_cost(250);
 6671 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6672 //  opcode(0x0F,0x40);
 6673 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6674 //  ins_pipe( pipe_cmov_mem );
 6675 //%}
 6676 //
 6677 //// Conditional move
 6678 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6679 //  predicate(VM_Version::supports_cmov() );
 6680 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6681 //  ins_cost(250);
 6682 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6683 //  opcode(0x0F,0x40);
 6684 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6685 //  ins_pipe( pipe_cmov_mem );
 6686 //%}
 6687 
 6688 // Conditional move
 6689 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6690   predicate(UseSSE<=1);
 6691   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6692   ins_cost(200);
 6693   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6694   opcode(0xDA);
 6695   ins_encode( enc_cmov_dpr(cop,src) );
 6696   ins_pipe( pipe_cmovDPR_reg );
 6697 %}
 6698 
 6699 // Conditional move
 6700 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6701   predicate(UseSSE==0);
 6702   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6703   ins_cost(200);
 6704   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6705   opcode(0xDA);
 6706   ins_encode( enc_cmov_dpr(cop,src) );
 6707   ins_pipe( pipe_cmovDPR_reg );
 6708 %}
 6709 
 6710 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6711 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6712   predicate(UseSSE<=1);
 6713   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6714   ins_cost(200);
 6715   format %{ "Jn$cop   skip\n\t"
 6716             "MOV    $dst,$src\t# double\n"
 6717       "skip:" %}
 6718   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6719   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6720   ins_pipe( pipe_cmovDPR_reg );
 6721 %}
 6722 
 6723 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6724 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6725   predicate(UseSSE==0);
 6726   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6727   ins_cost(200);
 6728   format %{ "Jn$cop    skip\n\t"
 6729             "MOV    $dst,$src\t# float\n"
 6730       "skip:" %}
 6731   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6732   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6733   ins_pipe( pipe_cmovDPR_reg );
 6734 %}
 6735 
 6736 // No CMOVE with SSE/SSE2
 6737 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6738   predicate (UseSSE>=1);
 6739   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6740   ins_cost(200);
 6741   format %{ "Jn$cop   skip\n\t"
 6742             "MOVSS  $dst,$src\t# float\n"
 6743       "skip:" %}
 6744   ins_encode %{
 6745     Label skip;
 6746     // Invert sense of branch from sense of CMOV
 6747     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6748     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6749     __ bind(skip);
 6750   %}
 6751   ins_pipe( pipe_slow );
 6752 %}
 6753 
 6754 // No CMOVE with SSE/SSE2
 6755 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6756   predicate (UseSSE>=2);
 6757   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6758   ins_cost(200);
 6759   format %{ "Jn$cop   skip\n\t"
 6760             "MOVSD  $dst,$src\t# float\n"
 6761       "skip:" %}
 6762   ins_encode %{
 6763     Label skip;
 6764     // Invert sense of branch from sense of CMOV
 6765     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6766     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6767     __ bind(skip);
 6768   %}
 6769   ins_pipe( pipe_slow );
 6770 %}
 6771 
 6772 // unsigned version
 6773 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6774   predicate (UseSSE>=1);
 6775   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6776   ins_cost(200);
 6777   format %{ "Jn$cop   skip\n\t"
 6778             "MOVSS  $dst,$src\t# float\n"
 6779       "skip:" %}
 6780   ins_encode %{
 6781     Label skip;
 6782     // Invert sense of branch from sense of CMOV
 6783     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6784     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6785     __ bind(skip);
 6786   %}
 6787   ins_pipe( pipe_slow );
 6788 %}
 6789 
 6790 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6791   predicate (UseSSE>=1);
 6792   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6793   ins_cost(200);
 6794   expand %{
 6795     fcmovF_regU(cop, cr, dst, src);
 6796   %}
 6797 %}
 6798 
 6799 // unsigned version
 6800 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6801   predicate (UseSSE>=2);
 6802   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6803   ins_cost(200);
 6804   format %{ "Jn$cop   skip\n\t"
 6805             "MOVSD  $dst,$src\t# float\n"
 6806       "skip:" %}
 6807   ins_encode %{
 6808     Label skip;
 6809     // Invert sense of branch from sense of CMOV
 6810     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6811     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6812     __ bind(skip);
 6813   %}
 6814   ins_pipe( pipe_slow );
 6815 %}
 6816 
 6817 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6818   predicate (UseSSE>=2);
 6819   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6820   ins_cost(200);
 6821   expand %{
 6822     fcmovD_regU(cop, cr, dst, src);
 6823   %}
 6824 %}
 6825 
 6826 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6827   predicate(VM_Version::supports_cmov() );
 6828   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6829   ins_cost(200);
 6830   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6831             "CMOV$cop $dst.hi,$src.hi" %}
 6832   opcode(0x0F,0x40);
 6833   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6834   ins_pipe( pipe_cmov_reg_long );
 6835 %}
 6836 
 6837 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6838   predicate(VM_Version::supports_cmov() );
 6839   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6840   ins_cost(200);
 6841   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6842             "CMOV$cop $dst.hi,$src.hi" %}
 6843   opcode(0x0F,0x40);
 6844   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6845   ins_pipe( pipe_cmov_reg_long );
 6846 %}
 6847 
 6848 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 6849   predicate(VM_Version::supports_cmov() );
 6850   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6851   ins_cost(200);
 6852   expand %{
 6853     cmovL_regU(cop, cr, dst, src);
 6854   %}
 6855 %}
 6856 
 6857 //----------Arithmetic Instructions--------------------------------------------
 6858 //----------Addition Instructions----------------------------------------------
 6859 
 6860 // Integer Addition Instructions
 6861 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 6862   match(Set dst (AddI dst src));
 6863   effect(KILL cr);
 6864 
 6865   size(2);
 6866   format %{ "ADD    $dst,$src" %}
 6867   opcode(0x03);
 6868   ins_encode( OpcP, RegReg( dst, src) );
 6869   ins_pipe( ialu_reg_reg );
 6870 %}
 6871 
 6872 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 6873   match(Set dst (AddI dst src));
 6874   effect(KILL cr);
 6875 
 6876   format %{ "ADD    $dst,$src" %}
 6877   opcode(0x81, 0x00); /* /0 id */
 6878   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6879   ins_pipe( ialu_reg );
 6880 %}
 6881 
 6882 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 6883   predicate(UseIncDec);
 6884   match(Set dst (AddI dst src));
 6885   effect(KILL cr);
 6886 
 6887   size(1);
 6888   format %{ "INC    $dst" %}
 6889   opcode(0x40); /*  */
 6890   ins_encode( Opc_plus( primary, dst ) );
 6891   ins_pipe( ialu_reg );
 6892 %}
 6893 
 6894 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 6895   match(Set dst (AddI src0 src1));
 6896   ins_cost(110);
 6897 
 6898   format %{ "LEA    $dst,[$src0 + $src1]" %}
 6899   opcode(0x8D); /* 0x8D /r */
 6900   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6901   ins_pipe( ialu_reg_reg );
 6902 %}
 6903 
 6904 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 6905   match(Set dst (AddP src0 src1));
 6906   ins_cost(110);
 6907 
 6908   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 6909   opcode(0x8D); /* 0x8D /r */
 6910   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6911   ins_pipe( ialu_reg_reg );
 6912 %}
 6913 
 6914 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 6915   predicate(UseIncDec);
 6916   match(Set dst (AddI dst src));
 6917   effect(KILL cr);
 6918 
 6919   size(1);
 6920   format %{ "DEC    $dst" %}
 6921   opcode(0x48); /*  */
 6922   ins_encode( Opc_plus( primary, dst ) );
 6923   ins_pipe( ialu_reg );
 6924 %}
 6925 
 6926 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 6927   match(Set dst (AddP dst src));
 6928   effect(KILL cr);
 6929 
 6930   size(2);
 6931   format %{ "ADD    $dst,$src" %}
 6932   opcode(0x03);
 6933   ins_encode( OpcP, RegReg( dst, src) );
 6934   ins_pipe( ialu_reg_reg );
 6935 %}
 6936 
 6937 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 6938   match(Set dst (AddP dst src));
 6939   effect(KILL cr);
 6940 
 6941   format %{ "ADD    $dst,$src" %}
 6942   opcode(0x81,0x00); /* Opcode 81 /0 id */
 6943   // ins_encode( RegImm( dst, src) );
 6944   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6945   ins_pipe( ialu_reg );
 6946 %}
 6947 
 6948 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 6949   match(Set dst (AddI dst (LoadI src)));
 6950   effect(KILL cr);
 6951 
 6952   ins_cost(150);
 6953   format %{ "ADD    $dst,$src" %}
 6954   opcode(0x03);
 6955   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 6956   ins_pipe( ialu_reg_mem );
 6957 %}
 6958 
 6959 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 6960   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 6961   effect(KILL cr);
 6962 
 6963   ins_cost(150);
 6964   format %{ "ADD    $dst,$src" %}
 6965   opcode(0x01);  /* Opcode 01 /r */
 6966   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 6967   ins_pipe( ialu_mem_reg );
 6968 %}
 6969 
 6970 // Add Memory with Immediate
 6971 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 6972   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 6973   effect(KILL cr);
 6974 
 6975   ins_cost(125);
 6976   format %{ "ADD    $dst,$src" %}
 6977   opcode(0x81);               /* Opcode 81 /0 id */
 6978   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
 6979   ins_pipe( ialu_mem_imm );
 6980 %}
 6981 
 6982 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 6983   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 6984   effect(KILL cr);
 6985 
 6986   ins_cost(125);
 6987   format %{ "INC    $dst" %}
 6988   opcode(0xFF);               /* Opcode FF /0 */
 6989   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
 6990   ins_pipe( ialu_mem_imm );
 6991 %}
 6992 
 6993 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 6994   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 6995   effect(KILL cr);
 6996 
 6997   ins_cost(125);
 6998   format %{ "DEC    $dst" %}
 6999   opcode(0xFF);               /* Opcode FF /1 */
 7000   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
 7001   ins_pipe( ialu_mem_imm );
 7002 %}
 7003 
 7004 
 7005 instruct checkCastPP( eRegP dst ) %{
 7006   match(Set dst (CheckCastPP dst));
 7007 
 7008   size(0);
 7009   format %{ "#checkcastPP of $dst" %}
 7010   ins_encode( /*empty encoding*/ );
 7011   ins_pipe( empty );
 7012 %}
 7013 
 7014 instruct castPP( eRegP dst ) %{
 7015   match(Set dst (CastPP dst));
 7016   format %{ "#castPP of $dst" %}
 7017   ins_encode( /*empty encoding*/ );
 7018   ins_pipe( empty );
 7019 %}
 7020 
 7021 instruct castII( rRegI dst ) %{
 7022   match(Set dst (CastII dst));
 7023   format %{ "#castII of $dst" %}
 7024   ins_encode( /*empty encoding*/ );
 7025   ins_cost(0);
 7026   ins_pipe( empty );
 7027 %}
 7028 
 7029 instruct castLL( eRegL dst ) %{
 7030   match(Set dst (CastLL dst));
 7031   format %{ "#castLL of $dst" %}
 7032   ins_encode( /*empty encoding*/ );
 7033   ins_cost(0);
 7034   ins_pipe( empty );
 7035 %}
 7036 
 7037 instruct castFF( regF dst ) %{
 7038   predicate(UseSSE >= 1);
 7039   match(Set dst (CastFF dst));
 7040   format %{ "#castFF of $dst" %}
 7041   ins_encode( /*empty encoding*/ );
 7042   ins_cost(0);
 7043   ins_pipe( empty );
 7044 %}
 7045 
 7046 instruct castDD( regD dst ) %{
 7047   predicate(UseSSE >= 2);
 7048   match(Set dst (CastDD dst));
 7049   format %{ "#castDD of $dst" %}
 7050   ins_encode( /*empty encoding*/ );
 7051   ins_cost(0);
 7052   ins_pipe( empty );
 7053 %}
 7054 
 7055 instruct castFF_PR( regFPR dst ) %{
 7056   predicate(UseSSE < 1);
 7057   match(Set dst (CastFF dst));
 7058   format %{ "#castFF of $dst" %}
 7059   ins_encode( /*empty encoding*/ );
 7060   ins_cost(0);
 7061   ins_pipe( empty );
 7062 %}
 7063 
 7064 instruct castDD_PR( regDPR dst ) %{
 7065   predicate(UseSSE < 2);
 7066   match(Set dst (CastDD dst));
 7067   format %{ "#castDD of $dst" %}
 7068   ins_encode( /*empty encoding*/ );
 7069   ins_cost(0);
 7070   ins_pipe( empty );
 7071 %}
 7072 
 7073 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7074 
 7075 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7076   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7077   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7078   effect(KILL cr, KILL oldval);
 7079   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7080             "MOV    $res,0\n\t"
 7081             "JNE,s  fail\n\t"
 7082             "MOV    $res,1\n"
 7083           "fail:" %}
 7084   ins_encode( enc_cmpxchg8(mem_ptr),
 7085               enc_flags_ne_to_boolean(res) );
 7086   ins_pipe( pipe_cmpxchg );
 7087 %}
 7088 
 7089 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7090   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7091   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7092   effect(KILL cr, KILL oldval);
 7093   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7094             "MOV    $res,0\n\t"
 7095             "JNE,s  fail\n\t"
 7096             "MOV    $res,1\n"
 7097           "fail:" %}
 7098   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7099   ins_pipe( pipe_cmpxchg );
 7100 %}
 7101 
 7102 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7103   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7104   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7105   effect(KILL cr, KILL oldval);
 7106   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7107             "MOV    $res,0\n\t"
 7108             "JNE,s  fail\n\t"
 7109             "MOV    $res,1\n"
 7110           "fail:" %}
 7111   ins_encode( enc_cmpxchgb(mem_ptr),
 7112               enc_flags_ne_to_boolean(res) );
 7113   ins_pipe( pipe_cmpxchg );
 7114 %}
 7115 
 7116 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7117   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7118   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7119   effect(KILL cr, KILL oldval);
 7120   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7121             "MOV    $res,0\n\t"
 7122             "JNE,s  fail\n\t"
 7123             "MOV    $res,1\n"
 7124           "fail:" %}
 7125   ins_encode( enc_cmpxchgw(mem_ptr),
 7126               enc_flags_ne_to_boolean(res) );
 7127   ins_pipe( pipe_cmpxchg );
 7128 %}
 7129 
 7130 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7131   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7132   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7133   effect(KILL cr, KILL oldval);
 7134   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7135             "MOV    $res,0\n\t"
 7136             "JNE,s  fail\n\t"
 7137             "MOV    $res,1\n"
 7138           "fail:" %}
 7139   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7140   ins_pipe( pipe_cmpxchg );
 7141 %}
 7142 
 7143 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7144   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7145   effect(KILL cr);
 7146   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7147   ins_encode( enc_cmpxchg8(mem_ptr) );
 7148   ins_pipe( pipe_cmpxchg );
 7149 %}
 7150 
 7151 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7152   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7153   effect(KILL cr);
 7154   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7155   ins_encode( enc_cmpxchg(mem_ptr) );
 7156   ins_pipe( pipe_cmpxchg );
 7157 %}
 7158 
 7159 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7160   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7161   effect(KILL cr);
 7162   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7163   ins_encode( enc_cmpxchgb(mem_ptr) );
 7164   ins_pipe( pipe_cmpxchg );
 7165 %}
 7166 
 7167 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7168   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7169   effect(KILL cr);
 7170   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7171   ins_encode( enc_cmpxchgw(mem_ptr) );
 7172   ins_pipe( pipe_cmpxchg );
 7173 %}
 7174 
 7175 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7176   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7177   effect(KILL cr);
 7178   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7179   ins_encode( enc_cmpxchg(mem_ptr) );
 7180   ins_pipe( pipe_cmpxchg );
 7181 %}
 7182 
 7183 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7184   predicate(n->as_LoadStore()->result_not_used());
 7185   match(Set dummy (GetAndAddB mem add));
 7186   effect(KILL cr);
 7187   format %{ "ADDB  [$mem],$add" %}
 7188   ins_encode %{
 7189     __ lock();
 7190     __ addb($mem$$Address, $add$$constant);
 7191   %}
 7192   ins_pipe( pipe_cmpxchg );
 7193 %}
 7194 
 7195 // Important to match to xRegI: only 8-bit regs.
 7196 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7197   match(Set newval (GetAndAddB mem newval));
 7198   effect(KILL cr);
 7199   format %{ "XADDB  [$mem],$newval" %}
 7200   ins_encode %{
 7201     __ lock();
 7202     __ xaddb($mem$$Address, $newval$$Register);
 7203   %}
 7204   ins_pipe( pipe_cmpxchg );
 7205 %}
 7206 
 7207 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7208   predicate(n->as_LoadStore()->result_not_used());
 7209   match(Set dummy (GetAndAddS mem add));
 7210   effect(KILL cr);
 7211   format %{ "ADDS  [$mem],$add" %}
 7212   ins_encode %{
 7213     __ lock();
 7214     __ addw($mem$$Address, $add$$constant);
 7215   %}
 7216   ins_pipe( pipe_cmpxchg );
 7217 %}
 7218 
 7219 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7220   match(Set newval (GetAndAddS mem newval));
 7221   effect(KILL cr);
 7222   format %{ "XADDS  [$mem],$newval" %}
 7223   ins_encode %{
 7224     __ lock();
 7225     __ xaddw($mem$$Address, $newval$$Register);
 7226   %}
 7227   ins_pipe( pipe_cmpxchg );
 7228 %}
 7229 
 7230 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7231   predicate(n->as_LoadStore()->result_not_used());
 7232   match(Set dummy (GetAndAddI mem add));
 7233   effect(KILL cr);
 7234   format %{ "ADDL  [$mem],$add" %}
 7235   ins_encode %{
 7236     __ lock();
 7237     __ addl($mem$$Address, $add$$constant);
 7238   %}
 7239   ins_pipe( pipe_cmpxchg );
 7240 %}
 7241 
 7242 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7243   match(Set newval (GetAndAddI mem newval));
 7244   effect(KILL cr);
 7245   format %{ "XADDL  [$mem],$newval" %}
 7246   ins_encode %{
 7247     __ lock();
 7248     __ xaddl($mem$$Address, $newval$$Register);
 7249   %}
 7250   ins_pipe( pipe_cmpxchg );
 7251 %}
 7252 
 7253 // Important to match to xRegI: only 8-bit regs.
 7254 instruct xchgB( memory mem, xRegI newval) %{
 7255   match(Set newval (GetAndSetB mem newval));
 7256   format %{ "XCHGB  $newval,[$mem]" %}
 7257   ins_encode %{
 7258     __ xchgb($newval$$Register, $mem$$Address);
 7259   %}
 7260   ins_pipe( pipe_cmpxchg );
 7261 %}
 7262 
 7263 instruct xchgS( memory mem, rRegI newval) %{
 7264   match(Set newval (GetAndSetS mem newval));
 7265   format %{ "XCHGW  $newval,[$mem]" %}
 7266   ins_encode %{
 7267     __ xchgw($newval$$Register, $mem$$Address);
 7268   %}
 7269   ins_pipe( pipe_cmpxchg );
 7270 %}
 7271 
 7272 instruct xchgI( memory mem, rRegI newval) %{
 7273   match(Set newval (GetAndSetI mem newval));
 7274   format %{ "XCHGL  $newval,[$mem]" %}
 7275   ins_encode %{
 7276     __ xchgl($newval$$Register, $mem$$Address);
 7277   %}
 7278   ins_pipe( pipe_cmpxchg );
 7279 %}
 7280 
 7281 instruct xchgP( memory mem, pRegP newval) %{
 7282   match(Set newval (GetAndSetP mem newval));
 7283   format %{ "XCHGL  $newval,[$mem]" %}
 7284   ins_encode %{
 7285     __ xchgl($newval$$Register, $mem$$Address);
 7286   %}
 7287   ins_pipe( pipe_cmpxchg );
 7288 %}
 7289 
 7290 //----------Subtraction Instructions-------------------------------------------
 7291 
 7292 // Integer Subtraction Instructions
 7293 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7294   match(Set dst (SubI dst src));
 7295   effect(KILL cr);
 7296 
 7297   size(2);
 7298   format %{ "SUB    $dst,$src" %}
 7299   opcode(0x2B);
 7300   ins_encode( OpcP, RegReg( dst, src) );
 7301   ins_pipe( ialu_reg_reg );
 7302 %}
 7303 
 7304 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7305   match(Set dst (SubI dst src));
 7306   effect(KILL cr);
 7307 
 7308   format %{ "SUB    $dst,$src" %}
 7309   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7310   // ins_encode( RegImm( dst, src) );
 7311   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7312   ins_pipe( ialu_reg );
 7313 %}
 7314 
 7315 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7316   match(Set dst (SubI dst (LoadI src)));
 7317   effect(KILL cr);
 7318 
 7319   ins_cost(150);
 7320   format %{ "SUB    $dst,$src" %}
 7321   opcode(0x2B);
 7322   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7323   ins_pipe( ialu_reg_mem );
 7324 %}
 7325 
 7326 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7327   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7328   effect(KILL cr);
 7329 
 7330   ins_cost(150);
 7331   format %{ "SUB    $dst,$src" %}
 7332   opcode(0x29);  /* Opcode 29 /r */
 7333   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7334   ins_pipe( ialu_mem_reg );
 7335 %}
 7336 
 7337 // Subtract from a pointer
 7338 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7339   match(Set dst (AddP dst (SubI zero src)));
 7340   effect(KILL cr);
 7341 
 7342   size(2);
 7343   format %{ "SUB    $dst,$src" %}
 7344   opcode(0x2B);
 7345   ins_encode( OpcP, RegReg( dst, src) );
 7346   ins_pipe( ialu_reg_reg );
 7347 %}
 7348 
 7349 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7350   match(Set dst (SubI zero dst));
 7351   effect(KILL cr);
 7352 
 7353   size(2);
 7354   format %{ "NEG    $dst" %}
 7355   opcode(0xF7,0x03);  // Opcode F7 /3
 7356   ins_encode( OpcP, RegOpc( dst ) );
 7357   ins_pipe( ialu_reg );
 7358 %}
 7359 
 7360 //----------Multiplication/Division Instructions-------------------------------
 7361 // Integer Multiplication Instructions
 7362 // Multiply Register
 7363 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7364   match(Set dst (MulI dst src));
 7365   effect(KILL cr);
 7366 
 7367   size(3);
 7368   ins_cost(300);
 7369   format %{ "IMUL   $dst,$src" %}
 7370   opcode(0xAF, 0x0F);
 7371   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7372   ins_pipe( ialu_reg_reg_alu0 );
 7373 %}
 7374 
 7375 // Multiply 32-bit Immediate
 7376 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7377   match(Set dst (MulI src imm));
 7378   effect(KILL cr);
 7379 
 7380   ins_cost(300);
 7381   format %{ "IMUL   $dst,$src,$imm" %}
 7382   opcode(0x69);  /* 69 /r id */
 7383   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7384   ins_pipe( ialu_reg_reg_alu0 );
 7385 %}
 7386 
 7387 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7388   match(Set dst src);
 7389   effect(KILL cr);
 7390 
 7391   // Note that this is artificially increased to make it more expensive than loadConL
 7392   ins_cost(250);
 7393   format %{ "MOV    EAX,$src\t// low word only" %}
 7394   opcode(0xB8);
 7395   ins_encode( LdImmL_Lo(dst, src) );
 7396   ins_pipe( ialu_reg_fat );
 7397 %}
 7398 
 7399 // Multiply by 32-bit Immediate, taking the shifted high order results
 7400 //  (special case for shift by 32)
 7401 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7402   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7403   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7404              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7405              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7406   effect(USE src1, KILL cr);
 7407 
 7408   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7409   ins_cost(0*100 + 1*400 - 150);
 7410   format %{ "IMUL   EDX:EAX,$src1" %}
 7411   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7412   ins_pipe( pipe_slow );
 7413 %}
 7414 
 7415 // Multiply by 32-bit Immediate, taking the shifted high order results
 7416 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7417   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7418   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7419              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7420              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7421   effect(USE src1, KILL cr);
 7422 
 7423   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7424   ins_cost(1*100 + 1*400 - 150);
 7425   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7426             "SAR    EDX,$cnt-32" %}
 7427   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7428   ins_pipe( pipe_slow );
 7429 %}
 7430 
 7431 // Multiply Memory 32-bit Immediate
 7432 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7433   match(Set dst (MulI (LoadI src) imm));
 7434   effect(KILL cr);
 7435 
 7436   ins_cost(300);
 7437   format %{ "IMUL   $dst,$src,$imm" %}
 7438   opcode(0x69);  /* 69 /r id */
 7439   ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
 7440   ins_pipe( ialu_reg_mem_alu0 );
 7441 %}
 7442 
 7443 // Multiply Memory
 7444 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7445   match(Set dst (MulI dst (LoadI src)));
 7446   effect(KILL cr);
 7447 
 7448   ins_cost(350);
 7449   format %{ "IMUL   $dst,$src" %}
 7450   opcode(0xAF, 0x0F);
 7451   ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
 7452   ins_pipe( ialu_reg_mem_alu0 );
 7453 %}
 7454 
 7455 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7456 %{
 7457   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7458   effect(KILL cr, KILL src2);
 7459 
 7460   expand %{ mulI_eReg(dst, src1, cr);
 7461            mulI_eReg(src2, src3, cr);
 7462            addI_eReg(dst, src2, cr); %}
 7463 %}
 7464 
 7465 // Multiply Register Int to Long
 7466 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7467   // Basic Idea: long = (long)int * (long)int
 7468   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7469   effect(DEF dst, USE src, USE src1, KILL flags);
 7470 
 7471   ins_cost(300);
 7472   format %{ "IMUL   $dst,$src1" %}
 7473 
 7474   ins_encode( long_int_multiply( dst, src1 ) );
 7475   ins_pipe( ialu_reg_reg_alu0 );
 7476 %}
 7477 
 7478 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7479   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7480   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7481   effect(KILL flags);
 7482 
 7483   ins_cost(300);
 7484   format %{ "MUL    $dst,$src1" %}
 7485 
 7486   ins_encode( long_uint_multiply(dst, src1) );
 7487   ins_pipe( ialu_reg_reg_alu0 );
 7488 %}
 7489 
 7490 // Multiply Register Long
 7491 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7492   match(Set dst (MulL dst src));
 7493   effect(KILL cr, TEMP tmp);
 7494   ins_cost(4*100+3*400);
 7495 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7496 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7497   format %{ "MOV    $tmp,$src.lo\n\t"
 7498             "IMUL   $tmp,EDX\n\t"
 7499             "MOV    EDX,$src.hi\n\t"
 7500             "IMUL   EDX,EAX\n\t"
 7501             "ADD    $tmp,EDX\n\t"
 7502             "MUL    EDX:EAX,$src.lo\n\t"
 7503             "ADD    EDX,$tmp" %}
 7504   ins_encode( long_multiply( dst, src, tmp ) );
 7505   ins_pipe( pipe_slow );
 7506 %}
 7507 
 7508 // Multiply Register Long where the left operand's high 32 bits are zero
 7509 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7510   predicate(is_operand_hi32_zero(n->in(1)));
 7511   match(Set dst (MulL dst src));
 7512   effect(KILL cr, TEMP tmp);
 7513   ins_cost(2*100+2*400);
 7514 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7515 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7516   format %{ "MOV    $tmp,$src.hi\n\t"
 7517             "IMUL   $tmp,EAX\n\t"
 7518             "MUL    EDX:EAX,$src.lo\n\t"
 7519             "ADD    EDX,$tmp" %}
 7520   ins_encode %{
 7521     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7522     __ imull($tmp$$Register, rax);
 7523     __ mull($src$$Register);
 7524     __ addl(rdx, $tmp$$Register);
 7525   %}
 7526   ins_pipe( pipe_slow );
 7527 %}
 7528 
 7529 // Multiply Register Long where the right operand's high 32 bits are zero
 7530 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7531   predicate(is_operand_hi32_zero(n->in(2)));
 7532   match(Set dst (MulL dst src));
 7533   effect(KILL cr, TEMP tmp);
 7534   ins_cost(2*100+2*400);
 7535 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7536 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7537   format %{ "MOV    $tmp,$src.lo\n\t"
 7538             "IMUL   $tmp,EDX\n\t"
 7539             "MUL    EDX:EAX,$src.lo\n\t"
 7540             "ADD    EDX,$tmp" %}
 7541   ins_encode %{
 7542     __ movl($tmp$$Register, $src$$Register);
 7543     __ imull($tmp$$Register, rdx);
 7544     __ mull($src$$Register);
 7545     __ addl(rdx, $tmp$$Register);
 7546   %}
 7547   ins_pipe( pipe_slow );
 7548 %}
 7549 
 7550 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7551 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7552   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7553   match(Set dst (MulL dst src));
 7554   effect(KILL cr);
 7555   ins_cost(1*400);
 7556 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7557 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7558   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7559   ins_encode %{
 7560     __ mull($src$$Register);
 7561   %}
 7562   ins_pipe( pipe_slow );
 7563 %}
 7564 
 7565 // Multiply Register Long by small constant
 7566 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7567   match(Set dst (MulL dst src));
 7568   effect(KILL cr, TEMP tmp);
 7569   ins_cost(2*100+2*400);
 7570   size(12);
 7571 // Basic idea: lo(result) = lo(src * EAX)
 7572 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7573   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7574             "MOV    EDX,$src\n\t"
 7575             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7576             "ADD    EDX,$tmp" %}
 7577   ins_encode( long_multiply_con( dst, src, tmp ) );
 7578   ins_pipe( pipe_slow );
 7579 %}
 7580 
 7581 // Integer DIV with Register
 7582 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7583   match(Set rax (DivI rax div));
 7584   effect(KILL rdx, KILL cr);
 7585   size(26);
 7586   ins_cost(30*100+10*100);
 7587   format %{ "CMP    EAX,0x80000000\n\t"
 7588             "JNE,s  normal\n\t"
 7589             "XOR    EDX,EDX\n\t"
 7590             "CMP    ECX,-1\n\t"
 7591             "JE,s   done\n"
 7592     "normal: CDQ\n\t"
 7593             "IDIV   $div\n\t"
 7594     "done:"        %}
 7595   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7596   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7597   ins_pipe( ialu_reg_reg_alu0 );
 7598 %}
 7599 
 7600 // Divide Register Long
 7601 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7602   match(Set dst (DivL src1 src2));
 7603   effect(CALL);
 7604   ins_cost(10000);
 7605   format %{ "PUSH   $src1.hi\n\t"
 7606             "PUSH   $src1.lo\n\t"
 7607             "PUSH   $src2.hi\n\t"
 7608             "PUSH   $src2.lo\n\t"
 7609             "CALL   SharedRuntime::ldiv\n\t"
 7610             "ADD    ESP,16" %}
 7611   ins_encode( long_div(src1,src2) );
 7612   ins_pipe( pipe_slow );
 7613 %}
 7614 
 7615 // Integer DIVMOD with Register, both quotient and mod results
 7616 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7617   match(DivModI rax div);
 7618   effect(KILL cr);
 7619   size(26);
 7620   ins_cost(30*100+10*100);
 7621   format %{ "CMP    EAX,0x80000000\n\t"
 7622             "JNE,s  normal\n\t"
 7623             "XOR    EDX,EDX\n\t"
 7624             "CMP    ECX,-1\n\t"
 7625             "JE,s   done\n"
 7626     "normal: CDQ\n\t"
 7627             "IDIV   $div\n\t"
 7628     "done:"        %}
 7629   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7630   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7631   ins_pipe( pipe_slow );
 7632 %}
 7633 
 7634 // Integer MOD with Register
 7635 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7636   match(Set rdx (ModI rax div));
 7637   effect(KILL rax, KILL cr);
 7638 
 7639   size(26);
 7640   ins_cost(300);
 7641   format %{ "CDQ\n\t"
 7642             "IDIV   $div" %}
 7643   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7644   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7645   ins_pipe( ialu_reg_reg_alu0 );
 7646 %}
 7647 
 7648 // Remainder Register Long
 7649 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7650   match(Set dst (ModL src1 src2));
 7651   effect(CALL);
 7652   ins_cost(10000);
 7653   format %{ "PUSH   $src1.hi\n\t"
 7654             "PUSH   $src1.lo\n\t"
 7655             "PUSH   $src2.hi\n\t"
 7656             "PUSH   $src2.lo\n\t"
 7657             "CALL   SharedRuntime::lrem\n\t"
 7658             "ADD    ESP,16" %}
 7659   ins_encode( long_mod(src1,src2) );
 7660   ins_pipe( pipe_slow );
 7661 %}
 7662 
 7663 // Divide Register Long (no special case since divisor != -1)
 7664 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7665   match(Set dst (DivL dst imm));
 7666   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7667   ins_cost(1000);
 7668   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7669             "XOR    $tmp2,$tmp2\n\t"
 7670             "CMP    $tmp,EDX\n\t"
 7671             "JA,s   fast\n\t"
 7672             "MOV    $tmp2,EAX\n\t"
 7673             "MOV    EAX,EDX\n\t"
 7674             "MOV    EDX,0\n\t"
 7675             "JLE,s  pos\n\t"
 7676             "LNEG   EAX : $tmp2\n\t"
 7677             "DIV    $tmp # unsigned division\n\t"
 7678             "XCHG   EAX,$tmp2\n\t"
 7679             "DIV    $tmp\n\t"
 7680             "LNEG   $tmp2 : EAX\n\t"
 7681             "JMP,s  done\n"
 7682     "pos:\n\t"
 7683             "DIV    $tmp\n\t"
 7684             "XCHG   EAX,$tmp2\n"
 7685     "fast:\n\t"
 7686             "DIV    $tmp\n"
 7687     "done:\n\t"
 7688             "MOV    EDX,$tmp2\n\t"
 7689             "NEG    EDX:EAX # if $imm < 0" %}
 7690   ins_encode %{
 7691     int con = (int)$imm$$constant;
 7692     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7693     int pcon = (con > 0) ? con : -con;
 7694     Label Lfast, Lpos, Ldone;
 7695 
 7696     __ movl($tmp$$Register, pcon);
 7697     __ xorl($tmp2$$Register,$tmp2$$Register);
 7698     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7699     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7700 
 7701     __ movl($tmp2$$Register, $dst$$Register); // save
 7702     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7703     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7704     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7705 
 7706     // Negative dividend.
 7707     // convert value to positive to use unsigned division
 7708     __ lneg($dst$$Register, $tmp2$$Register);
 7709     __ divl($tmp$$Register);
 7710     __ xchgl($dst$$Register, $tmp2$$Register);
 7711     __ divl($tmp$$Register);
 7712     // revert result back to negative
 7713     __ lneg($tmp2$$Register, $dst$$Register);
 7714     __ jmpb(Ldone);
 7715 
 7716     __ bind(Lpos);
 7717     __ divl($tmp$$Register); // Use unsigned division
 7718     __ xchgl($dst$$Register, $tmp2$$Register);
 7719     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7720 
 7721     __ bind(Lfast);
 7722     // fast path: src is positive
 7723     __ divl($tmp$$Register); // Use unsigned division
 7724 
 7725     __ bind(Ldone);
 7726     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7727     if (con < 0) {
 7728       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7729     }
 7730   %}
 7731   ins_pipe( pipe_slow );
 7732 %}
 7733 
 7734 // Remainder Register Long (remainder fit into 32 bits)
 7735 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7736   match(Set dst (ModL dst imm));
 7737   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7738   ins_cost(1000);
 7739   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7740             "CMP    $tmp,EDX\n\t"
 7741             "JA,s   fast\n\t"
 7742             "MOV    $tmp2,EAX\n\t"
 7743             "MOV    EAX,EDX\n\t"
 7744             "MOV    EDX,0\n\t"
 7745             "JLE,s  pos\n\t"
 7746             "LNEG   EAX : $tmp2\n\t"
 7747             "DIV    $tmp # unsigned division\n\t"
 7748             "MOV    EAX,$tmp2\n\t"
 7749             "DIV    $tmp\n\t"
 7750             "NEG    EDX\n\t"
 7751             "JMP,s  done\n"
 7752     "pos:\n\t"
 7753             "DIV    $tmp\n\t"
 7754             "MOV    EAX,$tmp2\n"
 7755     "fast:\n\t"
 7756             "DIV    $tmp\n"
 7757     "done:\n\t"
 7758             "MOV    EAX,EDX\n\t"
 7759             "SAR    EDX,31\n\t" %}
 7760   ins_encode %{
 7761     int con = (int)$imm$$constant;
 7762     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7763     int pcon = (con > 0) ? con : -con;
 7764     Label  Lfast, Lpos, Ldone;
 7765 
 7766     __ movl($tmp$$Register, pcon);
 7767     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7768     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7769 
 7770     __ movl($tmp2$$Register, $dst$$Register); // save
 7771     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7772     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7773     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7774 
 7775     // Negative dividend.
 7776     // convert value to positive to use unsigned division
 7777     __ lneg($dst$$Register, $tmp2$$Register);
 7778     __ divl($tmp$$Register);
 7779     __ movl($dst$$Register, $tmp2$$Register);
 7780     __ divl($tmp$$Register);
 7781     // revert remainder back to negative
 7782     __ negl(HIGH_FROM_LOW($dst$$Register));
 7783     __ jmpb(Ldone);
 7784 
 7785     __ bind(Lpos);
 7786     __ divl($tmp$$Register);
 7787     __ movl($dst$$Register, $tmp2$$Register);
 7788 
 7789     __ bind(Lfast);
 7790     // fast path: src is positive
 7791     __ divl($tmp$$Register);
 7792 
 7793     __ bind(Ldone);
 7794     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7795     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7796 
 7797   %}
 7798   ins_pipe( pipe_slow );
 7799 %}
 7800 
 7801 // Integer Shift Instructions
 7802 // Shift Left by one
 7803 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7804   match(Set dst (LShiftI dst shift));
 7805   effect(KILL cr);
 7806 
 7807   size(2);
 7808   format %{ "SHL    $dst,$shift" %}
 7809   opcode(0xD1, 0x4);  /* D1 /4 */
 7810   ins_encode( OpcP, RegOpc( dst ) );
 7811   ins_pipe( ialu_reg );
 7812 %}
 7813 
 7814 // Shift Left by 8-bit immediate
 7815 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7816   match(Set dst (LShiftI dst shift));
 7817   effect(KILL cr);
 7818 
 7819   size(3);
 7820   format %{ "SHL    $dst,$shift" %}
 7821   opcode(0xC1, 0x4);  /* C1 /4 ib */
 7822   ins_encode( RegOpcImm( dst, shift) );
 7823   ins_pipe( ialu_reg );
 7824 %}
 7825 
 7826 // Shift Left by variable
 7827 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7828   match(Set dst (LShiftI dst shift));
 7829   effect(KILL cr);
 7830 
 7831   size(2);
 7832   format %{ "SHL    $dst,$shift" %}
 7833   opcode(0xD3, 0x4);  /* D3 /4 */
 7834   ins_encode( OpcP, RegOpc( dst ) );
 7835   ins_pipe( ialu_reg_reg );
 7836 %}
 7837 
 7838 // Arithmetic shift right by one
 7839 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7840   match(Set dst (RShiftI dst shift));
 7841   effect(KILL cr);
 7842 
 7843   size(2);
 7844   format %{ "SAR    $dst,$shift" %}
 7845   opcode(0xD1, 0x7);  /* D1 /7 */
 7846   ins_encode( OpcP, RegOpc( dst ) );
 7847   ins_pipe( ialu_reg );
 7848 %}
 7849 
 7850 // Arithmetic shift right by one
 7851 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 7852   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7853   effect(KILL cr);
 7854   format %{ "SAR    $dst,$shift" %}
 7855   opcode(0xD1, 0x7);  /* D1 /7 */
 7856   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
 7857   ins_pipe( ialu_mem_imm );
 7858 %}
 7859 
 7860 // Arithmetic Shift Right by 8-bit immediate
 7861 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7862   match(Set dst (RShiftI dst shift));
 7863   effect(KILL cr);
 7864 
 7865   size(3);
 7866   format %{ "SAR    $dst,$shift" %}
 7867   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7868   ins_encode( RegOpcImm( dst, shift ) );
 7869   ins_pipe( ialu_mem_imm );
 7870 %}
 7871 
 7872 // Arithmetic Shift Right by 8-bit immediate
 7873 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 7874   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7875   effect(KILL cr);
 7876 
 7877   format %{ "SAR    $dst,$shift" %}
 7878   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7879   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
 7880   ins_pipe( ialu_mem_imm );
 7881 %}
 7882 
 7883 // Arithmetic Shift Right by variable
 7884 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7885   match(Set dst (RShiftI dst shift));
 7886   effect(KILL cr);
 7887 
 7888   size(2);
 7889   format %{ "SAR    $dst,$shift" %}
 7890   opcode(0xD3, 0x7);  /* D3 /7 */
 7891   ins_encode( OpcP, RegOpc( dst ) );
 7892   ins_pipe( ialu_reg_reg );
 7893 %}
 7894 
 7895 // Logical shift right by one
 7896 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7897   match(Set dst (URShiftI dst shift));
 7898   effect(KILL cr);
 7899 
 7900   size(2);
 7901   format %{ "SHR    $dst,$shift" %}
 7902   opcode(0xD1, 0x5);  /* D1 /5 */
 7903   ins_encode( OpcP, RegOpc( dst ) );
 7904   ins_pipe( ialu_reg );
 7905 %}
 7906 
 7907 // Logical Shift Right by 8-bit immediate
 7908 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7909   match(Set dst (URShiftI dst shift));
 7910   effect(KILL cr);
 7911 
 7912   size(3);
 7913   format %{ "SHR    $dst,$shift" %}
 7914   opcode(0xC1, 0x5);  /* C1 /5 ib */
 7915   ins_encode( RegOpcImm( dst, shift) );
 7916   ins_pipe( ialu_reg );
 7917 %}
 7918 
 7919 
 7920 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 7921 // This idiom is used by the compiler for the i2b bytecode.
 7922 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 7923   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 7924 
 7925   size(3);
 7926   format %{ "MOVSX  $dst,$src :8" %}
 7927   ins_encode %{
 7928     __ movsbl($dst$$Register, $src$$Register);
 7929   %}
 7930   ins_pipe(ialu_reg_reg);
 7931 %}
 7932 
 7933 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 7934 // This idiom is used by the compiler the i2s bytecode.
 7935 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 7936   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 7937 
 7938   size(3);
 7939   format %{ "MOVSX  $dst,$src :16" %}
 7940   ins_encode %{
 7941     __ movswl($dst$$Register, $src$$Register);
 7942   %}
 7943   ins_pipe(ialu_reg_reg);
 7944 %}
 7945 
 7946 
 7947 // Logical Shift Right by variable
 7948 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7949   match(Set dst (URShiftI dst shift));
 7950   effect(KILL cr);
 7951 
 7952   size(2);
 7953   format %{ "SHR    $dst,$shift" %}
 7954   opcode(0xD3, 0x5);  /* D3 /5 */
 7955   ins_encode( OpcP, RegOpc( dst ) );
 7956   ins_pipe( ialu_reg_reg );
 7957 %}
 7958 
 7959 
 7960 //----------Logical Instructions-----------------------------------------------
 7961 //----------Integer Logical Instructions---------------------------------------
 7962 // And Instructions
 7963 // And Register with Register
 7964 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7965   match(Set dst (AndI dst src));
 7966   effect(KILL cr);
 7967 
 7968   size(2);
 7969   format %{ "AND    $dst,$src" %}
 7970   opcode(0x23);
 7971   ins_encode( OpcP, RegReg( dst, src) );
 7972   ins_pipe( ialu_reg_reg );
 7973 %}
 7974 
 7975 // And Register with Immediate
 7976 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7977   match(Set dst (AndI dst src));
 7978   effect(KILL cr);
 7979 
 7980   format %{ "AND    $dst,$src" %}
 7981   opcode(0x81,0x04);  /* Opcode 81 /4 */
 7982   // ins_encode( RegImm( dst, src) );
 7983   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7984   ins_pipe( ialu_reg );
 7985 %}
 7986 
 7987 // And Register with Memory
 7988 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7989   match(Set dst (AndI dst (LoadI src)));
 7990   effect(KILL cr);
 7991 
 7992   ins_cost(150);
 7993   format %{ "AND    $dst,$src" %}
 7994   opcode(0x23);
 7995   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7996   ins_pipe( ialu_reg_mem );
 7997 %}
 7998 
 7999 // And Memory with Register
 8000 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8001   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8002   effect(KILL cr);
 8003 
 8004   ins_cost(150);
 8005   format %{ "AND    $dst,$src" %}
 8006   opcode(0x21);  /* Opcode 21 /r */
 8007   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8008   ins_pipe( ialu_mem_reg );
 8009 %}
 8010 
 8011 // And Memory with Immediate
 8012 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8013   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8014   effect(KILL cr);
 8015 
 8016   ins_cost(125);
 8017   format %{ "AND    $dst,$src" %}
 8018   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8019   // ins_encode( MemImm( dst, src) );
 8020   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8021   ins_pipe( ialu_mem_imm );
 8022 %}
 8023 
 8024 // BMI1 instructions
 8025 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8026   match(Set dst (AndI (XorI src1 minus_1) src2));
 8027   predicate(UseBMI1Instructions);
 8028   effect(KILL cr);
 8029 
 8030   format %{ "ANDNL  $dst, $src1, $src2" %}
 8031 
 8032   ins_encode %{
 8033     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8034   %}
 8035   ins_pipe(ialu_reg);
 8036 %}
 8037 
 8038 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8039   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8040   predicate(UseBMI1Instructions);
 8041   effect(KILL cr);
 8042 
 8043   ins_cost(125);
 8044   format %{ "ANDNL  $dst, $src1, $src2" %}
 8045 
 8046   ins_encode %{
 8047     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8048   %}
 8049   ins_pipe(ialu_reg_mem);
 8050 %}
 8051 
 8052 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8053   match(Set dst (AndI (SubI imm_zero src) src));
 8054   predicate(UseBMI1Instructions);
 8055   effect(KILL cr);
 8056 
 8057   format %{ "BLSIL  $dst, $src" %}
 8058 
 8059   ins_encode %{
 8060     __ blsil($dst$$Register, $src$$Register);
 8061   %}
 8062   ins_pipe(ialu_reg);
 8063 %}
 8064 
 8065 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8066   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8067   predicate(UseBMI1Instructions);
 8068   effect(KILL cr);
 8069 
 8070   ins_cost(125);
 8071   format %{ "BLSIL  $dst, $src" %}
 8072 
 8073   ins_encode %{
 8074     __ blsil($dst$$Register, $src$$Address);
 8075   %}
 8076   ins_pipe(ialu_reg_mem);
 8077 %}
 8078 
 8079 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8080 %{
 8081   match(Set dst (XorI (AddI src minus_1) src));
 8082   predicate(UseBMI1Instructions);
 8083   effect(KILL cr);
 8084 
 8085   format %{ "BLSMSKL $dst, $src" %}
 8086 
 8087   ins_encode %{
 8088     __ blsmskl($dst$$Register, $src$$Register);
 8089   %}
 8090 
 8091   ins_pipe(ialu_reg);
 8092 %}
 8093 
 8094 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8095 %{
 8096   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8097   predicate(UseBMI1Instructions);
 8098   effect(KILL cr);
 8099 
 8100   ins_cost(125);
 8101   format %{ "BLSMSKL $dst, $src" %}
 8102 
 8103   ins_encode %{
 8104     __ blsmskl($dst$$Register, $src$$Address);
 8105   %}
 8106 
 8107   ins_pipe(ialu_reg_mem);
 8108 %}
 8109 
 8110 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8111 %{
 8112   match(Set dst (AndI (AddI src minus_1) src) );
 8113   predicate(UseBMI1Instructions);
 8114   effect(KILL cr);
 8115 
 8116   format %{ "BLSRL  $dst, $src" %}
 8117 
 8118   ins_encode %{
 8119     __ blsrl($dst$$Register, $src$$Register);
 8120   %}
 8121 
 8122   ins_pipe(ialu_reg);
 8123 %}
 8124 
 8125 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8126 %{
 8127   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8128   predicate(UseBMI1Instructions);
 8129   effect(KILL cr);
 8130 
 8131   ins_cost(125);
 8132   format %{ "BLSRL  $dst, $src" %}
 8133 
 8134   ins_encode %{
 8135     __ blsrl($dst$$Register, $src$$Address);
 8136   %}
 8137 
 8138   ins_pipe(ialu_reg_mem);
 8139 %}
 8140 
 8141 // Or Instructions
 8142 // Or Register with Register
 8143 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8144   match(Set dst (OrI dst src));
 8145   effect(KILL cr);
 8146 
 8147   size(2);
 8148   format %{ "OR     $dst,$src" %}
 8149   opcode(0x0B);
 8150   ins_encode( OpcP, RegReg( dst, src) );
 8151   ins_pipe( ialu_reg_reg );
 8152 %}
 8153 
 8154 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8155   match(Set dst (OrI dst (CastP2X src)));
 8156   effect(KILL cr);
 8157 
 8158   size(2);
 8159   format %{ "OR     $dst,$src" %}
 8160   opcode(0x0B);
 8161   ins_encode( OpcP, RegReg( dst, src) );
 8162   ins_pipe( ialu_reg_reg );
 8163 %}
 8164 
 8165 
 8166 // Or Register with Immediate
 8167 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8168   match(Set dst (OrI dst src));
 8169   effect(KILL cr);
 8170 
 8171   format %{ "OR     $dst,$src" %}
 8172   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8173   // ins_encode( RegImm( dst, src) );
 8174   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8175   ins_pipe( ialu_reg );
 8176 %}
 8177 
 8178 // Or Register with Memory
 8179 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8180   match(Set dst (OrI dst (LoadI src)));
 8181   effect(KILL cr);
 8182 
 8183   ins_cost(150);
 8184   format %{ "OR     $dst,$src" %}
 8185   opcode(0x0B);
 8186   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8187   ins_pipe( ialu_reg_mem );
 8188 %}
 8189 
 8190 // Or Memory with Register
 8191 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8192   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8193   effect(KILL cr);
 8194 
 8195   ins_cost(150);
 8196   format %{ "OR     $dst,$src" %}
 8197   opcode(0x09);  /* Opcode 09 /r */
 8198   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8199   ins_pipe( ialu_mem_reg );
 8200 %}
 8201 
 8202 // Or Memory with Immediate
 8203 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8204   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8205   effect(KILL cr);
 8206 
 8207   ins_cost(125);
 8208   format %{ "OR     $dst,$src" %}
 8209   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8210   // ins_encode( MemImm( dst, src) );
 8211   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8212   ins_pipe( ialu_mem_imm );
 8213 %}
 8214 
 8215 // ROL/ROR
 8216 // ROL expand
 8217 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8218   effect(USE_DEF dst, USE shift, KILL cr);
 8219 
 8220   format %{ "ROL    $dst, $shift" %}
 8221   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8222   ins_encode( OpcP, RegOpc( dst ));
 8223   ins_pipe( ialu_reg );
 8224 %}
 8225 
 8226 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8227   effect(USE_DEF dst, USE shift, KILL cr);
 8228 
 8229   format %{ "ROL    $dst, $shift" %}
 8230   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8231   ins_encode( RegOpcImm(dst, shift) );
 8232   ins_pipe(ialu_reg);
 8233 %}
 8234 
 8235 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8236   effect(USE_DEF dst, USE shift, KILL cr);
 8237 
 8238   format %{ "ROL    $dst, $shift" %}
 8239   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8240   ins_encode(OpcP, RegOpc(dst));
 8241   ins_pipe( ialu_reg_reg );
 8242 %}
 8243 // end of ROL expand
 8244 
 8245 // ROL 32bit by one once
 8246 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8247   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8248 
 8249   expand %{
 8250     rolI_eReg_imm1(dst, lshift, cr);
 8251   %}
 8252 %}
 8253 
 8254 // ROL 32bit var by imm8 once
 8255 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8256   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8257   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8258 
 8259   expand %{
 8260     rolI_eReg_imm8(dst, lshift, cr);
 8261   %}
 8262 %}
 8263 
 8264 // ROL 32bit var by var once
 8265 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8266   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8267 
 8268   expand %{
 8269     rolI_eReg_CL(dst, shift, cr);
 8270   %}
 8271 %}
 8272 
 8273 // ROL 32bit var by var once
 8274 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8275   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8276 
 8277   expand %{
 8278     rolI_eReg_CL(dst, shift, cr);
 8279   %}
 8280 %}
 8281 
 8282 // ROR expand
 8283 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8284   effect(USE_DEF dst, USE shift, KILL cr);
 8285 
 8286   format %{ "ROR    $dst, $shift" %}
 8287   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8288   ins_encode( OpcP, RegOpc( dst ) );
 8289   ins_pipe( ialu_reg );
 8290 %}
 8291 
 8292 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8293   effect (USE_DEF dst, USE shift, KILL cr);
 8294 
 8295   format %{ "ROR    $dst, $shift" %}
 8296   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8297   ins_encode( RegOpcImm(dst, shift) );
 8298   ins_pipe( ialu_reg );
 8299 %}
 8300 
 8301 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8302   effect(USE_DEF dst, USE shift, KILL cr);
 8303 
 8304   format %{ "ROR    $dst, $shift" %}
 8305   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8306   ins_encode(OpcP, RegOpc(dst));
 8307   ins_pipe( ialu_reg_reg );
 8308 %}
 8309 // end of ROR expand
 8310 
 8311 // ROR right once
 8312 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8313   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8314 
 8315   expand %{
 8316     rorI_eReg_imm1(dst, rshift, cr);
 8317   %}
 8318 %}
 8319 
 8320 // ROR 32bit by immI8 once
 8321 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8322   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8323   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8324 
 8325   expand %{
 8326     rorI_eReg_imm8(dst, rshift, cr);
 8327   %}
 8328 %}
 8329 
 8330 // ROR 32bit var by var once
 8331 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8332   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8333 
 8334   expand %{
 8335     rorI_eReg_CL(dst, shift, cr);
 8336   %}
 8337 %}
 8338 
 8339 // ROR 32bit var by var once
 8340 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8341   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8342 
 8343   expand %{
 8344     rorI_eReg_CL(dst, shift, cr);
 8345   %}
 8346 %}
 8347 
 8348 // Xor Instructions
 8349 // Xor Register with Register
 8350 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8351   match(Set dst (XorI dst src));
 8352   effect(KILL cr);
 8353 
 8354   size(2);
 8355   format %{ "XOR    $dst,$src" %}
 8356   opcode(0x33);
 8357   ins_encode( OpcP, RegReg( dst, src) );
 8358   ins_pipe( ialu_reg_reg );
 8359 %}
 8360 
 8361 // Xor Register with Immediate -1
 8362 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8363   match(Set dst (XorI dst imm));
 8364 
 8365   size(2);
 8366   format %{ "NOT    $dst" %}
 8367   ins_encode %{
 8368      __ notl($dst$$Register);
 8369   %}
 8370   ins_pipe( ialu_reg );
 8371 %}
 8372 
 8373 // Xor Register with Immediate
 8374 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8375   match(Set dst (XorI dst src));
 8376   effect(KILL cr);
 8377 
 8378   format %{ "XOR    $dst,$src" %}
 8379   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8380   // ins_encode( RegImm( dst, src) );
 8381   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8382   ins_pipe( ialu_reg );
 8383 %}
 8384 
 8385 // Xor Register with Memory
 8386 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8387   match(Set dst (XorI dst (LoadI src)));
 8388   effect(KILL cr);
 8389 
 8390   ins_cost(150);
 8391   format %{ "XOR    $dst,$src" %}
 8392   opcode(0x33);
 8393   ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
 8394   ins_pipe( ialu_reg_mem );
 8395 %}
 8396 
 8397 // Xor Memory with Register
 8398 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8399   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8400   effect(KILL cr);
 8401 
 8402   ins_cost(150);
 8403   format %{ "XOR    $dst,$src" %}
 8404   opcode(0x31);  /* Opcode 31 /r */
 8405   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8406   ins_pipe( ialu_mem_reg );
 8407 %}
 8408 
 8409 // Xor Memory with Immediate
 8410 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8411   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8412   effect(KILL cr);
 8413 
 8414   ins_cost(125);
 8415   format %{ "XOR    $dst,$src" %}
 8416   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8417   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8418   ins_pipe( ialu_mem_imm );
 8419 %}
 8420 
 8421 //----------Convert Int to Boolean---------------------------------------------
 8422 
 8423 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8424   effect( DEF dst, USE src );
 8425   format %{ "MOV    $dst,$src" %}
 8426   ins_encode( enc_Copy( dst, src) );
 8427   ins_pipe( ialu_reg_reg );
 8428 %}
 8429 
 8430 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8431   effect( USE_DEF dst, USE src, KILL cr );
 8432 
 8433   size(4);
 8434   format %{ "NEG    $dst\n\t"
 8435             "ADC    $dst,$src" %}
 8436   ins_encode( neg_reg(dst),
 8437               OpcRegReg(0x13,dst,src) );
 8438   ins_pipe( ialu_reg_reg_long );
 8439 %}
 8440 
 8441 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8442   match(Set dst (Conv2B src));
 8443 
 8444   expand %{
 8445     movI_nocopy(dst,src);
 8446     ci2b(dst,src,cr);
 8447   %}
 8448 %}
 8449 
 8450 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8451   effect( DEF dst, USE src );
 8452   format %{ "MOV    $dst,$src" %}
 8453   ins_encode( enc_Copy( dst, src) );
 8454   ins_pipe( ialu_reg_reg );
 8455 %}
 8456 
 8457 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8458   effect( USE_DEF dst, USE src, KILL cr );
 8459   format %{ "NEG    $dst\n\t"
 8460             "ADC    $dst,$src" %}
 8461   ins_encode( neg_reg(dst),
 8462               OpcRegReg(0x13,dst,src) );
 8463   ins_pipe( ialu_reg_reg_long );
 8464 %}
 8465 
 8466 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8467   match(Set dst (Conv2B src));
 8468 
 8469   expand %{
 8470     movP_nocopy(dst,src);
 8471     cp2b(dst,src,cr);
 8472   %}
 8473 %}
 8474 
 8475 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8476   match(Set dst (CmpLTMask p q));
 8477   effect(KILL cr);
 8478   ins_cost(400);
 8479 
 8480   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8481   format %{ "XOR    $dst,$dst\n\t"
 8482             "CMP    $p,$q\n\t"
 8483             "SETlt  $dst\n\t"
 8484             "NEG    $dst" %}
 8485   ins_encode %{
 8486     Register Rp = $p$$Register;
 8487     Register Rq = $q$$Register;
 8488     Register Rd = $dst$$Register;
 8489     Label done;
 8490     __ xorl(Rd, Rd);
 8491     __ cmpl(Rp, Rq);
 8492     __ setb(Assembler::less, Rd);
 8493     __ negl(Rd);
 8494   %}
 8495 
 8496   ins_pipe(pipe_slow);
 8497 %}
 8498 
 8499 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8500   match(Set dst (CmpLTMask dst zero));
 8501   effect(DEF dst, KILL cr);
 8502   ins_cost(100);
 8503 
 8504   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8505   ins_encode %{
 8506   __ sarl($dst$$Register, 31);
 8507   %}
 8508   ins_pipe(ialu_reg);
 8509 %}
 8510 
 8511 /* better to save a register than avoid a branch */
 8512 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8513   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8514   effect(KILL cr);
 8515   ins_cost(400);
 8516   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8517             "JGE    done\n\t"
 8518             "ADD    $p,$y\n"
 8519             "done:  " %}
 8520   ins_encode %{
 8521     Register Rp = $p$$Register;
 8522     Register Rq = $q$$Register;
 8523     Register Ry = $y$$Register;
 8524     Label done;
 8525     __ subl(Rp, Rq);
 8526     __ jccb(Assembler::greaterEqual, done);
 8527     __ addl(Rp, Ry);
 8528     __ bind(done);
 8529   %}
 8530 
 8531   ins_pipe(pipe_cmplt);
 8532 %}
 8533 
 8534 /* better to save a register than avoid a branch */
 8535 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8536   match(Set y (AndI (CmpLTMask p q) y));
 8537   effect(KILL cr);
 8538 
 8539   ins_cost(300);
 8540 
 8541   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8542             "JLT      done\n\t"
 8543             "XORL     $y, $y\n"
 8544             "done:  " %}
 8545   ins_encode %{
 8546     Register Rp = $p$$Register;
 8547     Register Rq = $q$$Register;
 8548     Register Ry = $y$$Register;
 8549     Label done;
 8550     __ cmpl(Rp, Rq);
 8551     __ jccb(Assembler::less, done);
 8552     __ xorl(Ry, Ry);
 8553     __ bind(done);
 8554   %}
 8555 
 8556   ins_pipe(pipe_cmplt);
 8557 %}
 8558 
 8559 /* If I enable this, I encourage spilling in the inner loop of compress.
 8560 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8561   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8562 */
 8563 //----------Overflow Math Instructions-----------------------------------------
 8564 
 8565 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8566 %{
 8567   match(Set cr (OverflowAddI op1 op2));
 8568   effect(DEF cr, USE_KILL op1, USE op2);
 8569 
 8570   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8571 
 8572   ins_encode %{
 8573     __ addl($op1$$Register, $op2$$Register);
 8574   %}
 8575   ins_pipe(ialu_reg_reg);
 8576 %}
 8577 
 8578 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8579 %{
 8580   match(Set cr (OverflowAddI op1 op2));
 8581   effect(DEF cr, USE_KILL op1, USE op2);
 8582 
 8583   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8584 
 8585   ins_encode %{
 8586     __ addl($op1$$Register, $op2$$constant);
 8587   %}
 8588   ins_pipe(ialu_reg_reg);
 8589 %}
 8590 
 8591 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8592 %{
 8593   match(Set cr (OverflowSubI op1 op2));
 8594 
 8595   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8596   ins_encode %{
 8597     __ cmpl($op1$$Register, $op2$$Register);
 8598   %}
 8599   ins_pipe(ialu_reg_reg);
 8600 %}
 8601 
 8602 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8603 %{
 8604   match(Set cr (OverflowSubI op1 op2));
 8605 
 8606   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8607   ins_encode %{
 8608     __ cmpl($op1$$Register, $op2$$constant);
 8609   %}
 8610   ins_pipe(ialu_reg_reg);
 8611 %}
 8612 
 8613 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8614 %{
 8615   match(Set cr (OverflowSubI zero op2));
 8616   effect(DEF cr, USE_KILL op2);
 8617 
 8618   format %{ "NEG    $op2\t# overflow check int" %}
 8619   ins_encode %{
 8620     __ negl($op2$$Register);
 8621   %}
 8622   ins_pipe(ialu_reg_reg);
 8623 %}
 8624 
 8625 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8626 %{
 8627   match(Set cr (OverflowMulI op1 op2));
 8628   effect(DEF cr, USE_KILL op1, USE op2);
 8629 
 8630   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8631   ins_encode %{
 8632     __ imull($op1$$Register, $op2$$Register);
 8633   %}
 8634   ins_pipe(ialu_reg_reg_alu0);
 8635 %}
 8636 
 8637 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8638 %{
 8639   match(Set cr (OverflowMulI op1 op2));
 8640   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8641 
 8642   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8643   ins_encode %{
 8644     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8645   %}
 8646   ins_pipe(ialu_reg_reg_alu0);
 8647 %}
 8648 
 8649 // Integer Absolute Instructions
 8650 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8651 %{
 8652   match(Set dst (AbsI src));
 8653   effect(TEMP dst, TEMP tmp, KILL cr);
 8654   format %{ "movl $tmp, $src\n\t"
 8655             "sarl $tmp, 31\n\t"
 8656             "movl $dst, $src\n\t"
 8657             "xorl $dst, $tmp\n\t"
 8658             "subl $dst, $tmp\n"
 8659           %}
 8660   ins_encode %{
 8661     __ movl($tmp$$Register, $src$$Register);
 8662     __ sarl($tmp$$Register, 31);
 8663     __ movl($dst$$Register, $src$$Register);
 8664     __ xorl($dst$$Register, $tmp$$Register);
 8665     __ subl($dst$$Register, $tmp$$Register);
 8666   %}
 8667 
 8668   ins_pipe(ialu_reg_reg);
 8669 %}
 8670 
 8671 //----------Long Instructions------------------------------------------------
 8672 // Add Long Register with Register
 8673 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8674   match(Set dst (AddL dst src));
 8675   effect(KILL cr);
 8676   ins_cost(200);
 8677   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8678             "ADC    $dst.hi,$src.hi" %}
 8679   opcode(0x03, 0x13);
 8680   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8681   ins_pipe( ialu_reg_reg_long );
 8682 %}
 8683 
 8684 // Add Long Register with Immediate
 8685 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8686   match(Set dst (AddL dst src));
 8687   effect(KILL cr);
 8688   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8689             "ADC    $dst.hi,$src.hi" %}
 8690   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8691   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8692   ins_pipe( ialu_reg_long );
 8693 %}
 8694 
 8695 // Add Long Register with Memory
 8696 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8697   match(Set dst (AddL dst (LoadL mem)));
 8698   effect(KILL cr);
 8699   ins_cost(125);
 8700   format %{ "ADD    $dst.lo,$mem\n\t"
 8701             "ADC    $dst.hi,$mem+4" %}
 8702   opcode(0x03, 0x13);
 8703   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8704   ins_pipe( ialu_reg_long_mem );
 8705 %}
 8706 
 8707 // Subtract Long Register with Register.
 8708 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8709   match(Set dst (SubL dst src));
 8710   effect(KILL cr);
 8711   ins_cost(200);
 8712   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8713             "SBB    $dst.hi,$src.hi" %}
 8714   opcode(0x2B, 0x1B);
 8715   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8716   ins_pipe( ialu_reg_reg_long );
 8717 %}
 8718 
 8719 // Subtract Long Register with Immediate
 8720 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8721   match(Set dst (SubL dst src));
 8722   effect(KILL cr);
 8723   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8724             "SBB    $dst.hi,$src.hi" %}
 8725   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8726   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8727   ins_pipe( ialu_reg_long );
 8728 %}
 8729 
 8730 // Subtract Long Register with Memory
 8731 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8732   match(Set dst (SubL dst (LoadL mem)));
 8733   effect(KILL cr);
 8734   ins_cost(125);
 8735   format %{ "SUB    $dst.lo,$mem\n\t"
 8736             "SBB    $dst.hi,$mem+4" %}
 8737   opcode(0x2B, 0x1B);
 8738   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8739   ins_pipe( ialu_reg_long_mem );
 8740 %}
 8741 
 8742 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8743   match(Set dst (SubL zero dst));
 8744   effect(KILL cr);
 8745   ins_cost(300);
 8746   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8747   ins_encode( neg_long(dst) );
 8748   ins_pipe( ialu_reg_reg_long );
 8749 %}
 8750 
 8751 // And Long Register with Register
 8752 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8753   match(Set dst (AndL dst src));
 8754   effect(KILL cr);
 8755   format %{ "AND    $dst.lo,$src.lo\n\t"
 8756             "AND    $dst.hi,$src.hi" %}
 8757   opcode(0x23,0x23);
 8758   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8759   ins_pipe( ialu_reg_reg_long );
 8760 %}
 8761 
 8762 // And Long Register with Immediate
 8763 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8764   match(Set dst (AndL dst src));
 8765   effect(KILL cr);
 8766   format %{ "AND    $dst.lo,$src.lo\n\t"
 8767             "AND    $dst.hi,$src.hi" %}
 8768   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8769   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8770   ins_pipe( ialu_reg_long );
 8771 %}
 8772 
 8773 // And Long Register with Memory
 8774 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8775   match(Set dst (AndL dst (LoadL mem)));
 8776   effect(KILL cr);
 8777   ins_cost(125);
 8778   format %{ "AND    $dst.lo,$mem\n\t"
 8779             "AND    $dst.hi,$mem+4" %}
 8780   opcode(0x23, 0x23);
 8781   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8782   ins_pipe( ialu_reg_long_mem );
 8783 %}
 8784 
 8785 // BMI1 instructions
 8786 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8787   match(Set dst (AndL (XorL src1 minus_1) src2));
 8788   predicate(UseBMI1Instructions);
 8789   effect(KILL cr, TEMP dst);
 8790 
 8791   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8792             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8793          %}
 8794 
 8795   ins_encode %{
 8796     Register Rdst = $dst$$Register;
 8797     Register Rsrc1 = $src1$$Register;
 8798     Register Rsrc2 = $src2$$Register;
 8799     __ andnl(Rdst, Rsrc1, Rsrc2);
 8800     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8801   %}
 8802   ins_pipe(ialu_reg_reg_long);
 8803 %}
 8804 
 8805 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8806   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8807   predicate(UseBMI1Instructions);
 8808   effect(KILL cr, TEMP dst);
 8809 
 8810   ins_cost(125);
 8811   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 8812             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 8813          %}
 8814 
 8815   ins_encode %{
 8816     Register Rdst = $dst$$Register;
 8817     Register Rsrc1 = $src1$$Register;
 8818     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 8819 
 8820     __ andnl(Rdst, Rsrc1, $src2$$Address);
 8821     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 8822   %}
 8823   ins_pipe(ialu_reg_mem);
 8824 %}
 8825 
 8826 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 8827   match(Set dst (AndL (SubL imm_zero src) src));
 8828   predicate(UseBMI1Instructions);
 8829   effect(KILL cr, TEMP dst);
 8830 
 8831   format %{ "MOVL   $dst.hi, 0\n\t"
 8832             "BLSIL  $dst.lo, $src.lo\n\t"
 8833             "JNZ    done\n\t"
 8834             "BLSIL  $dst.hi, $src.hi\n"
 8835             "done:"
 8836          %}
 8837 
 8838   ins_encode %{
 8839     Label done;
 8840     Register Rdst = $dst$$Register;
 8841     Register Rsrc = $src$$Register;
 8842     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8843     __ blsil(Rdst, Rsrc);
 8844     __ jccb(Assembler::notZero, done);
 8845     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8846     __ bind(done);
 8847   %}
 8848   ins_pipe(ialu_reg);
 8849 %}
 8850 
 8851 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 8852   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 8853   predicate(UseBMI1Instructions);
 8854   effect(KILL cr, TEMP dst);
 8855 
 8856   ins_cost(125);
 8857   format %{ "MOVL   $dst.hi, 0\n\t"
 8858             "BLSIL  $dst.lo, $src\n\t"
 8859             "JNZ    done\n\t"
 8860             "BLSIL  $dst.hi, $src+4\n"
 8861             "done:"
 8862          %}
 8863 
 8864   ins_encode %{
 8865     Label done;
 8866     Register Rdst = $dst$$Register;
 8867     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8868 
 8869     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8870     __ blsil(Rdst, $src$$Address);
 8871     __ jccb(Assembler::notZero, done);
 8872     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 8873     __ bind(done);
 8874   %}
 8875   ins_pipe(ialu_reg_mem);
 8876 %}
 8877 
 8878 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8879 %{
 8880   match(Set dst (XorL (AddL src minus_1) src));
 8881   predicate(UseBMI1Instructions);
 8882   effect(KILL cr, TEMP dst);
 8883 
 8884   format %{ "MOVL    $dst.hi, 0\n\t"
 8885             "BLSMSKL $dst.lo, $src.lo\n\t"
 8886             "JNC     done\n\t"
 8887             "BLSMSKL $dst.hi, $src.hi\n"
 8888             "done:"
 8889          %}
 8890 
 8891   ins_encode %{
 8892     Label done;
 8893     Register Rdst = $dst$$Register;
 8894     Register Rsrc = $src$$Register;
 8895     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8896     __ blsmskl(Rdst, Rsrc);
 8897     __ jccb(Assembler::carryClear, done);
 8898     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8899     __ bind(done);
 8900   %}
 8901 
 8902   ins_pipe(ialu_reg);
 8903 %}
 8904 
 8905 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 8906 %{
 8907   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 8908   predicate(UseBMI1Instructions);
 8909   effect(KILL cr, TEMP dst);
 8910 
 8911   ins_cost(125);
 8912   format %{ "MOVL    $dst.hi, 0\n\t"
 8913             "BLSMSKL $dst.lo, $src\n\t"
 8914             "JNC     done\n\t"
 8915             "BLSMSKL $dst.hi, $src+4\n"
 8916             "done:"
 8917          %}
 8918 
 8919   ins_encode %{
 8920     Label done;
 8921     Register Rdst = $dst$$Register;
 8922     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8923 
 8924     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8925     __ blsmskl(Rdst, $src$$Address);
 8926     __ jccb(Assembler::carryClear, done);
 8927     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 8928     __ bind(done);
 8929   %}
 8930 
 8931   ins_pipe(ialu_reg_mem);
 8932 %}
 8933 
 8934 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8935 %{
 8936   match(Set dst (AndL (AddL src minus_1) src) );
 8937   predicate(UseBMI1Instructions);
 8938   effect(KILL cr, TEMP dst);
 8939 
 8940   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 8941             "BLSRL  $dst.lo, $src.lo\n\t"
 8942             "JNC    done\n\t"
 8943             "BLSRL  $dst.hi, $src.hi\n"
 8944             "done:"
 8945   %}
 8946 
 8947   ins_encode %{
 8948     Label done;
 8949     Register Rdst = $dst$$Register;
 8950     Register Rsrc = $src$$Register;
 8951     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8952     __ blsrl(Rdst, Rsrc);
 8953     __ jccb(Assembler::carryClear, done);
 8954     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8955     __ bind(done);
 8956   %}
 8957 
 8958   ins_pipe(ialu_reg);
 8959 %}
 8960 
 8961 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 8962 %{
 8963   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 8964   predicate(UseBMI1Instructions);
 8965   effect(KILL cr, TEMP dst);
 8966 
 8967   ins_cost(125);
 8968   format %{ "MOVL   $dst.hi, $src+4\n\t"
 8969             "BLSRL  $dst.lo, $src\n\t"
 8970             "JNC    done\n\t"
 8971             "BLSRL  $dst.hi, $src+4\n"
 8972             "done:"
 8973   %}
 8974 
 8975   ins_encode %{
 8976     Label done;
 8977     Register Rdst = $dst$$Register;
 8978     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8979     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 8980     __ blsrl(Rdst, $src$$Address);
 8981     __ jccb(Assembler::carryClear, done);
 8982     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 8983     __ bind(done);
 8984   %}
 8985 
 8986   ins_pipe(ialu_reg_mem);
 8987 %}
 8988 
 8989 // Or Long Register with Register
 8990 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8991   match(Set dst (OrL dst src));
 8992   effect(KILL cr);
 8993   format %{ "OR     $dst.lo,$src.lo\n\t"
 8994             "OR     $dst.hi,$src.hi" %}
 8995   opcode(0x0B,0x0B);
 8996   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8997   ins_pipe( ialu_reg_reg_long );
 8998 %}
 8999 
 9000 // Or Long Register with Immediate
 9001 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9002   match(Set dst (OrL dst src));
 9003   effect(KILL cr);
 9004   format %{ "OR     $dst.lo,$src.lo\n\t"
 9005             "OR     $dst.hi,$src.hi" %}
 9006   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9007   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9008   ins_pipe( ialu_reg_long );
 9009 %}
 9010 
 9011 // Or Long Register with Memory
 9012 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9013   match(Set dst (OrL dst (LoadL mem)));
 9014   effect(KILL cr);
 9015   ins_cost(125);
 9016   format %{ "OR     $dst.lo,$mem\n\t"
 9017             "OR     $dst.hi,$mem+4" %}
 9018   opcode(0x0B,0x0B);
 9019   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9020   ins_pipe( ialu_reg_long_mem );
 9021 %}
 9022 
 9023 // Xor Long Register with Register
 9024 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9025   match(Set dst (XorL dst src));
 9026   effect(KILL cr);
 9027   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9028             "XOR    $dst.hi,$src.hi" %}
 9029   opcode(0x33,0x33);
 9030   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9031   ins_pipe( ialu_reg_reg_long );
 9032 %}
 9033 
 9034 // Xor Long Register with Immediate -1
 9035 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9036   match(Set dst (XorL dst imm));
 9037   format %{ "NOT    $dst.lo\n\t"
 9038             "NOT    $dst.hi" %}
 9039   ins_encode %{
 9040      __ notl($dst$$Register);
 9041      __ notl(HIGH_FROM_LOW($dst$$Register));
 9042   %}
 9043   ins_pipe( ialu_reg_long );
 9044 %}
 9045 
 9046 // Xor Long Register with Immediate
 9047 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9048   match(Set dst (XorL dst src));
 9049   effect(KILL cr);
 9050   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9051             "XOR    $dst.hi,$src.hi" %}
 9052   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9053   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9054   ins_pipe( ialu_reg_long );
 9055 %}
 9056 
 9057 // Xor Long Register with Memory
 9058 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9059   match(Set dst (XorL dst (LoadL mem)));
 9060   effect(KILL cr);
 9061   ins_cost(125);
 9062   format %{ "XOR    $dst.lo,$mem\n\t"
 9063             "XOR    $dst.hi,$mem+4" %}
 9064   opcode(0x33,0x33);
 9065   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9066   ins_pipe( ialu_reg_long_mem );
 9067 %}
 9068 
 9069 // Shift Left Long by 1
 9070 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9071   predicate(UseNewLongLShift);
 9072   match(Set dst (LShiftL dst cnt));
 9073   effect(KILL cr);
 9074   ins_cost(100);
 9075   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9076             "ADC    $dst.hi,$dst.hi" %}
 9077   ins_encode %{
 9078     __ addl($dst$$Register,$dst$$Register);
 9079     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9080   %}
 9081   ins_pipe( ialu_reg_long );
 9082 %}
 9083 
 9084 // Shift Left Long by 2
 9085 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9086   predicate(UseNewLongLShift);
 9087   match(Set dst (LShiftL dst cnt));
 9088   effect(KILL cr);
 9089   ins_cost(100);
 9090   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9091             "ADC    $dst.hi,$dst.hi\n\t"
 9092             "ADD    $dst.lo,$dst.lo\n\t"
 9093             "ADC    $dst.hi,$dst.hi" %}
 9094   ins_encode %{
 9095     __ addl($dst$$Register,$dst$$Register);
 9096     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9097     __ addl($dst$$Register,$dst$$Register);
 9098     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9099   %}
 9100   ins_pipe( ialu_reg_long );
 9101 %}
 9102 
 9103 // Shift Left Long by 3
 9104 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9105   predicate(UseNewLongLShift);
 9106   match(Set dst (LShiftL dst cnt));
 9107   effect(KILL cr);
 9108   ins_cost(100);
 9109   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9110             "ADC    $dst.hi,$dst.hi\n\t"
 9111             "ADD    $dst.lo,$dst.lo\n\t"
 9112             "ADC    $dst.hi,$dst.hi\n\t"
 9113             "ADD    $dst.lo,$dst.lo\n\t"
 9114             "ADC    $dst.hi,$dst.hi" %}
 9115   ins_encode %{
 9116     __ addl($dst$$Register,$dst$$Register);
 9117     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9118     __ addl($dst$$Register,$dst$$Register);
 9119     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9120     __ addl($dst$$Register,$dst$$Register);
 9121     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9122   %}
 9123   ins_pipe( ialu_reg_long );
 9124 %}
 9125 
 9126 // Shift Left Long by 1-31
 9127 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9128   match(Set dst (LShiftL dst cnt));
 9129   effect(KILL cr);
 9130   ins_cost(200);
 9131   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9132             "SHL    $dst.lo,$cnt" %}
 9133   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9134   ins_encode( move_long_small_shift(dst,cnt) );
 9135   ins_pipe( ialu_reg_long );
 9136 %}
 9137 
 9138 // Shift Left Long by 32-63
 9139 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9140   match(Set dst (LShiftL dst cnt));
 9141   effect(KILL cr);
 9142   ins_cost(300);
 9143   format %{ "MOV    $dst.hi,$dst.lo\n"
 9144           "\tSHL    $dst.hi,$cnt-32\n"
 9145           "\tXOR    $dst.lo,$dst.lo" %}
 9146   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9147   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9148   ins_pipe( ialu_reg_long );
 9149 %}
 9150 
 9151 // Shift Left Long by variable
 9152 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9153   match(Set dst (LShiftL dst shift));
 9154   effect(KILL cr);
 9155   ins_cost(500+200);
 9156   size(17);
 9157   format %{ "TEST   $shift,32\n\t"
 9158             "JEQ,s  small\n\t"
 9159             "MOV    $dst.hi,$dst.lo\n\t"
 9160             "XOR    $dst.lo,$dst.lo\n"
 9161     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9162             "SHL    $dst.lo,$shift" %}
 9163   ins_encode( shift_left_long( dst, shift ) );
 9164   ins_pipe( pipe_slow );
 9165 %}
 9166 
 9167 // Shift Right Long by 1-31
 9168 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9169   match(Set dst (URShiftL dst cnt));
 9170   effect(KILL cr);
 9171   ins_cost(200);
 9172   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9173             "SHR    $dst.hi,$cnt" %}
 9174   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9175   ins_encode( move_long_small_shift(dst,cnt) );
 9176   ins_pipe( ialu_reg_long );
 9177 %}
 9178 
 9179 // Shift Right Long by 32-63
 9180 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9181   match(Set dst (URShiftL dst cnt));
 9182   effect(KILL cr);
 9183   ins_cost(300);
 9184   format %{ "MOV    $dst.lo,$dst.hi\n"
 9185           "\tSHR    $dst.lo,$cnt-32\n"
 9186           "\tXOR    $dst.hi,$dst.hi" %}
 9187   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9188   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9189   ins_pipe( ialu_reg_long );
 9190 %}
 9191 
 9192 // Shift Right Long by variable
 9193 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9194   match(Set dst (URShiftL dst shift));
 9195   effect(KILL cr);
 9196   ins_cost(600);
 9197   size(17);
 9198   format %{ "TEST   $shift,32\n\t"
 9199             "JEQ,s  small\n\t"
 9200             "MOV    $dst.lo,$dst.hi\n\t"
 9201             "XOR    $dst.hi,$dst.hi\n"
 9202     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9203             "SHR    $dst.hi,$shift" %}
 9204   ins_encode( shift_right_long( dst, shift ) );
 9205   ins_pipe( pipe_slow );
 9206 %}
 9207 
 9208 // Shift Right Long by 1-31
 9209 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9210   match(Set dst (RShiftL dst cnt));
 9211   effect(KILL cr);
 9212   ins_cost(200);
 9213   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9214             "SAR    $dst.hi,$cnt" %}
 9215   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9216   ins_encode( move_long_small_shift(dst,cnt) );
 9217   ins_pipe( ialu_reg_long );
 9218 %}
 9219 
 9220 // Shift Right Long by 32-63
 9221 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9222   match(Set dst (RShiftL dst cnt));
 9223   effect(KILL cr);
 9224   ins_cost(300);
 9225   format %{ "MOV    $dst.lo,$dst.hi\n"
 9226           "\tSAR    $dst.lo,$cnt-32\n"
 9227           "\tSAR    $dst.hi,31" %}
 9228   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9229   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9230   ins_pipe( ialu_reg_long );
 9231 %}
 9232 
 9233 // Shift Right arithmetic Long by variable
 9234 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9235   match(Set dst (RShiftL dst shift));
 9236   effect(KILL cr);
 9237   ins_cost(600);
 9238   size(18);
 9239   format %{ "TEST   $shift,32\n\t"
 9240             "JEQ,s  small\n\t"
 9241             "MOV    $dst.lo,$dst.hi\n\t"
 9242             "SAR    $dst.hi,31\n"
 9243     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9244             "SAR    $dst.hi,$shift" %}
 9245   ins_encode( shift_right_arith_long( dst, shift ) );
 9246   ins_pipe( pipe_slow );
 9247 %}
 9248 
 9249 
 9250 //----------Double Instructions------------------------------------------------
 9251 // Double Math
 9252 
 9253 // Compare & branch
 9254 
 9255 // P6 version of float compare, sets condition codes in EFLAGS
 9256 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9257   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9258   match(Set cr (CmpD src1 src2));
 9259   effect(KILL rax);
 9260   ins_cost(150);
 9261   format %{ "FLD    $src1\n\t"
 9262             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9263             "JNP    exit\n\t"
 9264             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9265             "SAHF\n"
 9266      "exit:\tNOP               // avoid branch to branch" %}
 9267   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9268   ins_encode( Push_Reg_DPR(src1),
 9269               OpcP, RegOpc(src2),
 9270               cmpF_P6_fixup );
 9271   ins_pipe( pipe_slow );
 9272 %}
 9273 
 9274 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9275   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9276   match(Set cr (CmpD src1 src2));
 9277   ins_cost(150);
 9278   format %{ "FLD    $src1\n\t"
 9279             "FUCOMIP ST,$src2  // P6 instruction" %}
 9280   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9281   ins_encode( Push_Reg_DPR(src1),
 9282               OpcP, RegOpc(src2));
 9283   ins_pipe( pipe_slow );
 9284 %}
 9285 
 9286 // Compare & branch
 9287 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9288   predicate(UseSSE<=1);
 9289   match(Set cr (CmpD src1 src2));
 9290   effect(KILL rax);
 9291   ins_cost(200);
 9292   format %{ "FLD    $src1\n\t"
 9293             "FCOMp  $src2\n\t"
 9294             "FNSTSW AX\n\t"
 9295             "TEST   AX,0x400\n\t"
 9296             "JZ,s   flags\n\t"
 9297             "MOV    AH,1\t# unordered treat as LT\n"
 9298     "flags:\tSAHF" %}
 9299   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9300   ins_encode( Push_Reg_DPR(src1),
 9301               OpcP, RegOpc(src2),
 9302               fpu_flags);
 9303   ins_pipe( pipe_slow );
 9304 %}
 9305 
 9306 // Compare vs zero into -1,0,1
 9307 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9308   predicate(UseSSE<=1);
 9309   match(Set dst (CmpD3 src1 zero));
 9310   effect(KILL cr, KILL rax);
 9311   ins_cost(280);
 9312   format %{ "FTSTD  $dst,$src1" %}
 9313   opcode(0xE4, 0xD9);
 9314   ins_encode( Push_Reg_DPR(src1),
 9315               OpcS, OpcP, PopFPU,
 9316               CmpF_Result(dst));
 9317   ins_pipe( pipe_slow );
 9318 %}
 9319 
 9320 // Compare into -1,0,1
 9321 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9322   predicate(UseSSE<=1);
 9323   match(Set dst (CmpD3 src1 src2));
 9324   effect(KILL cr, KILL rax);
 9325   ins_cost(300);
 9326   format %{ "FCMPD  $dst,$src1,$src2" %}
 9327   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9328   ins_encode( Push_Reg_DPR(src1),
 9329               OpcP, RegOpc(src2),
 9330               CmpF_Result(dst));
 9331   ins_pipe( pipe_slow );
 9332 %}
 9333 
 9334 // float compare and set condition codes in EFLAGS by XMM regs
 9335 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9336   predicate(UseSSE>=2);
 9337   match(Set cr (CmpD src1 src2));
 9338   ins_cost(145);
 9339   format %{ "UCOMISD $src1,$src2\n\t"
 9340             "JNP,s   exit\n\t"
 9341             "PUSHF\t# saw NaN, set CF\n\t"
 9342             "AND     [rsp], #0xffffff2b\n\t"
 9343             "POPF\n"
 9344     "exit:" %}
 9345   ins_encode %{
 9346     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9347     emit_cmpfp_fixup(masm);
 9348   %}
 9349   ins_pipe( pipe_slow );
 9350 %}
 9351 
 9352 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9353   predicate(UseSSE>=2);
 9354   match(Set cr (CmpD src1 src2));
 9355   ins_cost(100);
 9356   format %{ "UCOMISD $src1,$src2" %}
 9357   ins_encode %{
 9358     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9359   %}
 9360   ins_pipe( pipe_slow );
 9361 %}
 9362 
 9363 // float compare and set condition codes in EFLAGS by XMM regs
 9364 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9365   predicate(UseSSE>=2);
 9366   match(Set cr (CmpD src1 (LoadD src2)));
 9367   ins_cost(145);
 9368   format %{ "UCOMISD $src1,$src2\n\t"
 9369             "JNP,s   exit\n\t"
 9370             "PUSHF\t# saw NaN, set CF\n\t"
 9371             "AND     [rsp], #0xffffff2b\n\t"
 9372             "POPF\n"
 9373     "exit:" %}
 9374   ins_encode %{
 9375     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9376     emit_cmpfp_fixup(masm);
 9377   %}
 9378   ins_pipe( pipe_slow );
 9379 %}
 9380 
 9381 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9382   predicate(UseSSE>=2);
 9383   match(Set cr (CmpD src1 (LoadD src2)));
 9384   ins_cost(100);
 9385   format %{ "UCOMISD $src1,$src2" %}
 9386   ins_encode %{
 9387     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9388   %}
 9389   ins_pipe( pipe_slow );
 9390 %}
 9391 
 9392 // Compare into -1,0,1 in XMM
 9393 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9394   predicate(UseSSE>=2);
 9395   match(Set dst (CmpD3 src1 src2));
 9396   effect(KILL cr);
 9397   ins_cost(255);
 9398   format %{ "UCOMISD $src1, $src2\n\t"
 9399             "MOV     $dst, #-1\n\t"
 9400             "JP,s    done\n\t"
 9401             "JB,s    done\n\t"
 9402             "SETNE   $dst\n\t"
 9403             "MOVZB   $dst, $dst\n"
 9404     "done:" %}
 9405   ins_encode %{
 9406     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9407     emit_cmpfp3(masm, $dst$$Register);
 9408   %}
 9409   ins_pipe( pipe_slow );
 9410 %}
 9411 
 9412 // Compare into -1,0,1 in XMM and memory
 9413 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9414   predicate(UseSSE>=2);
 9415   match(Set dst (CmpD3 src1 (LoadD src2)));
 9416   effect(KILL cr);
 9417   ins_cost(275);
 9418   format %{ "UCOMISD $src1, $src2\n\t"
 9419             "MOV     $dst, #-1\n\t"
 9420             "JP,s    done\n\t"
 9421             "JB,s    done\n\t"
 9422             "SETNE   $dst\n\t"
 9423             "MOVZB   $dst, $dst\n"
 9424     "done:" %}
 9425   ins_encode %{
 9426     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9427     emit_cmpfp3(masm, $dst$$Register);
 9428   %}
 9429   ins_pipe( pipe_slow );
 9430 %}
 9431 
 9432 
 9433 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9434   predicate (UseSSE <=1);
 9435   match(Set dst (SubD dst src));
 9436 
 9437   format %{ "FLD    $src\n\t"
 9438             "DSUBp  $dst,ST" %}
 9439   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9440   ins_cost(150);
 9441   ins_encode( Push_Reg_DPR(src),
 9442               OpcP, RegOpc(dst) );
 9443   ins_pipe( fpu_reg_reg );
 9444 %}
 9445 
 9446 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9447   predicate (UseSSE <=1);
 9448   match(Set dst (RoundDouble (SubD src1 src2)));
 9449   ins_cost(250);
 9450 
 9451   format %{ "FLD    $src2\n\t"
 9452             "DSUB   ST,$src1\n\t"
 9453             "FSTP_D $dst\t# D-round" %}
 9454   opcode(0xD8, 0x5);
 9455   ins_encode( Push_Reg_DPR(src2),
 9456               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9457   ins_pipe( fpu_mem_reg_reg );
 9458 %}
 9459 
 9460 
 9461 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9462   predicate (UseSSE <=1);
 9463   match(Set dst (SubD dst (LoadD src)));
 9464   ins_cost(150);
 9465 
 9466   format %{ "FLD    $src\n\t"
 9467             "DSUBp  $dst,ST" %}
 9468   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9469   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9470               OpcP, RegOpc(dst), ClearInstMark );
 9471   ins_pipe( fpu_reg_mem );
 9472 %}
 9473 
 9474 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9475   predicate (UseSSE<=1);
 9476   match(Set dst (AbsD src));
 9477   ins_cost(100);
 9478   format %{ "FABS" %}
 9479   opcode(0xE1, 0xD9);
 9480   ins_encode( OpcS, OpcP );
 9481   ins_pipe( fpu_reg_reg );
 9482 %}
 9483 
 9484 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9485   predicate(UseSSE<=1);
 9486   match(Set dst (NegD src));
 9487   ins_cost(100);
 9488   format %{ "FCHS" %}
 9489   opcode(0xE0, 0xD9);
 9490   ins_encode( OpcS, OpcP );
 9491   ins_pipe( fpu_reg_reg );
 9492 %}
 9493 
 9494 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9495   predicate(UseSSE<=1);
 9496   match(Set dst (AddD dst src));
 9497   format %{ "FLD    $src\n\t"
 9498             "DADD   $dst,ST" %}
 9499   size(4);
 9500   ins_cost(150);
 9501   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9502   ins_encode( Push_Reg_DPR(src),
 9503               OpcP, RegOpc(dst) );
 9504   ins_pipe( fpu_reg_reg );
 9505 %}
 9506 
 9507 
 9508 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9509   predicate(UseSSE<=1);
 9510   match(Set dst (RoundDouble (AddD src1 src2)));
 9511   ins_cost(250);
 9512 
 9513   format %{ "FLD    $src2\n\t"
 9514             "DADD   ST,$src1\n\t"
 9515             "FSTP_D $dst\t# D-round" %}
 9516   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9517   ins_encode( Push_Reg_DPR(src2),
 9518               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9519   ins_pipe( fpu_mem_reg_reg );
 9520 %}
 9521 
 9522 
 9523 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9524   predicate(UseSSE<=1);
 9525   match(Set dst (AddD dst (LoadD src)));
 9526   ins_cost(150);
 9527 
 9528   format %{ "FLD    $src\n\t"
 9529             "DADDp  $dst,ST" %}
 9530   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9531   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9532               OpcP, RegOpc(dst), ClearInstMark );
 9533   ins_pipe( fpu_reg_mem );
 9534 %}
 9535 
 9536 // add-to-memory
 9537 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9538   predicate(UseSSE<=1);
 9539   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9540   ins_cost(150);
 9541 
 9542   format %{ "FLD_D  $dst\n\t"
 9543             "DADD   ST,$src\n\t"
 9544             "FST_D  $dst" %}
 9545   opcode(0xDD, 0x0);
 9546   ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
 9547               Opcode(0xD8), RegOpc(src), ClearInstMark,
 9548               SetInstMark,
 9549               Opcode(0xDD), RMopc_Mem(0x03,dst),
 9550               ClearInstMark);
 9551   ins_pipe( fpu_reg_mem );
 9552 %}
 9553 
 9554 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9555   predicate(UseSSE<=1);
 9556   match(Set dst (AddD dst con));
 9557   ins_cost(125);
 9558   format %{ "FLD1\n\t"
 9559             "DADDp  $dst,ST" %}
 9560   ins_encode %{
 9561     __ fld1();
 9562     __ faddp($dst$$reg);
 9563   %}
 9564   ins_pipe(fpu_reg);
 9565 %}
 9566 
 9567 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9568   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9569   match(Set dst (AddD dst con));
 9570   ins_cost(200);
 9571   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9572             "DADDp  $dst,ST" %}
 9573   ins_encode %{
 9574     __ fld_d($constantaddress($con));
 9575     __ faddp($dst$$reg);
 9576   %}
 9577   ins_pipe(fpu_reg_mem);
 9578 %}
 9579 
 9580 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9581   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9582   match(Set dst (RoundDouble (AddD src con)));
 9583   ins_cost(200);
 9584   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9585             "DADD   ST,$src\n\t"
 9586             "FSTP_D $dst\t# D-round" %}
 9587   ins_encode %{
 9588     __ fld_d($constantaddress($con));
 9589     __ fadd($src$$reg);
 9590     __ fstp_d(Address(rsp, $dst$$disp));
 9591   %}
 9592   ins_pipe(fpu_mem_reg_con);
 9593 %}
 9594 
 9595 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9596   predicate(UseSSE<=1);
 9597   match(Set dst (MulD dst src));
 9598   format %{ "FLD    $src\n\t"
 9599             "DMULp  $dst,ST" %}
 9600   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9601   ins_cost(150);
 9602   ins_encode( Push_Reg_DPR(src),
 9603               OpcP, RegOpc(dst) );
 9604   ins_pipe( fpu_reg_reg );
 9605 %}
 9606 
 9607 // Strict FP instruction biases argument before multiply then
 9608 // biases result to avoid double rounding of subnormals.
 9609 //
 9610 // scale arg1 by multiplying arg1 by 2^(-15360)
 9611 // load arg2
 9612 // multiply scaled arg1 by arg2
 9613 // rescale product by 2^(15360)
 9614 //
 9615 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9616   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9617   match(Set dst (MulD dst src));
 9618   ins_cost(1);   // Select this instruction for all FP double multiplies
 9619 
 9620   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9621             "DMULp  $dst,ST\n\t"
 9622             "FLD    $src\n\t"
 9623             "DMULp  $dst,ST\n\t"
 9624             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9625             "DMULp  $dst,ST\n\t" %}
 9626   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9627   ins_encode( strictfp_bias1(dst),
 9628               Push_Reg_DPR(src),
 9629               OpcP, RegOpc(dst),
 9630               strictfp_bias2(dst) );
 9631   ins_pipe( fpu_reg_reg );
 9632 %}
 9633 
 9634 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9635   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9636   match(Set dst (MulD dst con));
 9637   ins_cost(200);
 9638   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9639             "DMULp  $dst,ST" %}
 9640   ins_encode %{
 9641     __ fld_d($constantaddress($con));
 9642     __ fmulp($dst$$reg);
 9643   %}
 9644   ins_pipe(fpu_reg_mem);
 9645 %}
 9646 
 9647 
 9648 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9649   predicate( UseSSE<=1 );
 9650   match(Set dst (MulD dst (LoadD src)));
 9651   ins_cost(200);
 9652   format %{ "FLD_D  $src\n\t"
 9653             "DMULp  $dst,ST" %}
 9654   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9655   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9656               OpcP, RegOpc(dst), ClearInstMark );
 9657   ins_pipe( fpu_reg_mem );
 9658 %}
 9659 
 9660 //
 9661 // Cisc-alternate to reg-reg multiply
 9662 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9663   predicate( UseSSE<=1 );
 9664   match(Set dst (MulD src (LoadD mem)));
 9665   ins_cost(250);
 9666   format %{ "FLD_D  $mem\n\t"
 9667             "DMUL   ST,$src\n\t"
 9668             "FSTP_D $dst" %}
 9669   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9670   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
 9671               OpcReg_FPR(src),
 9672               Pop_Reg_DPR(dst), ClearInstMark );
 9673   ins_pipe( fpu_reg_reg_mem );
 9674 %}
 9675 
 9676 
 9677 // MACRO3 -- addDPR a mulDPR
 9678 // This instruction is a '2-address' instruction in that the result goes
 9679 // back to src2.  This eliminates a move from the macro; possibly the
 9680 // register allocator will have to add it back (and maybe not).
 9681 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9682   predicate( UseSSE<=1 );
 9683   match(Set src2 (AddD (MulD src0 src1) src2));
 9684   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9685             "DMUL   ST,$src1\n\t"
 9686             "DADDp  $src2,ST" %}
 9687   ins_cost(250);
 9688   opcode(0xDD); /* LoadD DD /0 */
 9689   ins_encode( Push_Reg_FPR(src0),
 9690               FMul_ST_reg(src1),
 9691               FAddP_reg_ST(src2) );
 9692   ins_pipe( fpu_reg_reg_reg );
 9693 %}
 9694 
 9695 
 9696 // MACRO3 -- subDPR a mulDPR
 9697 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9698   predicate( UseSSE<=1 );
 9699   match(Set src2 (SubD (MulD src0 src1) src2));
 9700   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9701             "DMUL   ST,$src1\n\t"
 9702             "DSUBRp $src2,ST" %}
 9703   ins_cost(250);
 9704   ins_encode( Push_Reg_FPR(src0),
 9705               FMul_ST_reg(src1),
 9706               Opcode(0xDE), Opc_plus(0xE0,src2));
 9707   ins_pipe( fpu_reg_reg_reg );
 9708 %}
 9709 
 9710 
 9711 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9712   predicate( UseSSE<=1 );
 9713   match(Set dst (DivD dst src));
 9714 
 9715   format %{ "FLD    $src\n\t"
 9716             "FDIVp  $dst,ST" %}
 9717   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9718   ins_cost(150);
 9719   ins_encode( Push_Reg_DPR(src),
 9720               OpcP, RegOpc(dst) );
 9721   ins_pipe( fpu_reg_reg );
 9722 %}
 9723 
 9724 // Strict FP instruction biases argument before division then
 9725 // biases result, to avoid double rounding of subnormals.
 9726 //
 9727 // scale dividend by multiplying dividend by 2^(-15360)
 9728 // load divisor
 9729 // divide scaled dividend by divisor
 9730 // rescale quotient by 2^(15360)
 9731 //
 9732 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9733   predicate (UseSSE<=1);
 9734   match(Set dst (DivD dst src));
 9735   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9736   ins_cost(01);
 9737 
 9738   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9739             "DMULp  $dst,ST\n\t"
 9740             "FLD    $src\n\t"
 9741             "FDIVp  $dst,ST\n\t"
 9742             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9743             "DMULp  $dst,ST\n\t" %}
 9744   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9745   ins_encode( strictfp_bias1(dst),
 9746               Push_Reg_DPR(src),
 9747               OpcP, RegOpc(dst),
 9748               strictfp_bias2(dst) );
 9749   ins_pipe( fpu_reg_reg );
 9750 %}
 9751 
 9752 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9753   predicate (UseSSE<=1);
 9754   match(Set dst(AtanD dst src));
 9755   format %{ "DATA   $dst,$src" %}
 9756   opcode(0xD9, 0xF3);
 9757   ins_encode( Push_Reg_DPR(src),
 9758               OpcP, OpcS, RegOpc(dst) );
 9759   ins_pipe( pipe_slow );
 9760 %}
 9761 
 9762 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9763   predicate (UseSSE>=2);
 9764   match(Set dst(AtanD dst src));
 9765   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9766   format %{ "DATA   $dst,$src" %}
 9767   opcode(0xD9, 0xF3);
 9768   ins_encode( Push_SrcD(src),
 9769               OpcP, OpcS, Push_ResultD(dst) );
 9770   ins_pipe( pipe_slow );
 9771 %}
 9772 
 9773 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
 9774   predicate (UseSSE<=1);
 9775   match(Set dst (SqrtD src));
 9776   format %{ "DSQRT  $dst,$src" %}
 9777   opcode(0xFA, 0xD9);
 9778   ins_encode( Push_Reg_DPR(src),
 9779               OpcS, OpcP, Pop_Reg_DPR(dst) );
 9780   ins_pipe( pipe_slow );
 9781 %}
 9782 
 9783 //-------------Float Instructions-------------------------------
 9784 // Float Math
 9785 
 9786 // Code for float compare:
 9787 //     fcompp();
 9788 //     fwait(); fnstsw_ax();
 9789 //     sahf();
 9790 //     movl(dst, unordered_result);
 9791 //     jcc(Assembler::parity, exit);
 9792 //     movl(dst, less_result);
 9793 //     jcc(Assembler::below, exit);
 9794 //     movl(dst, equal_result);
 9795 //     jcc(Assembler::equal, exit);
 9796 //     movl(dst, greater_result);
 9797 //   exit:
 9798 
 9799 // P6 version of float compare, sets condition codes in EFLAGS
 9800 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9801   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9802   match(Set cr (CmpF src1 src2));
 9803   effect(KILL rax);
 9804   ins_cost(150);
 9805   format %{ "FLD    $src1\n\t"
 9806             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9807             "JNP    exit\n\t"
 9808             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
 9809             "SAHF\n"
 9810      "exit:\tNOP               // avoid branch to branch" %}
 9811   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9812   ins_encode( Push_Reg_DPR(src1),
 9813               OpcP, RegOpc(src2),
 9814               cmpF_P6_fixup );
 9815   ins_pipe( pipe_slow );
 9816 %}
 9817 
 9818 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
 9819   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9820   match(Set cr (CmpF src1 src2));
 9821   ins_cost(100);
 9822   format %{ "FLD    $src1\n\t"
 9823             "FUCOMIP ST,$src2  // P6 instruction" %}
 9824   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9825   ins_encode( Push_Reg_DPR(src1),
 9826               OpcP, RegOpc(src2));
 9827   ins_pipe( pipe_slow );
 9828 %}
 9829 
 9830 
 9831 // Compare & branch
 9832 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9833   predicate(UseSSE == 0);
 9834   match(Set cr (CmpF src1 src2));
 9835   effect(KILL rax);
 9836   ins_cost(200);
 9837   format %{ "FLD    $src1\n\t"
 9838             "FCOMp  $src2\n\t"
 9839             "FNSTSW AX\n\t"
 9840             "TEST   AX,0x400\n\t"
 9841             "JZ,s   flags\n\t"
 9842             "MOV    AH,1\t# unordered treat as LT\n"
 9843     "flags:\tSAHF" %}
 9844   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9845   ins_encode( Push_Reg_DPR(src1),
 9846               OpcP, RegOpc(src2),
 9847               fpu_flags);
 9848   ins_pipe( pipe_slow );
 9849 %}
 9850 
 9851 // Compare vs zero into -1,0,1
 9852 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9853   predicate(UseSSE == 0);
 9854   match(Set dst (CmpF3 src1 zero));
 9855   effect(KILL cr, KILL rax);
 9856   ins_cost(280);
 9857   format %{ "FTSTF  $dst,$src1" %}
 9858   opcode(0xE4, 0xD9);
 9859   ins_encode( Push_Reg_DPR(src1),
 9860               OpcS, OpcP, PopFPU,
 9861               CmpF_Result(dst));
 9862   ins_pipe( pipe_slow );
 9863 %}
 9864 
 9865 // Compare into -1,0,1
 9866 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
 9867   predicate(UseSSE == 0);
 9868   match(Set dst (CmpF3 src1 src2));
 9869   effect(KILL cr, KILL rax);
 9870   ins_cost(300);
 9871   format %{ "FCMPF  $dst,$src1,$src2" %}
 9872   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9873   ins_encode( Push_Reg_DPR(src1),
 9874               OpcP, RegOpc(src2),
 9875               CmpF_Result(dst));
 9876   ins_pipe( pipe_slow );
 9877 %}
 9878 
 9879 // float compare and set condition codes in EFLAGS by XMM regs
 9880 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
 9881   predicate(UseSSE>=1);
 9882   match(Set cr (CmpF src1 src2));
 9883   ins_cost(145);
 9884   format %{ "UCOMISS $src1,$src2\n\t"
 9885             "JNP,s   exit\n\t"
 9886             "PUSHF\t# saw NaN, set CF\n\t"
 9887             "AND     [rsp], #0xffffff2b\n\t"
 9888             "POPF\n"
 9889     "exit:" %}
 9890   ins_encode %{
 9891     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9892     emit_cmpfp_fixup(masm);
 9893   %}
 9894   ins_pipe( pipe_slow );
 9895 %}
 9896 
 9897 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
 9898   predicate(UseSSE>=1);
 9899   match(Set cr (CmpF src1 src2));
 9900   ins_cost(100);
 9901   format %{ "UCOMISS $src1,$src2" %}
 9902   ins_encode %{
 9903     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9904   %}
 9905   ins_pipe( pipe_slow );
 9906 %}
 9907 
 9908 // float compare and set condition codes in EFLAGS by XMM regs
 9909 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
 9910   predicate(UseSSE>=1);
 9911   match(Set cr (CmpF src1 (LoadF src2)));
 9912   ins_cost(165);
 9913   format %{ "UCOMISS $src1,$src2\n\t"
 9914             "JNP,s   exit\n\t"
 9915             "PUSHF\t# saw NaN, set CF\n\t"
 9916             "AND     [rsp], #0xffffff2b\n\t"
 9917             "POPF\n"
 9918     "exit:" %}
 9919   ins_encode %{
 9920     __ ucomiss($src1$$XMMRegister, $src2$$Address);
 9921     emit_cmpfp_fixup(masm);
 9922   %}
 9923   ins_pipe( pipe_slow );
 9924 %}
 9925 
 9926 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
 9927   predicate(UseSSE>=1);
 9928   match(Set cr (CmpF src1 (LoadF src2)));
 9929   ins_cost(100);
 9930   format %{ "UCOMISS $src1,$src2" %}
 9931   ins_encode %{
 9932     __ ucomiss($src1$$XMMRegister, $src2$$Address);
 9933   %}
 9934   ins_pipe( pipe_slow );
 9935 %}
 9936 
 9937 // Compare into -1,0,1 in XMM
 9938 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
 9939   predicate(UseSSE>=1);
 9940   match(Set dst (CmpF3 src1 src2));
 9941   effect(KILL cr);
 9942   ins_cost(255);
 9943   format %{ "UCOMISS $src1, $src2\n\t"
 9944             "MOV     $dst, #-1\n\t"
 9945             "JP,s    done\n\t"
 9946             "JB,s    done\n\t"
 9947             "SETNE   $dst\n\t"
 9948             "MOVZB   $dst, $dst\n"
 9949     "done:" %}
 9950   ins_encode %{
 9951     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9952     emit_cmpfp3(masm, $dst$$Register);
 9953   %}
 9954   ins_pipe( pipe_slow );
 9955 %}
 9956 
 9957 // Compare into -1,0,1 in XMM and memory
 9958 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
 9959   predicate(UseSSE>=1);
 9960   match(Set dst (CmpF3 src1 (LoadF src2)));
 9961   effect(KILL cr);
 9962   ins_cost(275);
 9963   format %{ "UCOMISS $src1, $src2\n\t"
 9964             "MOV     $dst, #-1\n\t"
 9965             "JP,s    done\n\t"
 9966             "JB,s    done\n\t"
 9967             "SETNE   $dst\n\t"
 9968             "MOVZB   $dst, $dst\n"
 9969     "done:" %}
 9970   ins_encode %{
 9971     __ ucomiss($src1$$XMMRegister, $src2$$Address);
 9972     emit_cmpfp3(masm, $dst$$Register);
 9973   %}
 9974   ins_pipe( pipe_slow );
 9975 %}
 9976 
 9977 // Spill to obtain 24-bit precision
 9978 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
 9979   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
 9980   match(Set dst (SubF src1 src2));
 9981 
 9982   format %{ "FSUB   $dst,$src1 - $src2" %}
 9983   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
 9984   ins_encode( Push_Reg_FPR(src1),
 9985               OpcReg_FPR(src2),
 9986               Pop_Mem_FPR(dst) );
 9987   ins_pipe( fpu_mem_reg_reg );
 9988 %}
 9989 //
 9990 // This instruction does not round to 24-bits
 9991 instruct subFPR_reg(regFPR dst, regFPR src) %{
 9992   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
 9993   match(Set dst (SubF dst src));
 9994 
 9995   format %{ "FSUB   $dst,$src" %}
 9996   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9997   ins_encode( Push_Reg_FPR(src),
 9998               OpcP, RegOpc(dst) );
 9999   ins_pipe( fpu_reg_reg );
10000 %}
10001 
10002 // Spill to obtain 24-bit precision
10003 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10004   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10005   match(Set dst (AddF src1 src2));
10006 
10007   format %{ "FADD   $dst,$src1,$src2" %}
10008   opcode(0xD8, 0x0); /* D8 C0+i */
10009   ins_encode( Push_Reg_FPR(src2),
10010               OpcReg_FPR(src1),
10011               Pop_Mem_FPR(dst) );
10012   ins_pipe( fpu_mem_reg_reg );
10013 %}
10014 //
10015 // This instruction does not round to 24-bits
10016 instruct addFPR_reg(regFPR dst, regFPR src) %{
10017   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10018   match(Set dst (AddF dst src));
10019 
10020   format %{ "FLD    $src\n\t"
10021             "FADDp  $dst,ST" %}
10022   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10023   ins_encode( Push_Reg_FPR(src),
10024               OpcP, RegOpc(dst) );
10025   ins_pipe( fpu_reg_reg );
10026 %}
10027 
10028 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10029   predicate(UseSSE==0);
10030   match(Set dst (AbsF src));
10031   ins_cost(100);
10032   format %{ "FABS" %}
10033   opcode(0xE1, 0xD9);
10034   ins_encode( OpcS, OpcP );
10035   ins_pipe( fpu_reg_reg );
10036 %}
10037 
10038 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10039   predicate(UseSSE==0);
10040   match(Set dst (NegF src));
10041   ins_cost(100);
10042   format %{ "FCHS" %}
10043   opcode(0xE0, 0xD9);
10044   ins_encode( OpcS, OpcP );
10045   ins_pipe( fpu_reg_reg );
10046 %}
10047 
10048 // Cisc-alternate to addFPR_reg
10049 // Spill to obtain 24-bit precision
10050 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10051   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10052   match(Set dst (AddF src1 (LoadF src2)));
10053 
10054   format %{ "FLD    $src2\n\t"
10055             "FADD   ST,$src1\n\t"
10056             "FSTP_S $dst" %}
10057   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10058   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10059               OpcReg_FPR(src1),
10060               Pop_Mem_FPR(dst), ClearInstMark );
10061   ins_pipe( fpu_mem_reg_mem );
10062 %}
10063 //
10064 // Cisc-alternate to addFPR_reg
10065 // This instruction does not round to 24-bits
10066 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10067   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10068   match(Set dst (AddF dst (LoadF src)));
10069 
10070   format %{ "FADD   $dst,$src" %}
10071   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10072   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10073               OpcP, RegOpc(dst), ClearInstMark );
10074   ins_pipe( fpu_reg_mem );
10075 %}
10076 
10077 // // Following two instructions for _222_mpegaudio
10078 // Spill to obtain 24-bit precision
10079 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10080   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10081   match(Set dst (AddF src1 src2));
10082 
10083   format %{ "FADD   $dst,$src1,$src2" %}
10084   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10085   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10086               OpcReg_FPR(src2),
10087               Pop_Mem_FPR(dst), ClearInstMark );
10088   ins_pipe( fpu_mem_reg_mem );
10089 %}
10090 
10091 // Cisc-spill variant
10092 // Spill to obtain 24-bit precision
10093 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10094   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10095   match(Set dst (AddF src1 (LoadF src2)));
10096 
10097   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10098   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10099   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10100               OpcP, RMopc_Mem(secondary,src1),
10101               Pop_Mem_FPR(dst),
10102               ClearInstMark);
10103   ins_pipe( fpu_mem_mem_mem );
10104 %}
10105 
10106 // Spill to obtain 24-bit precision
10107 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10108   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10109   match(Set dst (AddF src1 src2));
10110 
10111   format %{ "FADD   $dst,$src1,$src2" %}
10112   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10113   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10114               OpcP, RMopc_Mem(secondary,src1),
10115               Pop_Mem_FPR(dst),
10116               ClearInstMark);
10117   ins_pipe( fpu_mem_mem_mem );
10118 %}
10119 
10120 
10121 // Spill to obtain 24-bit precision
10122 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10123   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10124   match(Set dst (AddF src con));
10125   format %{ "FLD    $src\n\t"
10126             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10127             "FSTP_S $dst"  %}
10128   ins_encode %{
10129     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10130     __ fadd_s($constantaddress($con));
10131     __ fstp_s(Address(rsp, $dst$$disp));
10132   %}
10133   ins_pipe(fpu_mem_reg_con);
10134 %}
10135 //
10136 // This instruction does not round to 24-bits
10137 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10138   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10139   match(Set dst (AddF src con));
10140   format %{ "FLD    $src\n\t"
10141             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10142             "FSTP   $dst"  %}
10143   ins_encode %{
10144     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10145     __ fadd_s($constantaddress($con));
10146     __ fstp_d($dst$$reg);
10147   %}
10148   ins_pipe(fpu_reg_reg_con);
10149 %}
10150 
10151 // Spill to obtain 24-bit precision
10152 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10153   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10154   match(Set dst (MulF src1 src2));
10155 
10156   format %{ "FLD    $src1\n\t"
10157             "FMUL   $src2\n\t"
10158             "FSTP_S $dst"  %}
10159   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10160   ins_encode( Push_Reg_FPR(src1),
10161               OpcReg_FPR(src2),
10162               Pop_Mem_FPR(dst) );
10163   ins_pipe( fpu_mem_reg_reg );
10164 %}
10165 //
10166 // This instruction does not round to 24-bits
10167 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10168   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10169   match(Set dst (MulF src1 src2));
10170 
10171   format %{ "FLD    $src1\n\t"
10172             "FMUL   $src2\n\t"
10173             "FSTP_S $dst"  %}
10174   opcode(0xD8, 0x1); /* D8 C8+i */
10175   ins_encode( Push_Reg_FPR(src2),
10176               OpcReg_FPR(src1),
10177               Pop_Reg_FPR(dst) );
10178   ins_pipe( fpu_reg_reg_reg );
10179 %}
10180 
10181 
10182 // Spill to obtain 24-bit precision
10183 // Cisc-alternate to reg-reg multiply
10184 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10185   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10186   match(Set dst (MulF src1 (LoadF src2)));
10187 
10188   format %{ "FLD_S  $src2\n\t"
10189             "FMUL   $src1\n\t"
10190             "FSTP_S $dst"  %}
10191   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10192   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10193               OpcReg_FPR(src1),
10194               Pop_Mem_FPR(dst), ClearInstMark );
10195   ins_pipe( fpu_mem_reg_mem );
10196 %}
10197 //
10198 // This instruction does not round to 24-bits
10199 // Cisc-alternate to reg-reg multiply
10200 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10201   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10202   match(Set dst (MulF src1 (LoadF src2)));
10203 
10204   format %{ "FMUL   $dst,$src1,$src2" %}
10205   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10206   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10207               OpcReg_FPR(src1),
10208               Pop_Reg_FPR(dst), ClearInstMark );
10209   ins_pipe( fpu_reg_reg_mem );
10210 %}
10211 
10212 // Spill to obtain 24-bit precision
10213 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10214   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10215   match(Set dst (MulF src1 src2));
10216 
10217   format %{ "FMUL   $dst,$src1,$src2" %}
10218   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10219   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10220               OpcP, RMopc_Mem(secondary,src1),
10221               Pop_Mem_FPR(dst),
10222               ClearInstMark );
10223   ins_pipe( fpu_mem_mem_mem );
10224 %}
10225 
10226 // Spill to obtain 24-bit precision
10227 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10228   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10229   match(Set dst (MulF src con));
10230 
10231   format %{ "FLD    $src\n\t"
10232             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10233             "FSTP_S $dst"  %}
10234   ins_encode %{
10235     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10236     __ fmul_s($constantaddress($con));
10237     __ fstp_s(Address(rsp, $dst$$disp));
10238   %}
10239   ins_pipe(fpu_mem_reg_con);
10240 %}
10241 //
10242 // This instruction does not round to 24-bits
10243 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10244   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10245   match(Set dst (MulF src con));
10246 
10247   format %{ "FLD    $src\n\t"
10248             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10249             "FSTP   $dst"  %}
10250   ins_encode %{
10251     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10252     __ fmul_s($constantaddress($con));
10253     __ fstp_d($dst$$reg);
10254   %}
10255   ins_pipe(fpu_reg_reg_con);
10256 %}
10257 
10258 
10259 //
10260 // MACRO1 -- subsume unshared load into mulFPR
10261 // This instruction does not round to 24-bits
10262 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10263   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10264   match(Set dst (MulF (LoadF mem1) src));
10265 
10266   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10267             "FMUL   ST,$src\n\t"
10268             "FSTP   $dst" %}
10269   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10270   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10271               OpcReg_FPR(src),
10272               Pop_Reg_FPR(dst), ClearInstMark );
10273   ins_pipe( fpu_reg_reg_mem );
10274 %}
10275 //
10276 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10277 // This instruction does not round to 24-bits
10278 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10279   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10280   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10281   ins_cost(95);
10282 
10283   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10284             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10285             "FADD   ST,$src2\n\t"
10286             "FSTP   $dst" %}
10287   opcode(0xD9); /* LoadF D9 /0 */
10288   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10289               FMul_ST_reg(src1),
10290               FAdd_ST_reg(src2),
10291               Pop_Reg_FPR(dst), ClearInstMark );
10292   ins_pipe( fpu_reg_mem_reg_reg );
10293 %}
10294 
10295 // MACRO3 -- addFPR a mulFPR
10296 // This instruction does not round to 24-bits.  It is a '2-address'
10297 // instruction in that the result goes back to src2.  This eliminates
10298 // a move from the macro; possibly the register allocator will have
10299 // to add it back (and maybe not).
10300 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10301   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10302   match(Set src2 (AddF (MulF src0 src1) src2));
10303 
10304   format %{ "FLD    $src0     ===MACRO3===\n\t"
10305             "FMUL   ST,$src1\n\t"
10306             "FADDP  $src2,ST" %}
10307   opcode(0xD9); /* LoadF D9 /0 */
10308   ins_encode( Push_Reg_FPR(src0),
10309               FMul_ST_reg(src1),
10310               FAddP_reg_ST(src2) );
10311   ins_pipe( fpu_reg_reg_reg );
10312 %}
10313 
10314 // MACRO4 -- divFPR subFPR
10315 // This instruction does not round to 24-bits
10316 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10317   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10318   match(Set dst (DivF (SubF src2 src1) src3));
10319 
10320   format %{ "FLD    $src2   ===MACRO4===\n\t"
10321             "FSUB   ST,$src1\n\t"
10322             "FDIV   ST,$src3\n\t"
10323             "FSTP  $dst" %}
10324   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10325   ins_encode( Push_Reg_FPR(src2),
10326               subFPR_divFPR_encode(src1,src3),
10327               Pop_Reg_FPR(dst) );
10328   ins_pipe( fpu_reg_reg_reg_reg );
10329 %}
10330 
10331 // Spill to obtain 24-bit precision
10332 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10333   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10334   match(Set dst (DivF src1 src2));
10335 
10336   format %{ "FDIV   $dst,$src1,$src2" %}
10337   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10338   ins_encode( Push_Reg_FPR(src1),
10339               OpcReg_FPR(src2),
10340               Pop_Mem_FPR(dst) );
10341   ins_pipe( fpu_mem_reg_reg );
10342 %}
10343 //
10344 // This instruction does not round to 24-bits
10345 instruct divFPR_reg(regFPR dst, regFPR src) %{
10346   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10347   match(Set dst (DivF dst src));
10348 
10349   format %{ "FDIV   $dst,$src" %}
10350   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10351   ins_encode( Push_Reg_FPR(src),
10352               OpcP, RegOpc(dst) );
10353   ins_pipe( fpu_reg_reg );
10354 %}
10355 
10356 
10357 //----------Arithmetic Conversion Instructions---------------------------------
10358 // The conversions operations are all Alpha sorted.  Please keep it that way!
10359 
10360 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10361   predicate(UseSSE==0);
10362   match(Set dst (RoundFloat src));
10363   ins_cost(125);
10364   format %{ "FST_S  $dst,$src\t# F-round" %}
10365   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10366   ins_pipe( fpu_mem_reg );
10367 %}
10368 
10369 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10370   predicate(UseSSE<=1);
10371   match(Set dst (RoundDouble src));
10372   ins_cost(125);
10373   format %{ "FST_D  $dst,$src\t# D-round" %}
10374   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10375   ins_pipe( fpu_mem_reg );
10376 %}
10377 
10378 // Force rounding to 24-bit precision and 6-bit exponent
10379 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10380   predicate(UseSSE==0);
10381   match(Set dst (ConvD2F src));
10382   format %{ "FST_S  $dst,$src\t# F-round" %}
10383   expand %{
10384     roundFloat_mem_reg(dst,src);
10385   %}
10386 %}
10387 
10388 // Force rounding to 24-bit precision and 6-bit exponent
10389 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10390   predicate(UseSSE==1);
10391   match(Set dst (ConvD2F src));
10392   effect( KILL cr );
10393   format %{ "SUB    ESP,4\n\t"
10394             "FST_S  [ESP],$src\t# F-round\n\t"
10395             "MOVSS  $dst,[ESP]\n\t"
10396             "ADD ESP,4" %}
10397   ins_encode %{
10398     __ subptr(rsp, 4);
10399     if ($src$$reg != FPR1L_enc) {
10400       __ fld_s($src$$reg-1);
10401       __ fstp_s(Address(rsp, 0));
10402     } else {
10403       __ fst_s(Address(rsp, 0));
10404     }
10405     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10406     __ addptr(rsp, 4);
10407   %}
10408   ins_pipe( pipe_slow );
10409 %}
10410 
10411 // Force rounding double precision to single precision
10412 instruct convD2F_reg(regF dst, regD src) %{
10413   predicate(UseSSE>=2);
10414   match(Set dst (ConvD2F src));
10415   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10416   ins_encode %{
10417     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10418   %}
10419   ins_pipe( pipe_slow );
10420 %}
10421 
10422 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10423   predicate(UseSSE==0);
10424   match(Set dst (ConvF2D src));
10425   format %{ "FST_S  $dst,$src\t# D-round" %}
10426   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10427   ins_pipe( fpu_reg_reg );
10428 %}
10429 
10430 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10431   predicate(UseSSE==1);
10432   match(Set dst (ConvF2D src));
10433   format %{ "FST_D  $dst,$src\t# D-round" %}
10434   expand %{
10435     roundDouble_mem_reg(dst,src);
10436   %}
10437 %}
10438 
10439 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10440   predicate(UseSSE==1);
10441   match(Set dst (ConvF2D src));
10442   effect( KILL cr );
10443   format %{ "SUB    ESP,4\n\t"
10444             "MOVSS  [ESP] $src\n\t"
10445             "FLD_S  [ESP]\n\t"
10446             "ADD    ESP,4\n\t"
10447             "FSTP   $dst\t# D-round" %}
10448   ins_encode %{
10449     __ subptr(rsp, 4);
10450     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10451     __ fld_s(Address(rsp, 0));
10452     __ addptr(rsp, 4);
10453     __ fstp_d($dst$$reg);
10454   %}
10455   ins_pipe( pipe_slow );
10456 %}
10457 
10458 instruct convF2D_reg(regD dst, regF src) %{
10459   predicate(UseSSE>=2);
10460   match(Set dst (ConvF2D src));
10461   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10462   ins_encode %{
10463     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10464   %}
10465   ins_pipe( pipe_slow );
10466 %}
10467 
10468 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10469 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10470   predicate(UseSSE<=1);
10471   match(Set dst (ConvD2I src));
10472   effect( KILL tmp, KILL cr );
10473   format %{ "FLD    $src\t# Convert double to int \n\t"
10474             "FLDCW  trunc mode\n\t"
10475             "SUB    ESP,4\n\t"
10476             "FISTp  [ESP + #0]\n\t"
10477             "FLDCW  std/24-bit mode\n\t"
10478             "POP    EAX\n\t"
10479             "CMP    EAX,0x80000000\n\t"
10480             "JNE,s  fast\n\t"
10481             "FLD_D  $src\n\t"
10482             "CALL   d2i_wrapper\n"
10483       "fast:" %}
10484   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10485   ins_pipe( pipe_slow );
10486 %}
10487 
10488 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10489 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10490   predicate(UseSSE>=2);
10491   match(Set dst (ConvD2I src));
10492   effect( KILL tmp, KILL cr );
10493   format %{ "CVTTSD2SI $dst, $src\n\t"
10494             "CMP    $dst,0x80000000\n\t"
10495             "JNE,s  fast\n\t"
10496             "SUB    ESP, 8\n\t"
10497             "MOVSD  [ESP], $src\n\t"
10498             "FLD_D  [ESP]\n\t"
10499             "ADD    ESP, 8\n\t"
10500             "CALL   d2i_wrapper\n"
10501       "fast:" %}
10502   ins_encode %{
10503     Label fast;
10504     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10505     __ cmpl($dst$$Register, 0x80000000);
10506     __ jccb(Assembler::notEqual, fast);
10507     __ subptr(rsp, 8);
10508     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10509     __ fld_d(Address(rsp, 0));
10510     __ addptr(rsp, 8);
10511     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10512     __ post_call_nop();
10513     __ bind(fast);
10514   %}
10515   ins_pipe( pipe_slow );
10516 %}
10517 
10518 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10519   predicate(UseSSE<=1);
10520   match(Set dst (ConvD2L src));
10521   effect( KILL cr );
10522   format %{ "FLD    $src\t# Convert double to long\n\t"
10523             "FLDCW  trunc mode\n\t"
10524             "SUB    ESP,8\n\t"
10525             "FISTp  [ESP + #0]\n\t"
10526             "FLDCW  std/24-bit mode\n\t"
10527             "POP    EAX\n\t"
10528             "POP    EDX\n\t"
10529             "CMP    EDX,0x80000000\n\t"
10530             "JNE,s  fast\n\t"
10531             "TEST   EAX,EAX\n\t"
10532             "JNE,s  fast\n\t"
10533             "FLD    $src\n\t"
10534             "CALL   d2l_wrapper\n"
10535       "fast:" %}
10536   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10537   ins_pipe( pipe_slow );
10538 %}
10539 
10540 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10541 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10542   predicate (UseSSE>=2);
10543   match(Set dst (ConvD2L src));
10544   effect( KILL cr );
10545   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10546             "MOVSD  [ESP],$src\n\t"
10547             "FLD_D  [ESP]\n\t"
10548             "FLDCW  trunc mode\n\t"
10549             "FISTp  [ESP + #0]\n\t"
10550             "FLDCW  std/24-bit mode\n\t"
10551             "POP    EAX\n\t"
10552             "POP    EDX\n\t"
10553             "CMP    EDX,0x80000000\n\t"
10554             "JNE,s  fast\n\t"
10555             "TEST   EAX,EAX\n\t"
10556             "JNE,s  fast\n\t"
10557             "SUB    ESP,8\n\t"
10558             "MOVSD  [ESP],$src\n\t"
10559             "FLD_D  [ESP]\n\t"
10560             "ADD    ESP,8\n\t"
10561             "CALL   d2l_wrapper\n"
10562       "fast:" %}
10563   ins_encode %{
10564     Label fast;
10565     __ subptr(rsp, 8);
10566     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10567     __ fld_d(Address(rsp, 0));
10568     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10569     __ fistp_d(Address(rsp, 0));
10570     // Restore the rounding mode, mask the exception
10571     if (Compile::current()->in_24_bit_fp_mode()) {
10572       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10573     } else {
10574       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10575     }
10576     // Load the converted long, adjust CPU stack
10577     __ pop(rax);
10578     __ pop(rdx);
10579     __ cmpl(rdx, 0x80000000);
10580     __ jccb(Assembler::notEqual, fast);
10581     __ testl(rax, rax);
10582     __ jccb(Assembler::notEqual, fast);
10583     __ subptr(rsp, 8);
10584     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10585     __ fld_d(Address(rsp, 0));
10586     __ addptr(rsp, 8);
10587     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10588     __ post_call_nop();
10589     __ bind(fast);
10590   %}
10591   ins_pipe( pipe_slow );
10592 %}
10593 
10594 // Convert a double to an int.  Java semantics require we do complex
10595 // manglations in the corner cases.  So we set the rounding mode to
10596 // 'zero', store the darned double down as an int, and reset the
10597 // rounding mode to 'nearest'.  The hardware stores a flag value down
10598 // if we would overflow or converted a NAN; we check for this and
10599 // and go the slow path if needed.
10600 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10601   predicate(UseSSE==0);
10602   match(Set dst (ConvF2I src));
10603   effect( KILL tmp, KILL cr );
10604   format %{ "FLD    $src\t# Convert float to int \n\t"
10605             "FLDCW  trunc mode\n\t"
10606             "SUB    ESP,4\n\t"
10607             "FISTp  [ESP + #0]\n\t"
10608             "FLDCW  std/24-bit mode\n\t"
10609             "POP    EAX\n\t"
10610             "CMP    EAX,0x80000000\n\t"
10611             "JNE,s  fast\n\t"
10612             "FLD    $src\n\t"
10613             "CALL   d2i_wrapper\n"
10614       "fast:" %}
10615   // DPR2I_encoding works for FPR2I
10616   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10617   ins_pipe( pipe_slow );
10618 %}
10619 
10620 // Convert a float in xmm to an int reg.
10621 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10622   predicate(UseSSE>=1);
10623   match(Set dst (ConvF2I src));
10624   effect( KILL tmp, KILL cr );
10625   format %{ "CVTTSS2SI $dst, $src\n\t"
10626             "CMP    $dst,0x80000000\n\t"
10627             "JNE,s  fast\n\t"
10628             "SUB    ESP, 4\n\t"
10629             "MOVSS  [ESP], $src\n\t"
10630             "FLD    [ESP]\n\t"
10631             "ADD    ESP, 4\n\t"
10632             "CALL   d2i_wrapper\n"
10633       "fast:" %}
10634   ins_encode %{
10635     Label fast;
10636     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10637     __ cmpl($dst$$Register, 0x80000000);
10638     __ jccb(Assembler::notEqual, fast);
10639     __ subptr(rsp, 4);
10640     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10641     __ fld_s(Address(rsp, 0));
10642     __ addptr(rsp, 4);
10643     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10644     __ post_call_nop();
10645     __ bind(fast);
10646   %}
10647   ins_pipe( pipe_slow );
10648 %}
10649 
10650 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10651   predicate(UseSSE==0);
10652   match(Set dst (ConvF2L src));
10653   effect( KILL cr );
10654   format %{ "FLD    $src\t# Convert float to long\n\t"
10655             "FLDCW  trunc mode\n\t"
10656             "SUB    ESP,8\n\t"
10657             "FISTp  [ESP + #0]\n\t"
10658             "FLDCW  std/24-bit mode\n\t"
10659             "POP    EAX\n\t"
10660             "POP    EDX\n\t"
10661             "CMP    EDX,0x80000000\n\t"
10662             "JNE,s  fast\n\t"
10663             "TEST   EAX,EAX\n\t"
10664             "JNE,s  fast\n\t"
10665             "FLD    $src\n\t"
10666             "CALL   d2l_wrapper\n"
10667       "fast:" %}
10668   // DPR2L_encoding works for FPR2L
10669   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10670   ins_pipe( pipe_slow );
10671 %}
10672 
10673 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10674 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10675   predicate (UseSSE>=1);
10676   match(Set dst (ConvF2L src));
10677   effect( KILL cr );
10678   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10679             "MOVSS  [ESP],$src\n\t"
10680             "FLD_S  [ESP]\n\t"
10681             "FLDCW  trunc mode\n\t"
10682             "FISTp  [ESP + #0]\n\t"
10683             "FLDCW  std/24-bit mode\n\t"
10684             "POP    EAX\n\t"
10685             "POP    EDX\n\t"
10686             "CMP    EDX,0x80000000\n\t"
10687             "JNE,s  fast\n\t"
10688             "TEST   EAX,EAX\n\t"
10689             "JNE,s  fast\n\t"
10690             "SUB    ESP,4\t# Convert float to long\n\t"
10691             "MOVSS  [ESP],$src\n\t"
10692             "FLD_S  [ESP]\n\t"
10693             "ADD    ESP,4\n\t"
10694             "CALL   d2l_wrapper\n"
10695       "fast:" %}
10696   ins_encode %{
10697     Label fast;
10698     __ subptr(rsp, 8);
10699     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10700     __ fld_s(Address(rsp, 0));
10701     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10702     __ fistp_d(Address(rsp, 0));
10703     // Restore the rounding mode, mask the exception
10704     if (Compile::current()->in_24_bit_fp_mode()) {
10705       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10706     } else {
10707       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10708     }
10709     // Load the converted long, adjust CPU stack
10710     __ pop(rax);
10711     __ pop(rdx);
10712     __ cmpl(rdx, 0x80000000);
10713     __ jccb(Assembler::notEqual, fast);
10714     __ testl(rax, rax);
10715     __ jccb(Assembler::notEqual, fast);
10716     __ subptr(rsp, 4);
10717     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10718     __ fld_s(Address(rsp, 0));
10719     __ addptr(rsp, 4);
10720     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10721     __ post_call_nop();
10722     __ bind(fast);
10723   %}
10724   ins_pipe( pipe_slow );
10725 %}
10726 
10727 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10728   predicate( UseSSE<=1 );
10729   match(Set dst (ConvI2D src));
10730   format %{ "FILD   $src\n\t"
10731             "FSTP   $dst" %}
10732   opcode(0xDB, 0x0);  /* DB /0 */
10733   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10734   ins_pipe( fpu_reg_mem );
10735 %}
10736 
10737 instruct convI2D_reg(regD dst, rRegI src) %{
10738   predicate( UseSSE>=2 && !UseXmmI2D );
10739   match(Set dst (ConvI2D src));
10740   format %{ "CVTSI2SD $dst,$src" %}
10741   ins_encode %{
10742     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10743   %}
10744   ins_pipe( pipe_slow );
10745 %}
10746 
10747 instruct convI2D_mem(regD dst, memory mem) %{
10748   predicate( UseSSE>=2 );
10749   match(Set dst (ConvI2D (LoadI mem)));
10750   format %{ "CVTSI2SD $dst,$mem" %}
10751   ins_encode %{
10752     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10753   %}
10754   ins_pipe( pipe_slow );
10755 %}
10756 
10757 instruct convXI2D_reg(regD dst, rRegI src)
10758 %{
10759   predicate( UseSSE>=2 && UseXmmI2D );
10760   match(Set dst (ConvI2D src));
10761 
10762   format %{ "MOVD  $dst,$src\n\t"
10763             "CVTDQ2PD $dst,$dst\t# i2d" %}
10764   ins_encode %{
10765     __ movdl($dst$$XMMRegister, $src$$Register);
10766     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10767   %}
10768   ins_pipe(pipe_slow); // XXX
10769 %}
10770 
10771 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10772   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10773   match(Set dst (ConvI2D (LoadI mem)));
10774   format %{ "FILD   $mem\n\t"
10775             "FSTP   $dst" %}
10776   opcode(0xDB);      /* DB /0 */
10777   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10778               Pop_Reg_DPR(dst), ClearInstMark);
10779   ins_pipe( fpu_reg_mem );
10780 %}
10781 
10782 // Convert a byte to a float; no rounding step needed.
10783 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10784   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10785   match(Set dst (ConvI2F src));
10786   format %{ "FILD   $src\n\t"
10787             "FSTP   $dst" %}
10788 
10789   opcode(0xDB, 0x0);  /* DB /0 */
10790   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10791   ins_pipe( fpu_reg_mem );
10792 %}
10793 
10794 // In 24-bit mode, force exponent rounding by storing back out
10795 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10796   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10797   match(Set dst (ConvI2F src));
10798   ins_cost(200);
10799   format %{ "FILD   $src\n\t"
10800             "FSTP_S $dst" %}
10801   opcode(0xDB, 0x0);  /* DB /0 */
10802   ins_encode( Push_Mem_I(src),
10803               Pop_Mem_FPR(dst));
10804   ins_pipe( fpu_mem_mem );
10805 %}
10806 
10807 // In 24-bit mode, force exponent rounding by storing back out
10808 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10809   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10810   match(Set dst (ConvI2F (LoadI mem)));
10811   ins_cost(200);
10812   format %{ "FILD   $mem\n\t"
10813             "FSTP_S $dst" %}
10814   opcode(0xDB);  /* DB /0 */
10815   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10816               Pop_Mem_FPR(dst), ClearInstMark);
10817   ins_pipe( fpu_mem_mem );
10818 %}
10819 
10820 // This instruction does not round to 24-bits
10821 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10822   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10823   match(Set dst (ConvI2F src));
10824   format %{ "FILD   $src\n\t"
10825             "FSTP   $dst" %}
10826   opcode(0xDB, 0x0);  /* DB /0 */
10827   ins_encode( Push_Mem_I(src),
10828               Pop_Reg_FPR(dst));
10829   ins_pipe( fpu_reg_mem );
10830 %}
10831 
10832 // This instruction does not round to 24-bits
10833 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10834   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10835   match(Set dst (ConvI2F (LoadI mem)));
10836   format %{ "FILD   $mem\n\t"
10837             "FSTP   $dst" %}
10838   opcode(0xDB);      /* DB /0 */
10839   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10840               Pop_Reg_FPR(dst), ClearInstMark);
10841   ins_pipe( fpu_reg_mem );
10842 %}
10843 
10844 // Convert an int to a float in xmm; no rounding step needed.
10845 instruct convI2F_reg(regF dst, rRegI src) %{
10846   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
10847   match(Set dst (ConvI2F src));
10848   format %{ "CVTSI2SS $dst, $src" %}
10849   ins_encode %{
10850     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10851   %}
10852   ins_pipe( pipe_slow );
10853 %}
10854 
10855  instruct convXI2F_reg(regF dst, rRegI src)
10856 %{
10857   predicate( UseSSE>=2 && UseXmmI2F );
10858   match(Set dst (ConvI2F src));
10859 
10860   format %{ "MOVD  $dst,$src\n\t"
10861             "CVTDQ2PS $dst,$dst\t# i2f" %}
10862   ins_encode %{
10863     __ movdl($dst$$XMMRegister, $src$$Register);
10864     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10865   %}
10866   ins_pipe(pipe_slow); // XXX
10867 %}
10868 
10869 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
10870   match(Set dst (ConvI2L src));
10871   effect(KILL cr);
10872   ins_cost(375);
10873   format %{ "MOV    $dst.lo,$src\n\t"
10874             "MOV    $dst.hi,$src\n\t"
10875             "SAR    $dst.hi,31" %}
10876   ins_encode(convert_int_long(dst,src));
10877   ins_pipe( ialu_reg_reg_long );
10878 %}
10879 
10880 // Zero-extend convert int to long
10881 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
10882   match(Set dst (AndL (ConvI2L src) mask) );
10883   effect( KILL flags );
10884   ins_cost(250);
10885   format %{ "MOV    $dst.lo,$src\n\t"
10886             "XOR    $dst.hi,$dst.hi" %}
10887   opcode(0x33); // XOR
10888   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
10889   ins_pipe( ialu_reg_reg_long );
10890 %}
10891 
10892 // Zero-extend long
10893 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
10894   match(Set dst (AndL src mask) );
10895   effect( KILL flags );
10896   ins_cost(250);
10897   format %{ "MOV    $dst.lo,$src.lo\n\t"
10898             "XOR    $dst.hi,$dst.hi\n\t" %}
10899   opcode(0x33); // XOR
10900   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
10901   ins_pipe( ialu_reg_reg_long );
10902 %}
10903 
10904 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
10905   predicate (UseSSE<=1);
10906   match(Set dst (ConvL2D src));
10907   effect( KILL cr );
10908   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
10909             "PUSH   $src.lo\n\t"
10910             "FILD   ST,[ESP + #0]\n\t"
10911             "ADD    ESP,8\n\t"
10912             "FSTP_D $dst\t# D-round" %}
10913   opcode(0xDF, 0x5);  /* DF /5 */
10914   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
10915   ins_pipe( pipe_slow );
10916 %}
10917 
10918 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
10919   predicate (UseSSE>=2);
10920   match(Set dst (ConvL2D src));
10921   effect( KILL cr );
10922   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
10923             "PUSH   $src.lo\n\t"
10924             "FILD_D [ESP]\n\t"
10925             "FSTP_D [ESP]\n\t"
10926             "MOVSD  $dst,[ESP]\n\t"
10927             "ADD    ESP,8" %}
10928   opcode(0xDF, 0x5);  /* DF /5 */
10929   ins_encode(convert_long_double2(src), Push_ResultD(dst));
10930   ins_pipe( pipe_slow );
10931 %}
10932 
10933 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
10934   predicate (UseSSE>=1);
10935   match(Set dst (ConvL2F src));
10936   effect( KILL cr );
10937   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
10938             "PUSH   $src.lo\n\t"
10939             "FILD_D [ESP]\n\t"
10940             "FSTP_S [ESP]\n\t"
10941             "MOVSS  $dst,[ESP]\n\t"
10942             "ADD    ESP,8" %}
10943   opcode(0xDF, 0x5);  /* DF /5 */
10944   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
10945   ins_pipe( pipe_slow );
10946 %}
10947 
10948 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
10949   match(Set dst (ConvL2F src));
10950   effect( KILL cr );
10951   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
10952             "PUSH   $src.lo\n\t"
10953             "FILD   ST,[ESP + #0]\n\t"
10954             "ADD    ESP,8\n\t"
10955             "FSTP_S $dst\t# F-round" %}
10956   opcode(0xDF, 0x5);  /* DF /5 */
10957   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
10958   ins_pipe( pipe_slow );
10959 %}
10960 
10961 instruct convL2I_reg( rRegI dst, eRegL src ) %{
10962   match(Set dst (ConvL2I src));
10963   effect( DEF dst, USE src );
10964   format %{ "MOV    $dst,$src.lo" %}
10965   ins_encode(enc_CopyL_Lo(dst,src));
10966   ins_pipe( ialu_reg_reg );
10967 %}
10968 
10969 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
10970   match(Set dst (MoveF2I src));
10971   effect( DEF dst, USE src );
10972   ins_cost(100);
10973   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
10974   ins_encode %{
10975     __ movl($dst$$Register, Address(rsp, $src$$disp));
10976   %}
10977   ins_pipe( ialu_reg_mem );
10978 %}
10979 
10980 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
10981   predicate(UseSSE==0);
10982   match(Set dst (MoveF2I src));
10983   effect( DEF dst, USE src );
10984 
10985   ins_cost(125);
10986   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
10987   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10988   ins_pipe( fpu_mem_reg );
10989 %}
10990 
10991 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
10992   predicate(UseSSE>=1);
10993   match(Set dst (MoveF2I src));
10994   effect( DEF dst, USE src );
10995 
10996   ins_cost(95);
10997   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
10998   ins_encode %{
10999     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11000   %}
11001   ins_pipe( pipe_slow );
11002 %}
11003 
11004 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11005   predicate(UseSSE>=2);
11006   match(Set dst (MoveF2I src));
11007   effect( DEF dst, USE src );
11008   ins_cost(85);
11009   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11010   ins_encode %{
11011     __ movdl($dst$$Register, $src$$XMMRegister);
11012   %}
11013   ins_pipe( pipe_slow );
11014 %}
11015 
11016 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11017   match(Set dst (MoveI2F src));
11018   effect( DEF dst, USE src );
11019 
11020   ins_cost(100);
11021   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11022   ins_encode %{
11023     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11024   %}
11025   ins_pipe( ialu_mem_reg );
11026 %}
11027 
11028 
11029 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11030   predicate(UseSSE==0);
11031   match(Set dst (MoveI2F src));
11032   effect(DEF dst, USE src);
11033 
11034   ins_cost(125);
11035   format %{ "FLD_S  $src\n\t"
11036             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11037   opcode(0xD9);               /* D9 /0, FLD m32real */
11038   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11039               Pop_Reg_FPR(dst), ClearInstMark );
11040   ins_pipe( fpu_reg_mem );
11041 %}
11042 
11043 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11044   predicate(UseSSE>=1);
11045   match(Set dst (MoveI2F src));
11046   effect( DEF dst, USE src );
11047 
11048   ins_cost(95);
11049   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11050   ins_encode %{
11051     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11052   %}
11053   ins_pipe( pipe_slow );
11054 %}
11055 
11056 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11057   predicate(UseSSE>=2);
11058   match(Set dst (MoveI2F src));
11059   effect( DEF dst, USE src );
11060 
11061   ins_cost(85);
11062   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11063   ins_encode %{
11064     __ movdl($dst$$XMMRegister, $src$$Register);
11065   %}
11066   ins_pipe( pipe_slow );
11067 %}
11068 
11069 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11070   match(Set dst (MoveD2L src));
11071   effect(DEF dst, USE src);
11072 
11073   ins_cost(250);
11074   format %{ "MOV    $dst.lo,$src\n\t"
11075             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11076   opcode(0x8B, 0x8B);
11077   ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11078   ins_pipe( ialu_mem_long_reg );
11079 %}
11080 
11081 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11082   predicate(UseSSE<=1);
11083   match(Set dst (MoveD2L src));
11084   effect(DEF dst, USE src);
11085 
11086   ins_cost(125);
11087   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11088   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11089   ins_pipe( fpu_mem_reg );
11090 %}
11091 
11092 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11093   predicate(UseSSE>=2);
11094   match(Set dst (MoveD2L src));
11095   effect(DEF dst, USE src);
11096   ins_cost(95);
11097   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11098   ins_encode %{
11099     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11100   %}
11101   ins_pipe( pipe_slow );
11102 %}
11103 
11104 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11105   predicate(UseSSE>=2);
11106   match(Set dst (MoveD2L src));
11107   effect(DEF dst, USE src, TEMP tmp);
11108   ins_cost(85);
11109   format %{ "MOVD   $dst.lo,$src\n\t"
11110             "PSHUFLW $tmp,$src,0x4E\n\t"
11111             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11112   ins_encode %{
11113     __ movdl($dst$$Register, $src$$XMMRegister);
11114     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11115     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11116   %}
11117   ins_pipe( pipe_slow );
11118 %}
11119 
11120 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11121   match(Set dst (MoveL2D src));
11122   effect(DEF dst, USE src);
11123 
11124   ins_cost(200);
11125   format %{ "MOV    $dst,$src.lo\n\t"
11126             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11127   opcode(0x89, 0x89);
11128   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11129   ins_pipe( ialu_mem_long_reg );
11130 %}
11131 
11132 
11133 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11134   predicate(UseSSE<=1);
11135   match(Set dst (MoveL2D src));
11136   effect(DEF dst, USE src);
11137   ins_cost(125);
11138 
11139   format %{ "FLD_D  $src\n\t"
11140             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11141   opcode(0xDD);               /* DD /0, FLD m64real */
11142   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11143               Pop_Reg_DPR(dst), ClearInstMark );
11144   ins_pipe( fpu_reg_mem );
11145 %}
11146 
11147 
11148 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11149   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11150   match(Set dst (MoveL2D src));
11151   effect(DEF dst, USE src);
11152 
11153   ins_cost(95);
11154   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11155   ins_encode %{
11156     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11157   %}
11158   ins_pipe( pipe_slow );
11159 %}
11160 
11161 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11162   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11163   match(Set dst (MoveL2D src));
11164   effect(DEF dst, USE src);
11165 
11166   ins_cost(95);
11167   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11168   ins_encode %{
11169     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11170   %}
11171   ins_pipe( pipe_slow );
11172 %}
11173 
11174 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11175   predicate(UseSSE>=2);
11176   match(Set dst (MoveL2D src));
11177   effect(TEMP dst, USE src, TEMP tmp);
11178   ins_cost(85);
11179   format %{ "MOVD   $dst,$src.lo\n\t"
11180             "MOVD   $tmp,$src.hi\n\t"
11181             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11182   ins_encode %{
11183     __ movdl($dst$$XMMRegister, $src$$Register);
11184     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11185     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11186   %}
11187   ins_pipe( pipe_slow );
11188 %}
11189 
11190 //----------------------------- CompressBits/ExpandBits ------------------------
11191 
11192 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11193   predicate(n->bottom_type()->isa_long());
11194   match(Set dst (CompressBits src mask));
11195   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11196   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11197   ins_encode %{
11198     Label exit, partail_result;
11199     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11200     // Merge the results of upper and lower destination registers such that upper destination
11201     // results are contiguously laid out after the lower destination result.
11202     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11203     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11204     __ popcntl($rtmp$$Register, $mask$$Register);
11205     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11206     __ cmpl($rtmp$$Register, 32);
11207     __ jccb(Assembler::equal, exit);
11208     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11209     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11210     // Shift left the contents of upper destination register by true bit count of lower mask register
11211     // and merge with lower destination register.
11212     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11213     __ orl($dst$$Register, $rtmp$$Register);
11214     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11215     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11216     // since contents of upper destination have already been copied to lower destination
11217     // register.
11218     __ cmpl($rtmp$$Register, 0);
11219     __ jccb(Assembler::greater, partail_result);
11220     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11221     __ jmp(exit);
11222     __ bind(partail_result);
11223     // Perform right shift over upper destination register to move out bits already copied
11224     // to lower destination register.
11225     __ subl($rtmp$$Register, 32);
11226     __ negl($rtmp$$Register);
11227     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11228     __ bind(exit);
11229   %}
11230   ins_pipe( pipe_slow );
11231 %}
11232 
11233 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11234   predicate(n->bottom_type()->isa_long());
11235   match(Set dst (ExpandBits src mask));
11236   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11237   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11238   ins_encode %{
11239     // Extraction operation sequentially reads the bits from source register starting from LSB
11240     // and lays them out into destination register at bit locations corresponding to true bits
11241     // in mask register. Thus number of source bits read are equal to combined true bit count
11242     // of mask register pair.
11243     Label exit, mask_clipping;
11244     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11245     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11246     __ popcntl($rtmp$$Register, $mask$$Register);
11247     // If true bit count of lower mask register is 32 then none of bit of lower source register
11248     // will feed to upper destination register.
11249     __ cmpl($rtmp$$Register, 32);
11250     __ jccb(Assembler::equal, exit);
11251     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11252     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11253     // Shift right the contents of lower source register to remove already consumed bits.
11254     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11255     // Extract the bits from lower source register starting from LSB under the influence
11256     // of upper mask register.
11257     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11258     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11259     __ subl($rtmp$$Register, 32);
11260     __ negl($rtmp$$Register);
11261     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11262     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11263     // Clear the set bits in upper mask register which have been used to extract the contents
11264     // from lower source register.
11265     __ bind(mask_clipping);
11266     __ blsrl($mask$$Register, $mask$$Register);
11267     __ decrementl($rtmp$$Register, 1);
11268     __ jccb(Assembler::greater, mask_clipping);
11269     // Starting from LSB extract the bits from upper source register under the influence of
11270     // remaining set bits in upper mask register.
11271     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11272     // Merge the partial results extracted from lower and upper source register bits.
11273     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11274     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11275     __ bind(exit);
11276   %}
11277   ins_pipe( pipe_slow );
11278 %}
11279 
11280 // =======================================================================
11281 // Fast clearing of an array
11282 // Small non-constant length ClearArray for non-AVX512 targets.
11283 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11284   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11285   match(Set dummy (ClearArray cnt base));
11286   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11287 
11288   format %{ $$template
11289     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11290     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11291     $$emit$$"JG     LARGE\n\t"
11292     $$emit$$"SHL    ECX, 1\n\t"
11293     $$emit$$"DEC    ECX\n\t"
11294     $$emit$$"JS     DONE\t# Zero length\n\t"
11295     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11296     $$emit$$"DEC    ECX\n\t"
11297     $$emit$$"JGE    LOOP\n\t"
11298     $$emit$$"JMP    DONE\n\t"
11299     $$emit$$"# LARGE:\n\t"
11300     if (UseFastStosb) {
11301        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11302        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11303     } else if (UseXMMForObjInit) {
11304        $$emit$$"MOV     RDI,RAX\n\t"
11305        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11306        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11307        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11308        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11309        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11310        $$emit$$"ADD     0x40,RAX\n\t"
11311        $$emit$$"# L_zero_64_bytes:\n\t"
11312        $$emit$$"SUB     0x8,RCX\n\t"
11313        $$emit$$"JGE     L_loop\n\t"
11314        $$emit$$"ADD     0x4,RCX\n\t"
11315        $$emit$$"JL      L_tail\n\t"
11316        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11317        $$emit$$"ADD     0x20,RAX\n\t"
11318        $$emit$$"SUB     0x4,RCX\n\t"
11319        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11320        $$emit$$"ADD     0x4,RCX\n\t"
11321        $$emit$$"JLE     L_end\n\t"
11322        $$emit$$"DEC     RCX\n\t"
11323        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11324        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11325        $$emit$$"ADD     0x8,RAX\n\t"
11326        $$emit$$"DEC     RCX\n\t"
11327        $$emit$$"JGE     L_sloop\n\t"
11328        $$emit$$"# L_end:\n\t"
11329     } else {
11330        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11331        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11332     }
11333     $$emit$$"# DONE"
11334   %}
11335   ins_encode %{
11336     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11337                  $tmp$$XMMRegister, false, knoreg);
11338   %}
11339   ins_pipe( pipe_slow );
11340 %}
11341 
11342 // Small non-constant length ClearArray for AVX512 targets.
11343 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11344   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11345   match(Set dummy (ClearArray cnt base));
11346   ins_cost(125);
11347   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11348 
11349   format %{ $$template
11350     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11351     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11352     $$emit$$"JG     LARGE\n\t"
11353     $$emit$$"SHL    ECX, 1\n\t"
11354     $$emit$$"DEC    ECX\n\t"
11355     $$emit$$"JS     DONE\t# Zero length\n\t"
11356     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11357     $$emit$$"DEC    ECX\n\t"
11358     $$emit$$"JGE    LOOP\n\t"
11359     $$emit$$"JMP    DONE\n\t"
11360     $$emit$$"# LARGE:\n\t"
11361     if (UseFastStosb) {
11362        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11363        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11364     } else if (UseXMMForObjInit) {
11365        $$emit$$"MOV     RDI,RAX\n\t"
11366        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11367        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11368        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11369        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11370        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11371        $$emit$$"ADD     0x40,RAX\n\t"
11372        $$emit$$"# L_zero_64_bytes:\n\t"
11373        $$emit$$"SUB     0x8,RCX\n\t"
11374        $$emit$$"JGE     L_loop\n\t"
11375        $$emit$$"ADD     0x4,RCX\n\t"
11376        $$emit$$"JL      L_tail\n\t"
11377        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11378        $$emit$$"ADD     0x20,RAX\n\t"
11379        $$emit$$"SUB     0x4,RCX\n\t"
11380        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11381        $$emit$$"ADD     0x4,RCX\n\t"
11382        $$emit$$"JLE     L_end\n\t"
11383        $$emit$$"DEC     RCX\n\t"
11384        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11385        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11386        $$emit$$"ADD     0x8,RAX\n\t"
11387        $$emit$$"DEC     RCX\n\t"
11388        $$emit$$"JGE     L_sloop\n\t"
11389        $$emit$$"# L_end:\n\t"
11390     } else {
11391        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11392        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11393     }
11394     $$emit$$"# DONE"
11395   %}
11396   ins_encode %{
11397     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11398                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11399   %}
11400   ins_pipe( pipe_slow );
11401 %}
11402 
11403 // Large non-constant length ClearArray for non-AVX512 targets.
11404 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11405   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11406   match(Set dummy (ClearArray cnt base));
11407   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11408   format %{ $$template
11409     if (UseFastStosb) {
11410        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11411        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11412        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11413     } else if (UseXMMForObjInit) {
11414        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11415        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11416        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11417        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11418        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11419        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11420        $$emit$$"ADD     0x40,RAX\n\t"
11421        $$emit$$"# L_zero_64_bytes:\n\t"
11422        $$emit$$"SUB     0x8,RCX\n\t"
11423        $$emit$$"JGE     L_loop\n\t"
11424        $$emit$$"ADD     0x4,RCX\n\t"
11425        $$emit$$"JL      L_tail\n\t"
11426        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11427        $$emit$$"ADD     0x20,RAX\n\t"
11428        $$emit$$"SUB     0x4,RCX\n\t"
11429        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11430        $$emit$$"ADD     0x4,RCX\n\t"
11431        $$emit$$"JLE     L_end\n\t"
11432        $$emit$$"DEC     RCX\n\t"
11433        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11434        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11435        $$emit$$"ADD     0x8,RAX\n\t"
11436        $$emit$$"DEC     RCX\n\t"
11437        $$emit$$"JGE     L_sloop\n\t"
11438        $$emit$$"# L_end:\n\t"
11439     } else {
11440        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11441        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11442        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11443     }
11444     $$emit$$"# DONE"
11445   %}
11446   ins_encode %{
11447     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11448                  $tmp$$XMMRegister, true, knoreg);
11449   %}
11450   ins_pipe( pipe_slow );
11451 %}
11452 
11453 // Large non-constant length ClearArray for AVX512 targets.
11454 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11455   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11456   match(Set dummy (ClearArray cnt base));
11457   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11458   format %{ $$template
11459     if (UseFastStosb) {
11460        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11461        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11462        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11463     } else if (UseXMMForObjInit) {
11464        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11465        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11466        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11467        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11468        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11469        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11470        $$emit$$"ADD     0x40,RAX\n\t"
11471        $$emit$$"# L_zero_64_bytes:\n\t"
11472        $$emit$$"SUB     0x8,RCX\n\t"
11473        $$emit$$"JGE     L_loop\n\t"
11474        $$emit$$"ADD     0x4,RCX\n\t"
11475        $$emit$$"JL      L_tail\n\t"
11476        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11477        $$emit$$"ADD     0x20,RAX\n\t"
11478        $$emit$$"SUB     0x4,RCX\n\t"
11479        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11480        $$emit$$"ADD     0x4,RCX\n\t"
11481        $$emit$$"JLE     L_end\n\t"
11482        $$emit$$"DEC     RCX\n\t"
11483        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11484        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11485        $$emit$$"ADD     0x8,RAX\n\t"
11486        $$emit$$"DEC     RCX\n\t"
11487        $$emit$$"JGE     L_sloop\n\t"
11488        $$emit$$"# L_end:\n\t"
11489     } else {
11490        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11491        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11492        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11493     }
11494     $$emit$$"# DONE"
11495   %}
11496   ins_encode %{
11497     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11498                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11499   %}
11500   ins_pipe( pipe_slow );
11501 %}
11502 
11503 // Small constant length ClearArray for AVX512 targets.
11504 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11505 %{
11506   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11507   match(Set dummy (ClearArray cnt base));
11508   ins_cost(100);
11509   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11510   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11511   ins_encode %{
11512    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11513   %}
11514   ins_pipe(pipe_slow);
11515 %}
11516 
11517 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11518                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11519   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11520   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11521   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11522 
11523   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11524   ins_encode %{
11525     __ string_compare($str1$$Register, $str2$$Register,
11526                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11527                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11528   %}
11529   ins_pipe( pipe_slow );
11530 %}
11531 
11532 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11533                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11534   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11535   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11536   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11537 
11538   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11539   ins_encode %{
11540     __ string_compare($str1$$Register, $str2$$Register,
11541                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11542                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11543   %}
11544   ins_pipe( pipe_slow );
11545 %}
11546 
11547 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11548                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11549   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11550   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11551   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11552 
11553   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11554   ins_encode %{
11555     __ string_compare($str1$$Register, $str2$$Register,
11556                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11557                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11558   %}
11559   ins_pipe( pipe_slow );
11560 %}
11561 
11562 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11563                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11564   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11565   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11566   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11567 
11568   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11569   ins_encode %{
11570     __ string_compare($str1$$Register, $str2$$Register,
11571                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11572                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11573   %}
11574   ins_pipe( pipe_slow );
11575 %}
11576 
11577 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11578                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11579   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11580   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11581   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11582 
11583   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11584   ins_encode %{
11585     __ string_compare($str1$$Register, $str2$$Register,
11586                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11587                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11588   %}
11589   ins_pipe( pipe_slow );
11590 %}
11591 
11592 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11593                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11594   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11595   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11596   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11597 
11598   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11599   ins_encode %{
11600     __ string_compare($str1$$Register, $str2$$Register,
11601                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11602                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11603   %}
11604   ins_pipe( pipe_slow );
11605 %}
11606 
11607 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11608                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11609   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11610   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11611   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11612 
11613   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11614   ins_encode %{
11615     __ string_compare($str2$$Register, $str1$$Register,
11616                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11617                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11618   %}
11619   ins_pipe( pipe_slow );
11620 %}
11621 
11622 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11623                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11624   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11625   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11626   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11627 
11628   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11629   ins_encode %{
11630     __ string_compare($str2$$Register, $str1$$Register,
11631                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11632                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11633   %}
11634   ins_pipe( pipe_slow );
11635 %}
11636 
11637 // fast string equals
11638 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11639                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11640   predicate(!VM_Version::supports_avx512vlbw());
11641   match(Set result (StrEquals (Binary str1 str2) cnt));
11642   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11643 
11644   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11645   ins_encode %{
11646     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11647                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11648                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11649   %}
11650 
11651   ins_pipe( pipe_slow );
11652 %}
11653 
11654 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11655                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11656   predicate(VM_Version::supports_avx512vlbw());
11657   match(Set result (StrEquals (Binary str1 str2) cnt));
11658   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11659 
11660   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11661   ins_encode %{
11662     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11663                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11664                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11665   %}
11666 
11667   ins_pipe( pipe_slow );
11668 %}
11669 
11670 
11671 // fast search of substring with known size.
11672 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11673                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11674   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11675   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11676   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11677 
11678   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11679   ins_encode %{
11680     int icnt2 = (int)$int_cnt2$$constant;
11681     if (icnt2 >= 16) {
11682       // IndexOf for constant substrings with size >= 16 elements
11683       // which don't need to be loaded through stack.
11684       __ string_indexofC8($str1$$Register, $str2$$Register,
11685                           $cnt1$$Register, $cnt2$$Register,
11686                           icnt2, $result$$Register,
11687                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11688     } else {
11689       // Small strings are loaded through stack if they cross page boundary.
11690       __ string_indexof($str1$$Register, $str2$$Register,
11691                         $cnt1$$Register, $cnt2$$Register,
11692                         icnt2, $result$$Register,
11693                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11694     }
11695   %}
11696   ins_pipe( pipe_slow );
11697 %}
11698 
11699 // fast search of substring with known size.
11700 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11701                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11702   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11703   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11704   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11705 
11706   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11707   ins_encode %{
11708     int icnt2 = (int)$int_cnt2$$constant;
11709     if (icnt2 >= 8) {
11710       // IndexOf for constant substrings with size >= 8 elements
11711       // which don't need to be loaded through stack.
11712       __ string_indexofC8($str1$$Register, $str2$$Register,
11713                           $cnt1$$Register, $cnt2$$Register,
11714                           icnt2, $result$$Register,
11715                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11716     } else {
11717       // Small strings are loaded through stack if they cross page boundary.
11718       __ string_indexof($str1$$Register, $str2$$Register,
11719                         $cnt1$$Register, $cnt2$$Register,
11720                         icnt2, $result$$Register,
11721                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11722     }
11723   %}
11724   ins_pipe( pipe_slow );
11725 %}
11726 
11727 // fast search of substring with known size.
11728 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11729                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11730   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11731   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11732   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11733 
11734   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11735   ins_encode %{
11736     int icnt2 = (int)$int_cnt2$$constant;
11737     if (icnt2 >= 8) {
11738       // IndexOf for constant substrings with size >= 8 elements
11739       // which don't need to be loaded through stack.
11740       __ string_indexofC8($str1$$Register, $str2$$Register,
11741                           $cnt1$$Register, $cnt2$$Register,
11742                           icnt2, $result$$Register,
11743                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11744     } else {
11745       // Small strings are loaded through stack if they cross page boundary.
11746       __ string_indexof($str1$$Register, $str2$$Register,
11747                         $cnt1$$Register, $cnt2$$Register,
11748                         icnt2, $result$$Register,
11749                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11750     }
11751   %}
11752   ins_pipe( pipe_slow );
11753 %}
11754 
11755 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11756                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11757   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11758   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11759   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11760 
11761   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11762   ins_encode %{
11763     __ string_indexof($str1$$Register, $str2$$Register,
11764                       $cnt1$$Register, $cnt2$$Register,
11765                       (-1), $result$$Register,
11766                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11767   %}
11768   ins_pipe( pipe_slow );
11769 %}
11770 
11771 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11772                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11773   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11774   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11775   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11776 
11777   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11778   ins_encode %{
11779     __ string_indexof($str1$$Register, $str2$$Register,
11780                       $cnt1$$Register, $cnt2$$Register,
11781                       (-1), $result$$Register,
11782                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11783   %}
11784   ins_pipe( pipe_slow );
11785 %}
11786 
11787 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11788                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11789   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11790   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11791   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11792 
11793   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11794   ins_encode %{
11795     __ string_indexof($str1$$Register, $str2$$Register,
11796                       $cnt1$$Register, $cnt2$$Register,
11797                       (-1), $result$$Register,
11798                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11799   %}
11800   ins_pipe( pipe_slow );
11801 %}
11802 
11803 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11804                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11805   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11806   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11807   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11808   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11809   ins_encode %{
11810     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11811                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11812   %}
11813   ins_pipe( pipe_slow );
11814 %}
11815 
11816 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11817                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11818   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11819   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11820   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11821   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11822   ins_encode %{
11823     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11824                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11825   %}
11826   ins_pipe( pipe_slow );
11827 %}
11828 
11829 
11830 // fast array equals
11831 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11832                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11833 %{
11834   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11835   match(Set result (AryEq ary1 ary2));
11836   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11837   //ins_cost(300);
11838 
11839   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11840   ins_encode %{
11841     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11842                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11843                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11844   %}
11845   ins_pipe( pipe_slow );
11846 %}
11847 
11848 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11849                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11850 %{
11851   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11852   match(Set result (AryEq ary1 ary2));
11853   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11854   //ins_cost(300);
11855 
11856   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11857   ins_encode %{
11858     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11859                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11860                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11861   %}
11862   ins_pipe( pipe_slow );
11863 %}
11864 
11865 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11866                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11867 %{
11868   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11869   match(Set result (AryEq ary1 ary2));
11870   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11871   //ins_cost(300);
11872 
11873   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11874   ins_encode %{
11875     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11876                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11877                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
11878   %}
11879   ins_pipe( pipe_slow );
11880 %}
11881 
11882 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11883                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11884 %{
11885   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11886   match(Set result (AryEq ary1 ary2));
11887   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11888   //ins_cost(300);
11889 
11890   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11891   ins_encode %{
11892     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11893                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11894                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
11895   %}
11896   ins_pipe( pipe_slow );
11897 %}
11898 
11899 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11900                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11901 %{
11902   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
11903   match(Set result (CountPositives ary1 len));
11904   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11905 
11906   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11907   ins_encode %{
11908     __ count_positives($ary1$$Register, $len$$Register,
11909                        $result$$Register, $tmp3$$Register,
11910                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
11911   %}
11912   ins_pipe( pipe_slow );
11913 %}
11914 
11915 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
11916                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
11917 %{
11918   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
11919   match(Set result (CountPositives ary1 len));
11920   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11921 
11922   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11923   ins_encode %{
11924     __ count_positives($ary1$$Register, $len$$Register,
11925                        $result$$Register, $tmp3$$Register,
11926                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
11927   %}
11928   ins_pipe( pipe_slow );
11929 %}
11930 
11931 
11932 // fast char[] to byte[] compression
11933 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
11934                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11935   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
11936   match(Set result (StrCompressedCopy src (Binary dst len)));
11937   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11938 
11939   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11940   ins_encode %{
11941     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11942                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11943                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
11944                            knoreg, knoreg);
11945   %}
11946   ins_pipe( pipe_slow );
11947 %}
11948 
11949 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
11950                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11951   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
11952   match(Set result (StrCompressedCopy src (Binary dst len)));
11953   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11954 
11955   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11956   ins_encode %{
11957     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11958                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11959                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
11960                            $ktmp1$$KRegister, $ktmp2$$KRegister);
11961   %}
11962   ins_pipe( pipe_slow );
11963 %}
11964 
11965 // fast byte[] to char[] inflation
11966 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11967                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11968   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
11969   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11970   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11971 
11972   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11973   ins_encode %{
11974     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11975                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
11976   %}
11977   ins_pipe( pipe_slow );
11978 %}
11979 
11980 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11981                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
11982   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
11983   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11984   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11985 
11986   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11987   ins_encode %{
11988     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11989                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
11990   %}
11991   ins_pipe( pipe_slow );
11992 %}
11993 
11994 // encode char[] to byte[] in ISO_8859_1
11995 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11996                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11997                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11998   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
11999   match(Set result (EncodeISOArray src (Binary dst len)));
12000   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12001 
12002   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12003   ins_encode %{
12004     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12005                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12006                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12007   %}
12008   ins_pipe( pipe_slow );
12009 %}
12010 
12011 // encode char[] to byte[] in ASCII
12012 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12013                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12014                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12015   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12016   match(Set result (EncodeISOArray src (Binary dst len)));
12017   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12018 
12019   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12020   ins_encode %{
12021     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12022                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12023                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12024   %}
12025   ins_pipe( pipe_slow );
12026 %}
12027 
12028 //----------Control Flow Instructions------------------------------------------
12029 // Signed compare Instructions
12030 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12031   match(Set cr (CmpI op1 op2));
12032   effect( DEF cr, USE op1, USE op2 );
12033   format %{ "CMP    $op1,$op2" %}
12034   opcode(0x3B);  /* Opcode 3B /r */
12035   ins_encode( OpcP, RegReg( op1, op2) );
12036   ins_pipe( ialu_cr_reg_reg );
12037 %}
12038 
12039 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12040   match(Set cr (CmpI op1 op2));
12041   effect( DEF cr, USE op1 );
12042   format %{ "CMP    $op1,$op2" %}
12043   opcode(0x81,0x07);  /* Opcode 81 /7 */
12044   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12045   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12046   ins_pipe( ialu_cr_reg_imm );
12047 %}
12048 
12049 // Cisc-spilled version of cmpI_eReg
12050 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12051   match(Set cr (CmpI op1 (LoadI op2)));
12052 
12053   format %{ "CMP    $op1,$op2" %}
12054   ins_cost(500);
12055   opcode(0x3B);  /* Opcode 3B /r */
12056   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12057   ins_pipe( ialu_cr_reg_mem );
12058 %}
12059 
12060 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12061   match(Set cr (CmpI src zero));
12062   effect( DEF cr, USE src );
12063 
12064   format %{ "TEST   $src,$src" %}
12065   opcode(0x85);
12066   ins_encode( OpcP, RegReg( src, src ) );
12067   ins_pipe( ialu_cr_reg_imm );
12068 %}
12069 
12070 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12071   match(Set cr (CmpI (AndI src con) zero));
12072 
12073   format %{ "TEST   $src,$con" %}
12074   opcode(0xF7,0x00);
12075   ins_encode( OpcP, RegOpc(src), Con32(con) );
12076   ins_pipe( ialu_cr_reg_imm );
12077 %}
12078 
12079 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12080   match(Set cr (CmpI (AndI src mem) zero));
12081 
12082   format %{ "TEST   $src,$mem" %}
12083   opcode(0x85);
12084   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12085   ins_pipe( ialu_cr_reg_mem );
12086 %}
12087 
12088 // Unsigned compare Instructions; really, same as signed except they
12089 // produce an eFlagsRegU instead of eFlagsReg.
12090 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12091   match(Set cr (CmpU op1 op2));
12092 
12093   format %{ "CMPu   $op1,$op2" %}
12094   opcode(0x3B);  /* Opcode 3B /r */
12095   ins_encode( OpcP, RegReg( op1, op2) );
12096   ins_pipe( ialu_cr_reg_reg );
12097 %}
12098 
12099 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12100   match(Set cr (CmpU op1 op2));
12101 
12102   format %{ "CMPu   $op1,$op2" %}
12103   opcode(0x81,0x07);  /* Opcode 81 /7 */
12104   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12105   ins_pipe( ialu_cr_reg_imm );
12106 %}
12107 
12108 // // Cisc-spilled version of cmpU_eReg
12109 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12110   match(Set cr (CmpU op1 (LoadI op2)));
12111 
12112   format %{ "CMPu   $op1,$op2" %}
12113   ins_cost(500);
12114   opcode(0x3B);  /* Opcode 3B /r */
12115   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12116   ins_pipe( ialu_cr_reg_mem );
12117 %}
12118 
12119 // // Cisc-spilled version of cmpU_eReg
12120 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12121 //  match(Set cr (CmpU (LoadI op1) op2));
12122 //
12123 //  format %{ "CMPu   $op1,$op2" %}
12124 //  ins_cost(500);
12125 //  opcode(0x39);  /* Opcode 39 /r */
12126 //  ins_encode( OpcP, RegMem( op1, op2) );
12127 //%}
12128 
12129 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12130   match(Set cr (CmpU src zero));
12131 
12132   format %{ "TESTu  $src,$src" %}
12133   opcode(0x85);
12134   ins_encode( OpcP, RegReg( src, src ) );
12135   ins_pipe( ialu_cr_reg_imm );
12136 %}
12137 
12138 // Unsigned pointer compare Instructions
12139 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12140   match(Set cr (CmpP op1 op2));
12141 
12142   format %{ "CMPu   $op1,$op2" %}
12143   opcode(0x3B);  /* Opcode 3B /r */
12144   ins_encode( OpcP, RegReg( op1, op2) );
12145   ins_pipe( ialu_cr_reg_reg );
12146 %}
12147 
12148 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12149   match(Set cr (CmpP op1 op2));
12150 
12151   format %{ "CMPu   $op1,$op2" %}
12152   opcode(0x81,0x07);  /* Opcode 81 /7 */
12153   ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12154   ins_pipe( ialu_cr_reg_imm );
12155 %}
12156 
12157 // // Cisc-spilled version of cmpP_eReg
12158 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12159   match(Set cr (CmpP op1 (LoadP op2)));
12160 
12161   format %{ "CMPu   $op1,$op2" %}
12162   ins_cost(500);
12163   opcode(0x3B);  /* Opcode 3B /r */
12164   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12165   ins_pipe( ialu_cr_reg_mem );
12166 %}
12167 
12168 // // Cisc-spilled version of cmpP_eReg
12169 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12170 //  match(Set cr (CmpP (LoadP op1) op2));
12171 //
12172 //  format %{ "CMPu   $op1,$op2" %}
12173 //  ins_cost(500);
12174 //  opcode(0x39);  /* Opcode 39 /r */
12175 //  ins_encode( OpcP, RegMem( op1, op2) );
12176 //%}
12177 
12178 // Compare raw pointer (used in out-of-heap check).
12179 // Only works because non-oop pointers must be raw pointers
12180 // and raw pointers have no anti-dependencies.
12181 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12182   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12183   match(Set cr (CmpP op1 (LoadP op2)));
12184 
12185   format %{ "CMPu   $op1,$op2" %}
12186   opcode(0x3B);  /* Opcode 3B /r */
12187   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12188   ins_pipe( ialu_cr_reg_mem );
12189 %}
12190 
12191 //
12192 // This will generate a signed flags result. This should be ok
12193 // since any compare to a zero should be eq/neq.
12194 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12195   match(Set cr (CmpP src zero));
12196 
12197   format %{ "TEST   $src,$src" %}
12198   opcode(0x85);
12199   ins_encode( OpcP, RegReg( src, src ) );
12200   ins_pipe( ialu_cr_reg_imm );
12201 %}
12202 
12203 // Cisc-spilled version of testP_reg
12204 // This will generate a signed flags result. This should be ok
12205 // since any compare to a zero should be eq/neq.
12206 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12207   match(Set cr (CmpP (LoadP op) zero));
12208 
12209   format %{ "TEST   $op,0xFFFFFFFF" %}
12210   ins_cost(500);
12211   opcode(0xF7);               /* Opcode F7 /0 */
12212   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12213   ins_pipe( ialu_cr_reg_imm );
12214 %}
12215 
12216 // Yanked all unsigned pointer compare operations.
12217 // Pointer compares are done with CmpP which is already unsigned.
12218 
12219 //----------Max and Min--------------------------------------------------------
12220 // Min Instructions
12221 ////
12222 //   *** Min and Max using the conditional move are slower than the
12223 //   *** branch version on a Pentium III.
12224 // // Conditional move for min
12225 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12226 //  effect( USE_DEF op2, USE op1, USE cr );
12227 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12228 //  opcode(0x4C,0x0F);
12229 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12230 //  ins_pipe( pipe_cmov_reg );
12231 //%}
12232 //
12233 //// Min Register with Register (P6 version)
12234 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12235 //  predicate(VM_Version::supports_cmov() );
12236 //  match(Set op2 (MinI op1 op2));
12237 //  ins_cost(200);
12238 //  expand %{
12239 //    eFlagsReg cr;
12240 //    compI_eReg(cr,op1,op2);
12241 //    cmovI_reg_lt(op2,op1,cr);
12242 //  %}
12243 //%}
12244 
12245 // Min Register with Register (generic version)
12246 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12247   match(Set dst (MinI dst src));
12248   effect(KILL flags);
12249   ins_cost(300);
12250 
12251   format %{ "MIN    $dst,$src" %}
12252   opcode(0xCC);
12253   ins_encode( min_enc(dst,src) );
12254   ins_pipe( pipe_slow );
12255 %}
12256 
12257 // Max Register with Register
12258 //   *** Min and Max using the conditional move are slower than the
12259 //   *** branch version on a Pentium III.
12260 // // Conditional move for max
12261 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12262 //  effect( USE_DEF op2, USE op1, USE cr );
12263 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12264 //  opcode(0x4F,0x0F);
12265 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12266 //  ins_pipe( pipe_cmov_reg );
12267 //%}
12268 //
12269 // // Max Register with Register (P6 version)
12270 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12271 //  predicate(VM_Version::supports_cmov() );
12272 //  match(Set op2 (MaxI op1 op2));
12273 //  ins_cost(200);
12274 //  expand %{
12275 //    eFlagsReg cr;
12276 //    compI_eReg(cr,op1,op2);
12277 //    cmovI_reg_gt(op2,op1,cr);
12278 //  %}
12279 //%}
12280 
12281 // Max Register with Register (generic version)
12282 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12283   match(Set dst (MaxI dst src));
12284   effect(KILL flags);
12285   ins_cost(300);
12286 
12287   format %{ "MAX    $dst,$src" %}
12288   opcode(0xCC);
12289   ins_encode( max_enc(dst,src) );
12290   ins_pipe( pipe_slow );
12291 %}
12292 
12293 // ============================================================================
12294 // Counted Loop limit node which represents exact final iterator value.
12295 // Note: the resulting value should fit into integer range since
12296 // counted loops have limit check on overflow.
12297 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12298   match(Set limit (LoopLimit (Binary init limit) stride));
12299   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12300   ins_cost(300);
12301 
12302   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12303   ins_encode %{
12304     int strd = (int)$stride$$constant;
12305     assert(strd != 1 && strd != -1, "sanity");
12306     int m1 = (strd > 0) ? 1 : -1;
12307     // Convert limit to long (EAX:EDX)
12308     __ cdql();
12309     // Convert init to long (init:tmp)
12310     __ movl($tmp$$Register, $init$$Register);
12311     __ sarl($tmp$$Register, 31);
12312     // $limit - $init
12313     __ subl($limit$$Register, $init$$Register);
12314     __ sbbl($limit_hi$$Register, $tmp$$Register);
12315     // + ($stride - 1)
12316     if (strd > 0) {
12317       __ addl($limit$$Register, (strd - 1));
12318       __ adcl($limit_hi$$Register, 0);
12319       __ movl($tmp$$Register, strd);
12320     } else {
12321       __ addl($limit$$Register, (strd + 1));
12322       __ adcl($limit_hi$$Register, -1);
12323       __ lneg($limit_hi$$Register, $limit$$Register);
12324       __ movl($tmp$$Register, -strd);
12325     }
12326     // signed division: (EAX:EDX) / pos_stride
12327     __ idivl($tmp$$Register);
12328     if (strd < 0) {
12329       // restore sign
12330       __ negl($tmp$$Register);
12331     }
12332     // (EAX) * stride
12333     __ mull($tmp$$Register);
12334     // + init (ignore upper bits)
12335     __ addl($limit$$Register, $init$$Register);
12336   %}
12337   ins_pipe( pipe_slow );
12338 %}
12339 
12340 // ============================================================================
12341 // Branch Instructions
12342 // Jump Table
12343 instruct jumpXtnd(rRegI switch_val) %{
12344   match(Jump switch_val);
12345   ins_cost(350);
12346   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12347   ins_encode %{
12348     // Jump to Address(table_base + switch_reg)
12349     Address index(noreg, $switch_val$$Register, Address::times_1);
12350     __ jump(ArrayAddress($constantaddress, index), noreg);
12351   %}
12352   ins_pipe(pipe_jmp);
12353 %}
12354 
12355 // Jump Direct - Label defines a relative address from JMP+1
12356 instruct jmpDir(label labl) %{
12357   match(Goto);
12358   effect(USE labl);
12359 
12360   ins_cost(300);
12361   format %{ "JMP    $labl" %}
12362   size(5);
12363   ins_encode %{
12364     Label* L = $labl$$label;
12365     __ jmp(*L, false); // Always long jump
12366   %}
12367   ins_pipe( pipe_jmp );
12368 %}
12369 
12370 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12371 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12372   match(If cop cr);
12373   effect(USE labl);
12374 
12375   ins_cost(300);
12376   format %{ "J$cop    $labl" %}
12377   size(6);
12378   ins_encode %{
12379     Label* L = $labl$$label;
12380     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12381   %}
12382   ins_pipe( pipe_jcc );
12383 %}
12384 
12385 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12386 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12387   match(CountedLoopEnd cop cr);
12388   effect(USE labl);
12389 
12390   ins_cost(300);
12391   format %{ "J$cop    $labl\t# Loop end" %}
12392   size(6);
12393   ins_encode %{
12394     Label* L = $labl$$label;
12395     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12396   %}
12397   ins_pipe( pipe_jcc );
12398 %}
12399 
12400 // Jump Direct Conditional - using unsigned comparison
12401 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12402   match(If cop cmp);
12403   effect(USE labl);
12404 
12405   ins_cost(300);
12406   format %{ "J$cop,u  $labl" %}
12407   size(6);
12408   ins_encode %{
12409     Label* L = $labl$$label;
12410     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12411   %}
12412   ins_pipe(pipe_jcc);
12413 %}
12414 
12415 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12416   match(If cop cmp);
12417   effect(USE labl);
12418 
12419   ins_cost(200);
12420   format %{ "J$cop,u  $labl" %}
12421   size(6);
12422   ins_encode %{
12423     Label* L = $labl$$label;
12424     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12425   %}
12426   ins_pipe(pipe_jcc);
12427 %}
12428 
12429 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12430   match(If cop cmp);
12431   effect(USE labl);
12432 
12433   ins_cost(200);
12434   format %{ $$template
12435     if ($cop$$cmpcode == Assembler::notEqual) {
12436       $$emit$$"JP,u   $labl\n\t"
12437       $$emit$$"J$cop,u   $labl"
12438     } else {
12439       $$emit$$"JP,u   done\n\t"
12440       $$emit$$"J$cop,u   $labl\n\t"
12441       $$emit$$"done:"
12442     }
12443   %}
12444   ins_encode %{
12445     Label* l = $labl$$label;
12446     if ($cop$$cmpcode == Assembler::notEqual) {
12447       __ jcc(Assembler::parity, *l, false);
12448       __ jcc(Assembler::notEqual, *l, false);
12449     } else if ($cop$$cmpcode == Assembler::equal) {
12450       Label done;
12451       __ jccb(Assembler::parity, done);
12452       __ jcc(Assembler::equal, *l, false);
12453       __ bind(done);
12454     } else {
12455        ShouldNotReachHere();
12456     }
12457   %}
12458   ins_pipe(pipe_jcc);
12459 %}
12460 
12461 // ============================================================================
12462 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12463 // array for an instance of the superklass.  Set a hidden internal cache on a
12464 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12465 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12466 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12467   match(Set result (PartialSubtypeCheck sub super));
12468   effect( KILL rcx, KILL cr );
12469 
12470   ins_cost(1100);  // slightly larger than the next version
12471   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12472             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12473             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12474             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12475             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12476             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12477             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12478      "miss:\t" %}
12479 
12480   opcode(0x1); // Force a XOR of EDI
12481   ins_encode( enc_PartialSubtypeCheck() );
12482   ins_pipe( pipe_slow );
12483 %}
12484 
12485 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12486   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12487   effect( KILL rcx, KILL result );
12488 
12489   ins_cost(1000);
12490   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12491             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12492             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12493             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12494             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12495             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12496      "miss:\t" %}
12497 
12498   opcode(0x0);  // No need to XOR EDI
12499   ins_encode( enc_PartialSubtypeCheck() );
12500   ins_pipe( pipe_slow );
12501 %}
12502 
12503 // ============================================================================
12504 // Branch Instructions -- short offset versions
12505 //
12506 // These instructions are used to replace jumps of a long offset (the default
12507 // match) with jumps of a shorter offset.  These instructions are all tagged
12508 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12509 // match rules in general matching.  Instead, the ADLC generates a conversion
12510 // method in the MachNode which can be used to do in-place replacement of the
12511 // long variant with the shorter variant.  The compiler will determine if a
12512 // branch can be taken by the is_short_branch_offset() predicate in the machine
12513 // specific code section of the file.
12514 
12515 // Jump Direct - Label defines a relative address from JMP+1
12516 instruct jmpDir_short(label labl) %{
12517   match(Goto);
12518   effect(USE labl);
12519 
12520   ins_cost(300);
12521   format %{ "JMP,s  $labl" %}
12522   size(2);
12523   ins_encode %{
12524     Label* L = $labl$$label;
12525     __ jmpb(*L);
12526   %}
12527   ins_pipe( pipe_jmp );
12528   ins_short_branch(1);
12529 %}
12530 
12531 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12532 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12533   match(If cop cr);
12534   effect(USE labl);
12535 
12536   ins_cost(300);
12537   format %{ "J$cop,s  $labl" %}
12538   size(2);
12539   ins_encode %{
12540     Label* L = $labl$$label;
12541     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12542   %}
12543   ins_pipe( pipe_jcc );
12544   ins_short_branch(1);
12545 %}
12546 
12547 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12548 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12549   match(CountedLoopEnd cop cr);
12550   effect(USE labl);
12551 
12552   ins_cost(300);
12553   format %{ "J$cop,s  $labl\t# Loop end" %}
12554   size(2);
12555   ins_encode %{
12556     Label* L = $labl$$label;
12557     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12558   %}
12559   ins_pipe( pipe_jcc );
12560   ins_short_branch(1);
12561 %}
12562 
12563 // Jump Direct Conditional - using unsigned comparison
12564 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12565   match(If cop cmp);
12566   effect(USE labl);
12567 
12568   ins_cost(300);
12569   format %{ "J$cop,us $labl" %}
12570   size(2);
12571   ins_encode %{
12572     Label* L = $labl$$label;
12573     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12574   %}
12575   ins_pipe( pipe_jcc );
12576   ins_short_branch(1);
12577 %}
12578 
12579 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12580   match(If cop cmp);
12581   effect(USE labl);
12582 
12583   ins_cost(300);
12584   format %{ "J$cop,us $labl" %}
12585   size(2);
12586   ins_encode %{
12587     Label* L = $labl$$label;
12588     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12589   %}
12590   ins_pipe( pipe_jcc );
12591   ins_short_branch(1);
12592 %}
12593 
12594 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12595   match(If cop cmp);
12596   effect(USE labl);
12597 
12598   ins_cost(300);
12599   format %{ $$template
12600     if ($cop$$cmpcode == Assembler::notEqual) {
12601       $$emit$$"JP,u,s   $labl\n\t"
12602       $$emit$$"J$cop,u,s   $labl"
12603     } else {
12604       $$emit$$"JP,u,s   done\n\t"
12605       $$emit$$"J$cop,u,s  $labl\n\t"
12606       $$emit$$"done:"
12607     }
12608   %}
12609   size(4);
12610   ins_encode %{
12611     Label* l = $labl$$label;
12612     if ($cop$$cmpcode == Assembler::notEqual) {
12613       __ jccb(Assembler::parity, *l);
12614       __ jccb(Assembler::notEqual, *l);
12615     } else if ($cop$$cmpcode == Assembler::equal) {
12616       Label done;
12617       __ jccb(Assembler::parity, done);
12618       __ jccb(Assembler::equal, *l);
12619       __ bind(done);
12620     } else {
12621        ShouldNotReachHere();
12622     }
12623   %}
12624   ins_pipe(pipe_jcc);
12625   ins_short_branch(1);
12626 %}
12627 
12628 // ============================================================================
12629 // Long Compare
12630 //
12631 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12632 // is tricky.  The flavor of compare used depends on whether we are testing
12633 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12634 // The GE test is the negated LT test.  The LE test can be had by commuting
12635 // the operands (yielding a GE test) and then negating; negate again for the
12636 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12637 // NE test is negated from that.
12638 
12639 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12640 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12641 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12642 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12643 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12644 // foo match ends up with the wrong leaf.  One fix is to not match both
12645 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12646 // both forms beat the trinary form of long-compare and both are very useful
12647 // on Intel which has so few registers.
12648 
12649 // Manifest a CmpL result in an integer register.  Very painful.
12650 // This is the test to avoid.
12651 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12652   match(Set dst (CmpL3 src1 src2));
12653   effect( KILL flags );
12654   ins_cost(1000);
12655   format %{ "XOR    $dst,$dst\n\t"
12656             "CMP    $src1.hi,$src2.hi\n\t"
12657             "JLT,s  m_one\n\t"
12658             "JGT,s  p_one\n\t"
12659             "CMP    $src1.lo,$src2.lo\n\t"
12660             "JB,s   m_one\n\t"
12661             "JEQ,s  done\n"
12662     "p_one:\tINC    $dst\n\t"
12663             "JMP,s  done\n"
12664     "m_one:\tDEC    $dst\n"
12665      "done:" %}
12666   ins_encode %{
12667     Label p_one, m_one, done;
12668     __ xorptr($dst$$Register, $dst$$Register);
12669     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12670     __ jccb(Assembler::less,    m_one);
12671     __ jccb(Assembler::greater, p_one);
12672     __ cmpl($src1$$Register, $src2$$Register);
12673     __ jccb(Assembler::below,   m_one);
12674     __ jccb(Assembler::equal,   done);
12675     __ bind(p_one);
12676     __ incrementl($dst$$Register);
12677     __ jmpb(done);
12678     __ bind(m_one);
12679     __ decrementl($dst$$Register);
12680     __ bind(done);
12681   %}
12682   ins_pipe( pipe_slow );
12683 %}
12684 
12685 //======
12686 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12687 // compares.  Can be used for LE or GT compares by reversing arguments.
12688 // NOT GOOD FOR EQ/NE tests.
12689 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12690   match( Set flags (CmpL src zero ));
12691   ins_cost(100);
12692   format %{ "TEST   $src.hi,$src.hi" %}
12693   opcode(0x85);
12694   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12695   ins_pipe( ialu_cr_reg_reg );
12696 %}
12697 
12698 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12699 // compares.  Can be used for LE or GT compares by reversing arguments.
12700 // NOT GOOD FOR EQ/NE tests.
12701 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12702   match( Set flags (CmpL src1 src2 ));
12703   effect( TEMP tmp );
12704   ins_cost(300);
12705   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12706             "MOV    $tmp,$src1.hi\n\t"
12707             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12708   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12709   ins_pipe( ialu_cr_reg_reg );
12710 %}
12711 
12712 // Long compares reg < zero/req OR reg >= zero/req.
12713 // Just a wrapper for a normal branch, plus the predicate test.
12714 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12715   match(If cmp flags);
12716   effect(USE labl);
12717   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12718   expand %{
12719     jmpCon(cmp,flags,labl);    // JLT or JGE...
12720   %}
12721 %}
12722 
12723 //======
12724 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12725 // compares.  Can be used for LE or GT compares by reversing arguments.
12726 // NOT GOOD FOR EQ/NE tests.
12727 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12728   match(Set flags (CmpUL src zero));
12729   ins_cost(100);
12730   format %{ "TEST   $src.hi,$src.hi" %}
12731   opcode(0x85);
12732   ins_encode(OpcP, RegReg_Hi2(src, src));
12733   ins_pipe(ialu_cr_reg_reg);
12734 %}
12735 
12736 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12737 // compares.  Can be used for LE or GT compares by reversing arguments.
12738 // NOT GOOD FOR EQ/NE tests.
12739 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12740   match(Set flags (CmpUL src1 src2));
12741   effect(TEMP tmp);
12742   ins_cost(300);
12743   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12744             "MOV    $tmp,$src1.hi\n\t"
12745             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12746   ins_encode(long_cmp_flags2(src1, src2, tmp));
12747   ins_pipe(ialu_cr_reg_reg);
12748 %}
12749 
12750 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12751 // Just a wrapper for a normal branch, plus the predicate test.
12752 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12753   match(If cmp flags);
12754   effect(USE labl);
12755   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12756   expand %{
12757     jmpCon(cmp, flags, labl);    // JLT or JGE...
12758   %}
12759 %}
12760 
12761 // Compare 2 longs and CMOVE longs.
12762 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12763   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12764   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12765   ins_cost(400);
12766   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12767             "CMOV$cmp $dst.hi,$src.hi" %}
12768   opcode(0x0F,0x40);
12769   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12770   ins_pipe( pipe_cmov_reg_long );
12771 %}
12772 
12773 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12774   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12775   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12776   ins_cost(500);
12777   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12778             "CMOV$cmp $dst.hi,$src.hi" %}
12779   opcode(0x0F,0x40);
12780   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12781   ins_pipe( pipe_cmov_reg_long );
12782 %}
12783 
12784 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12785   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12786   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12787   ins_cost(400);
12788   expand %{
12789     cmovLL_reg_LTGE(cmp, flags, dst, src);
12790   %}
12791 %}
12792 
12793 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12794   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12795   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12796   ins_cost(500);
12797   expand %{
12798     cmovLL_mem_LTGE(cmp, flags, dst, src);
12799   %}
12800 %}
12801 
12802 // Compare 2 longs and CMOVE ints.
12803 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12804   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12805   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12806   ins_cost(200);
12807   format %{ "CMOV$cmp $dst,$src" %}
12808   opcode(0x0F,0x40);
12809   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12810   ins_pipe( pipe_cmov_reg );
12811 %}
12812 
12813 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12814   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12815   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12816   ins_cost(250);
12817   format %{ "CMOV$cmp $dst,$src" %}
12818   opcode(0x0F,0x40);
12819   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12820   ins_pipe( pipe_cmov_mem );
12821 %}
12822 
12823 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12824   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12825   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12826   ins_cost(200);
12827   expand %{
12828     cmovII_reg_LTGE(cmp, flags, dst, src);
12829   %}
12830 %}
12831 
12832 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12833   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12834   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12835   ins_cost(250);
12836   expand %{
12837     cmovII_mem_LTGE(cmp, flags, dst, src);
12838   %}
12839 %}
12840 
12841 // Compare 2 longs and CMOVE ptrs.
12842 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12843   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12844   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12845   ins_cost(200);
12846   format %{ "CMOV$cmp $dst,$src" %}
12847   opcode(0x0F,0x40);
12848   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12849   ins_pipe( pipe_cmov_reg );
12850 %}
12851 
12852 // Compare 2 unsigned longs and CMOVE ptrs.
12853 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
12854   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12855   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12856   ins_cost(200);
12857   expand %{
12858     cmovPP_reg_LTGE(cmp,flags,dst,src);
12859   %}
12860 %}
12861 
12862 // Compare 2 longs and CMOVE doubles
12863 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12864   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12865   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12866   ins_cost(200);
12867   expand %{
12868     fcmovDPR_regS(cmp,flags,dst,src);
12869   %}
12870 %}
12871 
12872 // Compare 2 longs and CMOVE doubles
12873 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12874   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12875   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12876   ins_cost(200);
12877   expand %{
12878     fcmovD_regS(cmp,flags,dst,src);
12879   %}
12880 %}
12881 
12882 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12883   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12884   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12885   ins_cost(200);
12886   expand %{
12887     fcmovFPR_regS(cmp,flags,dst,src);
12888   %}
12889 %}
12890 
12891 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12892   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12893   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12894   ins_cost(200);
12895   expand %{
12896     fcmovF_regS(cmp,flags,dst,src);
12897   %}
12898 %}
12899 
12900 //======
12901 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12902 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12903   match( Set flags (CmpL src zero ));
12904   effect(TEMP tmp);
12905   ins_cost(200);
12906   format %{ "MOV    $tmp,$src.lo\n\t"
12907             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12908   ins_encode( long_cmp_flags0( src, tmp ) );
12909   ins_pipe( ialu_reg_reg_long );
12910 %}
12911 
12912 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12913 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12914   match( Set flags (CmpL src1 src2 ));
12915   ins_cost(200+300);
12916   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12917             "JNE,s  skip\n\t"
12918             "CMP    $src1.hi,$src2.hi\n\t"
12919      "skip:\t" %}
12920   ins_encode( long_cmp_flags1( src1, src2 ) );
12921   ins_pipe( ialu_cr_reg_reg );
12922 %}
12923 
12924 // Long compare reg == zero/reg OR reg != zero/reg
12925 // Just a wrapper for a normal branch, plus the predicate test.
12926 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12927   match(If cmp flags);
12928   effect(USE labl);
12929   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12930   expand %{
12931     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12932   %}
12933 %}
12934 
12935 //======
12936 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12937 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12938   match(Set flags (CmpUL src zero));
12939   effect(TEMP tmp);
12940   ins_cost(200);
12941   format %{ "MOV    $tmp,$src.lo\n\t"
12942             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12943   ins_encode(long_cmp_flags0(src, tmp));
12944   ins_pipe(ialu_reg_reg_long);
12945 %}
12946 
12947 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12948 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12949   match(Set flags (CmpUL src1 src2));
12950   ins_cost(200+300);
12951   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12952             "JNE,s  skip\n\t"
12953             "CMP    $src1.hi,$src2.hi\n\t"
12954      "skip:\t" %}
12955   ins_encode(long_cmp_flags1(src1, src2));
12956   ins_pipe(ialu_cr_reg_reg);
12957 %}
12958 
12959 // Unsigned long compare reg == zero/reg OR reg != zero/reg
12960 // Just a wrapper for a normal branch, plus the predicate test.
12961 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12962   match(If cmp flags);
12963   effect(USE labl);
12964   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12965   expand %{
12966     jmpCon(cmp, flags, labl);    // JEQ or JNE...
12967   %}
12968 %}
12969 
12970 // Compare 2 longs and CMOVE longs.
12971 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12972   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12973   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12974   ins_cost(400);
12975   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12976             "CMOV$cmp $dst.hi,$src.hi" %}
12977   opcode(0x0F,0x40);
12978   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12979   ins_pipe( pipe_cmov_reg_long );
12980 %}
12981 
12982 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12983   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12984   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12985   ins_cost(500);
12986   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12987             "CMOV$cmp $dst.hi,$src.hi" %}
12988   opcode(0x0F,0x40);
12989   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12990   ins_pipe( pipe_cmov_reg_long );
12991 %}
12992 
12993 // Compare 2 longs and CMOVE ints.
12994 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12995   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12996   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12997   ins_cost(200);
12998   format %{ "CMOV$cmp $dst,$src" %}
12999   opcode(0x0F,0x40);
13000   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13001   ins_pipe( pipe_cmov_reg );
13002 %}
13003 
13004 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13005   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13006   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13007   ins_cost(250);
13008   format %{ "CMOV$cmp $dst,$src" %}
13009   opcode(0x0F,0x40);
13010   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13011   ins_pipe( pipe_cmov_mem );
13012 %}
13013 
13014 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13015   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13016   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13017   ins_cost(200);
13018   expand %{
13019     cmovII_reg_EQNE(cmp, flags, dst, src);
13020   %}
13021 %}
13022 
13023 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13024   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13025   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13026   ins_cost(250);
13027   expand %{
13028     cmovII_mem_EQNE(cmp, flags, dst, src);
13029   %}
13030 %}
13031 
13032 // Compare 2 longs and CMOVE ptrs.
13033 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13034   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13035   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13036   ins_cost(200);
13037   format %{ "CMOV$cmp $dst,$src" %}
13038   opcode(0x0F,0x40);
13039   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13040   ins_pipe( pipe_cmov_reg );
13041 %}
13042 
13043 // Compare 2 unsigned longs and CMOVE ptrs.
13044 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13045   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13046   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13047   ins_cost(200);
13048   expand %{
13049     cmovPP_reg_EQNE(cmp,flags,dst,src);
13050   %}
13051 %}
13052 
13053 // Compare 2 longs and CMOVE doubles
13054 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13055   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13056   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13057   ins_cost(200);
13058   expand %{
13059     fcmovDPR_regS(cmp,flags,dst,src);
13060   %}
13061 %}
13062 
13063 // Compare 2 longs and CMOVE doubles
13064 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13065   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13066   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13067   ins_cost(200);
13068   expand %{
13069     fcmovD_regS(cmp,flags,dst,src);
13070   %}
13071 %}
13072 
13073 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13074   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13075   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13076   ins_cost(200);
13077   expand %{
13078     fcmovFPR_regS(cmp,flags,dst,src);
13079   %}
13080 %}
13081 
13082 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13083   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13084   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13085   ins_cost(200);
13086   expand %{
13087     fcmovF_regS(cmp,flags,dst,src);
13088   %}
13089 %}
13090 
13091 //======
13092 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13093 // Same as cmpL_reg_flags_LEGT except must negate src
13094 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13095   match( Set flags (CmpL src zero ));
13096   effect( TEMP tmp );
13097   ins_cost(300);
13098   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13099             "CMP    $tmp,$src.lo\n\t"
13100             "SBB    $tmp,$src.hi\n\t" %}
13101   ins_encode( long_cmp_flags3(src, tmp) );
13102   ins_pipe( ialu_reg_reg_long );
13103 %}
13104 
13105 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13106 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13107 // requires a commuted test to get the same result.
13108 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13109   match( Set flags (CmpL src1 src2 ));
13110   effect( TEMP tmp );
13111   ins_cost(300);
13112   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13113             "MOV    $tmp,$src2.hi\n\t"
13114             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13115   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13116   ins_pipe( ialu_cr_reg_reg );
13117 %}
13118 
13119 // Long compares reg < zero/req OR reg >= zero/req.
13120 // Just a wrapper for a normal branch, plus the predicate test
13121 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13122   match(If cmp flags);
13123   effect(USE labl);
13124   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13125   ins_cost(300);
13126   expand %{
13127     jmpCon(cmp,flags,labl);    // JGT or JLE...
13128   %}
13129 %}
13130 
13131 //======
13132 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13133 // Same as cmpUL_reg_flags_LEGT except must negate src
13134 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13135   match(Set flags (CmpUL src zero));
13136   effect(TEMP tmp);
13137   ins_cost(300);
13138   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13139             "CMP    $tmp,$src.lo\n\t"
13140             "SBB    $tmp,$src.hi\n\t" %}
13141   ins_encode(long_cmp_flags3(src, tmp));
13142   ins_pipe(ialu_reg_reg_long);
13143 %}
13144 
13145 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13146 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13147 // requires a commuted test to get the same result.
13148 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13149   match(Set flags (CmpUL src1 src2));
13150   effect(TEMP tmp);
13151   ins_cost(300);
13152   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13153             "MOV    $tmp,$src2.hi\n\t"
13154             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13155   ins_encode(long_cmp_flags2( src2, src1, tmp));
13156   ins_pipe(ialu_cr_reg_reg);
13157 %}
13158 
13159 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13160 // Just a wrapper for a normal branch, plus the predicate test
13161 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13162   match(If cmp flags);
13163   effect(USE labl);
13164   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13165   ins_cost(300);
13166   expand %{
13167     jmpCon(cmp, flags, labl);    // JGT or JLE...
13168   %}
13169 %}
13170 
13171 // Compare 2 longs and CMOVE longs.
13172 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13173   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13174   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13175   ins_cost(400);
13176   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13177             "CMOV$cmp $dst.hi,$src.hi" %}
13178   opcode(0x0F,0x40);
13179   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13180   ins_pipe( pipe_cmov_reg_long );
13181 %}
13182 
13183 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13184   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13185   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13186   ins_cost(500);
13187   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13188             "CMOV$cmp $dst.hi,$src.hi+4" %}
13189   opcode(0x0F,0x40);
13190   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13191   ins_pipe( pipe_cmov_reg_long );
13192 %}
13193 
13194 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13195   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13196   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13197   ins_cost(400);
13198   expand %{
13199     cmovLL_reg_LEGT(cmp, flags, dst, src);
13200   %}
13201 %}
13202 
13203 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13204   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13205   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13206   ins_cost(500);
13207   expand %{
13208     cmovLL_mem_LEGT(cmp, flags, dst, src);
13209   %}
13210 %}
13211 
13212 // Compare 2 longs and CMOVE ints.
13213 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13214   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13215   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13216   ins_cost(200);
13217   format %{ "CMOV$cmp $dst,$src" %}
13218   opcode(0x0F,0x40);
13219   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13220   ins_pipe( pipe_cmov_reg );
13221 %}
13222 
13223 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13224   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13225   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13226   ins_cost(250);
13227   format %{ "CMOV$cmp $dst,$src" %}
13228   opcode(0x0F,0x40);
13229   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13230   ins_pipe( pipe_cmov_mem );
13231 %}
13232 
13233 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13234   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13235   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13236   ins_cost(200);
13237   expand %{
13238     cmovII_reg_LEGT(cmp, flags, dst, src);
13239   %}
13240 %}
13241 
13242 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13243   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13244   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13245   ins_cost(250);
13246   expand %{
13247     cmovII_mem_LEGT(cmp, flags, dst, src);
13248   %}
13249 %}
13250 
13251 // Compare 2 longs and CMOVE ptrs.
13252 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13253   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13254   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13255   ins_cost(200);
13256   format %{ "CMOV$cmp $dst,$src" %}
13257   opcode(0x0F,0x40);
13258   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13259   ins_pipe( pipe_cmov_reg );
13260 %}
13261 
13262 // Compare 2 unsigned longs and CMOVE ptrs.
13263 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13264   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13265   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13266   ins_cost(200);
13267   expand %{
13268     cmovPP_reg_LEGT(cmp,flags,dst,src);
13269   %}
13270 %}
13271 
13272 // Compare 2 longs and CMOVE doubles
13273 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13274   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13275   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13276   ins_cost(200);
13277   expand %{
13278     fcmovDPR_regS(cmp,flags,dst,src);
13279   %}
13280 %}
13281 
13282 // Compare 2 longs and CMOVE doubles
13283 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13284   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13285   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13286   ins_cost(200);
13287   expand %{
13288     fcmovD_regS(cmp,flags,dst,src);
13289   %}
13290 %}
13291 
13292 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13293   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13294   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13295   ins_cost(200);
13296   expand %{
13297     fcmovFPR_regS(cmp,flags,dst,src);
13298   %}
13299 %}
13300 
13301 
13302 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13303   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13304   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13305   ins_cost(200);
13306   expand %{
13307     fcmovF_regS(cmp,flags,dst,src);
13308   %}
13309 %}
13310 
13311 
13312 // ============================================================================
13313 // Procedure Call/Return Instructions
13314 // Call Java Static Instruction
13315 // Note: If this code changes, the corresponding ret_addr_offset() and
13316 //       compute_padding() functions will have to be adjusted.
13317 instruct CallStaticJavaDirect(method meth) %{
13318   match(CallStaticJava);
13319   effect(USE meth);
13320 
13321   ins_cost(300);
13322   format %{ "CALL,static " %}
13323   opcode(0xE8); /* E8 cd */
13324   ins_encode( pre_call_resets,
13325               Java_Static_Call( meth ),
13326               call_epilog,
13327               post_call_FPU );
13328   ins_pipe( pipe_slow );
13329   ins_alignment(4);
13330 %}
13331 
13332 // Call Java Dynamic Instruction
13333 // Note: If this code changes, the corresponding ret_addr_offset() and
13334 //       compute_padding() functions will have to be adjusted.
13335 instruct CallDynamicJavaDirect(method meth) %{
13336   match(CallDynamicJava);
13337   effect(USE meth);
13338 
13339   ins_cost(300);
13340   format %{ "MOV    EAX,(oop)-1\n\t"
13341             "CALL,dynamic" %}
13342   opcode(0xE8); /* E8 cd */
13343   ins_encode( pre_call_resets,
13344               Java_Dynamic_Call( meth ),
13345               call_epilog,
13346               post_call_FPU );
13347   ins_pipe( pipe_slow );
13348   ins_alignment(4);
13349 %}
13350 
13351 // Call Runtime Instruction
13352 instruct CallRuntimeDirect(method meth) %{
13353   match(CallRuntime );
13354   effect(USE meth);
13355 
13356   ins_cost(300);
13357   format %{ "CALL,runtime " %}
13358   opcode(0xE8); /* E8 cd */
13359   // Use FFREEs to clear entries in float stack
13360   ins_encode( pre_call_resets,
13361               FFree_Float_Stack_All,
13362               Java_To_Runtime( meth ),
13363               post_call_FPU );
13364   ins_pipe( pipe_slow );
13365 %}
13366 
13367 // Call runtime without safepoint
13368 instruct CallLeafDirect(method meth) %{
13369   match(CallLeaf);
13370   effect(USE meth);
13371 
13372   ins_cost(300);
13373   format %{ "CALL_LEAF,runtime " %}
13374   opcode(0xE8); /* E8 cd */
13375   ins_encode( pre_call_resets,
13376               FFree_Float_Stack_All,
13377               Java_To_Runtime( meth ),
13378               Verify_FPU_For_Leaf, post_call_FPU );
13379   ins_pipe( pipe_slow );
13380 %}
13381 
13382 instruct CallLeafNoFPDirect(method meth) %{
13383   match(CallLeafNoFP);
13384   effect(USE meth);
13385 
13386   ins_cost(300);
13387   format %{ "CALL_LEAF_NOFP,runtime " %}
13388   opcode(0xE8); /* E8 cd */
13389   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13390   ins_pipe( pipe_slow );
13391 %}
13392 
13393 
13394 // Return Instruction
13395 // Remove the return address & jump to it.
13396 instruct Ret() %{
13397   match(Return);
13398   format %{ "RET" %}
13399   opcode(0xC3);
13400   ins_encode(OpcP);
13401   ins_pipe( pipe_jmp );
13402 %}
13403 
13404 // Tail Call; Jump from runtime stub to Java code.
13405 // Also known as an 'interprocedural jump'.
13406 // Target of jump will eventually return to caller.
13407 // TailJump below removes the return address.
13408 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13409 // emitted just above the TailCall which has reset ebp to the caller state.
13410 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13411   match(TailCall jump_target method_ptr);
13412   ins_cost(300);
13413   format %{ "JMP    $jump_target \t# EBX holds method" %}
13414   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13415   ins_encode( OpcP, RegOpc(jump_target) );
13416   ins_pipe( pipe_jmp );
13417 %}
13418 
13419 
13420 // Tail Jump; remove the return address; jump to target.
13421 // TailCall above leaves the return address around.
13422 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13423   match( TailJump jump_target ex_oop );
13424   ins_cost(300);
13425   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13426             "JMP    $jump_target " %}
13427   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13428   ins_encode( enc_pop_rdx,
13429               OpcP, RegOpc(jump_target) );
13430   ins_pipe( pipe_jmp );
13431 %}
13432 
13433 // Forward exception.
13434 instruct ForwardExceptionjmp()
13435 %{
13436   match(ForwardException);
13437 
13438   format %{ "JMP    forward_exception_stub" %}
13439   ins_encode %{
13440     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
13441   %}
13442   ins_pipe(pipe_jmp);
13443 %}
13444 
13445 // Create exception oop: created by stack-crawling runtime code.
13446 // Created exception is now available to this handler, and is setup
13447 // just prior to jumping to this handler.  No code emitted.
13448 instruct CreateException( eAXRegP ex_oop )
13449 %{
13450   match(Set ex_oop (CreateEx));
13451 
13452   size(0);
13453   // use the following format syntax
13454   format %{ "# exception oop is in EAX; no code emitted" %}
13455   ins_encode();
13456   ins_pipe( empty );
13457 %}
13458 
13459 
13460 // Rethrow exception:
13461 // The exception oop will come in the first argument position.
13462 // Then JUMP (not call) to the rethrow stub code.
13463 instruct RethrowException()
13464 %{
13465   match(Rethrow);
13466 
13467   // use the following format syntax
13468   format %{ "JMP    rethrow_stub" %}
13469   ins_encode(enc_rethrow);
13470   ins_pipe( pipe_jmp );
13471 %}
13472 
13473 // inlined locking and unlocking
13474 
13475 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13476   predicate(LockingMode != LM_LIGHTWEIGHT);
13477   match(Set cr (FastLock object box));
13478   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13479   ins_cost(300);
13480   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13481   ins_encode %{
13482     __ get_thread($thread$$Register);
13483     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13484                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13485   %}
13486   ins_pipe(pipe_slow);
13487 %}
13488 
13489 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13490   predicate(LockingMode != LM_LIGHTWEIGHT);
13491   match(Set cr (FastUnlock object box));
13492   effect(TEMP tmp, USE_KILL box);
13493   ins_cost(300);
13494   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13495   ins_encode %{
13496     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13497   %}
13498   ins_pipe(pipe_slow);
13499 %}
13500 
13501 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13502   predicate(LockingMode == LM_LIGHTWEIGHT);
13503   match(Set cr (FastLock object box));
13504   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13505   ins_cost(300);
13506   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13507   ins_encode %{
13508     __ get_thread($thread$$Register);
13509     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13510   %}
13511   ins_pipe(pipe_slow);
13512 %}
13513 
13514 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13515   predicate(LockingMode == LM_LIGHTWEIGHT);
13516   match(Set cr (FastUnlock object eax_reg));
13517   effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13518   ins_cost(300);
13519   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13520   ins_encode %{
13521     __ get_thread($thread$$Register);
13522     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13523   %}
13524   ins_pipe(pipe_slow);
13525 %}
13526 
13527 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13528   predicate(Matcher::vector_length(n) <= 32);
13529   match(Set dst (MaskAll src));
13530   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13531   ins_encode %{
13532     int mask_len = Matcher::vector_length(this);
13533     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13534   %}
13535   ins_pipe( pipe_slow );
13536 %}
13537 
13538 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13539   predicate(Matcher::vector_length(n) > 32);
13540   match(Set dst (MaskAll src));
13541   effect(TEMP ktmp);
13542   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13543   ins_encode %{
13544     int mask_len = Matcher::vector_length(this);
13545     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13546   %}
13547   ins_pipe( pipe_slow );
13548 %}
13549 
13550 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13551   predicate(Matcher::vector_length(n) > 32);
13552   match(Set dst (MaskAll src));
13553   effect(TEMP ktmp);
13554   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13555   ins_encode %{
13556     int mask_len = Matcher::vector_length(this);
13557     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13558   %}
13559   ins_pipe( pipe_slow );
13560 %}
13561 
13562 // ============================================================================
13563 // Safepoint Instruction
13564 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13565   match(SafePoint poll);
13566   effect(KILL cr, USE poll);
13567 
13568   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13569   ins_cost(125);
13570   // EBP would need size(3)
13571   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13572   ins_encode %{
13573     __ set_inst_mark();
13574     __ relocate(relocInfo::poll_type);
13575     __ clear_inst_mark();
13576     address pre_pc = __ pc();
13577     __ testl(rax, Address($poll$$Register, 0));
13578     address post_pc = __ pc();
13579     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13580   %}
13581   ins_pipe(ialu_reg_mem);
13582 %}
13583 
13584 
13585 // ============================================================================
13586 // This name is KNOWN by the ADLC and cannot be changed.
13587 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13588 // for this guy.
13589 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13590   match(Set dst (ThreadLocal));
13591   effect(DEF dst, KILL cr);
13592 
13593   format %{ "MOV    $dst, Thread::current()" %}
13594   ins_encode %{
13595     Register dstReg = as_Register($dst$$reg);
13596     __ get_thread(dstReg);
13597   %}
13598   ins_pipe( ialu_reg_fat );
13599 %}
13600 
13601 
13602 
13603 //----------PEEPHOLE RULES-----------------------------------------------------
13604 // These must follow all instruction definitions as they use the names
13605 // defined in the instructions definitions.
13606 //
13607 // peepmatch ( root_instr_name [preceding_instruction]* );
13608 //
13609 // peepconstraint %{
13610 // (instruction_number.operand_name relational_op instruction_number.operand_name
13611 //  [, ...] );
13612 // // instruction numbers are zero-based using left to right order in peepmatch
13613 //
13614 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13615 // // provide an instruction_number.operand_name for each operand that appears
13616 // // in the replacement instruction's match rule
13617 //
13618 // ---------VM FLAGS---------------------------------------------------------
13619 //
13620 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13621 //
13622 // Each peephole rule is given an identifying number starting with zero and
13623 // increasing by one in the order seen by the parser.  An individual peephole
13624 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13625 // on the command-line.
13626 //
13627 // ---------CURRENT LIMITATIONS----------------------------------------------
13628 //
13629 // Only match adjacent instructions in same basic block
13630 // Only equality constraints
13631 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13632 // Only one replacement instruction
13633 //
13634 // ---------EXAMPLE----------------------------------------------------------
13635 //
13636 // // pertinent parts of existing instructions in architecture description
13637 // instruct movI(rRegI dst, rRegI src) %{
13638 //   match(Set dst (CopyI src));
13639 // %}
13640 //
13641 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13642 //   match(Set dst (AddI dst src));
13643 //   effect(KILL cr);
13644 // %}
13645 //
13646 // // Change (inc mov) to lea
13647 // peephole %{
13648 //   // increment preceded by register-register move
13649 //   peepmatch ( incI_eReg movI );
13650 //   // require that the destination register of the increment
13651 //   // match the destination register of the move
13652 //   peepconstraint ( 0.dst == 1.dst );
13653 //   // construct a replacement instruction that sets
13654 //   // the destination to ( move's source register + one )
13655 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13656 // %}
13657 //
13658 // Implementation no longer uses movX instructions since
13659 // machine-independent system no longer uses CopyX nodes.
13660 //
13661 // peephole %{
13662 //   peepmatch ( incI_eReg movI );
13663 //   peepconstraint ( 0.dst == 1.dst );
13664 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13665 // %}
13666 //
13667 // peephole %{
13668 //   peepmatch ( decI_eReg movI );
13669 //   peepconstraint ( 0.dst == 1.dst );
13670 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13671 // %}
13672 //
13673 // peephole %{
13674 //   peepmatch ( addI_eReg_imm movI );
13675 //   peepconstraint ( 0.dst == 1.dst );
13676 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13677 // %}
13678 //
13679 // peephole %{
13680 //   peepmatch ( addP_eReg_imm movP );
13681 //   peepconstraint ( 0.dst == 1.dst );
13682 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13683 // %}
13684 
13685 // // Change load of spilled value to only a spill
13686 // instruct storeI(memory mem, rRegI src) %{
13687 //   match(Set mem (StoreI mem src));
13688 // %}
13689 //
13690 // instruct loadI(rRegI dst, memory mem) %{
13691 //   match(Set dst (LoadI mem));
13692 // %}
13693 //
13694 peephole %{
13695   peepmatch ( loadI storeI );
13696   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13697   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13698 %}
13699 
13700 //----------SMARTSPILL RULES---------------------------------------------------
13701 // These must follow all instruction definitions as they use the names
13702 // defined in the instructions definitions.