1 //
    2 // Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ masm->
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   __ emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(C2_MacroAssembler *masm, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   __ emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(C2_MacroAssembler *masm, int code) {
  353   __ emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) {
  358   __ relocate(__ inst_mark() + offset, reloc);
  359   emit_opcode(masm, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(C2_MacroAssembler *masm, int d8) {
  364   __ emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(C2_MacroAssembler *masm, int d16) {
  369   __ emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(C2_MacroAssembler *masm, int d32) {
  374   __ emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   __ relocate(__ inst_mark(), reloc, format);
  381   __ emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   __ relocate(__ inst_mark(), rspec, format);
  393   __ emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) {
  398   emit_opcode( masm, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( masm, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (masm, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( masm, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( masm, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(masm, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(masm, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(masm, 0x1, reg_encoding, base);
  423         emit_d8(masm, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(masm, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(masm, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (masm, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(masm, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(masm, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (masm, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(masm, 0x0, reg_encoding, 0x4);
  450       emit_rm(masm, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(masm, 0x1, reg_encoding, 0x4);
  456         emit_rm(masm, scale, index, base);
  457         emit_d8(masm, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(masm, 0x2, reg_encoding, 0x4);
  462           emit_rm(masm, scale, index, 0x04);
  463         } else {
  464           emit_rm(masm, 0x2, reg_encoding, 0x4);
  465           emit_rm(masm, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(masm, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (masm, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( masm, 0x8B );
  483     emit_rm(masm, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler* masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 static void emit_cmpfp3(MacroAssembler* masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612 
  613   int framesize = C->output()->frame_size_in_bytes();
  614   int bangsize = C->output()->bang_size_in_bytes();
  615 
  616   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr);
  617 
  618   C->output()->set_frame_complete(__ offset());
  619 
  620   if (C->has_mach_constant_base_node()) {
  621     // NOTE: We set the table base offset here because users might be
  622     // emitted before MachConstantBaseNode.
  623     ConstantTable& constant_table = C->output()->constant_table();
  624     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  625   }
  626 }
  627 
  628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  629   return MachNode::size(ra_); // too many variables; just compute it the hard way
  630 }
  631 
  632 int MachPrologNode::reloc() const {
  633   return 0; // a large enough number
  634 }
  635 
  636 //=============================================================================
  637 #ifndef PRODUCT
  638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  639   Compile *C = ra_->C;
  640   int framesize = C->output()->frame_size_in_bytes();
  641   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  642   // Remove two words for return addr and rbp,
  643   framesize -= 2*wordSize;
  644 
  645   if (C->max_vector_size() > 16) {
  646     st->print("VZEROUPPER");
  647     st->cr(); st->print("\t");
  648   }
  649   if (C->in_24_bit_fp_mode()) {
  650     st->print("FLDCW  standard control word");
  651     st->cr(); st->print("\t");
  652   }
  653   if (framesize) {
  654     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  655     st->cr(); st->print("\t");
  656   }
  657   st->print_cr("POPL   EBP"); st->print("\t");
  658   if (do_polling() && C->is_method_compilation()) {
  659     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  660               "JA      #safepoint_stub\t"
  661               "# Safepoint: poll for GC");
  662   }
  663 }
  664 #endif
  665 
  666 void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
  667   Compile *C = ra_->C;
  668 
  669   if (C->max_vector_size() > 16) {
  670     // Clear upper bits of YMM registers when current compiled code uses
  671     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  672     __ vzeroupper();
  673   }
  674   // If method set FPU control word, restore to standard control word
  675   if (C->in_24_bit_fp_mode()) {
  676     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  677   }
  678 
  679   int framesize = C->output()->frame_size_in_bytes();
  680   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  681   // Remove two words for return addr and rbp,
  682   framesize -= 2*wordSize;
  683 
  684   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  685 
  686   if (framesize >= 128) {
  687     emit_opcode(masm, 0x81); // add  SP, #framesize
  688     emit_rm(masm, 0x3, 0x00, ESP_enc);
  689     emit_d32(masm, framesize);
  690   } else if (framesize) {
  691     emit_opcode(masm, 0x83); // add  SP, #framesize
  692     emit_rm(masm, 0x3, 0x00, ESP_enc);
  693     emit_d8(masm, framesize);
  694   }
  695 
  696   emit_opcode(masm, 0x58 | EBP_enc);
  697 
  698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  699     __ reserved_stack_check();
  700   }
  701 
  702   if (do_polling() && C->is_method_compilation()) {
  703     Register thread = as_Register(EBX_enc);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  709       C->output()->add_stub(stub);
  710       code_stub = &stub->entry();
  711     }
  712     __ set_inst_mark();
  713     __ relocate(relocInfo::poll_return_type);
  714     __ clear_inst_mark();
  715     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  716   }
  717 }
  718 
  719 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  720   return MachNode::size(ra_); // too many variables; just compute it
  721                               // the hard way
  722 }
  723 
  724 int MachEpilogNode::reloc() const {
  725   return 0; // a large enough number
  726 }
  727 
  728 const Pipeline * MachEpilogNode::pipeline() const {
  729   return MachNode::pipeline_class();
  730 }
  731 
  732 //=============================================================================
  733 
  734 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  735 static enum RC rc_class( OptoReg::Name reg ) {
  736 
  737   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  738   if (OptoReg::is_stack(reg)) return rc_stack;
  739 
  740   VMReg r = OptoReg::as_VMReg(reg);
  741   if (r->is_Register()) return rc_int;
  742   if (r->is_FloatRegister()) {
  743     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  744     return rc_float;
  745   }
  746   if (r->is_KRegister()) return rc_kreg;
  747   assert(r->is_XMMRegister(), "must be");
  748   return rc_xmm;
  749 }
  750 
  751 static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg,
  752                         int opcode, const char *op_str, int size, outputStream* st ) {
  753   if( masm ) {
  754     masm->set_inst_mark();
  755     emit_opcode  (masm, opcode );
  756     encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  757     masm->clear_inst_mark();
  758 #ifndef PRODUCT
  759   } else if( !do_size ) {
  760     if( size != 0 ) st->print("\n\t");
  761     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  762       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  763       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  764     } else { // FLD, FST, PUSH, POP
  765       st->print("%s [ESP + #%d]",op_str,offset);
  766     }
  767 #endif
  768   }
  769   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  770   return size+3+offset_size;
  771 }
  772 
  773 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  774 static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load,
  775                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  776   int in_size_in_bits = Assembler::EVEX_32bit;
  777   int evex_encoding = 0;
  778   if (reg_lo+1 == reg_hi) {
  779     in_size_in_bits = Assembler::EVEX_64bit;
  780     evex_encoding = Assembler::VEX_W;
  781   }
  782   if (masm) {
  783     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  784     //                          it maps more cases to single byte displacement
  785     __ set_managed();
  786     if (reg_lo+1 == reg_hi) { // double move?
  787       if (is_load) {
  788         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  789       } else {
  790         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  791       }
  792     } else {
  793       if (is_load) {
  794         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  795       } else {
  796         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  797       }
  798     }
  799 #ifndef PRODUCT
  800   } else if (!do_size) {
  801     if (size != 0) st->print("\n\t");
  802     if (reg_lo+1 == reg_hi) { // double move?
  803       if (is_load) st->print("%s %s,[ESP + #%d]",
  804                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  805                               Matcher::regName[reg_lo], offset);
  806       else         st->print("MOVSD  [ESP + #%d],%s",
  807                               offset, Matcher::regName[reg_lo]);
  808     } else {
  809       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  810                               Matcher::regName[reg_lo], offset);
  811       else         st->print("MOVSS  [ESP + #%d],%s",
  812                               offset, Matcher::regName[reg_lo]);
  813     }
  814 #endif
  815   }
  816   bool is_single_byte = false;
  817   if ((UseAVX > 2) && (offset != 0)) {
  818     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  819   }
  820   int offset_size = 0;
  821   if (UseAVX > 2 ) {
  822     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  823   } else {
  824     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  825   }
  826   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  827   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  828   return size+5+offset_size;
  829 }
  830 
  831 
  832 static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  833                             int src_hi, int dst_hi, int size, outputStream* st ) {
  834   if (masm) {
  835     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  836     __ set_managed();
  837     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  838       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  839                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  840     } else {
  841       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  842                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  843     }
  844 #ifndef PRODUCT
  845   } else if (!do_size) {
  846     if (size != 0) st->print("\n\t");
  847     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  848       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  849         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  850       } else {
  851         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  852       }
  853     } else {
  854       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  855         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  856       } else {
  857         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  858       }
  859     }
  860 #endif
  861   }
  862   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  863   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  864   int sz = (UseAVX > 2) ? 6 : 4;
  865   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  866       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  867   return size + sz;
  868 }
  869 
  870 static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  871                             int src_hi, int dst_hi, int size, outputStream* st ) {
  872   // 32-bit
  873   if (masm) {
  874     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  875     __ set_managed();
  876     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  877              as_Register(Matcher::_regEncode[src_lo]));
  878 #ifndef PRODUCT
  879   } else if (!do_size) {
  880     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  881 #endif
  882   }
  883   return (UseAVX> 2) ? 6 : 4;
  884 }
  885 
  886 
  887 static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo,
  888                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  889   // 32-bit
  890   if (masm) {
  891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  892     __ set_managed();
  893     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  894              as_XMMRegister(Matcher::_regEncode[src_lo]));
  895 #ifndef PRODUCT
  896   } else if (!do_size) {
  897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  898 #endif
  899   }
  900   return (UseAVX> 2) ? 6 : 4;
  901 }
  902 
  903 static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) {
  904   if( masm ) {
  905     emit_opcode(masm, 0x8B );
  906     emit_rm    (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  907 #ifndef PRODUCT
  908   } else if( !do_size ) {
  909     if( size != 0 ) st->print("\n\t");
  910     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  911 #endif
  912   }
  913   return size+2;
  914 }
  915 
  916 static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  917                                  int offset, int size, outputStream* st ) {
  918   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  919     if( masm ) {
  920       emit_opcode( masm, 0xD9 );  // FLD (i.e., push it)
  921       emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] );
  922 #ifndef PRODUCT
  923     } else if( !do_size ) {
  924       if( size != 0 ) st->print("\n\t");
  925       st->print("FLD    %s",Matcher::regName[src_lo]);
  926 #endif
  927     }
  928     size += 2;
  929   }
  930 
  931   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  932   const char *op_str;
  933   int op;
  934   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  936     op = 0xDD;
  937   } else {                   // 32-bit store
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  939     op = 0xD9;
  940     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  941   }
  942 
  943   return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st);
  944 }
  945 
  946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  947 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo,
  948                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  949 
  950 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load,
  951                             int stack_offset, int reg, uint ireg, outputStream* st);
  952 
  953 static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset,
  954                                      int dst_offset, uint ireg, outputStream* st) {
  955   if (masm) {
  956     switch (ireg) {
  957     case Op_VecS:
  958       __ pushl(Address(rsp, src_offset));
  959       __ popl (Address(rsp, dst_offset));
  960       break;
  961     case Op_VecD:
  962       __ pushl(Address(rsp, src_offset));
  963       __ popl (Address(rsp, dst_offset));
  964       __ pushl(Address(rsp, src_offset+4));
  965       __ popl (Address(rsp, dst_offset+4));
  966       break;
  967     case Op_VecX:
  968       __ movdqu(Address(rsp, -16), xmm0);
  969       __ movdqu(xmm0, Address(rsp, src_offset));
  970       __ movdqu(Address(rsp, dst_offset), xmm0);
  971       __ movdqu(xmm0, Address(rsp, -16));
  972       break;
  973     case Op_VecY:
  974       __ vmovdqu(Address(rsp, -32), xmm0);
  975       __ vmovdqu(xmm0, Address(rsp, src_offset));
  976       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  977       __ vmovdqu(xmm0, Address(rsp, -32));
  978       break;
  979     case Op_VecZ:
  980       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  981       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  982       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  983       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  984       break;
  985     default:
  986       ShouldNotReachHere();
  987     }
  988 #ifndef PRODUCT
  989   } else {
  990     switch (ireg) {
  991     case Op_VecS:
  992       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  993                 "popl    [rsp + #%d]",
  994                 src_offset, dst_offset);
  995       break;
  996     case Op_VecD:
  997       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  998                 "popq    [rsp + #%d]\n\t"
  999                 "pushl   [rsp + #%d]\n\t"
 1000                 "popq    [rsp + #%d]",
 1001                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1002       break;
 1003      case Op_VecX:
 1004       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1005                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1006                 "movdqu  [rsp + #%d], xmm0\n\t"
 1007                 "movdqu  xmm0, [rsp - #16]",
 1008                 src_offset, dst_offset);
 1009       break;
 1010     case Op_VecY:
 1011       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1012                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1013                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1014                 "vmovdqu xmm0, [rsp - #32]",
 1015                 src_offset, dst_offset);
 1016       break;
 1017     case Op_VecZ:
 1018       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1019                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1020                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1021                 "vmovdqu xmm0, [rsp - #64]",
 1022                 src_offset, dst_offset);
 1023       break;
 1024     default:
 1025       ShouldNotReachHere();
 1026     }
 1027 #endif
 1028   }
 1029 }
 1030 
 1031 uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1032   // Get registers to move
 1033   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1034   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1035   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1036   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1037 
 1038   enum RC src_second_rc = rc_class(src_second);
 1039   enum RC src_first_rc = rc_class(src_first);
 1040   enum RC dst_second_rc = rc_class(dst_second);
 1041   enum RC dst_first_rc = rc_class(dst_first);
 1042 
 1043   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1044 
 1045   // Generate spill code!
 1046   int size = 0;
 1047 
 1048   if( src_first == dst_first && src_second == dst_second )
 1049     return size;            // Self copy, no move
 1050 
 1051   if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) {
 1052     uint ireg = ideal_reg();
 1053     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1054     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1055     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1056     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1057       // mem -> mem
 1058       int src_offset = ra_->reg2offset(src_first);
 1059       int dst_offset = ra_->reg2offset(dst_first);
 1060       vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st);
 1061     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1062       vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st);
 1063     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1064       int stack_offset = ra_->reg2offset(dst_first);
 1065       vec_spill_helper(masm, false, stack_offset, src_first, ireg, st);
 1066     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1067       int stack_offset = ra_->reg2offset(src_first);
 1068       vec_spill_helper(masm, true,  stack_offset, dst_first, ireg, st);
 1069     } else {
 1070       ShouldNotReachHere();
 1071     }
 1072     return 0;
 1073   }
 1074 
 1075   // --------------------------------------
 1076   // Check for mem-mem move.  push/pop to move.
 1077   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1078     if( src_second == dst_first ) { // overlapping stack copy ranges
 1079       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1080       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1081       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1082       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1083     }
 1084     // move low bits
 1085     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1086     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1087     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1088       size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1089       size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1090     }
 1091     return size;
 1092   }
 1093 
 1094   // --------------------------------------
 1095   // Check for integer reg-reg copy
 1096   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1097     size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st);
 1098 
 1099   // Check for integer store
 1100   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1101     size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1102 
 1103   // Check for integer load
 1104   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1105     size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1106 
 1107   // Check for integer reg-xmm reg copy
 1108   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1109     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1110             "no 64 bit integer-float reg moves" );
 1111     return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1112   }
 1113   // --------------------------------------
 1114   // Check for float reg-reg copy
 1115   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1116     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1117             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1118     if( masm ) {
 1119 
 1120       // Note the mucking with the register encode to compensate for the 0/1
 1121       // indexing issue mentioned in a comment in the reg_def sections
 1122       // for FPR registers many lines above here.
 1123 
 1124       if( src_first != FPR1L_num ) {
 1125         emit_opcode  (masm, 0xD9 );           // FLD    ST(i)
 1126         emit_d8      (masm, 0xC0+Matcher::_regEncode[src_first]-1 );
 1127         emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1128         emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1129      } else {
 1130         emit_opcode  (masm, 0xDD );           // FST    ST(i)
 1131         emit_d8      (masm, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1132      }
 1133 #ifndef PRODUCT
 1134     } else if( !do_size ) {
 1135       if( size != 0 ) st->print("\n\t");
 1136       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1137       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1138 #endif
 1139     }
 1140     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1141   }
 1142 
 1143   // Check for float store
 1144   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1145     return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1146   }
 1147 
 1148   // Check for float load
 1149   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1150     int offset = ra_->reg2offset(src_first);
 1151     const char *op_str;
 1152     int op;
 1153     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1154       op_str = "FLD_D";
 1155       op = 0xDD;
 1156     } else {                   // 32-bit load
 1157       op_str = "FLD_S";
 1158       op = 0xD9;
 1159       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1160     }
 1161     if( masm ) {
 1162       masm->set_inst_mark();
 1163       emit_opcode  (masm, op );
 1164       encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1165       emit_opcode  (masm, 0xDD );           // FSTP   ST(i)
 1166       emit_d8      (masm, 0xD8+Matcher::_regEncode[dst_first] );
 1167       masm->clear_inst_mark();
 1168 #ifndef PRODUCT
 1169     } else if( !do_size ) {
 1170       if( size != 0 ) st->print("\n\t");
 1171       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1172 #endif
 1173     }
 1174     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1175     return size + 3+offset_size+2;
 1176   }
 1177 
 1178   // Check for xmm reg-reg copy
 1179   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1180     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1181             (src_first+1 == src_second && dst_first+1 == dst_second),
 1182             "no non-adjacent float-moves" );
 1183     return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1184   }
 1185 
 1186   // Check for xmm reg-integer reg copy
 1187   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1188     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1189             "no 64 bit float-integer reg moves" );
 1190     return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1191   }
 1192 
 1193   // Check for xmm store
 1194   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1195     return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1196   }
 1197 
 1198   // Check for float xmm load
 1199   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1200     return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1201   }
 1202 
 1203   // Copy from float reg to xmm reg
 1204   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1205     // copy to the top of stack from floating point reg
 1206     // and use LEA to preserve flags
 1207     if( masm ) {
 1208       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP-8]
 1209       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1210       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1211       emit_d8(masm,0xF8);
 1212 #ifndef PRODUCT
 1213     } else if( !do_size ) {
 1214       if( size != 0 ) st->print("\n\t");
 1215       st->print("LEA    ESP,[ESP-8]");
 1216 #endif
 1217     }
 1218     size += 4;
 1219 
 1220     size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1221 
 1222     // Copy from the temp memory to the xmm reg.
 1223     size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st);
 1224 
 1225     if( masm ) {
 1226       emit_opcode(masm,0x8D);  // LEA  ESP,[ESP+8]
 1227       emit_rm(masm, 0x1, ESP_enc, 0x04);
 1228       emit_rm(masm, 0x0, 0x04, ESP_enc);
 1229       emit_d8(masm,0x08);
 1230 #ifndef PRODUCT
 1231     } else if( !do_size ) {
 1232       if( size != 0 ) st->print("\n\t");
 1233       st->print("LEA    ESP,[ESP+8]");
 1234 #endif
 1235     }
 1236     size += 4;
 1237     return size;
 1238   }
 1239 
 1240   // AVX-512 opmask specific spilling.
 1241   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1242     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1243     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1244     int offset = ra_->reg2offset(src_first);
 1245     if (masm != nullptr) {
 1246       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1247 #ifndef PRODUCT
 1248     } else {
 1249       st->print("KMOV    %s, [ESP + %d]", Matcher::regName[dst_first], offset);
 1250 #endif
 1251     }
 1252     return 0;
 1253   }
 1254 
 1255   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1256     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1257     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1258     int offset = ra_->reg2offset(dst_first);
 1259     if (masm != nullptr) {
 1260       __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1261 #ifndef PRODUCT
 1262     } else {
 1263       st->print("KMOV    [ESP + %d], %s", offset, Matcher::regName[src_first]);
 1264 #endif
 1265     }
 1266     return 0;
 1267   }
 1268 
 1269   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1270     Unimplemented();
 1271     return 0;
 1272   }
 1273 
 1274   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1275     Unimplemented();
 1276     return 0;
 1277   }
 1278 
 1279   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1280     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1281     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1282     if (masm != nullptr) {
 1283       __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1284 #ifndef PRODUCT
 1285     } else {
 1286       st->print("KMOV    %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]);
 1287 #endif
 1288     }
 1289     return 0;
 1290   }
 1291 
 1292   assert( size > 0, "missed a case" );
 1293 
 1294   // --------------------------------------------------------------------
 1295   // Check for second bits still needing moving.
 1296   if( src_second == dst_second )
 1297     return size;               // Self copy; no move
 1298   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1299 
 1300   // Check for second word int-int move
 1301   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1302     return impl_mov_helper(masm,do_size,src_second,dst_second,size, st);
 1303 
 1304   // Check for second word integer store
 1305   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1306     return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1307 
 1308   // Check for second word integer load
 1309   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1310     return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1311 
 1312   Unimplemented();
 1313   return 0; // Mute compiler
 1314 }
 1315 
 1316 #ifndef PRODUCT
 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1318   implementation( nullptr, ra_, false, st );
 1319 }
 1320 #endif
 1321 
 1322 void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1323   implementation( masm, ra_, false, nullptr );
 1324 }
 1325 
 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1327   return MachNode::size(ra_);
 1328 }
 1329 
 1330 
 1331 //=============================================================================
 1332 #ifndef PRODUCT
 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1334   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1335   int reg = ra_->get_reg_first(this);
 1336   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1337 }
 1338 #endif
 1339 
 1340 void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1341   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1342   int reg = ra_->get_encode(this);
 1343   if( offset >= 128 ) {
 1344     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1345     emit_rm(masm, 0x2, reg, 0x04);
 1346     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1347     emit_d32(masm, offset);
 1348   }
 1349   else {
 1350     emit_opcode(masm, 0x8D);      // LEA  reg,[SP+offset]
 1351     emit_rm(masm, 0x1, reg, 0x04);
 1352     emit_rm(masm, 0x0, 0x04, ESP_enc);
 1353     emit_d8(masm, offset);
 1354   }
 1355 }
 1356 
 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1358   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1359   if( offset >= 128 ) {
 1360     return 7;
 1361   }
 1362   else {
 1363     return 4;
 1364   }
 1365 }
 1366 
 1367 //=============================================================================
 1368 #ifndef PRODUCT
 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1370   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1371   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1372   st->print_cr("\tNOP");
 1373   st->print_cr("\tNOP");
 1374   if( !OptoBreakpoint )
 1375     st->print_cr("\tNOP");
 1376 }
 1377 #endif
 1378 
 1379 void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
 1380   __ ic_check(CodeEntryAlignment);
 1381 }
 1382 
 1383 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1384   return MachNode::size(ra_); // too many variables; just compute it
 1385                               // the hard way
 1386 }
 1387 
 1388 
 1389 //=============================================================================
 1390 
 1391 // Vector calling convention not supported.
 1392 bool Matcher::supports_vector_calling_convention() {
 1393   return false;
 1394 }
 1395 
 1396 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1397   Unimplemented();
 1398   return OptoRegPair(0, 0);
 1399 }
 1400 
 1401 // Is this branch offset short enough that a short branch can be used?
 1402 //
 1403 // NOTE: If the platform does not provide any short branch variants, then
 1404 //       this method should return false for offset 0.
 1405 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1406   // The passed offset is relative to address of the branch.
 1407   // On 86 a branch displacement is calculated relative to address
 1408   // of a next instruction.
 1409   offset -= br_size;
 1410 
 1411   // the short version of jmpConUCF2 contains multiple branches,
 1412   // making the reach slightly less
 1413   if (rule == jmpConUCF2_rule)
 1414     return (-126 <= offset && offset <= 125);
 1415   return (-128 <= offset && offset <= 127);
 1416 }
 1417 
 1418 // Return whether or not this register is ever used as an argument.  This
 1419 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1420 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1421 // arguments in those registers not be available to the callee.
 1422 bool Matcher::can_be_java_arg( int reg ) {
 1423   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1424   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1425   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1426   return false;
 1427 }
 1428 
 1429 bool Matcher::is_spillable_arg( int reg ) {
 1430   return can_be_java_arg(reg);
 1431 }
 1432 
 1433 uint Matcher::int_pressure_limit()
 1434 {
 1435   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1436 }
 1437 
 1438 uint Matcher::float_pressure_limit()
 1439 {
 1440   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1441 }
 1442 
 1443 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1444   // Use hardware integer DIV instruction when
 1445   // it is faster than a code which use multiply.
 1446   // Only when constant divisor fits into 32 bit
 1447   // (min_jint is excluded to get only correct
 1448   // positive 32 bit values from negative).
 1449   return VM_Version::has_fast_idiv() &&
 1450          (divisor == (int)divisor && divisor != min_jint);
 1451 }
 1452 
 1453 // Register for DIVI projection of divmodI
 1454 RegMask Matcher::divI_proj_mask() {
 1455   return EAX_REG_mask();
 1456 }
 1457 
 1458 // Register for MODI projection of divmodI
 1459 RegMask Matcher::modI_proj_mask() {
 1460   return EDX_REG_mask();
 1461 }
 1462 
 1463 // Register for DIVL projection of divmodL
 1464 RegMask Matcher::divL_proj_mask() {
 1465   ShouldNotReachHere();
 1466   return RegMask();
 1467 }
 1468 
 1469 // Register for MODL projection of divmodL
 1470 RegMask Matcher::modL_proj_mask() {
 1471   ShouldNotReachHere();
 1472   return RegMask();
 1473 }
 1474 
 1475 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1476   return NO_REG_mask();
 1477 }
 1478 
 1479 // Returns true if the high 32 bits of the value is known to be zero.
 1480 bool is_operand_hi32_zero(Node* n) {
 1481   int opc = n->Opcode();
 1482   if (opc == Op_AndL) {
 1483     Node* o2 = n->in(2);
 1484     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1485       return true;
 1486     }
 1487   }
 1488   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1489     return true;
 1490   }
 1491   return false;
 1492 }
 1493 
 1494 %}
 1495 
 1496 //----------ENCODING BLOCK-----------------------------------------------------
 1497 // This block specifies the encoding classes used by the compiler to output
 1498 // byte streams.  Encoding classes generate functions which are called by
 1499 // Machine Instruction Nodes in order to generate the bit encoding of the
 1500 // instruction.  Operands specify their base encoding interface with the
 1501 // interface keyword.  There are currently supported four interfaces,
 1502 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1503 // operand to generate a function which returns its register number when
 1504 // queried.   CONST_INTER causes an operand to generate a function which
 1505 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1506 // operand to generate four functions which return the Base Register, the
 1507 // Index Register, the Scale Value, and the Offset Value of the operand when
 1508 // queried.  COND_INTER causes an operand to generate six functions which
 1509 // return the encoding code (ie - encoding bits for the instruction)
 1510 // associated with each basic boolean condition for a conditional instruction.
 1511 // Instructions specify two basic values for encoding.  They use the
 1512 // ins_encode keyword to specify their encoding class (which must be one of
 1513 // the class names specified in the encoding block), and they use the
 1514 // opcode keyword to specify, in order, their primary, secondary, and
 1515 // tertiary opcode.  Only the opcode sections which a particular instruction
 1516 // needs for encoding need to be specified.
 1517 encode %{
 1518   // Build emit functions for each basic byte or larger field in the intel
 1519   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1520   // code in the enc_class source block.  Emit functions will live in the
 1521   // main source block for now.  In future, we can generalize this by
 1522   // adding a syntax that specifies the sizes of fields in an order,
 1523   // so that the adlc can build the emit functions automagically
 1524 
 1525   // Set instruction mark in MacroAssembler. This is used only in
 1526   // instructions that emit bytes directly to the CodeBuffer wraped
 1527   // in the MacroAssembler. Should go away once all "instruct" are
 1528   // patched to emit bytes only using methods in MacroAssembler.
 1529   enc_class SetInstMark %{
 1530     __ set_inst_mark();
 1531   %}
 1532 
 1533   enc_class ClearInstMark %{
 1534     __ clear_inst_mark();
 1535   %}
 1536 
 1537   // Emit primary opcode
 1538   enc_class OpcP %{
 1539     emit_opcode(masm, $primary);
 1540   %}
 1541 
 1542   // Emit secondary opcode
 1543   enc_class OpcS %{
 1544     emit_opcode(masm, $secondary);
 1545   %}
 1546 
 1547   // Emit opcode directly
 1548   enc_class Opcode(immI d8) %{
 1549     emit_opcode(masm, $d8$$constant);
 1550   %}
 1551 
 1552   enc_class SizePrefix %{
 1553     emit_opcode(masm,0x66);
 1554   %}
 1555 
 1556   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1557     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1558   %}
 1559 
 1560   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1561     emit_opcode(masm,$opcode$$constant);
 1562     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1563   %}
 1564 
 1565   enc_class mov_r32_imm0( rRegI dst ) %{
 1566     emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1567     emit_d32   ( masm, 0x0  );             //                         imm32==0x0
 1568   %}
 1569 
 1570   enc_class cdq_enc %{
 1571     // Full implementation of Java idiv and irem; checks for
 1572     // special case as described in JVM spec., p.243 & p.271.
 1573     //
 1574     //         normal case                           special case
 1575     //
 1576     // input : rax,: dividend                         min_int
 1577     //         reg: divisor                          -1
 1578     //
 1579     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1580     //         rdx: remainder (= rax, irem reg)       0
 1581     //
 1582     //  Code sequnce:
 1583     //
 1584     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1585     //  0F 85 0B 00 00 00    jne         normal_case
 1586     //  33 D2                xor         rdx,edx
 1587     //  83 F9 FF             cmp         rcx,0FFh
 1588     //  0F 84 03 00 00 00    je          done
 1589     //                  normal_case:
 1590     //  99                   cdq
 1591     //  F7 F9                idiv        rax,ecx
 1592     //                  done:
 1593     //
 1594     emit_opcode(masm,0x81); emit_d8(masm,0xF8);
 1595     emit_opcode(masm,0x00); emit_d8(masm,0x00);
 1596     emit_opcode(masm,0x00); emit_d8(masm,0x80);                     // cmp rax,80000000h
 1597     emit_opcode(masm,0x0F); emit_d8(masm,0x85);
 1598     emit_opcode(masm,0x0B); emit_d8(masm,0x00);
 1599     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // jne normal_case
 1600     emit_opcode(masm,0x33); emit_d8(masm,0xD2);                     // xor rdx,edx
 1601     emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh
 1602     emit_opcode(masm,0x0F); emit_d8(masm,0x84);
 1603     emit_opcode(masm,0x03); emit_d8(masm,0x00);
 1604     emit_opcode(masm,0x00); emit_d8(masm,0x00);                     // je done
 1605     // normal_case:
 1606     emit_opcode(masm,0x99);                                         // cdq
 1607     // idiv (note: must be emitted by the user of this rule)
 1608     // normal:
 1609   %}
 1610 
 1611   // Dense encoding for older common ops
 1612   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1613     emit_opcode(masm, $opcode$$constant + $reg$$reg);
 1614   %}
 1615 
 1616 
 1617   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1618   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1619     // Check for 8-bit immediate, and set sign extend bit in opcode
 1620     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1621       emit_opcode(masm, $primary | 0x02);
 1622     }
 1623     else {                          // If 32-bit immediate
 1624       emit_opcode(masm, $primary);
 1625     }
 1626   %}
 1627 
 1628   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1629     // Emit primary opcode and set sign-extend bit
 1630     // Check for 8-bit immediate, and set sign extend bit in opcode
 1631     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1632       emit_opcode(masm, $primary | 0x02);    }
 1633     else {                          // If 32-bit immediate
 1634       emit_opcode(masm, $primary);
 1635     }
 1636     // Emit r/m byte with secondary opcode, after primary opcode.
 1637     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1638   %}
 1639 
 1640   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1641     // Check for 8-bit immediate, and set sign extend bit in opcode
 1642     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1643       $$$emit8$imm$$constant;
 1644     }
 1645     else {                          // If 32-bit immediate
 1646       // Output immediate
 1647       $$$emit32$imm$$constant;
 1648     }
 1649   %}
 1650 
 1651   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1652     // Emit primary opcode and set sign-extend bit
 1653     // Check for 8-bit immediate, and set sign extend bit in opcode
 1654     int con = (int)$imm$$constant; // Throw away top bits
 1655     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1656     // Emit r/m byte with secondary opcode, after primary opcode.
 1657     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1658     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1659     else                               emit_d32(masm,con);
 1660   %}
 1661 
 1662   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1663     // Emit primary opcode and set sign-extend bit
 1664     // Check for 8-bit immediate, and set sign extend bit in opcode
 1665     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1666     emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1667     // Emit r/m byte with tertiary opcode, after primary opcode.
 1668     emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1669     if ((con >= -128) && (con <= 127)) emit_d8 (masm,con);
 1670     else                               emit_d32(masm,con);
 1671   %}
 1672 
 1673   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1674     emit_cc(masm, $secondary, $dst$$reg );
 1675   %}
 1676 
 1677   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1678     int destlo = $dst$$reg;
 1679     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1680     // bswap lo
 1681     emit_opcode(masm, 0x0F);
 1682     emit_cc(masm, 0xC8, destlo);
 1683     // bswap hi
 1684     emit_opcode(masm, 0x0F);
 1685     emit_cc(masm, 0xC8, desthi);
 1686     // xchg lo and hi
 1687     emit_opcode(masm, 0x87);
 1688     emit_rm(masm, 0x3, destlo, desthi);
 1689   %}
 1690 
 1691   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1692     emit_rm(masm, 0x3, $secondary, $div$$reg );
 1693   %}
 1694 
 1695   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1696     $$$emit8$primary;
 1697     emit_cc(masm, $secondary, $cop$$cmpcode);
 1698   %}
 1699 
 1700   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1701     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1702     emit_d8(masm, op >> 8 );
 1703     emit_d8(masm, op & 255);
 1704   %}
 1705 
 1706   // emulate a CMOV with a conditional branch around a MOV
 1707   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1708     // Invert sense of branch from sense of CMOV
 1709     emit_cc( masm, 0x70, ($cop$$cmpcode^1) );
 1710     emit_d8( masm, $brOffs$$constant );
 1711   %}
 1712 
 1713   enc_class enc_PartialSubtypeCheck( ) %{
 1714     Register Redi = as_Register(EDI_enc); // result register
 1715     Register Reax = as_Register(EAX_enc); // super class
 1716     Register Recx = as_Register(ECX_enc); // killed
 1717     Register Resi = as_Register(ESI_enc); // sub class
 1718     Label miss;
 1719 
 1720     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1721                                      nullptr, &miss,
 1722                                      /*set_cond_codes:*/ true);
 1723     if ($primary) {
 1724       __ xorptr(Redi, Redi);
 1725     }
 1726     __ bind(miss);
 1727   %}
 1728 
 1729   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1730     int start = __ offset();
 1731     if (UseSSE >= 2) {
 1732       if (VerifyFPU) {
 1733         __ verify_FPU(0, "must be empty in SSE2+ mode");
 1734       }
 1735     } else {
 1736       // External c_calling_convention expects the FPU stack to be 'clean'.
 1737       // Compiled code leaves it dirty.  Do cleanup now.
 1738       __ empty_FPU_stack();
 1739     }
 1740     if (sizeof_FFree_Float_Stack_All == -1) {
 1741       sizeof_FFree_Float_Stack_All = __ offset() - start;
 1742     } else {
 1743       assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1744     }
 1745   %}
 1746 
 1747   enc_class Verify_FPU_For_Leaf %{
 1748     if( VerifyFPU ) {
 1749       __ verify_FPU( -3, "Returning from Runtime Leaf call");
 1750     }
 1751   %}
 1752 
 1753   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1754     // This is the instruction starting address for relocation info.
 1755     __ set_inst_mark();
 1756     $$$emit8$primary;
 1757     // CALL directly to the runtime
 1758     emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1759                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1760     __ clear_inst_mark();
 1761     __ post_call_nop();
 1762 
 1763     if (UseSSE >= 2) {
 1764       BasicType rt = tf()->return_type();
 1765 
 1766       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1767         // A C runtime call where the return value is unused.  In SSE2+
 1768         // mode the result needs to be removed from the FPU stack.  It's
 1769         // likely that this function call could be removed by the
 1770         // optimizer if the C function is a pure function.
 1771         __ ffree(0);
 1772       } else if (rt == T_FLOAT) {
 1773         __ lea(rsp, Address(rsp, -4));
 1774         __ fstp_s(Address(rsp, 0));
 1775         __ movflt(xmm0, Address(rsp, 0));
 1776         __ lea(rsp, Address(rsp,  4));
 1777       } else if (rt == T_DOUBLE) {
 1778         __ lea(rsp, Address(rsp, -8));
 1779         __ fstp_d(Address(rsp, 0));
 1780         __ movdbl(xmm0, Address(rsp, 0));
 1781         __ lea(rsp, Address(rsp,  8));
 1782       }
 1783     }
 1784   %}
 1785 
 1786   enc_class pre_call_resets %{
 1787     // If method sets FPU control word restore it here
 1788     debug_only(int off0 = __ offset());
 1789     if (ra_->C->in_24_bit_fp_mode()) {
 1790       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1791     }
 1792     // Clear upper bits of YMM registers when current compiled code uses
 1793     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1794     __ vzeroupper();
 1795     debug_only(int off1 = __ offset());
 1796     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1797   %}
 1798 
 1799   enc_class post_call_FPU %{
 1800     // If method sets FPU control word do it here also
 1801     if (Compile::current()->in_24_bit_fp_mode()) {
 1802       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1803     }
 1804   %}
 1805 
 1806   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1807     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1808     // who we intended to call.
 1809     __ set_inst_mark();
 1810     $$$emit8$primary;
 1811 
 1812     if (!_method) {
 1813       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1814                      runtime_call_Relocation::spec(),
 1815                      RELOC_IMM32);
 1816       __ clear_inst_mark();
 1817       __ post_call_nop();
 1818     } else {
 1819       int method_index = resolved_method_index(masm);
 1820       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1821                                                   : static_call_Relocation::spec(method_index);
 1822       emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4),
 1823                      rspec, RELOC_DISP32);
 1824       __ post_call_nop();
 1825       address mark = __ inst_mark();
 1826       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1827         // Calls of the same statically bound method can share
 1828         // a stub to the interpreter.
 1829         __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off());
 1830         __ clear_inst_mark();
 1831       } else {
 1832         // Emit stubs for static call.
 1833         address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark);
 1834         __ clear_inst_mark();
 1835         if (stub == nullptr) {
 1836           ciEnv::current()->record_failure("CodeCache is full");
 1837           return;
 1838         }
 1839       }
 1840     }
 1841   %}
 1842 
 1843   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1844     __ ic_call((address)$meth$$method, resolved_method_index(masm));
 1845     __ post_call_nop();
 1846   %}
 1847 
 1848   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1849     int disp = in_bytes(Method::from_compiled_offset());
 1850     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1851 
 1852     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1853     __ set_inst_mark();
 1854     $$$emit8$primary;
 1855     emit_rm(masm, 0x01, $secondary, EAX_enc );  // R/M byte
 1856     emit_d8(masm, disp);             // Displacement
 1857     __ clear_inst_mark();
 1858     __ post_call_nop();
 1859   %}
 1860 
 1861   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1862     $$$emit8$primary;
 1863     emit_rm(masm, 0x3, $secondary, $dst$$reg);
 1864     $$$emit8$shift$$constant;
 1865   %}
 1866 
 1867   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1868     // Load immediate does not have a zero or sign extended version
 1869     // for 8-bit immediates
 1870     emit_opcode(masm, 0xB8 + $dst$$reg);
 1871     $$$emit32$src$$constant;
 1872   %}
 1873 
 1874   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1875     // Load immediate does not have a zero or sign extended version
 1876     // for 8-bit immediates
 1877     emit_opcode(masm, $primary + $dst$$reg);
 1878     $$$emit32$src$$constant;
 1879   %}
 1880 
 1881   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1882     // Load immediate does not have a zero or sign extended version
 1883     // for 8-bit immediates
 1884     int dst_enc = $dst$$reg;
 1885     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1886     if (src_con == 0) {
 1887       // xor dst, dst
 1888       emit_opcode(masm, 0x33);
 1889       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1890     } else {
 1891       emit_opcode(masm, $primary + dst_enc);
 1892       emit_d32(masm, src_con);
 1893     }
 1894   %}
 1895 
 1896   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1897     // Load immediate does not have a zero or sign extended version
 1898     // for 8-bit immediates
 1899     int dst_enc = $dst$$reg + 2;
 1900     int src_con = ((julong)($src$$constant)) >> 32;
 1901     if (src_con == 0) {
 1902       // xor dst, dst
 1903       emit_opcode(masm, 0x33);
 1904       emit_rm(masm, 0x3, dst_enc, dst_enc);
 1905     } else {
 1906       emit_opcode(masm, $primary + dst_enc);
 1907       emit_d32(masm, src_con);
 1908     }
 1909   %}
 1910 
 1911 
 1912   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1913   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1914     encode_Copy( masm, $dst$$reg, $src$$reg );
 1915   %}
 1916 
 1917   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1918     encode_Copy( masm, $dst$$reg, $src$$reg );
 1919   %}
 1920 
 1921   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1922     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1923   %}
 1924 
 1925   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1926     $$$emit8$primary;
 1927     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1928   %}
 1929 
 1930   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1931     $$$emit8$secondary;
 1932     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1933   %}
 1934 
 1935   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1936     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 1937   %}
 1938 
 1939   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1940     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1941   %}
 1942 
 1943   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1944     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1945   %}
 1946 
 1947   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1948     // Output immediate
 1949     $$$emit32$src$$constant;
 1950   %}
 1951 
 1952   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1953     // Output Float immediate bits
 1954     jfloat jf = $src$$constant;
 1955     int    jf_as_bits = jint_cast( jf );
 1956     emit_d32(masm, jf_as_bits);
 1957   %}
 1958 
 1959   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1960     // Output Float immediate bits
 1961     jfloat jf = $src$$constant;
 1962     int    jf_as_bits = jint_cast( jf );
 1963     emit_d32(masm, jf_as_bits);
 1964   %}
 1965 
 1966   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1967     // Output immediate
 1968     $$$emit16$src$$constant;
 1969   %}
 1970 
 1971   enc_class Con_d32(immI src) %{
 1972     emit_d32(masm,$src$$constant);
 1973   %}
 1974 
 1975   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1976     // Output immediate memory reference
 1977     emit_rm(masm, 0x00, $t1$$reg, 0x05 );
 1978     emit_d32(masm, 0x00);
 1979   %}
 1980 
 1981   enc_class lock_prefix( ) %{
 1982     emit_opcode(masm,0xF0);         // [Lock]
 1983   %}
 1984 
 1985   // Cmp-xchg long value.
 1986   // Note: we need to swap rbx, and rcx before and after the
 1987   //       cmpxchg8 instruction because the instruction uses
 1988   //       rcx as the high order word of the new value to store but
 1989   //       our register encoding uses rbx,.
 1990   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 1991 
 1992     // XCHG  rbx,ecx
 1993     emit_opcode(masm,0x87);
 1994     emit_opcode(masm,0xD9);
 1995     // [Lock]
 1996     emit_opcode(masm,0xF0);
 1997     // CMPXCHG8 [Eptr]
 1998     emit_opcode(masm,0x0F);
 1999     emit_opcode(masm,0xC7);
 2000     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2001     // XCHG  rbx,ecx
 2002     emit_opcode(masm,0x87);
 2003     emit_opcode(masm,0xD9);
 2004   %}
 2005 
 2006   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2007     // [Lock]
 2008     emit_opcode(masm,0xF0);
 2009 
 2010     // CMPXCHG [Eptr]
 2011     emit_opcode(masm,0x0F);
 2012     emit_opcode(masm,0xB1);
 2013     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2014   %}
 2015 
 2016   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2017     // [Lock]
 2018     emit_opcode(masm,0xF0);
 2019 
 2020     // CMPXCHGB [Eptr]
 2021     emit_opcode(masm,0x0F);
 2022     emit_opcode(masm,0xB0);
 2023     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2024   %}
 2025 
 2026   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2027     // [Lock]
 2028     emit_opcode(masm,0xF0);
 2029 
 2030     // 16-bit mode
 2031     emit_opcode(masm, 0x66);
 2032 
 2033     // CMPXCHGW [Eptr]
 2034     emit_opcode(masm,0x0F);
 2035     emit_opcode(masm,0xB1);
 2036     emit_rm( masm, 0x0, 1, $mem_ptr$$reg );
 2037   %}
 2038 
 2039   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2040     int res_encoding = $res$$reg;
 2041 
 2042     // MOV  res,0
 2043     emit_opcode( masm, 0xB8 + res_encoding);
 2044     emit_d32( masm, 0 );
 2045     // JNE,s  fail
 2046     emit_opcode(masm,0x75);
 2047     emit_d8(masm, 5 );
 2048     // MOV  res,1
 2049     emit_opcode( masm, 0xB8 + res_encoding);
 2050     emit_d32( masm, 1 );
 2051     // fail:
 2052   %}
 2053 
 2054   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2055     int reg_encoding = $ereg$$reg;
 2056     int base  = $mem$$base;
 2057     int index = $mem$$index;
 2058     int scale = $mem$$scale;
 2059     int displace = $mem$$disp;
 2060     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2061     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2062   %}
 2063 
 2064   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2065     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2066     int base  = $mem$$base;
 2067     int index = $mem$$index;
 2068     int scale = $mem$$scale;
 2069     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2070     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2071     encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none);
 2072   %}
 2073 
 2074   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2075     int r1, r2;
 2076     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2077     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2078     emit_opcode(masm,0x0F);
 2079     emit_opcode(masm,$tertiary);
 2080     emit_rm(masm, 0x3, r1, r2);
 2081     emit_d8(masm,$cnt$$constant);
 2082     emit_d8(masm,$primary);
 2083     emit_rm(masm, 0x3, $secondary, r1);
 2084     emit_d8(masm,$cnt$$constant);
 2085   %}
 2086 
 2087   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2088     emit_opcode( masm, 0x8B ); // Move
 2089     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2090     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2091       emit_d8(masm,$primary);
 2092       emit_rm(masm, 0x3, $secondary, $dst$$reg);
 2093       emit_d8(masm,$cnt$$constant-32);
 2094     }
 2095     emit_d8(masm,$primary);
 2096     emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2097     emit_d8(masm,31);
 2098   %}
 2099 
 2100   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2101     int r1, r2;
 2102     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2103     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2104 
 2105     emit_opcode( masm, 0x8B ); // Move r1,r2
 2106     emit_rm(masm, 0x3, r1, r2);
 2107     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2108       emit_opcode(masm,$primary);
 2109       emit_rm(masm, 0x3, $secondary, r1);
 2110       emit_d8(masm,$cnt$$constant-32);
 2111     }
 2112     emit_opcode(masm,0x33);  // XOR r2,r2
 2113     emit_rm(masm, 0x3, r2, r2);
 2114   %}
 2115 
 2116   // Clone of RegMem but accepts an extra parameter to access each
 2117   // half of a double in memory; it never needs relocation info.
 2118   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2119     emit_opcode(masm,$opcode$$constant);
 2120     int reg_encoding = $rm_reg$$reg;
 2121     int base     = $mem$$base;
 2122     int index    = $mem$$index;
 2123     int scale    = $mem$$scale;
 2124     int displace = $mem$$disp + $disp_for_half$$constant;
 2125     relocInfo::relocType disp_reloc = relocInfo::none;
 2126     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2127   %}
 2128 
 2129   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2130   //
 2131   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2132   // and it never needs relocation information.
 2133   // Frequently used to move data between FPU's Stack Top and memory.
 2134   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2135     int rm_byte_opcode = $rm_opcode$$constant;
 2136     int base     = $mem$$base;
 2137     int index    = $mem$$index;
 2138     int scale    = $mem$$scale;
 2139     int displace = $mem$$disp;
 2140     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2141     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2142   %}
 2143 
 2144   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2145     int rm_byte_opcode = $rm_opcode$$constant;
 2146     int base     = $mem$$base;
 2147     int index    = $mem$$index;
 2148     int scale    = $mem$$scale;
 2149     int displace = $mem$$disp;
 2150     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2151     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2152   %}
 2153 
 2154   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2155     int reg_encoding = $dst$$reg;
 2156     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2157     int index        = 0x04;            // 0x04 indicates no index
 2158     int scale        = 0x00;            // 0x00 indicates no scale
 2159     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2160     relocInfo::relocType disp_reloc = relocInfo::none;
 2161     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2162   %}
 2163 
 2164   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2165     // Compare dst,src
 2166     emit_opcode(masm,0x3B);
 2167     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2168     // jmp dst < src around move
 2169     emit_opcode(masm,0x7C);
 2170     emit_d8(masm,2);
 2171     // move dst,src
 2172     emit_opcode(masm,0x8B);
 2173     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2174   %}
 2175 
 2176   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2177     // Compare dst,src
 2178     emit_opcode(masm,0x3B);
 2179     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2180     // jmp dst > src around move
 2181     emit_opcode(masm,0x7F);
 2182     emit_d8(masm,2);
 2183     // move dst,src
 2184     emit_opcode(masm,0x8B);
 2185     emit_rm(masm, 0x3, $dst$$reg, $src$$reg);
 2186   %}
 2187 
 2188   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2189     // If src is FPR1, we can just FST to store it.
 2190     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2191     int reg_encoding = 0x2; // Just store
 2192     int base  = $mem$$base;
 2193     int index = $mem$$index;
 2194     int scale = $mem$$scale;
 2195     int displace = $mem$$disp;
 2196     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2197     if( $src$$reg != FPR1L_enc ) {
 2198       reg_encoding = 0x3;  // Store & pop
 2199       emit_opcode( masm, 0xD9 ); // FLD (i.e., push it)
 2200       emit_d8( masm, 0xC0-1+$src$$reg );
 2201     }
 2202     __ set_inst_mark();       // Mark start of opcode for reloc info in mem operand
 2203     emit_opcode(masm,$primary);
 2204     encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc);
 2205     __ clear_inst_mark();
 2206   %}
 2207 
 2208   enc_class neg_reg(rRegI dst) %{
 2209     // NEG $dst
 2210     emit_opcode(masm,0xF7);
 2211     emit_rm(masm, 0x3, 0x03, $dst$$reg );
 2212   %}
 2213 
 2214   enc_class setLT_reg(eCXRegI dst) %{
 2215     // SETLT $dst
 2216     emit_opcode(masm,0x0F);
 2217     emit_opcode(masm,0x9C);
 2218     emit_rm( masm, 0x3, 0x4, $dst$$reg );
 2219   %}
 2220 
 2221   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2222     int tmpReg = $tmp$$reg;
 2223 
 2224     // SUB $p,$q
 2225     emit_opcode(masm,0x2B);
 2226     emit_rm(masm, 0x3, $p$$reg, $q$$reg);
 2227     // SBB $tmp,$tmp
 2228     emit_opcode(masm,0x1B);
 2229     emit_rm(masm, 0x3, tmpReg, tmpReg);
 2230     // AND $tmp,$y
 2231     emit_opcode(masm,0x23);
 2232     emit_rm(masm, 0x3, tmpReg, $y$$reg);
 2233     // ADD $p,$tmp
 2234     emit_opcode(masm,0x03);
 2235     emit_rm(masm, 0x3, $p$$reg, tmpReg);
 2236   %}
 2237 
 2238   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2239     // TEST shift,32
 2240     emit_opcode(masm,0xF7);
 2241     emit_rm(masm, 0x3, 0, ECX_enc);
 2242     emit_d32(masm,0x20);
 2243     // JEQ,s small
 2244     emit_opcode(masm, 0x74);
 2245     emit_d8(masm, 0x04);
 2246     // MOV    $dst.hi,$dst.lo
 2247     emit_opcode( masm, 0x8B );
 2248     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2249     // CLR    $dst.lo
 2250     emit_opcode(masm, 0x33);
 2251     emit_rm(masm, 0x3, $dst$$reg, $dst$$reg);
 2252 // small:
 2253     // SHLD   $dst.hi,$dst.lo,$shift
 2254     emit_opcode(masm,0x0F);
 2255     emit_opcode(masm,0xA5);
 2256     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2257     // SHL    $dst.lo,$shift"
 2258     emit_opcode(masm,0xD3);
 2259     emit_rm(masm, 0x3, 0x4, $dst$$reg );
 2260   %}
 2261 
 2262   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2263     // TEST shift,32
 2264     emit_opcode(masm,0xF7);
 2265     emit_rm(masm, 0x3, 0, ECX_enc);
 2266     emit_d32(masm,0x20);
 2267     // JEQ,s small
 2268     emit_opcode(masm, 0x74);
 2269     emit_d8(masm, 0x04);
 2270     // MOV    $dst.lo,$dst.hi
 2271     emit_opcode( masm, 0x8B );
 2272     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2273     // CLR    $dst.hi
 2274     emit_opcode(masm, 0x33);
 2275     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2276 // small:
 2277     // SHRD   $dst.lo,$dst.hi,$shift
 2278     emit_opcode(masm,0x0F);
 2279     emit_opcode(masm,0xAD);
 2280     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2281     // SHR    $dst.hi,$shift"
 2282     emit_opcode(masm,0xD3);
 2283     emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2284   %}
 2285 
 2286   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2287     // TEST shift,32
 2288     emit_opcode(masm,0xF7);
 2289     emit_rm(masm, 0x3, 0, ECX_enc);
 2290     emit_d32(masm,0x20);
 2291     // JEQ,s small
 2292     emit_opcode(masm, 0x74);
 2293     emit_d8(masm, 0x05);
 2294     // MOV    $dst.lo,$dst.hi
 2295     emit_opcode( masm, 0x8B );
 2296     emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2297     // SAR    $dst.hi,31
 2298     emit_opcode(masm, 0xC1);
 2299     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2300     emit_d8(masm, 0x1F );
 2301 // small:
 2302     // SHRD   $dst.lo,$dst.hi,$shift
 2303     emit_opcode(masm,0x0F);
 2304     emit_opcode(masm,0xAD);
 2305     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2306     // SAR    $dst.hi,$shift"
 2307     emit_opcode(masm,0xD3);
 2308     emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2309   %}
 2310 
 2311 
 2312   // ----------------- Encodings for floating point unit -----------------
 2313   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2314   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2315     $$$emit8$primary;
 2316     emit_rm(masm, 0x3, $secondary, $src$$reg );
 2317   %}
 2318 
 2319   // Pop argument in FPR0 with FSTP ST(0)
 2320   enc_class PopFPU() %{
 2321     emit_opcode( masm, 0xDD );
 2322     emit_d8( masm, 0xD8 );
 2323   %}
 2324 
 2325   // !!!!! equivalent to Pop_Reg_F
 2326   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2327     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2328     emit_d8( masm, 0xD8+$dst$$reg );
 2329   %}
 2330 
 2331   enc_class Push_Reg_DPR( regDPR dst ) %{
 2332     emit_opcode( masm, 0xD9 );
 2333     emit_d8( masm, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2334   %}
 2335 
 2336   enc_class strictfp_bias1( regDPR dst ) %{
 2337     emit_opcode( masm, 0xDB );           // FLD m80real
 2338     emit_opcode( masm, 0x2D );
 2339     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2340     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2341     emit_opcode( masm, 0xC8+$dst$$reg );
 2342   %}
 2343 
 2344   enc_class strictfp_bias2( regDPR dst ) %{
 2345     emit_opcode( masm, 0xDB );           // FLD m80real
 2346     emit_opcode( masm, 0x2D );
 2347     emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2348     emit_opcode( masm, 0xDE );           // FMULP ST(dst), ST0
 2349     emit_opcode( masm, 0xC8+$dst$$reg );
 2350   %}
 2351 
 2352   // Special case for moving an integer register to a stack slot.
 2353   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2354     store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp );
 2355   %}
 2356 
 2357   // Special case for moving a register to a stack slot.
 2358   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2359     // Opcode already emitted
 2360     emit_rm( masm, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2361     emit_rm( masm, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2362     emit_d32(masm, $dst$$disp);   // Displacement
 2363   %}
 2364 
 2365   // Push the integer in stackSlot 'src' onto FP-stack
 2366   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2367     store_to_stackslot( masm, $primary, $secondary, $src$$disp );
 2368   %}
 2369 
 2370   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2371   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2372     store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp );
 2373   %}
 2374 
 2375   // Same as Pop_Mem_F except for opcode
 2376   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2377   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2378     store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp );
 2379   %}
 2380 
 2381   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2382     emit_opcode( masm, 0xDD );           // FSTP   ST(i)
 2383     emit_d8( masm, 0xD8+$dst$$reg );
 2384   %}
 2385 
 2386   enc_class Push_Reg_FPR( regFPR dst ) %{
 2387     emit_opcode( masm, 0xD9 );           // FLD    ST(i-1)
 2388     emit_d8( masm, 0xC0-1+$dst$$reg );
 2389   %}
 2390 
 2391   // Push FPU's float to a stack-slot, and pop FPU-stack
 2392   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2393     int pop = 0x02;
 2394     if ($src$$reg != FPR1L_enc) {
 2395       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2396       emit_d8( masm, 0xC0-1+$src$$reg );
 2397       pop = 0x03;
 2398     }
 2399     store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2400   %}
 2401 
 2402   // Push FPU's double to a stack-slot, and pop FPU-stack
 2403   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2404     int pop = 0x02;
 2405     if ($src$$reg != FPR1L_enc) {
 2406       emit_opcode( masm, 0xD9 );         // FLD    ST(i-1)
 2407       emit_d8( masm, 0xC0-1+$src$$reg );
 2408       pop = 0x03;
 2409     }
 2410     store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2411   %}
 2412 
 2413   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2414   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2415     int pop = 0xD0 - 1; // -1 since we skip FLD
 2416     if ($src$$reg != FPR1L_enc) {
 2417       emit_opcode( masm, 0xD9 );         // FLD    ST(src-1)
 2418       emit_d8( masm, 0xC0-1+$src$$reg );
 2419       pop = 0xD8;
 2420     }
 2421     emit_opcode( masm, 0xDD );
 2422     emit_d8( masm, pop+$dst$$reg );      // FST<P> ST(i)
 2423   %}
 2424 
 2425 
 2426   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2427     // load dst in FPR0
 2428     emit_opcode( masm, 0xD9 );
 2429     emit_d8( masm, 0xC0-1+$dst$$reg );
 2430     if ($src$$reg != FPR1L_enc) {
 2431       // fincstp
 2432       emit_opcode (masm, 0xD9);
 2433       emit_opcode (masm, 0xF7);
 2434       // swap src with FPR1:
 2435       // FXCH FPR1 with src
 2436       emit_opcode(masm, 0xD9);
 2437       emit_d8(masm, 0xC8-1+$src$$reg );
 2438       // fdecstp
 2439       emit_opcode (masm, 0xD9);
 2440       emit_opcode (masm, 0xF6);
 2441     }
 2442   %}
 2443 
 2444   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2445     __ subptr(rsp, 8);
 2446     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2447     __ fld_d(Address(rsp, 0));
 2448     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2449     __ fld_d(Address(rsp, 0));
 2450   %}
 2451 
 2452   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2453     __ subptr(rsp, 4);
 2454     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2455     __ fld_s(Address(rsp, 0));
 2456     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2457     __ fld_s(Address(rsp, 0));
 2458   %}
 2459 
 2460   enc_class Push_ResultD(regD dst) %{
 2461     __ fstp_d(Address(rsp, 0));
 2462     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2463     __ addptr(rsp, 8);
 2464   %}
 2465 
 2466   enc_class Push_ResultF(regF dst, immI d8) %{
 2467     __ fstp_s(Address(rsp, 0));
 2468     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2469     __ addptr(rsp, $d8$$constant);
 2470   %}
 2471 
 2472   enc_class Push_SrcD(regD src) %{
 2473     __ subptr(rsp, 8);
 2474     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2475     __ fld_d(Address(rsp, 0));
 2476   %}
 2477 
 2478   enc_class push_stack_temp_qword() %{
 2479     __ subptr(rsp, 8);
 2480   %}
 2481 
 2482   enc_class pop_stack_temp_qword() %{
 2483     __ addptr(rsp, 8);
 2484   %}
 2485 
 2486   enc_class push_xmm_to_fpr1(regD src) %{
 2487     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2488     __ fld_d(Address(rsp, 0));
 2489   %}
 2490 
 2491   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2492     if ($src$$reg != FPR1L_enc) {
 2493       // fincstp
 2494       emit_opcode (masm, 0xD9);
 2495       emit_opcode (masm, 0xF7);
 2496       // FXCH FPR1 with src
 2497       emit_opcode(masm, 0xD9);
 2498       emit_d8(masm, 0xC8-1+$src$$reg );
 2499       // fdecstp
 2500       emit_opcode (masm, 0xD9);
 2501       emit_opcode (masm, 0xF6);
 2502     }
 2503   %}
 2504 
 2505   enc_class fnstsw_sahf_skip_parity() %{
 2506     // fnstsw ax
 2507     emit_opcode( masm, 0xDF );
 2508     emit_opcode( masm, 0xE0 );
 2509     // sahf
 2510     emit_opcode( masm, 0x9E );
 2511     // jnp  ::skip
 2512     emit_opcode( masm, 0x7B );
 2513     emit_opcode( masm, 0x05 );
 2514   %}
 2515 
 2516   enc_class emitModDPR() %{
 2517     // fprem must be iterative
 2518     // :: loop
 2519     // fprem
 2520     emit_opcode( masm, 0xD9 );
 2521     emit_opcode( masm, 0xF8 );
 2522     // wait
 2523     emit_opcode( masm, 0x9b );
 2524     // fnstsw ax
 2525     emit_opcode( masm, 0xDF );
 2526     emit_opcode( masm, 0xE0 );
 2527     // sahf
 2528     emit_opcode( masm, 0x9E );
 2529     // jp  ::loop
 2530     emit_opcode( masm, 0x0F );
 2531     emit_opcode( masm, 0x8A );
 2532     emit_opcode( masm, 0xF4 );
 2533     emit_opcode( masm, 0xFF );
 2534     emit_opcode( masm, 0xFF );
 2535     emit_opcode( masm, 0xFF );
 2536   %}
 2537 
 2538   enc_class fpu_flags() %{
 2539     // fnstsw_ax
 2540     emit_opcode( masm, 0xDF);
 2541     emit_opcode( masm, 0xE0);
 2542     // test ax,0x0400
 2543     emit_opcode( masm, 0x66 );   // operand-size prefix for 16-bit immediate
 2544     emit_opcode( masm, 0xA9 );
 2545     emit_d16   ( masm, 0x0400 );
 2546     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2547     // // test rax,0x0400
 2548     // emit_opcode( masm, 0xA9 );
 2549     // emit_d32   ( masm, 0x00000400 );
 2550     //
 2551     // jz exit (no unordered comparison)
 2552     emit_opcode( masm, 0x74 );
 2553     emit_d8    ( masm, 0x02 );
 2554     // mov ah,1 - treat as LT case (set carry flag)
 2555     emit_opcode( masm, 0xB4 );
 2556     emit_d8    ( masm, 0x01 );
 2557     // sahf
 2558     emit_opcode( masm, 0x9E);
 2559   %}
 2560 
 2561   enc_class cmpF_P6_fixup() %{
 2562     // Fixup the integer flags in case comparison involved a NaN
 2563     //
 2564     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2565     emit_opcode( masm, 0x7B );
 2566     emit_d8    ( masm, 0x03 );
 2567     // MOV AH,1 - treat as LT case (set carry flag)
 2568     emit_opcode( masm, 0xB4 );
 2569     emit_d8    ( masm, 0x01 );
 2570     // SAHF
 2571     emit_opcode( masm, 0x9E);
 2572     // NOP     // target for branch to avoid branch to branch
 2573     emit_opcode( masm, 0x90);
 2574   %}
 2575 
 2576 //     fnstsw_ax();
 2577 //     sahf();
 2578 //     movl(dst, nan_result);
 2579 //     jcc(Assembler::parity, exit);
 2580 //     movl(dst, less_result);
 2581 //     jcc(Assembler::below, exit);
 2582 //     movl(dst, equal_result);
 2583 //     jcc(Assembler::equal, exit);
 2584 //     movl(dst, greater_result);
 2585 
 2586 // less_result     =  1;
 2587 // greater_result  = -1;
 2588 // equal_result    = 0;
 2589 // nan_result      = -1;
 2590 
 2591   enc_class CmpF_Result(rRegI dst) %{
 2592     // fnstsw_ax();
 2593     emit_opcode( masm, 0xDF);
 2594     emit_opcode( masm, 0xE0);
 2595     // sahf
 2596     emit_opcode( masm, 0x9E);
 2597     // movl(dst, nan_result);
 2598     emit_opcode( masm, 0xB8 + $dst$$reg);
 2599     emit_d32( masm, -1 );
 2600     // jcc(Assembler::parity, exit);
 2601     emit_opcode( masm, 0x7A );
 2602     emit_d8    ( masm, 0x13 );
 2603     // movl(dst, less_result);
 2604     emit_opcode( masm, 0xB8 + $dst$$reg);
 2605     emit_d32( masm, -1 );
 2606     // jcc(Assembler::below, exit);
 2607     emit_opcode( masm, 0x72 );
 2608     emit_d8    ( masm, 0x0C );
 2609     // movl(dst, equal_result);
 2610     emit_opcode( masm, 0xB8 + $dst$$reg);
 2611     emit_d32( masm, 0 );
 2612     // jcc(Assembler::equal, exit);
 2613     emit_opcode( masm, 0x74 );
 2614     emit_d8    ( masm, 0x05 );
 2615     // movl(dst, greater_result);
 2616     emit_opcode( masm, 0xB8 + $dst$$reg);
 2617     emit_d32( masm, 1 );
 2618   %}
 2619 
 2620 
 2621   // Compare the longs and set flags
 2622   // BROKEN!  Do Not use as-is
 2623   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2624     // CMP    $src1.hi,$src2.hi
 2625     emit_opcode( masm, 0x3B );
 2626     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2627     // JNE,s  done
 2628     emit_opcode(masm,0x75);
 2629     emit_d8(masm, 2 );
 2630     // CMP    $src1.lo,$src2.lo
 2631     emit_opcode( masm, 0x3B );
 2632     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2633 // done:
 2634   %}
 2635 
 2636   enc_class convert_int_long( regL dst, rRegI src ) %{
 2637     // mov $dst.lo,$src
 2638     int dst_encoding = $dst$$reg;
 2639     int src_encoding = $src$$reg;
 2640     encode_Copy( masm, dst_encoding  , src_encoding );
 2641     // mov $dst.hi,$src
 2642     encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2643     // sar $dst.hi,31
 2644     emit_opcode( masm, 0xC1 );
 2645     emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2646     emit_d8(masm, 0x1F );
 2647   %}
 2648 
 2649   enc_class convert_long_double( eRegL src ) %{
 2650     // push $src.hi
 2651     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2652     // push $src.lo
 2653     emit_opcode(masm, 0x50+$src$$reg  );
 2654     // fild 64-bits at [SP]
 2655     emit_opcode(masm,0xdf);
 2656     emit_d8(masm, 0x6C);
 2657     emit_d8(masm, 0x24);
 2658     emit_d8(masm, 0x00);
 2659     // pop stack
 2660     emit_opcode(masm, 0x83); // add  SP, #8
 2661     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2662     emit_d8(masm, 0x8);
 2663   %}
 2664 
 2665   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2666     // IMUL   EDX:EAX,$src1
 2667     emit_opcode( masm, 0xF7 );
 2668     emit_rm( masm, 0x3, 0x5, $src1$$reg );
 2669     // SAR    EDX,$cnt-32
 2670     int shift_count = ((int)$cnt$$constant) - 32;
 2671     if (shift_count > 0) {
 2672       emit_opcode(masm, 0xC1);
 2673       emit_rm(masm, 0x3, 7, $dst$$reg );
 2674       emit_d8(masm, shift_count);
 2675     }
 2676   %}
 2677 
 2678   // this version doesn't have add sp, 8
 2679   enc_class convert_long_double2( eRegL src ) %{
 2680     // push $src.hi
 2681     emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2682     // push $src.lo
 2683     emit_opcode(masm, 0x50+$src$$reg  );
 2684     // fild 64-bits at [SP]
 2685     emit_opcode(masm,0xdf);
 2686     emit_d8(masm, 0x6C);
 2687     emit_d8(masm, 0x24);
 2688     emit_d8(masm, 0x00);
 2689   %}
 2690 
 2691   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2692     // Basic idea: long = (long)int * (long)int
 2693     // IMUL EDX:EAX, src
 2694     emit_opcode( masm, 0xF7 );
 2695     emit_rm( masm, 0x3, 0x5, $src$$reg);
 2696   %}
 2697 
 2698   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2699     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2700     // MUL EDX:EAX, src
 2701     emit_opcode( masm, 0xF7 );
 2702     emit_rm( masm, 0x3, 0x4, $src$$reg);
 2703   %}
 2704 
 2705   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2706     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2707     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2708     // MOV    $tmp,$src.lo
 2709     encode_Copy( masm, $tmp$$reg, $src$$reg );
 2710     // IMUL   $tmp,EDX
 2711     emit_opcode( masm, 0x0F );
 2712     emit_opcode( masm, 0xAF );
 2713     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2714     // MOV    EDX,$src.hi
 2715     encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2716     // IMUL   EDX,EAX
 2717     emit_opcode( masm, 0x0F );
 2718     emit_opcode( masm, 0xAF );
 2719     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2720     // ADD    $tmp,EDX
 2721     emit_opcode( masm, 0x03 );
 2722     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2723     // MUL   EDX:EAX,$src.lo
 2724     emit_opcode( masm, 0xF7 );
 2725     emit_rm( masm, 0x3, 0x4, $src$$reg );
 2726     // ADD    EDX,ESI
 2727     emit_opcode( masm, 0x03 );
 2728     emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2729   %}
 2730 
 2731   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2732     // Basic idea: lo(result) = lo(src * y_lo)
 2733     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2734     // IMUL   $tmp,EDX,$src
 2735     emit_opcode( masm, 0x6B );
 2736     emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2737     emit_d8( masm, (int)$src$$constant );
 2738     // MOV    EDX,$src
 2739     emit_opcode(masm, 0xB8 + EDX_enc);
 2740     emit_d32( masm, (int)$src$$constant );
 2741     // MUL   EDX:EAX,EDX
 2742     emit_opcode( masm, 0xF7 );
 2743     emit_rm( masm, 0x3, 0x4, EDX_enc );
 2744     // ADD    EDX,ESI
 2745     emit_opcode( masm, 0x03 );
 2746     emit_rm( masm, 0x3, EDX_enc, $tmp$$reg );
 2747   %}
 2748 
 2749   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2750     // PUSH src1.hi
 2751     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2752     // PUSH src1.lo
 2753     emit_opcode(masm,               0x50+$src1$$reg  );
 2754     // PUSH src2.hi
 2755     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2756     // PUSH src2.lo
 2757     emit_opcode(masm,               0x50+$src2$$reg  );
 2758     // CALL directly to the runtime
 2759     __ set_inst_mark();
 2760     emit_opcode(masm,0xE8);       // Call into runtime
 2761     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2762     __ clear_inst_mark();
 2763     __ post_call_nop();
 2764     // Restore stack
 2765     emit_opcode(masm, 0x83); // add  SP, #framesize
 2766     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2767     emit_d8(masm, 4*4);
 2768   %}
 2769 
 2770   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2771     // PUSH src1.hi
 2772     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2773     // PUSH src1.lo
 2774     emit_opcode(masm,               0x50+$src1$$reg  );
 2775     // PUSH src2.hi
 2776     emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2777     // PUSH src2.lo
 2778     emit_opcode(masm,               0x50+$src2$$reg  );
 2779     // CALL directly to the runtime
 2780     __ set_inst_mark();
 2781     emit_opcode(masm,0xE8);       // Call into runtime
 2782     emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2783     __ clear_inst_mark();
 2784     __ post_call_nop();
 2785     // Restore stack
 2786     emit_opcode(masm, 0x83); // add  SP, #framesize
 2787     emit_rm(masm, 0x3, 0x00, ESP_enc);
 2788     emit_d8(masm, 4*4);
 2789   %}
 2790 
 2791   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2792     // MOV   $tmp,$src.lo
 2793     emit_opcode(masm, 0x8B);
 2794     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg);
 2795     // OR    $tmp,$src.hi
 2796     emit_opcode(masm, 0x0B);
 2797     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2798   %}
 2799 
 2800   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2801     // CMP    $src1.lo,$src2.lo
 2802     emit_opcode( masm, 0x3B );
 2803     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2804     // JNE,s  skip
 2805     emit_cc(masm, 0x70, 0x5);
 2806     emit_d8(masm,2);
 2807     // CMP    $src1.hi,$src2.hi
 2808     emit_opcode( masm, 0x3B );
 2809     emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2810   %}
 2811 
 2812   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2813     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2814     emit_opcode( masm, 0x3B );
 2815     emit_rm(masm, 0x3, $src1$$reg, $src2$$reg );
 2816     // MOV    $tmp,$src1.hi
 2817     emit_opcode( masm, 0x8B );
 2818     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2819     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2820     emit_opcode( masm, 0x1B );
 2821     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2822   %}
 2823 
 2824   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2825     // XOR    $tmp,$tmp
 2826     emit_opcode(masm,0x33);  // XOR
 2827     emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg);
 2828     // CMP    $tmp,$src.lo
 2829     emit_opcode( masm, 0x3B );
 2830     emit_rm(masm, 0x3, $tmp$$reg, $src$$reg );
 2831     // SBB    $tmp,$src.hi
 2832     emit_opcode( masm, 0x1B );
 2833     emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2834   %}
 2835 
 2836  // Sniff, sniff... smells like Gnu Superoptimizer
 2837   enc_class neg_long( eRegL dst ) %{
 2838     emit_opcode(masm,0xF7);    // NEG hi
 2839     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2840     emit_opcode(masm,0xF7);    // NEG lo
 2841     emit_rm    (masm,0x3, 0x3,               $dst$$reg );
 2842     emit_opcode(masm,0x83);    // SBB hi,0
 2843     emit_rm    (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2844     emit_d8    (masm,0 );
 2845   %}
 2846 
 2847   enc_class enc_pop_rdx() %{
 2848     emit_opcode(masm,0x5A);
 2849   %}
 2850 
 2851   enc_class enc_rethrow() %{
 2852     __ set_inst_mark();
 2853     emit_opcode(masm, 0xE9);        // jmp    entry
 2854     emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4,
 2855                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2856     __ clear_inst_mark();
 2857     __ post_call_nop();
 2858   %}
 2859 
 2860 
 2861   // Convert a double to an int.  Java semantics require we do complex
 2862   // manglelations in the corner cases.  So we set the rounding mode to
 2863   // 'zero', store the darned double down as an int, and reset the
 2864   // rounding mode to 'nearest'.  The hardware throws an exception which
 2865   // patches up the correct value directly to the stack.
 2866   enc_class DPR2I_encoding( regDPR src ) %{
 2867     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2868     // exceptions here, so that a NAN or other corner-case value will
 2869     // thrown an exception (but normal values get converted at full speed).
 2870     // However, I2C adapters and other float-stack manglers leave pending
 2871     // invalid-op exceptions hanging.  We would have to clear them before
 2872     // enabling them and that is more expensive than just testing for the
 2873     // invalid value Intel stores down in the corner cases.
 2874     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2875     emit_opcode(masm,0x2D);
 2876     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2877     // Allocate a word
 2878     emit_opcode(masm,0x83);            // SUB ESP,4
 2879     emit_opcode(masm,0xEC);
 2880     emit_d8(masm,0x04);
 2881     // Encoding assumes a double has been pushed into FPR0.
 2882     // Store down the double as an int, popping the FPU stack
 2883     emit_opcode(masm,0xDB);            // FISTP [ESP]
 2884     emit_opcode(masm,0x1C);
 2885     emit_d8(masm,0x24);
 2886     // Restore the rounding mode; mask the exception
 2887     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2888     emit_opcode(masm,0x2D);
 2889     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2890         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2891         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2892 
 2893     // Load the converted int; adjust CPU stack
 2894     emit_opcode(masm,0x58);       // POP EAX
 2895     emit_opcode(masm,0x3D);       // CMP EAX,imm
 2896     emit_d32   (masm,0x80000000); //         0x80000000
 2897     emit_opcode(masm,0x75);       // JNE around_slow_call
 2898     emit_d8    (masm,0x07);       // Size of slow_call
 2899     // Push src onto stack slow-path
 2900     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2901     emit_d8    (masm,0xC0-1+$src$$reg );
 2902     // CALL directly to the runtime
 2903     __ set_inst_mark();
 2904     emit_opcode(masm,0xE8);       // Call into runtime
 2905     emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2906     __ clear_inst_mark();
 2907     __ post_call_nop();
 2908     // Carry on here...
 2909   %}
 2910 
 2911   enc_class DPR2L_encoding( regDPR src ) %{
 2912     emit_opcode(masm,0xD9);            // FLDCW  trunc
 2913     emit_opcode(masm,0x2D);
 2914     emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2915     // Allocate a word
 2916     emit_opcode(masm,0x83);            // SUB ESP,8
 2917     emit_opcode(masm,0xEC);
 2918     emit_d8(masm,0x08);
 2919     // Encoding assumes a double has been pushed into FPR0.
 2920     // Store down the double as a long, popping the FPU stack
 2921     emit_opcode(masm,0xDF);            // FISTP [ESP]
 2922     emit_opcode(masm,0x3C);
 2923     emit_d8(masm,0x24);
 2924     // Restore the rounding mode; mask the exception
 2925     emit_opcode(masm,0xD9);            // FLDCW   std/24-bit mode
 2926     emit_opcode(masm,0x2D);
 2927     emit_d32( masm, Compile::current()->in_24_bit_fp_mode()
 2928         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2929         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2930 
 2931     // Load the converted int; adjust CPU stack
 2932     emit_opcode(masm,0x58);       // POP EAX
 2933     emit_opcode(masm,0x5A);       // POP EDX
 2934     emit_opcode(masm,0x81);       // CMP EDX,imm
 2935     emit_d8    (masm,0xFA);       // rdx
 2936     emit_d32   (masm,0x80000000); //         0x80000000
 2937     emit_opcode(masm,0x75);       // JNE around_slow_call
 2938     emit_d8    (masm,0x07+4);     // Size of slow_call
 2939     emit_opcode(masm,0x85);       // TEST EAX,EAX
 2940     emit_opcode(masm,0xC0);       // 2/rax,/rax,
 2941     emit_opcode(masm,0x75);       // JNE around_slow_call
 2942     emit_d8    (masm,0x07);       // Size of slow_call
 2943     // Push src onto stack slow-path
 2944     emit_opcode(masm,0xD9 );      // FLD     ST(i)
 2945     emit_d8    (masm,0xC0-1+$src$$reg );
 2946     // CALL directly to the runtime
 2947     __ set_inst_mark();
 2948     emit_opcode(masm,0xE8);       // Call into runtime
 2949     emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2950     __ clear_inst_mark();
 2951     __ post_call_nop();
 2952     // Carry on here...
 2953   %}
 2954 
 2955   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2956     // Operand was loaded from memory into fp ST (stack top)
 2957     // FMUL   ST,$src  /* D8 C8+i */
 2958     emit_opcode(masm, 0xD8);
 2959     emit_opcode(masm, 0xC8 + $src1$$reg);
 2960   %}
 2961 
 2962   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2963     // FADDP  ST,src2  /* D8 C0+i */
 2964     emit_opcode(masm, 0xD8);
 2965     emit_opcode(masm, 0xC0 + $src2$$reg);
 2966     //could use FADDP  src2,fpST  /* DE C0+i */
 2967   %}
 2968 
 2969   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2970     // FADDP  src2,ST  /* DE C0+i */
 2971     emit_opcode(masm, 0xDE);
 2972     emit_opcode(masm, 0xC0 + $src2$$reg);
 2973   %}
 2974 
 2975   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2976     // Operand has been loaded into fp ST (stack top)
 2977       // FSUB   ST,$src1
 2978       emit_opcode(masm, 0xD8);
 2979       emit_opcode(masm, 0xE0 + $src1$$reg);
 2980 
 2981       // FDIV
 2982       emit_opcode(masm, 0xD8);
 2983       emit_opcode(masm, 0xF0 + $src2$$reg);
 2984   %}
 2985 
 2986   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 2987     // Operand was loaded from memory into fp ST (stack top)
 2988     // FADD   ST,$src  /* D8 C0+i */
 2989     emit_opcode(masm, 0xD8);
 2990     emit_opcode(masm, 0xC0 + $src1$$reg);
 2991 
 2992     // FMUL  ST,src2  /* D8 C*+i */
 2993     emit_opcode(masm, 0xD8);
 2994     emit_opcode(masm, 0xC8 + $src2$$reg);
 2995   %}
 2996 
 2997 
 2998   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 2999     // Operand was loaded from memory into fp ST (stack top)
 3000     // FADD   ST,$src  /* D8 C0+i */
 3001     emit_opcode(masm, 0xD8);
 3002     emit_opcode(masm, 0xC0 + $src1$$reg);
 3003 
 3004     // FMULP  src2,ST  /* DE C8+i */
 3005     emit_opcode(masm, 0xDE);
 3006     emit_opcode(masm, 0xC8 + $src2$$reg);
 3007   %}
 3008 
 3009   // Atomically load the volatile long
 3010   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3011     emit_opcode(masm,0xDF);
 3012     int rm_byte_opcode = 0x05;
 3013     int base     = $mem$$base;
 3014     int index    = $mem$$index;
 3015     int scale    = $mem$$scale;
 3016     int displace = $mem$$disp;
 3017     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3018     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3019     store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp );
 3020   %}
 3021 
 3022   // Volatile Store Long.  Must be atomic, so move it into
 3023   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3024   // target address before the store (for null-ptr checks)
 3025   // so the memory operand is used twice in the encoding.
 3026   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3027     store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp );
 3028     __ set_inst_mark();            // Mark start of FIST in case $mem has an oop
 3029     emit_opcode(masm,0xDF);
 3030     int rm_byte_opcode = 0x07;
 3031     int base     = $mem$$base;
 3032     int index    = $mem$$index;
 3033     int scale    = $mem$$scale;
 3034     int displace = $mem$$disp;
 3035     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3036     encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3037     __ clear_inst_mark();
 3038   %}
 3039 
 3040 %}
 3041 
 3042 
 3043 //----------FRAME--------------------------------------------------------------
 3044 // Definition of frame structure and management information.
 3045 //
 3046 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3047 //                             |   (to get allocators register number
 3048 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3049 //  r   CALLER     |        |
 3050 //  o     |        +--------+      pad to even-align allocators stack-slot
 3051 //  w     V        |  pad0  |        numbers; owned by CALLER
 3052 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3053 //  h     ^        |   in   |  5
 3054 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3055 //  |     |        |        |  3
 3056 //  |     |        +--------+
 3057 //  V     |        | old out|      Empty on Intel, window on Sparc
 3058 //        |    old |preserve|      Must be even aligned.
 3059 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3060 //        |        |   in   |  3   area for Intel ret address
 3061 //     Owned by    |preserve|      Empty on Sparc.
 3062 //       SELF      +--------+
 3063 //        |        |  pad2  |  2   pad to align old SP
 3064 //        |        +--------+  1
 3065 //        |        | locks  |  0
 3066 //        |        +--------+----> OptoReg::stack0(), even aligned
 3067 //        |        |  pad1  | 11   pad to align new SP
 3068 //        |        +--------+
 3069 //        |        |        | 10
 3070 //        |        | spills |  9   spills
 3071 //        V        |        |  8   (pad0 slot for callee)
 3072 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3073 //        ^        |  out   |  7
 3074 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3075 //     Owned by    +--------+
 3076 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3077 //        |    new |preserve|      Must be even-aligned.
 3078 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3079 //        |        |        |
 3080 //
 3081 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3082 //         known from SELF's arguments and the Java calling convention.
 3083 //         Region 6-7 is determined per call site.
 3084 // Note 2: If the calling convention leaves holes in the incoming argument
 3085 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3086 //         are owned by the CALLEE.  Holes should not be necessary in the
 3087 //         incoming area, as the Java calling convention is completely under
 3088 //         the control of the AD file.  Doubles can be sorted and packed to
 3089 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3090 //         varargs C calling conventions.
 3091 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3092 //         even aligned with pad0 as needed.
 3093 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3094 //         region 6-11 is even aligned; it may be padded out more so that
 3095 //         the region from SP to FP meets the minimum stack alignment.
 3096 
 3097 frame %{
 3098   // These three registers define part of the calling convention
 3099   // between compiled code and the interpreter.
 3100   inline_cache_reg(EAX);                // Inline Cache Register
 3101 
 3102   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3103   cisc_spilling_operand_name(indOffset32);
 3104 
 3105   // Number of stack slots consumed by locking an object
 3106   sync_stack_slots(1);
 3107 
 3108   // Compiled code's Frame Pointer
 3109   frame_pointer(ESP);
 3110   // Interpreter stores its frame pointer in a register which is
 3111   // stored to the stack by I2CAdaptors.
 3112   // I2CAdaptors convert from interpreted java to compiled java.
 3113   interpreter_frame_pointer(EBP);
 3114 
 3115   // Stack alignment requirement
 3116   // Alignment size in bytes (128-bit -> 16 bytes)
 3117   stack_alignment(StackAlignmentInBytes);
 3118 
 3119   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3120   // for calls to C.  Supports the var-args backing area for register parms.
 3121   varargs_C_out_slots_killed(0);
 3122 
 3123   // The after-PROLOG location of the return address.  Location of
 3124   // return address specifies a type (REG or STACK) and a number
 3125   // representing the register number (i.e. - use a register name) or
 3126   // stack slot.
 3127   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3128   // Otherwise, it is above the locks and verification slot and alignment word
 3129   return_addr(STACK - 1 +
 3130               align_up((Compile::current()->in_preserve_stack_slots() +
 3131                         Compile::current()->fixed_slots()),
 3132                        stack_alignment_in_slots()));
 3133 
 3134   // Location of C & interpreter return values
 3135   c_return_value %{
 3136     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3137     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3138     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3139 
 3140     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3141     // that C functions return float and double results in XMM0.
 3142     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3143       return OptoRegPair(XMM0b_num,XMM0_num);
 3144     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3145       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3146 
 3147     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3148   %}
 3149 
 3150   // Location of return values
 3151   return_value %{
 3152     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3153     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3154     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3155     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3156       return OptoRegPair(XMM0b_num,XMM0_num);
 3157     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3158       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3159     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3160   %}
 3161 
 3162 %}
 3163 
 3164 //----------ATTRIBUTES---------------------------------------------------------
 3165 //----------Operand Attributes-------------------------------------------------
 3166 op_attrib op_cost(0);        // Required cost attribute
 3167 
 3168 //----------Instruction Attributes---------------------------------------------
 3169 ins_attrib ins_cost(100);       // Required cost attribute
 3170 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3171 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3172                                 // non-matching short branch variant of some
 3173                                                             // long branch?
 3174 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3175                                 // specifies the alignment that some part of the instruction (not
 3176                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3177                                 // function must be provided for the instruction
 3178 
 3179 //----------OPERANDS-----------------------------------------------------------
 3180 // Operand definitions must precede instruction definitions for correct parsing
 3181 // in the ADLC because operands constitute user defined types which are used in
 3182 // instruction definitions.
 3183 
 3184 //----------Simple Operands----------------------------------------------------
 3185 // Immediate Operands
 3186 // Integer Immediate
 3187 operand immI() %{
 3188   match(ConI);
 3189 
 3190   op_cost(10);
 3191   format %{ %}
 3192   interface(CONST_INTER);
 3193 %}
 3194 
 3195 // Constant for test vs zero
 3196 operand immI_0() %{
 3197   predicate(n->get_int() == 0);
 3198   match(ConI);
 3199 
 3200   op_cost(0);
 3201   format %{ %}
 3202   interface(CONST_INTER);
 3203 %}
 3204 
 3205 // Constant for increment
 3206 operand immI_1() %{
 3207   predicate(n->get_int() == 1);
 3208   match(ConI);
 3209 
 3210   op_cost(0);
 3211   format %{ %}
 3212   interface(CONST_INTER);
 3213 %}
 3214 
 3215 // Constant for decrement
 3216 operand immI_M1() %{
 3217   predicate(n->get_int() == -1);
 3218   match(ConI);
 3219 
 3220   op_cost(0);
 3221   format %{ %}
 3222   interface(CONST_INTER);
 3223 %}
 3224 
 3225 // Valid scale values for addressing modes
 3226 operand immI2() %{
 3227   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3228   match(ConI);
 3229 
 3230   format %{ %}
 3231   interface(CONST_INTER);
 3232 %}
 3233 
 3234 operand immI8() %{
 3235   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3236   match(ConI);
 3237 
 3238   op_cost(5);
 3239   format %{ %}
 3240   interface(CONST_INTER);
 3241 %}
 3242 
 3243 operand immU8() %{
 3244   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3245   match(ConI);
 3246 
 3247   op_cost(5);
 3248   format %{ %}
 3249   interface(CONST_INTER);
 3250 %}
 3251 
 3252 operand immI16() %{
 3253   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3254   match(ConI);
 3255 
 3256   op_cost(10);
 3257   format %{ %}
 3258   interface(CONST_INTER);
 3259 %}
 3260 
 3261 // Int Immediate non-negative
 3262 operand immU31()
 3263 %{
 3264   predicate(n->get_int() >= 0);
 3265   match(ConI);
 3266 
 3267   op_cost(0);
 3268   format %{ %}
 3269   interface(CONST_INTER);
 3270 %}
 3271 
 3272 // Constant for long shifts
 3273 operand immI_32() %{
 3274   predicate( n->get_int() == 32 );
 3275   match(ConI);
 3276 
 3277   op_cost(0);
 3278   format %{ %}
 3279   interface(CONST_INTER);
 3280 %}
 3281 
 3282 operand immI_1_31() %{
 3283   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3284   match(ConI);
 3285 
 3286   op_cost(0);
 3287   format %{ %}
 3288   interface(CONST_INTER);
 3289 %}
 3290 
 3291 operand immI_32_63() %{
 3292   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3293   match(ConI);
 3294   op_cost(0);
 3295 
 3296   format %{ %}
 3297   interface(CONST_INTER);
 3298 %}
 3299 
 3300 operand immI_2() %{
 3301   predicate( n->get_int() == 2 );
 3302   match(ConI);
 3303 
 3304   op_cost(0);
 3305   format %{ %}
 3306   interface(CONST_INTER);
 3307 %}
 3308 
 3309 operand immI_3() %{
 3310   predicate( n->get_int() == 3 );
 3311   match(ConI);
 3312 
 3313   op_cost(0);
 3314   format %{ %}
 3315   interface(CONST_INTER);
 3316 %}
 3317 
 3318 operand immI_4()
 3319 %{
 3320   predicate(n->get_int() == 4);
 3321   match(ConI);
 3322 
 3323   op_cost(0);
 3324   format %{ %}
 3325   interface(CONST_INTER);
 3326 %}
 3327 
 3328 operand immI_8()
 3329 %{
 3330   predicate(n->get_int() == 8);
 3331   match(ConI);
 3332 
 3333   op_cost(0);
 3334   format %{ %}
 3335   interface(CONST_INTER);
 3336 %}
 3337 
 3338 // Pointer Immediate
 3339 operand immP() %{
 3340   match(ConP);
 3341 
 3342   op_cost(10);
 3343   format %{ %}
 3344   interface(CONST_INTER);
 3345 %}
 3346 
 3347 // Null Pointer Immediate
 3348 operand immP0() %{
 3349   predicate( n->get_ptr() == 0 );
 3350   match(ConP);
 3351   op_cost(0);
 3352 
 3353   format %{ %}
 3354   interface(CONST_INTER);
 3355 %}
 3356 
 3357 // Long Immediate
 3358 operand immL() %{
 3359   match(ConL);
 3360 
 3361   op_cost(20);
 3362   format %{ %}
 3363   interface(CONST_INTER);
 3364 %}
 3365 
 3366 // Long Immediate zero
 3367 operand immL0() %{
 3368   predicate( n->get_long() == 0L );
 3369   match(ConL);
 3370   op_cost(0);
 3371 
 3372   format %{ %}
 3373   interface(CONST_INTER);
 3374 %}
 3375 
 3376 // Long Immediate zero
 3377 operand immL_M1() %{
 3378   predicate( n->get_long() == -1L );
 3379   match(ConL);
 3380   op_cost(0);
 3381 
 3382   format %{ %}
 3383   interface(CONST_INTER);
 3384 %}
 3385 
 3386 // Long immediate from 0 to 127.
 3387 // Used for a shorter form of long mul by 10.
 3388 operand immL_127() %{
 3389   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3390   match(ConL);
 3391   op_cost(0);
 3392 
 3393   format %{ %}
 3394   interface(CONST_INTER);
 3395 %}
 3396 
 3397 // Long Immediate: low 32-bit mask
 3398 operand immL_32bits() %{
 3399   predicate(n->get_long() == 0xFFFFFFFFL);
 3400   match(ConL);
 3401   op_cost(0);
 3402 
 3403   format %{ %}
 3404   interface(CONST_INTER);
 3405 %}
 3406 
 3407 // Long Immediate: low 32-bit mask
 3408 operand immL32() %{
 3409   predicate(n->get_long() == (int)(n->get_long()));
 3410   match(ConL);
 3411   op_cost(20);
 3412 
 3413   format %{ %}
 3414   interface(CONST_INTER);
 3415 %}
 3416 
 3417 //Double Immediate zero
 3418 operand immDPR0() %{
 3419   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3420   // bug that generates code such that NaNs compare equal to 0.0
 3421   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3422   match(ConD);
 3423 
 3424   op_cost(5);
 3425   format %{ %}
 3426   interface(CONST_INTER);
 3427 %}
 3428 
 3429 // Double Immediate one
 3430 operand immDPR1() %{
 3431   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3432   match(ConD);
 3433 
 3434   op_cost(5);
 3435   format %{ %}
 3436   interface(CONST_INTER);
 3437 %}
 3438 
 3439 // Double Immediate
 3440 operand immDPR() %{
 3441   predicate(UseSSE<=1);
 3442   match(ConD);
 3443 
 3444   op_cost(5);
 3445   format %{ %}
 3446   interface(CONST_INTER);
 3447 %}
 3448 
 3449 operand immD() %{
 3450   predicate(UseSSE>=2);
 3451   match(ConD);
 3452 
 3453   op_cost(5);
 3454   format %{ %}
 3455   interface(CONST_INTER);
 3456 %}
 3457 
 3458 // Double Immediate zero
 3459 operand immD0() %{
 3460   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3461   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3462   // compare equal to -0.0.
 3463   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3464   match(ConD);
 3465 
 3466   format %{ %}
 3467   interface(CONST_INTER);
 3468 %}
 3469 
 3470 // Float Immediate zero
 3471 operand immFPR0() %{
 3472   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3473   match(ConF);
 3474 
 3475   op_cost(5);
 3476   format %{ %}
 3477   interface(CONST_INTER);
 3478 %}
 3479 
 3480 // Float Immediate one
 3481 operand immFPR1() %{
 3482   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3483   match(ConF);
 3484 
 3485   op_cost(5);
 3486   format %{ %}
 3487   interface(CONST_INTER);
 3488 %}
 3489 
 3490 // Float Immediate
 3491 operand immFPR() %{
 3492   predicate( UseSSE == 0 );
 3493   match(ConF);
 3494 
 3495   op_cost(5);
 3496   format %{ %}
 3497   interface(CONST_INTER);
 3498 %}
 3499 
 3500 // Float Immediate
 3501 operand immF() %{
 3502   predicate(UseSSE >= 1);
 3503   match(ConF);
 3504 
 3505   op_cost(5);
 3506   format %{ %}
 3507   interface(CONST_INTER);
 3508 %}
 3509 
 3510 // Float Immediate zero.  Zero and not -0.0
 3511 operand immF0() %{
 3512   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3513   match(ConF);
 3514 
 3515   op_cost(5);
 3516   format %{ %}
 3517   interface(CONST_INTER);
 3518 %}
 3519 
 3520 // Immediates for special shifts (sign extend)
 3521 
 3522 // Constants for increment
 3523 operand immI_16() %{
 3524   predicate( n->get_int() == 16 );
 3525   match(ConI);
 3526 
 3527   format %{ %}
 3528   interface(CONST_INTER);
 3529 %}
 3530 
 3531 operand immI_24() %{
 3532   predicate( n->get_int() == 24 );
 3533   match(ConI);
 3534 
 3535   format %{ %}
 3536   interface(CONST_INTER);
 3537 %}
 3538 
 3539 // Constant for byte-wide masking
 3540 operand immI_255() %{
 3541   predicate( n->get_int() == 255 );
 3542   match(ConI);
 3543 
 3544   format %{ %}
 3545   interface(CONST_INTER);
 3546 %}
 3547 
 3548 // Constant for short-wide masking
 3549 operand immI_65535() %{
 3550   predicate(n->get_int() == 65535);
 3551   match(ConI);
 3552 
 3553   format %{ %}
 3554   interface(CONST_INTER);
 3555 %}
 3556 
 3557 operand kReg()
 3558 %{
 3559   constraint(ALLOC_IN_RC(vectmask_reg));
 3560   match(RegVectMask);
 3561   format %{%}
 3562   interface(REG_INTER);
 3563 %}
 3564 
 3565 // Register Operands
 3566 // Integer Register
 3567 operand rRegI() %{
 3568   constraint(ALLOC_IN_RC(int_reg));
 3569   match(RegI);
 3570   match(xRegI);
 3571   match(eAXRegI);
 3572   match(eBXRegI);
 3573   match(eCXRegI);
 3574   match(eDXRegI);
 3575   match(eDIRegI);
 3576   match(eSIRegI);
 3577 
 3578   format %{ %}
 3579   interface(REG_INTER);
 3580 %}
 3581 
 3582 // Subset of Integer Register
 3583 operand xRegI(rRegI reg) %{
 3584   constraint(ALLOC_IN_RC(int_x_reg));
 3585   match(reg);
 3586   match(eAXRegI);
 3587   match(eBXRegI);
 3588   match(eCXRegI);
 3589   match(eDXRegI);
 3590 
 3591   format %{ %}
 3592   interface(REG_INTER);
 3593 %}
 3594 
 3595 // Special Registers
 3596 operand eAXRegI(xRegI reg) %{
 3597   constraint(ALLOC_IN_RC(eax_reg));
 3598   match(reg);
 3599   match(rRegI);
 3600 
 3601   format %{ "EAX" %}
 3602   interface(REG_INTER);
 3603 %}
 3604 
 3605 // Special Registers
 3606 operand eBXRegI(xRegI reg) %{
 3607   constraint(ALLOC_IN_RC(ebx_reg));
 3608   match(reg);
 3609   match(rRegI);
 3610 
 3611   format %{ "EBX" %}
 3612   interface(REG_INTER);
 3613 %}
 3614 
 3615 operand eCXRegI(xRegI reg) %{
 3616   constraint(ALLOC_IN_RC(ecx_reg));
 3617   match(reg);
 3618   match(rRegI);
 3619 
 3620   format %{ "ECX" %}
 3621   interface(REG_INTER);
 3622 %}
 3623 
 3624 operand eDXRegI(xRegI reg) %{
 3625   constraint(ALLOC_IN_RC(edx_reg));
 3626   match(reg);
 3627   match(rRegI);
 3628 
 3629   format %{ "EDX" %}
 3630   interface(REG_INTER);
 3631 %}
 3632 
 3633 operand eDIRegI(xRegI reg) %{
 3634   constraint(ALLOC_IN_RC(edi_reg));
 3635   match(reg);
 3636   match(rRegI);
 3637 
 3638   format %{ "EDI" %}
 3639   interface(REG_INTER);
 3640 %}
 3641 
 3642 operand nadxRegI() %{
 3643   constraint(ALLOC_IN_RC(nadx_reg));
 3644   match(RegI);
 3645   match(eBXRegI);
 3646   match(eCXRegI);
 3647   match(eSIRegI);
 3648   match(eDIRegI);
 3649 
 3650   format %{ %}
 3651   interface(REG_INTER);
 3652 %}
 3653 
 3654 operand ncxRegI() %{
 3655   constraint(ALLOC_IN_RC(ncx_reg));
 3656   match(RegI);
 3657   match(eAXRegI);
 3658   match(eDXRegI);
 3659   match(eSIRegI);
 3660   match(eDIRegI);
 3661 
 3662   format %{ %}
 3663   interface(REG_INTER);
 3664 %}
 3665 
 3666 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3667 // //
 3668 operand eSIRegI(xRegI reg) %{
 3669    constraint(ALLOC_IN_RC(esi_reg));
 3670    match(reg);
 3671    match(rRegI);
 3672 
 3673    format %{ "ESI" %}
 3674    interface(REG_INTER);
 3675 %}
 3676 
 3677 // Pointer Register
 3678 operand anyRegP() %{
 3679   constraint(ALLOC_IN_RC(any_reg));
 3680   match(RegP);
 3681   match(eAXRegP);
 3682   match(eBXRegP);
 3683   match(eCXRegP);
 3684   match(eDIRegP);
 3685   match(eRegP);
 3686 
 3687   format %{ %}
 3688   interface(REG_INTER);
 3689 %}
 3690 
 3691 operand eRegP() %{
 3692   constraint(ALLOC_IN_RC(int_reg));
 3693   match(RegP);
 3694   match(eAXRegP);
 3695   match(eBXRegP);
 3696   match(eCXRegP);
 3697   match(eDIRegP);
 3698 
 3699   format %{ %}
 3700   interface(REG_INTER);
 3701 %}
 3702 
 3703 operand rRegP() %{
 3704   constraint(ALLOC_IN_RC(int_reg));
 3705   match(RegP);
 3706   match(eAXRegP);
 3707   match(eBXRegP);
 3708   match(eCXRegP);
 3709   match(eDIRegP);
 3710 
 3711   format %{ %}
 3712   interface(REG_INTER);
 3713 %}
 3714 
 3715 // On windows95, EBP is not safe to use for implicit null tests.
 3716 operand eRegP_no_EBP() %{
 3717   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3718   match(RegP);
 3719   match(eAXRegP);
 3720   match(eBXRegP);
 3721   match(eCXRegP);
 3722   match(eDIRegP);
 3723 
 3724   op_cost(100);
 3725   format %{ %}
 3726   interface(REG_INTER);
 3727 %}
 3728 
 3729 operand pRegP() %{
 3730   constraint(ALLOC_IN_RC(p_reg));
 3731   match(RegP);
 3732   match(eBXRegP);
 3733   match(eDXRegP);
 3734   match(eSIRegP);
 3735   match(eDIRegP);
 3736 
 3737   format %{ %}
 3738   interface(REG_INTER);
 3739 %}
 3740 
 3741 // Special Registers
 3742 // Return a pointer value
 3743 operand eAXRegP(eRegP reg) %{
 3744   constraint(ALLOC_IN_RC(eax_reg));
 3745   match(reg);
 3746   format %{ "EAX" %}
 3747   interface(REG_INTER);
 3748 %}
 3749 
 3750 // Used in AtomicAdd
 3751 operand eBXRegP(eRegP reg) %{
 3752   constraint(ALLOC_IN_RC(ebx_reg));
 3753   match(reg);
 3754   format %{ "EBX" %}
 3755   interface(REG_INTER);
 3756 %}
 3757 
 3758 // Tail-call (interprocedural jump) to interpreter
 3759 operand eCXRegP(eRegP reg) %{
 3760   constraint(ALLOC_IN_RC(ecx_reg));
 3761   match(reg);
 3762   format %{ "ECX" %}
 3763   interface(REG_INTER);
 3764 %}
 3765 
 3766 operand eDXRegP(eRegP reg) %{
 3767   constraint(ALLOC_IN_RC(edx_reg));
 3768   match(reg);
 3769   format %{ "EDX" %}
 3770   interface(REG_INTER);
 3771 %}
 3772 
 3773 operand eSIRegP(eRegP reg) %{
 3774   constraint(ALLOC_IN_RC(esi_reg));
 3775   match(reg);
 3776   format %{ "ESI" %}
 3777   interface(REG_INTER);
 3778 %}
 3779 
 3780 // Used in rep stosw
 3781 operand eDIRegP(eRegP reg) %{
 3782   constraint(ALLOC_IN_RC(edi_reg));
 3783   match(reg);
 3784   format %{ "EDI" %}
 3785   interface(REG_INTER);
 3786 %}
 3787 
 3788 operand eRegL() %{
 3789   constraint(ALLOC_IN_RC(long_reg));
 3790   match(RegL);
 3791   match(eADXRegL);
 3792 
 3793   format %{ %}
 3794   interface(REG_INTER);
 3795 %}
 3796 
 3797 operand eADXRegL( eRegL reg ) %{
 3798   constraint(ALLOC_IN_RC(eadx_reg));
 3799   match(reg);
 3800 
 3801   format %{ "EDX:EAX" %}
 3802   interface(REG_INTER);
 3803 %}
 3804 
 3805 operand eBCXRegL( eRegL reg ) %{
 3806   constraint(ALLOC_IN_RC(ebcx_reg));
 3807   match(reg);
 3808 
 3809   format %{ "EBX:ECX" %}
 3810   interface(REG_INTER);
 3811 %}
 3812 
 3813 operand eBDPRegL( eRegL reg ) %{
 3814   constraint(ALLOC_IN_RC(ebpd_reg));
 3815   match(reg);
 3816 
 3817   format %{ "EBP:EDI" %}
 3818   interface(REG_INTER);
 3819 %}
 3820 // Special case for integer high multiply
 3821 operand eADXRegL_low_only() %{
 3822   constraint(ALLOC_IN_RC(eadx_reg));
 3823   match(RegL);
 3824 
 3825   format %{ "EAX" %}
 3826   interface(REG_INTER);
 3827 %}
 3828 
 3829 // Flags register, used as output of compare instructions
 3830 operand rFlagsReg() %{
 3831   constraint(ALLOC_IN_RC(int_flags));
 3832   match(RegFlags);
 3833 
 3834   format %{ "EFLAGS" %}
 3835   interface(REG_INTER);
 3836 %}
 3837 
 3838 // Flags register, used as output of compare instructions
 3839 operand eFlagsReg() %{
 3840   constraint(ALLOC_IN_RC(int_flags));
 3841   match(RegFlags);
 3842 
 3843   format %{ "EFLAGS" %}
 3844   interface(REG_INTER);
 3845 %}
 3846 
 3847 // Flags register, used as output of FLOATING POINT compare instructions
 3848 operand eFlagsRegU() %{
 3849   constraint(ALLOC_IN_RC(int_flags));
 3850   match(RegFlags);
 3851 
 3852   format %{ "EFLAGS_U" %}
 3853   interface(REG_INTER);
 3854 %}
 3855 
 3856 operand eFlagsRegUCF() %{
 3857   constraint(ALLOC_IN_RC(int_flags));
 3858   match(RegFlags);
 3859   predicate(false);
 3860 
 3861   format %{ "EFLAGS_U_CF" %}
 3862   interface(REG_INTER);
 3863 %}
 3864 
 3865 // Condition Code Register used by long compare
 3866 operand flagsReg_long_LTGE() %{
 3867   constraint(ALLOC_IN_RC(int_flags));
 3868   match(RegFlags);
 3869   format %{ "FLAGS_LTGE" %}
 3870   interface(REG_INTER);
 3871 %}
 3872 operand flagsReg_long_EQNE() %{
 3873   constraint(ALLOC_IN_RC(int_flags));
 3874   match(RegFlags);
 3875   format %{ "FLAGS_EQNE" %}
 3876   interface(REG_INTER);
 3877 %}
 3878 operand flagsReg_long_LEGT() %{
 3879   constraint(ALLOC_IN_RC(int_flags));
 3880   match(RegFlags);
 3881   format %{ "FLAGS_LEGT" %}
 3882   interface(REG_INTER);
 3883 %}
 3884 
 3885 // Condition Code Register used by unsigned long compare
 3886 operand flagsReg_ulong_LTGE() %{
 3887   constraint(ALLOC_IN_RC(int_flags));
 3888   match(RegFlags);
 3889   format %{ "FLAGS_U_LTGE" %}
 3890   interface(REG_INTER);
 3891 %}
 3892 operand flagsReg_ulong_EQNE() %{
 3893   constraint(ALLOC_IN_RC(int_flags));
 3894   match(RegFlags);
 3895   format %{ "FLAGS_U_EQNE" %}
 3896   interface(REG_INTER);
 3897 %}
 3898 operand flagsReg_ulong_LEGT() %{
 3899   constraint(ALLOC_IN_RC(int_flags));
 3900   match(RegFlags);
 3901   format %{ "FLAGS_U_LEGT" %}
 3902   interface(REG_INTER);
 3903 %}
 3904 
 3905 // Float register operands
 3906 operand regDPR() %{
 3907   predicate( UseSSE < 2 );
 3908   constraint(ALLOC_IN_RC(fp_dbl_reg));
 3909   match(RegD);
 3910   match(regDPR1);
 3911   match(regDPR2);
 3912   format %{ %}
 3913   interface(REG_INTER);
 3914 %}
 3915 
 3916 operand regDPR1(regDPR reg) %{
 3917   predicate( UseSSE < 2 );
 3918   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 3919   match(reg);
 3920   format %{ "FPR1" %}
 3921   interface(REG_INTER);
 3922 %}
 3923 
 3924 operand regDPR2(regDPR reg) %{
 3925   predicate( UseSSE < 2 );
 3926   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 3927   match(reg);
 3928   format %{ "FPR2" %}
 3929   interface(REG_INTER);
 3930 %}
 3931 
 3932 operand regnotDPR1(regDPR reg) %{
 3933   predicate( UseSSE < 2 );
 3934   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 3935   match(reg);
 3936   format %{ %}
 3937   interface(REG_INTER);
 3938 %}
 3939 
 3940 // Float register operands
 3941 operand regFPR() %{
 3942   predicate( UseSSE < 2 );
 3943   constraint(ALLOC_IN_RC(fp_flt_reg));
 3944   match(RegF);
 3945   match(regFPR1);
 3946   format %{ %}
 3947   interface(REG_INTER);
 3948 %}
 3949 
 3950 // Float register operands
 3951 operand regFPR1(regFPR reg) %{
 3952   predicate( UseSSE < 2 );
 3953   constraint(ALLOC_IN_RC(fp_flt_reg0));
 3954   match(reg);
 3955   format %{ "FPR1" %}
 3956   interface(REG_INTER);
 3957 %}
 3958 
 3959 // XMM Float register operands
 3960 operand regF() %{
 3961   predicate( UseSSE>=1 );
 3962   constraint(ALLOC_IN_RC(float_reg_legacy));
 3963   match(RegF);
 3964   format %{ %}
 3965   interface(REG_INTER);
 3966 %}
 3967 
 3968 operand legRegF() %{
 3969   predicate( UseSSE>=1 );
 3970   constraint(ALLOC_IN_RC(float_reg_legacy));
 3971   match(RegF);
 3972   format %{ %}
 3973   interface(REG_INTER);
 3974 %}
 3975 
 3976 // Float register operands
 3977 operand vlRegF() %{
 3978    constraint(ALLOC_IN_RC(float_reg_vl));
 3979    match(RegF);
 3980 
 3981    format %{ %}
 3982    interface(REG_INTER);
 3983 %}
 3984 
 3985 // XMM Double register operands
 3986 operand regD() %{
 3987   predicate( UseSSE>=2 );
 3988   constraint(ALLOC_IN_RC(double_reg_legacy));
 3989   match(RegD);
 3990   format %{ %}
 3991   interface(REG_INTER);
 3992 %}
 3993 
 3994 // Double register operands
 3995 operand legRegD() %{
 3996   predicate( UseSSE>=2 );
 3997   constraint(ALLOC_IN_RC(double_reg_legacy));
 3998   match(RegD);
 3999   format %{ %}
 4000   interface(REG_INTER);
 4001 %}
 4002 
 4003 operand vlRegD() %{
 4004    constraint(ALLOC_IN_RC(double_reg_vl));
 4005    match(RegD);
 4006 
 4007    format %{ %}
 4008    interface(REG_INTER);
 4009 %}
 4010 
 4011 //----------Memory Operands----------------------------------------------------
 4012 // Direct Memory Operand
 4013 operand direct(immP addr) %{
 4014   match(addr);
 4015 
 4016   format %{ "[$addr]" %}
 4017   interface(MEMORY_INTER) %{
 4018     base(0xFFFFFFFF);
 4019     index(0x4);
 4020     scale(0x0);
 4021     disp($addr);
 4022   %}
 4023 %}
 4024 
 4025 // Indirect Memory Operand
 4026 operand indirect(eRegP reg) %{
 4027   constraint(ALLOC_IN_RC(int_reg));
 4028   match(reg);
 4029 
 4030   format %{ "[$reg]" %}
 4031   interface(MEMORY_INTER) %{
 4032     base($reg);
 4033     index(0x4);
 4034     scale(0x0);
 4035     disp(0x0);
 4036   %}
 4037 %}
 4038 
 4039 // Indirect Memory Plus Short Offset Operand
 4040 operand indOffset8(eRegP reg, immI8 off) %{
 4041   match(AddP reg off);
 4042 
 4043   format %{ "[$reg + $off]" %}
 4044   interface(MEMORY_INTER) %{
 4045     base($reg);
 4046     index(0x4);
 4047     scale(0x0);
 4048     disp($off);
 4049   %}
 4050 %}
 4051 
 4052 // Indirect Memory Plus Long Offset Operand
 4053 operand indOffset32(eRegP reg, immI off) %{
 4054   match(AddP reg off);
 4055 
 4056   format %{ "[$reg + $off]" %}
 4057   interface(MEMORY_INTER) %{
 4058     base($reg);
 4059     index(0x4);
 4060     scale(0x0);
 4061     disp($off);
 4062   %}
 4063 %}
 4064 
 4065 // Indirect Memory Plus Long Offset Operand
 4066 operand indOffset32X(rRegI reg, immP off) %{
 4067   match(AddP off reg);
 4068 
 4069   format %{ "[$reg + $off]" %}
 4070   interface(MEMORY_INTER) %{
 4071     base($reg);
 4072     index(0x4);
 4073     scale(0x0);
 4074     disp($off);
 4075   %}
 4076 %}
 4077 
 4078 // Indirect Memory Plus Index Register Plus Offset Operand
 4079 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4080   match(AddP (AddP reg ireg) off);
 4081 
 4082   op_cost(10);
 4083   format %{"[$reg + $off + $ireg]" %}
 4084   interface(MEMORY_INTER) %{
 4085     base($reg);
 4086     index($ireg);
 4087     scale(0x0);
 4088     disp($off);
 4089   %}
 4090 %}
 4091 
 4092 // Indirect Memory Plus Index Register Plus Offset Operand
 4093 operand indIndex(eRegP reg, rRegI ireg) %{
 4094   match(AddP reg ireg);
 4095 
 4096   op_cost(10);
 4097   format %{"[$reg + $ireg]" %}
 4098   interface(MEMORY_INTER) %{
 4099     base($reg);
 4100     index($ireg);
 4101     scale(0x0);
 4102     disp(0x0);
 4103   %}
 4104 %}
 4105 
 4106 // // -------------------------------------------------------------------------
 4107 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4108 // // -------------------------------------------------------------------------
 4109 // // Scaled Memory Operands
 4110 // // Indirect Memory Times Scale Plus Offset Operand
 4111 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4112 //   match(AddP off (LShiftI ireg scale));
 4113 //
 4114 //   op_cost(10);
 4115 //   format %{"[$off + $ireg << $scale]" %}
 4116 //   interface(MEMORY_INTER) %{
 4117 //     base(0x4);
 4118 //     index($ireg);
 4119 //     scale($scale);
 4120 //     disp($off);
 4121 //   %}
 4122 // %}
 4123 
 4124 // Indirect Memory Times Scale Plus Index Register
 4125 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4126   match(AddP reg (LShiftI ireg scale));
 4127 
 4128   op_cost(10);
 4129   format %{"[$reg + $ireg << $scale]" %}
 4130   interface(MEMORY_INTER) %{
 4131     base($reg);
 4132     index($ireg);
 4133     scale($scale);
 4134     disp(0x0);
 4135   %}
 4136 %}
 4137 
 4138 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4139 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4140   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4141 
 4142   op_cost(10);
 4143   format %{"[$reg + $off + $ireg << $scale]" %}
 4144   interface(MEMORY_INTER) %{
 4145     base($reg);
 4146     index($ireg);
 4147     scale($scale);
 4148     disp($off);
 4149   %}
 4150 %}
 4151 
 4152 //----------Load Long Memory Operands------------------------------------------
 4153 // The load-long idiom will use it's address expression again after loading
 4154 // the first word of the long.  If the load-long destination overlaps with
 4155 // registers used in the addressing expression, the 2nd half will be loaded
 4156 // from a clobbered address.  Fix this by requiring that load-long use
 4157 // address registers that do not overlap with the load-long target.
 4158 
 4159 // load-long support
 4160 operand load_long_RegP() %{
 4161   constraint(ALLOC_IN_RC(esi_reg));
 4162   match(RegP);
 4163   match(eSIRegP);
 4164   op_cost(100);
 4165   format %{  %}
 4166   interface(REG_INTER);
 4167 %}
 4168 
 4169 // Indirect Memory Operand Long
 4170 operand load_long_indirect(load_long_RegP reg) %{
 4171   constraint(ALLOC_IN_RC(esi_reg));
 4172   match(reg);
 4173 
 4174   format %{ "[$reg]" %}
 4175   interface(MEMORY_INTER) %{
 4176     base($reg);
 4177     index(0x4);
 4178     scale(0x0);
 4179     disp(0x0);
 4180   %}
 4181 %}
 4182 
 4183 // Indirect Memory Plus Long Offset Operand
 4184 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4185   match(AddP reg off);
 4186 
 4187   format %{ "[$reg + $off]" %}
 4188   interface(MEMORY_INTER) %{
 4189     base($reg);
 4190     index(0x4);
 4191     scale(0x0);
 4192     disp($off);
 4193   %}
 4194 %}
 4195 
 4196 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4197 
 4198 
 4199 //----------Special Memory Operands--------------------------------------------
 4200 // Stack Slot Operand - This operand is used for loading and storing temporary
 4201 //                      values on the stack where a match requires a value to
 4202 //                      flow through memory.
 4203 operand stackSlotP(sRegP reg) %{
 4204   constraint(ALLOC_IN_RC(stack_slots));
 4205   // No match rule because this operand is only generated in matching
 4206   format %{ "[$reg]" %}
 4207   interface(MEMORY_INTER) %{
 4208     base(0x4);   // ESP
 4209     index(0x4);  // No Index
 4210     scale(0x0);  // No Scale
 4211     disp($reg);  // Stack Offset
 4212   %}
 4213 %}
 4214 
 4215 operand stackSlotI(sRegI reg) %{
 4216   constraint(ALLOC_IN_RC(stack_slots));
 4217   // No match rule because this operand is only generated in matching
 4218   format %{ "[$reg]" %}
 4219   interface(MEMORY_INTER) %{
 4220     base(0x4);   // ESP
 4221     index(0x4);  // No Index
 4222     scale(0x0);  // No Scale
 4223     disp($reg);  // Stack Offset
 4224   %}
 4225 %}
 4226 
 4227 operand stackSlotF(sRegF reg) %{
 4228   constraint(ALLOC_IN_RC(stack_slots));
 4229   // No match rule because this operand is only generated in matching
 4230   format %{ "[$reg]" %}
 4231   interface(MEMORY_INTER) %{
 4232     base(0x4);   // ESP
 4233     index(0x4);  // No Index
 4234     scale(0x0);  // No Scale
 4235     disp($reg);  // Stack Offset
 4236   %}
 4237 %}
 4238 
 4239 operand stackSlotD(sRegD reg) %{
 4240   constraint(ALLOC_IN_RC(stack_slots));
 4241   // No match rule because this operand is only generated in matching
 4242   format %{ "[$reg]" %}
 4243   interface(MEMORY_INTER) %{
 4244     base(0x4);   // ESP
 4245     index(0x4);  // No Index
 4246     scale(0x0);  // No Scale
 4247     disp($reg);  // Stack Offset
 4248   %}
 4249 %}
 4250 
 4251 operand stackSlotL(sRegL reg) %{
 4252   constraint(ALLOC_IN_RC(stack_slots));
 4253   // No match rule because this operand is only generated in matching
 4254   format %{ "[$reg]" %}
 4255   interface(MEMORY_INTER) %{
 4256     base(0x4);   // ESP
 4257     index(0x4);  // No Index
 4258     scale(0x0);  // No Scale
 4259     disp($reg);  // Stack Offset
 4260   %}
 4261 %}
 4262 
 4263 //----------Conditional Branch Operands----------------------------------------
 4264 // Comparison Op  - This is the operation of the comparison, and is limited to
 4265 //                  the following set of codes:
 4266 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4267 //
 4268 // Other attributes of the comparison, such as unsignedness, are specified
 4269 // by the comparison instruction that sets a condition code flags register.
 4270 // That result is represented by a flags operand whose subtype is appropriate
 4271 // to the unsignedness (etc.) of the comparison.
 4272 //
 4273 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4274 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4275 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4276 
 4277 // Comparison Code
 4278 operand cmpOp() %{
 4279   match(Bool);
 4280 
 4281   format %{ "" %}
 4282   interface(COND_INTER) %{
 4283     equal(0x4, "e");
 4284     not_equal(0x5, "ne");
 4285     less(0xC, "l");
 4286     greater_equal(0xD, "ge");
 4287     less_equal(0xE, "le");
 4288     greater(0xF, "g");
 4289     overflow(0x0, "o");
 4290     no_overflow(0x1, "no");
 4291   %}
 4292 %}
 4293 
 4294 // Comparison Code, unsigned compare.  Used by FP also, with
 4295 // C2 (unordered) turned into GT or LT already.  The other bits
 4296 // C0 and C3 are turned into Carry & Zero flags.
 4297 operand cmpOpU() %{
 4298   match(Bool);
 4299 
 4300   format %{ "" %}
 4301   interface(COND_INTER) %{
 4302     equal(0x4, "e");
 4303     not_equal(0x5, "ne");
 4304     less(0x2, "b");
 4305     greater_equal(0x3, "nb");
 4306     less_equal(0x6, "be");
 4307     greater(0x7, "nbe");
 4308     overflow(0x0, "o");
 4309     no_overflow(0x1, "no");
 4310   %}
 4311 %}
 4312 
 4313 // Floating comparisons that don't require any fixup for the unordered case
 4314 operand cmpOpUCF() %{
 4315   match(Bool);
 4316   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4317             n->as_Bool()->_test._test == BoolTest::ge ||
 4318             n->as_Bool()->_test._test == BoolTest::le ||
 4319             n->as_Bool()->_test._test == BoolTest::gt);
 4320   format %{ "" %}
 4321   interface(COND_INTER) %{
 4322     equal(0x4, "e");
 4323     not_equal(0x5, "ne");
 4324     less(0x2, "b");
 4325     greater_equal(0x3, "nb");
 4326     less_equal(0x6, "be");
 4327     greater(0x7, "nbe");
 4328     overflow(0x0, "o");
 4329     no_overflow(0x1, "no");
 4330   %}
 4331 %}
 4332 
 4333 
 4334 // Floating comparisons that can be fixed up with extra conditional jumps
 4335 operand cmpOpUCF2() %{
 4336   match(Bool);
 4337   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4338             n->as_Bool()->_test._test == BoolTest::eq);
 4339   format %{ "" %}
 4340   interface(COND_INTER) %{
 4341     equal(0x4, "e");
 4342     not_equal(0x5, "ne");
 4343     less(0x2, "b");
 4344     greater_equal(0x3, "nb");
 4345     less_equal(0x6, "be");
 4346     greater(0x7, "nbe");
 4347     overflow(0x0, "o");
 4348     no_overflow(0x1, "no");
 4349   %}
 4350 %}
 4351 
 4352 // Comparison Code for FP conditional move
 4353 operand cmpOp_fcmov() %{
 4354   match(Bool);
 4355 
 4356   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4357             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4358   format %{ "" %}
 4359   interface(COND_INTER) %{
 4360     equal        (0x0C8);
 4361     not_equal    (0x1C8);
 4362     less         (0x0C0);
 4363     greater_equal(0x1C0);
 4364     less_equal   (0x0D0);
 4365     greater      (0x1D0);
 4366     overflow(0x0, "o"); // not really supported by the instruction
 4367     no_overflow(0x1, "no"); // not really supported by the instruction
 4368   %}
 4369 %}
 4370 
 4371 // Comparison Code used in long compares
 4372 operand cmpOp_commute() %{
 4373   match(Bool);
 4374 
 4375   format %{ "" %}
 4376   interface(COND_INTER) %{
 4377     equal(0x4, "e");
 4378     not_equal(0x5, "ne");
 4379     less(0xF, "g");
 4380     greater_equal(0xE, "le");
 4381     less_equal(0xD, "ge");
 4382     greater(0xC, "l");
 4383     overflow(0x0, "o");
 4384     no_overflow(0x1, "no");
 4385   %}
 4386 %}
 4387 
 4388 // Comparison Code used in unsigned long compares
 4389 operand cmpOpU_commute() %{
 4390   match(Bool);
 4391 
 4392   format %{ "" %}
 4393   interface(COND_INTER) %{
 4394     equal(0x4, "e");
 4395     not_equal(0x5, "ne");
 4396     less(0x7, "nbe");
 4397     greater_equal(0x6, "be");
 4398     less_equal(0x3, "nb");
 4399     greater(0x2, "b");
 4400     overflow(0x0, "o");
 4401     no_overflow(0x1, "no");
 4402   %}
 4403 %}
 4404 
 4405 //----------OPERAND CLASSES----------------------------------------------------
 4406 // Operand Classes are groups of operands that are used as to simplify
 4407 // instruction definitions by not requiring the AD writer to specify separate
 4408 // instructions for every form of operand when the instruction accepts
 4409 // multiple operand types with the same basic encoding and format.  The classic
 4410 // case of this is memory operands.
 4411 
 4412 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4413                indIndex, indIndexScale, indIndexScaleOffset);
 4414 
 4415 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4416 // This means some kind of offset is always required and you cannot use
 4417 // an oop as the offset (done when working on static globals).
 4418 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4419                     indIndex, indIndexScale, indIndexScaleOffset);
 4420 
 4421 
 4422 //----------PIPELINE-----------------------------------------------------------
 4423 // Rules which define the behavior of the target architectures pipeline.
 4424 pipeline %{
 4425 
 4426 //----------ATTRIBUTES---------------------------------------------------------
 4427 attributes %{
 4428   variable_size_instructions;        // Fixed size instructions
 4429   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4430   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4431   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4432   instruction_fetch_units = 1;       // of 16 bytes
 4433 
 4434   // List of nop instructions
 4435   nops( MachNop );
 4436 %}
 4437 
 4438 //----------RESOURCES----------------------------------------------------------
 4439 // Resources are the functional units available to the machine
 4440 
 4441 // Generic P2/P3 pipeline
 4442 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4443 // 3 instructions decoded per cycle.
 4444 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4445 // 2 ALU op, only ALU0 handles mul/div instructions.
 4446 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4447            MS0, MS1, MEM = MS0 | MS1,
 4448            BR, FPU,
 4449            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4450 
 4451 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4452 // Pipeline Description specifies the stages in the machine's pipeline
 4453 
 4454 // Generic P2/P3 pipeline
 4455 pipe_desc(S0, S1, S2, S3, S4, S5);
 4456 
 4457 //----------PIPELINE CLASSES---------------------------------------------------
 4458 // Pipeline Classes describe the stages in which input and output are
 4459 // referenced by the hardware pipeline.
 4460 
 4461 // Naming convention: ialu or fpu
 4462 // Then: _reg
 4463 // Then: _reg if there is a 2nd register
 4464 // Then: _long if it's a pair of instructions implementing a long
 4465 // Then: _fat if it requires the big decoder
 4466 //   Or: _mem if it requires the big decoder and a memory unit.
 4467 
 4468 // Integer ALU reg operation
 4469 pipe_class ialu_reg(rRegI dst) %{
 4470     single_instruction;
 4471     dst    : S4(write);
 4472     dst    : S3(read);
 4473     DECODE : S0;        // any decoder
 4474     ALU    : S3;        // any alu
 4475 %}
 4476 
 4477 // Long ALU reg operation
 4478 pipe_class ialu_reg_long(eRegL dst) %{
 4479     instruction_count(2);
 4480     dst    : S4(write);
 4481     dst    : S3(read);
 4482     DECODE : S0(2);     // any 2 decoders
 4483     ALU    : S3(2);     // both alus
 4484 %}
 4485 
 4486 // Integer ALU reg operation using big decoder
 4487 pipe_class ialu_reg_fat(rRegI dst) %{
 4488     single_instruction;
 4489     dst    : S4(write);
 4490     dst    : S3(read);
 4491     D0     : S0;        // big decoder only
 4492     ALU    : S3;        // any alu
 4493 %}
 4494 
 4495 // Long ALU reg operation using big decoder
 4496 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4497     instruction_count(2);
 4498     dst    : S4(write);
 4499     dst    : S3(read);
 4500     D0     : S0(2);     // big decoder only; twice
 4501     ALU    : S3(2);     // any 2 alus
 4502 %}
 4503 
 4504 // Integer ALU reg-reg operation
 4505 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4506     single_instruction;
 4507     dst    : S4(write);
 4508     src    : S3(read);
 4509     DECODE : S0;        // any decoder
 4510     ALU    : S3;        // any alu
 4511 %}
 4512 
 4513 // Long ALU reg-reg operation
 4514 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4515     instruction_count(2);
 4516     dst    : S4(write);
 4517     src    : S3(read);
 4518     DECODE : S0(2);     // any 2 decoders
 4519     ALU    : S3(2);     // both alus
 4520 %}
 4521 
 4522 // Integer ALU reg-reg operation
 4523 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4524     single_instruction;
 4525     dst    : S4(write);
 4526     src    : S3(read);
 4527     D0     : S0;        // big decoder only
 4528     ALU    : S3;        // any alu
 4529 %}
 4530 
 4531 // Long ALU reg-reg operation
 4532 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4533     instruction_count(2);
 4534     dst    : S4(write);
 4535     src    : S3(read);
 4536     D0     : S0(2);     // big decoder only; twice
 4537     ALU    : S3(2);     // both alus
 4538 %}
 4539 
 4540 // Integer ALU reg-mem operation
 4541 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4542     single_instruction;
 4543     dst    : S5(write);
 4544     mem    : S3(read);
 4545     D0     : S0;        // big decoder only
 4546     ALU    : S4;        // any alu
 4547     MEM    : S3;        // any mem
 4548 %}
 4549 
 4550 // Long ALU reg-mem operation
 4551 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4552     instruction_count(2);
 4553     dst    : S5(write);
 4554     mem    : S3(read);
 4555     D0     : S0(2);     // big decoder only; twice
 4556     ALU    : S4(2);     // any 2 alus
 4557     MEM    : S3(2);     // both mems
 4558 %}
 4559 
 4560 // Integer mem operation (prefetch)
 4561 pipe_class ialu_mem(memory mem)
 4562 %{
 4563     single_instruction;
 4564     mem    : S3(read);
 4565     D0     : S0;        // big decoder only
 4566     MEM    : S3;        // any mem
 4567 %}
 4568 
 4569 // Integer Store to Memory
 4570 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4571     single_instruction;
 4572     mem    : S3(read);
 4573     src    : S5(read);
 4574     D0     : S0;        // big decoder only
 4575     ALU    : S4;        // any alu
 4576     MEM    : S3;
 4577 %}
 4578 
 4579 // Long Store to Memory
 4580 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4581     instruction_count(2);
 4582     mem    : S3(read);
 4583     src    : S5(read);
 4584     D0     : S0(2);     // big decoder only; twice
 4585     ALU    : S4(2);     // any 2 alus
 4586     MEM    : S3(2);     // Both mems
 4587 %}
 4588 
 4589 // Integer Store to Memory
 4590 pipe_class ialu_mem_imm(memory mem) %{
 4591     single_instruction;
 4592     mem    : S3(read);
 4593     D0     : S0;        // big decoder only
 4594     ALU    : S4;        // any alu
 4595     MEM    : S3;
 4596 %}
 4597 
 4598 // Integer ALU0 reg-reg operation
 4599 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4600     single_instruction;
 4601     dst    : S4(write);
 4602     src    : S3(read);
 4603     D0     : S0;        // Big decoder only
 4604     ALU0   : S3;        // only alu0
 4605 %}
 4606 
 4607 // Integer ALU0 reg-mem operation
 4608 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4609     single_instruction;
 4610     dst    : S5(write);
 4611     mem    : S3(read);
 4612     D0     : S0;        // big decoder only
 4613     ALU0   : S4;        // ALU0 only
 4614     MEM    : S3;        // any mem
 4615 %}
 4616 
 4617 // Integer ALU reg-reg operation
 4618 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4619     single_instruction;
 4620     cr     : S4(write);
 4621     src1   : S3(read);
 4622     src2   : S3(read);
 4623     DECODE : S0;        // any decoder
 4624     ALU    : S3;        // any alu
 4625 %}
 4626 
 4627 // Integer ALU reg-imm operation
 4628 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4629     single_instruction;
 4630     cr     : S4(write);
 4631     src1   : S3(read);
 4632     DECODE : S0;        // any decoder
 4633     ALU    : S3;        // any alu
 4634 %}
 4635 
 4636 // Integer ALU reg-mem operation
 4637 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4638     single_instruction;
 4639     cr     : S4(write);
 4640     src1   : S3(read);
 4641     src2   : S3(read);
 4642     D0     : S0;        // big decoder only
 4643     ALU    : S4;        // any alu
 4644     MEM    : S3;
 4645 %}
 4646 
 4647 // Conditional move reg-reg
 4648 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4649     instruction_count(4);
 4650     y      : S4(read);
 4651     q      : S3(read);
 4652     p      : S3(read);
 4653     DECODE : S0(4);     // any decoder
 4654 %}
 4655 
 4656 // Conditional move reg-reg
 4657 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4658     single_instruction;
 4659     dst    : S4(write);
 4660     src    : S3(read);
 4661     cr     : S3(read);
 4662     DECODE : S0;        // any decoder
 4663 %}
 4664 
 4665 // Conditional move reg-mem
 4666 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4667     single_instruction;
 4668     dst    : S4(write);
 4669     src    : S3(read);
 4670     cr     : S3(read);
 4671     DECODE : S0;        // any decoder
 4672     MEM    : S3;
 4673 %}
 4674 
 4675 // Conditional move reg-reg long
 4676 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4677     single_instruction;
 4678     dst    : S4(write);
 4679     src    : S3(read);
 4680     cr     : S3(read);
 4681     DECODE : S0(2);     // any 2 decoders
 4682 %}
 4683 
 4684 // Conditional move double reg-reg
 4685 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4686     single_instruction;
 4687     dst    : S4(write);
 4688     src    : S3(read);
 4689     cr     : S3(read);
 4690     DECODE : S0;        // any decoder
 4691 %}
 4692 
 4693 // Float reg-reg operation
 4694 pipe_class fpu_reg(regDPR dst) %{
 4695     instruction_count(2);
 4696     dst    : S3(read);
 4697     DECODE : S0(2);     // any 2 decoders
 4698     FPU    : S3;
 4699 %}
 4700 
 4701 // Float reg-reg operation
 4702 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4703     instruction_count(2);
 4704     dst    : S4(write);
 4705     src    : S3(read);
 4706     DECODE : S0(2);     // any 2 decoders
 4707     FPU    : S3;
 4708 %}
 4709 
 4710 // Float reg-reg operation
 4711 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4712     instruction_count(3);
 4713     dst    : S4(write);
 4714     src1   : S3(read);
 4715     src2   : S3(read);
 4716     DECODE : S0(3);     // any 3 decoders
 4717     FPU    : S3(2);
 4718 %}
 4719 
 4720 // Float reg-reg operation
 4721 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4722     instruction_count(4);
 4723     dst    : S4(write);
 4724     src1   : S3(read);
 4725     src2   : S3(read);
 4726     src3   : S3(read);
 4727     DECODE : S0(4);     // any 3 decoders
 4728     FPU    : S3(2);
 4729 %}
 4730 
 4731 // Float reg-reg operation
 4732 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4733     instruction_count(4);
 4734     dst    : S4(write);
 4735     src1   : S3(read);
 4736     src2   : S3(read);
 4737     src3   : S3(read);
 4738     DECODE : S1(3);     // any 3 decoders
 4739     D0     : S0;        // Big decoder only
 4740     FPU    : S3(2);
 4741     MEM    : S3;
 4742 %}
 4743 
 4744 // Float reg-mem operation
 4745 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4746     instruction_count(2);
 4747     dst    : S5(write);
 4748     mem    : S3(read);
 4749     D0     : S0;        // big decoder only
 4750     DECODE : S1;        // any decoder for FPU POP
 4751     FPU    : S4;
 4752     MEM    : S3;        // any mem
 4753 %}
 4754 
 4755 // Float reg-mem operation
 4756 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4757     instruction_count(3);
 4758     dst    : S5(write);
 4759     src1   : S3(read);
 4760     mem    : S3(read);
 4761     D0     : S0;        // big decoder only
 4762     DECODE : S1(2);     // any decoder for FPU POP
 4763     FPU    : S4;
 4764     MEM    : S3;        // any mem
 4765 %}
 4766 
 4767 // Float mem-reg operation
 4768 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4769     instruction_count(2);
 4770     src    : S5(read);
 4771     mem    : S3(read);
 4772     DECODE : S0;        // any decoder for FPU PUSH
 4773     D0     : S1;        // big decoder only
 4774     FPU    : S4;
 4775     MEM    : S3;        // any mem
 4776 %}
 4777 
 4778 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4779     instruction_count(3);
 4780     src1   : S3(read);
 4781     src2   : S3(read);
 4782     mem    : S3(read);
 4783     DECODE : S0(2);     // any decoder for FPU PUSH
 4784     D0     : S1;        // big decoder only
 4785     FPU    : S4;
 4786     MEM    : S3;        // any mem
 4787 %}
 4788 
 4789 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4790     instruction_count(3);
 4791     src1   : S3(read);
 4792     src2   : S3(read);
 4793     mem    : S4(read);
 4794     DECODE : S0;        // any decoder for FPU PUSH
 4795     D0     : S0(2);     // big decoder only
 4796     FPU    : S4;
 4797     MEM    : S3(2);     // any mem
 4798 %}
 4799 
 4800 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4801     instruction_count(2);
 4802     src1   : S3(read);
 4803     dst    : S4(read);
 4804     D0     : S0(2);     // big decoder only
 4805     MEM    : S3(2);     // any mem
 4806 %}
 4807 
 4808 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4809     instruction_count(3);
 4810     src1   : S3(read);
 4811     src2   : S3(read);
 4812     dst    : S4(read);
 4813     D0     : S0(3);     // big decoder only
 4814     FPU    : S4;
 4815     MEM    : S3(3);     // any mem
 4816 %}
 4817 
 4818 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4819     instruction_count(3);
 4820     src1   : S4(read);
 4821     mem    : S4(read);
 4822     DECODE : S0;        // any decoder for FPU PUSH
 4823     D0     : S0(2);     // big decoder only
 4824     FPU    : S4;
 4825     MEM    : S3(2);     // any mem
 4826 %}
 4827 
 4828 // Float load constant
 4829 pipe_class fpu_reg_con(regDPR dst) %{
 4830     instruction_count(2);
 4831     dst    : S5(write);
 4832     D0     : S0;        // big decoder only for the load
 4833     DECODE : S1;        // any decoder for FPU POP
 4834     FPU    : S4;
 4835     MEM    : S3;        // any mem
 4836 %}
 4837 
 4838 // Float load constant
 4839 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4840     instruction_count(3);
 4841     dst    : S5(write);
 4842     src    : S3(read);
 4843     D0     : S0;        // big decoder only for the load
 4844     DECODE : S1(2);     // any decoder for FPU POP
 4845     FPU    : S4;
 4846     MEM    : S3;        // any mem
 4847 %}
 4848 
 4849 // UnConditional branch
 4850 pipe_class pipe_jmp( label labl ) %{
 4851     single_instruction;
 4852     BR   : S3;
 4853 %}
 4854 
 4855 // Conditional branch
 4856 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4857     single_instruction;
 4858     cr    : S1(read);
 4859     BR    : S3;
 4860 %}
 4861 
 4862 // Allocation idiom
 4863 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4864     instruction_count(1); force_serialization;
 4865     fixed_latency(6);
 4866     heap_ptr : S3(read);
 4867     DECODE   : S0(3);
 4868     D0       : S2;
 4869     MEM      : S3;
 4870     ALU      : S3(2);
 4871     dst      : S5(write);
 4872     BR       : S5;
 4873 %}
 4874 
 4875 // Generic big/slow expanded idiom
 4876 pipe_class pipe_slow(  ) %{
 4877     instruction_count(10); multiple_bundles; force_serialization;
 4878     fixed_latency(100);
 4879     D0  : S0(2);
 4880     MEM : S3(2);
 4881 %}
 4882 
 4883 // The real do-nothing guy
 4884 pipe_class empty( ) %{
 4885     instruction_count(0);
 4886 %}
 4887 
 4888 // Define the class for the Nop node
 4889 define %{
 4890    MachNop = empty;
 4891 %}
 4892 
 4893 %}
 4894 
 4895 //----------INSTRUCTIONS-------------------------------------------------------
 4896 //
 4897 // match      -- States which machine-independent subtree may be replaced
 4898 //               by this instruction.
 4899 // ins_cost   -- The estimated cost of this instruction is used by instruction
 4900 //               selection to identify a minimum cost tree of machine
 4901 //               instructions that matches a tree of machine-independent
 4902 //               instructions.
 4903 // format     -- A string providing the disassembly for this instruction.
 4904 //               The value of an instruction's operand may be inserted
 4905 //               by referring to it with a '$' prefix.
 4906 // opcode     -- Three instruction opcodes may be provided.  These are referred
 4907 //               to within an encode class as $primary, $secondary, and $tertiary
 4908 //               respectively.  The primary opcode is commonly used to
 4909 //               indicate the type of machine instruction, while secondary
 4910 //               and tertiary are often used for prefix options or addressing
 4911 //               modes.
 4912 // ins_encode -- A list of encode classes with parameters. The encode class
 4913 //               name must have been defined in an 'enc_class' specification
 4914 //               in the encode section of the architecture description.
 4915 
 4916 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 4917 // Load Float
 4918 instruct MoveF2LEG(legRegF dst, regF src) %{
 4919   match(Set dst src);
 4920   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4921   ins_encode %{
 4922     ShouldNotReachHere();
 4923   %}
 4924   ins_pipe( fpu_reg_reg );
 4925 %}
 4926 
 4927 // Load Float
 4928 instruct MoveLEG2F(regF dst, legRegF src) %{
 4929   match(Set dst src);
 4930   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 4931   ins_encode %{
 4932     ShouldNotReachHere();
 4933   %}
 4934   ins_pipe( fpu_reg_reg );
 4935 %}
 4936 
 4937 // Load Float
 4938 instruct MoveF2VL(vlRegF dst, regF src) %{
 4939   match(Set dst src);
 4940   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4941   ins_encode %{
 4942     ShouldNotReachHere();
 4943   %}
 4944   ins_pipe( fpu_reg_reg );
 4945 %}
 4946 
 4947 // Load Float
 4948 instruct MoveVL2F(regF dst, vlRegF src) %{
 4949   match(Set dst src);
 4950   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 4951   ins_encode %{
 4952     ShouldNotReachHere();
 4953   %}
 4954   ins_pipe( fpu_reg_reg );
 4955 %}
 4956 
 4957 
 4958 
 4959 // Load Double
 4960 instruct MoveD2LEG(legRegD dst, regD src) %{
 4961   match(Set dst src);
 4962   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4963   ins_encode %{
 4964     ShouldNotReachHere();
 4965   %}
 4966   ins_pipe( fpu_reg_reg );
 4967 %}
 4968 
 4969 // Load Double
 4970 instruct MoveLEG2D(regD dst, legRegD src) %{
 4971   match(Set dst src);
 4972   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 4973   ins_encode %{
 4974     ShouldNotReachHere();
 4975   %}
 4976   ins_pipe( fpu_reg_reg );
 4977 %}
 4978 
 4979 // Load Double
 4980 instruct MoveD2VL(vlRegD dst, regD src) %{
 4981   match(Set dst src);
 4982   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4983   ins_encode %{
 4984     ShouldNotReachHere();
 4985   %}
 4986   ins_pipe( fpu_reg_reg );
 4987 %}
 4988 
 4989 // Load Double
 4990 instruct MoveVL2D(regD dst, vlRegD src) %{
 4991   match(Set dst src);
 4992   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 4993   ins_encode %{
 4994     ShouldNotReachHere();
 4995   %}
 4996   ins_pipe( fpu_reg_reg );
 4997 %}
 4998 
 4999 //----------BSWAP-Instruction--------------------------------------------------
 5000 instruct bytes_reverse_int(rRegI dst) %{
 5001   match(Set dst (ReverseBytesI dst));
 5002 
 5003   format %{ "BSWAP  $dst" %}
 5004   opcode(0x0F, 0xC8);
 5005   ins_encode( OpcP, OpcSReg(dst) );
 5006   ins_pipe( ialu_reg );
 5007 %}
 5008 
 5009 instruct bytes_reverse_long(eRegL dst) %{
 5010   match(Set dst (ReverseBytesL dst));
 5011 
 5012   format %{ "BSWAP  $dst.lo\n\t"
 5013             "BSWAP  $dst.hi\n\t"
 5014             "XCHG   $dst.lo $dst.hi" %}
 5015 
 5016   ins_cost(125);
 5017   ins_encode( bswap_long_bytes(dst) );
 5018   ins_pipe( ialu_reg_reg);
 5019 %}
 5020 
 5021 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5022   match(Set dst (ReverseBytesUS dst));
 5023   effect(KILL cr);
 5024 
 5025   format %{ "BSWAP  $dst\n\t"
 5026             "SHR    $dst,16\n\t" %}
 5027   ins_encode %{
 5028     __ bswapl($dst$$Register);
 5029     __ shrl($dst$$Register, 16);
 5030   %}
 5031   ins_pipe( ialu_reg );
 5032 %}
 5033 
 5034 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5035   match(Set dst (ReverseBytesS dst));
 5036   effect(KILL cr);
 5037 
 5038   format %{ "BSWAP  $dst\n\t"
 5039             "SAR    $dst,16\n\t" %}
 5040   ins_encode %{
 5041     __ bswapl($dst$$Register);
 5042     __ sarl($dst$$Register, 16);
 5043   %}
 5044   ins_pipe( ialu_reg );
 5045 %}
 5046 
 5047 
 5048 //---------- Zeros Count Instructions ------------------------------------------
 5049 
 5050 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5051   predicate(UseCountLeadingZerosInstruction);
 5052   match(Set dst (CountLeadingZerosI src));
 5053   effect(KILL cr);
 5054 
 5055   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5056   ins_encode %{
 5057     __ lzcntl($dst$$Register, $src$$Register);
 5058   %}
 5059   ins_pipe(ialu_reg);
 5060 %}
 5061 
 5062 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5063   predicate(!UseCountLeadingZerosInstruction);
 5064   match(Set dst (CountLeadingZerosI src));
 5065   effect(KILL cr);
 5066 
 5067   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5068             "JNZ    skip\n\t"
 5069             "MOV    $dst, -1\n"
 5070       "skip:\n\t"
 5071             "NEG    $dst\n\t"
 5072             "ADD    $dst, 31" %}
 5073   ins_encode %{
 5074     Register Rdst = $dst$$Register;
 5075     Register Rsrc = $src$$Register;
 5076     Label skip;
 5077     __ bsrl(Rdst, Rsrc);
 5078     __ jccb(Assembler::notZero, skip);
 5079     __ movl(Rdst, -1);
 5080     __ bind(skip);
 5081     __ negl(Rdst);
 5082     __ addl(Rdst, BitsPerInt - 1);
 5083   %}
 5084   ins_pipe(ialu_reg);
 5085 %}
 5086 
 5087 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5088   predicate(UseCountLeadingZerosInstruction);
 5089   match(Set dst (CountLeadingZerosL src));
 5090   effect(TEMP dst, KILL cr);
 5091 
 5092   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5093             "JNC    done\n\t"
 5094             "LZCNT  $dst, $src.lo\n\t"
 5095             "ADD    $dst, 32\n"
 5096       "done:" %}
 5097   ins_encode %{
 5098     Register Rdst = $dst$$Register;
 5099     Register Rsrc = $src$$Register;
 5100     Label done;
 5101     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5102     __ jccb(Assembler::carryClear, done);
 5103     __ lzcntl(Rdst, Rsrc);
 5104     __ addl(Rdst, BitsPerInt);
 5105     __ bind(done);
 5106   %}
 5107   ins_pipe(ialu_reg);
 5108 %}
 5109 
 5110 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5111   predicate(!UseCountLeadingZerosInstruction);
 5112   match(Set dst (CountLeadingZerosL src));
 5113   effect(TEMP dst, KILL cr);
 5114 
 5115   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5116             "JZ     msw_is_zero\n\t"
 5117             "ADD    $dst, 32\n\t"
 5118             "JMP    not_zero\n"
 5119       "msw_is_zero:\n\t"
 5120             "BSR    $dst, $src.lo\n\t"
 5121             "JNZ    not_zero\n\t"
 5122             "MOV    $dst, -1\n"
 5123       "not_zero:\n\t"
 5124             "NEG    $dst\n\t"
 5125             "ADD    $dst, 63\n" %}
 5126  ins_encode %{
 5127     Register Rdst = $dst$$Register;
 5128     Register Rsrc = $src$$Register;
 5129     Label msw_is_zero;
 5130     Label not_zero;
 5131     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5132     __ jccb(Assembler::zero, msw_is_zero);
 5133     __ addl(Rdst, BitsPerInt);
 5134     __ jmpb(not_zero);
 5135     __ bind(msw_is_zero);
 5136     __ bsrl(Rdst, Rsrc);
 5137     __ jccb(Assembler::notZero, not_zero);
 5138     __ movl(Rdst, -1);
 5139     __ bind(not_zero);
 5140     __ negl(Rdst);
 5141     __ addl(Rdst, BitsPerLong - 1);
 5142   %}
 5143   ins_pipe(ialu_reg);
 5144 %}
 5145 
 5146 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5147   predicate(UseCountTrailingZerosInstruction);
 5148   match(Set dst (CountTrailingZerosI src));
 5149   effect(KILL cr);
 5150 
 5151   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5152   ins_encode %{
 5153     __ tzcntl($dst$$Register, $src$$Register);
 5154   %}
 5155   ins_pipe(ialu_reg);
 5156 %}
 5157 
 5158 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5159   predicate(!UseCountTrailingZerosInstruction);
 5160   match(Set dst (CountTrailingZerosI src));
 5161   effect(KILL cr);
 5162 
 5163   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5164             "JNZ    done\n\t"
 5165             "MOV    $dst, 32\n"
 5166       "done:" %}
 5167   ins_encode %{
 5168     Register Rdst = $dst$$Register;
 5169     Label done;
 5170     __ bsfl(Rdst, $src$$Register);
 5171     __ jccb(Assembler::notZero, done);
 5172     __ movl(Rdst, BitsPerInt);
 5173     __ bind(done);
 5174   %}
 5175   ins_pipe(ialu_reg);
 5176 %}
 5177 
 5178 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5179   predicate(UseCountTrailingZerosInstruction);
 5180   match(Set dst (CountTrailingZerosL src));
 5181   effect(TEMP dst, KILL cr);
 5182 
 5183   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5184             "JNC    done\n\t"
 5185             "TZCNT  $dst, $src.hi\n\t"
 5186             "ADD    $dst, 32\n"
 5187             "done:" %}
 5188   ins_encode %{
 5189     Register Rdst = $dst$$Register;
 5190     Register Rsrc = $src$$Register;
 5191     Label done;
 5192     __ tzcntl(Rdst, Rsrc);
 5193     __ jccb(Assembler::carryClear, done);
 5194     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5195     __ addl(Rdst, BitsPerInt);
 5196     __ bind(done);
 5197   %}
 5198   ins_pipe(ialu_reg);
 5199 %}
 5200 
 5201 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5202   predicate(!UseCountTrailingZerosInstruction);
 5203   match(Set dst (CountTrailingZerosL src));
 5204   effect(TEMP dst, KILL cr);
 5205 
 5206   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5207             "JNZ    done\n\t"
 5208             "BSF    $dst, $src.hi\n\t"
 5209             "JNZ    msw_not_zero\n\t"
 5210             "MOV    $dst, 32\n"
 5211       "msw_not_zero:\n\t"
 5212             "ADD    $dst, 32\n"
 5213       "done:" %}
 5214   ins_encode %{
 5215     Register Rdst = $dst$$Register;
 5216     Register Rsrc = $src$$Register;
 5217     Label msw_not_zero;
 5218     Label done;
 5219     __ bsfl(Rdst, Rsrc);
 5220     __ jccb(Assembler::notZero, done);
 5221     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5222     __ jccb(Assembler::notZero, msw_not_zero);
 5223     __ movl(Rdst, BitsPerInt);
 5224     __ bind(msw_not_zero);
 5225     __ addl(Rdst, BitsPerInt);
 5226     __ bind(done);
 5227   %}
 5228   ins_pipe(ialu_reg);
 5229 %}
 5230 
 5231 
 5232 //---------- Population Count Instructions -------------------------------------
 5233 
 5234 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5235   predicate(UsePopCountInstruction);
 5236   match(Set dst (PopCountI src));
 5237   effect(KILL cr);
 5238 
 5239   format %{ "POPCNT $dst, $src" %}
 5240   ins_encode %{
 5241     __ popcntl($dst$$Register, $src$$Register);
 5242   %}
 5243   ins_pipe(ialu_reg);
 5244 %}
 5245 
 5246 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5247   predicate(UsePopCountInstruction);
 5248   match(Set dst (PopCountI (LoadI mem)));
 5249   effect(KILL cr);
 5250 
 5251   format %{ "POPCNT $dst, $mem" %}
 5252   ins_encode %{
 5253     __ popcntl($dst$$Register, $mem$$Address);
 5254   %}
 5255   ins_pipe(ialu_reg);
 5256 %}
 5257 
 5258 // Note: Long.bitCount(long) returns an int.
 5259 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5260   predicate(UsePopCountInstruction);
 5261   match(Set dst (PopCountL src));
 5262   effect(KILL cr, TEMP tmp, TEMP dst);
 5263 
 5264   format %{ "POPCNT $dst, $src.lo\n\t"
 5265             "POPCNT $tmp, $src.hi\n\t"
 5266             "ADD    $dst, $tmp" %}
 5267   ins_encode %{
 5268     __ popcntl($dst$$Register, $src$$Register);
 5269     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5270     __ addl($dst$$Register, $tmp$$Register);
 5271   %}
 5272   ins_pipe(ialu_reg);
 5273 %}
 5274 
 5275 // Note: Long.bitCount(long) returns an int.
 5276 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5277   predicate(UsePopCountInstruction);
 5278   match(Set dst (PopCountL (LoadL mem)));
 5279   effect(KILL cr, TEMP tmp, TEMP dst);
 5280 
 5281   format %{ "POPCNT $dst, $mem\n\t"
 5282             "POPCNT $tmp, $mem+4\n\t"
 5283             "ADD    $dst, $tmp" %}
 5284   ins_encode %{
 5285     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5286     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5287     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5288     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5289     __ addl($dst$$Register, $tmp$$Register);
 5290   %}
 5291   ins_pipe(ialu_reg);
 5292 %}
 5293 
 5294 
 5295 //----------Load/Store/Move Instructions---------------------------------------
 5296 //----------Load Instructions--------------------------------------------------
 5297 // Load Byte (8bit signed)
 5298 instruct loadB(xRegI dst, memory mem) %{
 5299   match(Set dst (LoadB mem));
 5300 
 5301   ins_cost(125);
 5302   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5303 
 5304   ins_encode %{
 5305     __ movsbl($dst$$Register, $mem$$Address);
 5306   %}
 5307 
 5308   ins_pipe(ialu_reg_mem);
 5309 %}
 5310 
 5311 // Load Byte (8bit signed) into Long Register
 5312 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5313   match(Set dst (ConvI2L (LoadB mem)));
 5314   effect(KILL cr);
 5315 
 5316   ins_cost(375);
 5317   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5318             "MOV    $dst.hi,$dst.lo\n\t"
 5319             "SAR    $dst.hi,7" %}
 5320 
 5321   ins_encode %{
 5322     __ movsbl($dst$$Register, $mem$$Address);
 5323     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5324     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5325   %}
 5326 
 5327   ins_pipe(ialu_reg_mem);
 5328 %}
 5329 
 5330 // Load Unsigned Byte (8bit UNsigned)
 5331 instruct loadUB(xRegI dst, memory mem) %{
 5332   match(Set dst (LoadUB mem));
 5333 
 5334   ins_cost(125);
 5335   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5336 
 5337   ins_encode %{
 5338     __ movzbl($dst$$Register, $mem$$Address);
 5339   %}
 5340 
 5341   ins_pipe(ialu_reg_mem);
 5342 %}
 5343 
 5344 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5345 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5346   match(Set dst (ConvI2L (LoadUB mem)));
 5347   effect(KILL cr);
 5348 
 5349   ins_cost(250);
 5350   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5351             "XOR    $dst.hi,$dst.hi" %}
 5352 
 5353   ins_encode %{
 5354     Register Rdst = $dst$$Register;
 5355     __ movzbl(Rdst, $mem$$Address);
 5356     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5357   %}
 5358 
 5359   ins_pipe(ialu_reg_mem);
 5360 %}
 5361 
 5362 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5363 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5364   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5365   effect(KILL cr);
 5366 
 5367   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5368             "XOR    $dst.hi,$dst.hi\n\t"
 5369             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5370   ins_encode %{
 5371     Register Rdst = $dst$$Register;
 5372     __ movzbl(Rdst, $mem$$Address);
 5373     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5374     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5375   %}
 5376   ins_pipe(ialu_reg_mem);
 5377 %}
 5378 
 5379 // Load Short (16bit signed)
 5380 instruct loadS(rRegI dst, memory mem) %{
 5381   match(Set dst (LoadS mem));
 5382 
 5383   ins_cost(125);
 5384   format %{ "MOVSX  $dst,$mem\t# short" %}
 5385 
 5386   ins_encode %{
 5387     __ movswl($dst$$Register, $mem$$Address);
 5388   %}
 5389 
 5390   ins_pipe(ialu_reg_mem);
 5391 %}
 5392 
 5393 // Load Short (16 bit signed) to Byte (8 bit signed)
 5394 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5395   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5396 
 5397   ins_cost(125);
 5398   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5399   ins_encode %{
 5400     __ movsbl($dst$$Register, $mem$$Address);
 5401   %}
 5402   ins_pipe(ialu_reg_mem);
 5403 %}
 5404 
 5405 // Load Short (16bit signed) into Long Register
 5406 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5407   match(Set dst (ConvI2L (LoadS mem)));
 5408   effect(KILL cr);
 5409 
 5410   ins_cost(375);
 5411   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5412             "MOV    $dst.hi,$dst.lo\n\t"
 5413             "SAR    $dst.hi,15" %}
 5414 
 5415   ins_encode %{
 5416     __ movswl($dst$$Register, $mem$$Address);
 5417     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5418     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5419   %}
 5420 
 5421   ins_pipe(ialu_reg_mem);
 5422 %}
 5423 
 5424 // Load Unsigned Short/Char (16bit unsigned)
 5425 instruct loadUS(rRegI dst, memory mem) %{
 5426   match(Set dst (LoadUS mem));
 5427 
 5428   ins_cost(125);
 5429   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5430 
 5431   ins_encode %{
 5432     __ movzwl($dst$$Register, $mem$$Address);
 5433   %}
 5434 
 5435   ins_pipe(ialu_reg_mem);
 5436 %}
 5437 
 5438 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5439 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5440   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5441 
 5442   ins_cost(125);
 5443   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5444   ins_encode %{
 5445     __ movsbl($dst$$Register, $mem$$Address);
 5446   %}
 5447   ins_pipe(ialu_reg_mem);
 5448 %}
 5449 
 5450 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5451 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5452   match(Set dst (ConvI2L (LoadUS mem)));
 5453   effect(KILL cr);
 5454 
 5455   ins_cost(250);
 5456   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5457             "XOR    $dst.hi,$dst.hi" %}
 5458 
 5459   ins_encode %{
 5460     __ movzwl($dst$$Register, $mem$$Address);
 5461     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5462   %}
 5463 
 5464   ins_pipe(ialu_reg_mem);
 5465 %}
 5466 
 5467 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5468 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5469   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5470   effect(KILL cr);
 5471 
 5472   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5473             "XOR    $dst.hi,$dst.hi" %}
 5474   ins_encode %{
 5475     Register Rdst = $dst$$Register;
 5476     __ movzbl(Rdst, $mem$$Address);
 5477     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5478   %}
 5479   ins_pipe(ialu_reg_mem);
 5480 %}
 5481 
 5482 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5483 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5484   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5485   effect(KILL cr);
 5486 
 5487   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5488             "XOR    $dst.hi,$dst.hi\n\t"
 5489             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5490   ins_encode %{
 5491     Register Rdst = $dst$$Register;
 5492     __ movzwl(Rdst, $mem$$Address);
 5493     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5494     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5495   %}
 5496   ins_pipe(ialu_reg_mem);
 5497 %}
 5498 
 5499 // Load Integer
 5500 instruct loadI(rRegI dst, memory mem) %{
 5501   match(Set dst (LoadI mem));
 5502 
 5503   ins_cost(125);
 5504   format %{ "MOV    $dst,$mem\t# int" %}
 5505 
 5506   ins_encode %{
 5507     __ movl($dst$$Register, $mem$$Address);
 5508   %}
 5509 
 5510   ins_pipe(ialu_reg_mem);
 5511 %}
 5512 
 5513 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5514 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5515   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5516 
 5517   ins_cost(125);
 5518   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5519   ins_encode %{
 5520     __ movsbl($dst$$Register, $mem$$Address);
 5521   %}
 5522   ins_pipe(ialu_reg_mem);
 5523 %}
 5524 
 5525 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5526 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5527   match(Set dst (AndI (LoadI mem) mask));
 5528 
 5529   ins_cost(125);
 5530   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5531   ins_encode %{
 5532     __ movzbl($dst$$Register, $mem$$Address);
 5533   %}
 5534   ins_pipe(ialu_reg_mem);
 5535 %}
 5536 
 5537 // Load Integer (32 bit signed) to Short (16 bit signed)
 5538 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5539   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5540 
 5541   ins_cost(125);
 5542   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5543   ins_encode %{
 5544     __ movswl($dst$$Register, $mem$$Address);
 5545   %}
 5546   ins_pipe(ialu_reg_mem);
 5547 %}
 5548 
 5549 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5550 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5551   match(Set dst (AndI (LoadI mem) mask));
 5552 
 5553   ins_cost(125);
 5554   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5555   ins_encode %{
 5556     __ movzwl($dst$$Register, $mem$$Address);
 5557   %}
 5558   ins_pipe(ialu_reg_mem);
 5559 %}
 5560 
 5561 // Load Integer into Long Register
 5562 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5563   match(Set dst (ConvI2L (LoadI mem)));
 5564   effect(KILL cr);
 5565 
 5566   ins_cost(375);
 5567   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5568             "MOV    $dst.hi,$dst.lo\n\t"
 5569             "SAR    $dst.hi,31" %}
 5570 
 5571   ins_encode %{
 5572     __ movl($dst$$Register, $mem$$Address);
 5573     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5574     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5575   %}
 5576 
 5577   ins_pipe(ialu_reg_mem);
 5578 %}
 5579 
 5580 // Load Integer with mask 0xFF into Long Register
 5581 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5582   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5583   effect(KILL cr);
 5584 
 5585   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5586             "XOR    $dst.hi,$dst.hi" %}
 5587   ins_encode %{
 5588     Register Rdst = $dst$$Register;
 5589     __ movzbl(Rdst, $mem$$Address);
 5590     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5591   %}
 5592   ins_pipe(ialu_reg_mem);
 5593 %}
 5594 
 5595 // Load Integer with mask 0xFFFF into Long Register
 5596 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5597   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5598   effect(KILL cr);
 5599 
 5600   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5601             "XOR    $dst.hi,$dst.hi" %}
 5602   ins_encode %{
 5603     Register Rdst = $dst$$Register;
 5604     __ movzwl(Rdst, $mem$$Address);
 5605     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5606   %}
 5607   ins_pipe(ialu_reg_mem);
 5608 %}
 5609 
 5610 // Load Integer with 31-bit mask into Long Register
 5611 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5612   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5613   effect(KILL cr);
 5614 
 5615   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5616             "XOR    $dst.hi,$dst.hi\n\t"
 5617             "AND    $dst.lo,$mask" %}
 5618   ins_encode %{
 5619     Register Rdst = $dst$$Register;
 5620     __ movl(Rdst, $mem$$Address);
 5621     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5622     __ andl(Rdst, $mask$$constant);
 5623   %}
 5624   ins_pipe(ialu_reg_mem);
 5625 %}
 5626 
 5627 // Load Unsigned Integer into Long Register
 5628 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5629   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5630   effect(KILL cr);
 5631 
 5632   ins_cost(250);
 5633   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5634             "XOR    $dst.hi,$dst.hi" %}
 5635 
 5636   ins_encode %{
 5637     __ movl($dst$$Register, $mem$$Address);
 5638     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5639   %}
 5640 
 5641   ins_pipe(ialu_reg_mem);
 5642 %}
 5643 
 5644 // Load Long.  Cannot clobber address while loading, so restrict address
 5645 // register to ESI
 5646 instruct loadL(eRegL dst, load_long_memory mem) %{
 5647   predicate(!((LoadLNode*)n)->require_atomic_access());
 5648   match(Set dst (LoadL mem));
 5649 
 5650   ins_cost(250);
 5651   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5652             "MOV    $dst.hi,$mem+4" %}
 5653 
 5654   ins_encode %{
 5655     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5656     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5657     __ movl($dst$$Register, Amemlo);
 5658     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5659   %}
 5660 
 5661   ins_pipe(ialu_reg_long_mem);
 5662 %}
 5663 
 5664 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5665 // then store it down to the stack and reload on the int
 5666 // side.
 5667 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5668   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5669   match(Set dst (LoadL mem));
 5670 
 5671   ins_cost(200);
 5672   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5673             "FISTp  $dst" %}
 5674   ins_encode(enc_loadL_volatile(mem,dst));
 5675   ins_pipe( fpu_reg_mem );
 5676 %}
 5677 
 5678 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5679   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5680   match(Set dst (LoadL mem));
 5681   effect(TEMP tmp);
 5682   ins_cost(180);
 5683   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5684             "MOVSD  $dst,$tmp" %}
 5685   ins_encode %{
 5686     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5687     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5688   %}
 5689   ins_pipe( pipe_slow );
 5690 %}
 5691 
 5692 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5693   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5694   match(Set dst (LoadL mem));
 5695   effect(TEMP tmp);
 5696   ins_cost(160);
 5697   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5698             "MOVD   $dst.lo,$tmp\n\t"
 5699             "PSRLQ  $tmp,32\n\t"
 5700             "MOVD   $dst.hi,$tmp" %}
 5701   ins_encode %{
 5702     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5703     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5704     __ psrlq($tmp$$XMMRegister, 32);
 5705     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5706   %}
 5707   ins_pipe( pipe_slow );
 5708 %}
 5709 
 5710 // Load Range
 5711 instruct loadRange(rRegI dst, memory mem) %{
 5712   match(Set dst (LoadRange mem));
 5713 
 5714   ins_cost(125);
 5715   format %{ "MOV    $dst,$mem" %}
 5716   opcode(0x8B);
 5717   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5718   ins_pipe( ialu_reg_mem );
 5719 %}
 5720 
 5721 
 5722 // Load Pointer
 5723 instruct loadP(eRegP dst, memory mem) %{
 5724   match(Set dst (LoadP mem));
 5725 
 5726   ins_cost(125);
 5727   format %{ "MOV    $dst,$mem" %}
 5728   opcode(0x8B);
 5729   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5730   ins_pipe( ialu_reg_mem );
 5731 %}
 5732 
 5733 // Load Klass Pointer
 5734 instruct loadKlass(eRegP dst, memory mem) %{
 5735   match(Set dst (LoadKlass mem));
 5736 
 5737   ins_cost(125);
 5738   format %{ "MOV    $dst,$mem" %}
 5739   opcode(0x8B);
 5740   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5741   ins_pipe( ialu_reg_mem );
 5742 %}
 5743 
 5744 // Load Double
 5745 instruct loadDPR(regDPR dst, memory mem) %{
 5746   predicate(UseSSE<=1);
 5747   match(Set dst (LoadD mem));
 5748 
 5749   ins_cost(150);
 5750   format %{ "FLD_D  ST,$mem\n\t"
 5751             "FSTP   $dst" %}
 5752   opcode(0xDD);               /* DD /0 */
 5753   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5754               Pop_Reg_DPR(dst), ClearInstMark );
 5755   ins_pipe( fpu_reg_mem );
 5756 %}
 5757 
 5758 // Load Double to XMM
 5759 instruct loadD(regD dst, memory mem) %{
 5760   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5761   match(Set dst (LoadD mem));
 5762   ins_cost(145);
 5763   format %{ "MOVSD  $dst,$mem" %}
 5764   ins_encode %{
 5765     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5766   %}
 5767   ins_pipe( pipe_slow );
 5768 %}
 5769 
 5770 instruct loadD_partial(regD dst, memory mem) %{
 5771   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5772   match(Set dst (LoadD mem));
 5773   ins_cost(145);
 5774   format %{ "MOVLPD $dst,$mem" %}
 5775   ins_encode %{
 5776     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5777   %}
 5778   ins_pipe( pipe_slow );
 5779 %}
 5780 
 5781 // Load to XMM register (single-precision floating point)
 5782 // MOVSS instruction
 5783 instruct loadF(regF dst, memory mem) %{
 5784   predicate(UseSSE>=1);
 5785   match(Set dst (LoadF mem));
 5786   ins_cost(145);
 5787   format %{ "MOVSS  $dst,$mem" %}
 5788   ins_encode %{
 5789     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5790   %}
 5791   ins_pipe( pipe_slow );
 5792 %}
 5793 
 5794 // Load Float
 5795 instruct loadFPR(regFPR dst, memory mem) %{
 5796   predicate(UseSSE==0);
 5797   match(Set dst (LoadF mem));
 5798 
 5799   ins_cost(150);
 5800   format %{ "FLD_S  ST,$mem\n\t"
 5801             "FSTP   $dst" %}
 5802   opcode(0xD9);               /* D9 /0 */
 5803   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
 5804               Pop_Reg_FPR(dst), ClearInstMark );
 5805   ins_pipe( fpu_reg_mem );
 5806 %}
 5807 
 5808 // Load Effective Address
 5809 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5810   match(Set dst mem);
 5811 
 5812   ins_cost(110);
 5813   format %{ "LEA    $dst,$mem" %}
 5814   opcode(0x8D);
 5815   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5816   ins_pipe( ialu_reg_reg_fat );
 5817 %}
 5818 
 5819 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5820   match(Set dst mem);
 5821 
 5822   ins_cost(110);
 5823   format %{ "LEA    $dst,$mem" %}
 5824   opcode(0x8D);
 5825   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5826   ins_pipe( ialu_reg_reg_fat );
 5827 %}
 5828 
 5829 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5830   match(Set dst mem);
 5831 
 5832   ins_cost(110);
 5833   format %{ "LEA    $dst,$mem" %}
 5834   opcode(0x8D);
 5835   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5836   ins_pipe( ialu_reg_reg_fat );
 5837 %}
 5838 
 5839 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5840   match(Set dst mem);
 5841 
 5842   ins_cost(110);
 5843   format %{ "LEA    $dst,$mem" %}
 5844   opcode(0x8D);
 5845   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5846   ins_pipe( ialu_reg_reg_fat );
 5847 %}
 5848 
 5849 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5850   match(Set dst mem);
 5851 
 5852   ins_cost(110);
 5853   format %{ "LEA    $dst,$mem" %}
 5854   opcode(0x8D);
 5855   ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark);
 5856   ins_pipe( ialu_reg_reg_fat );
 5857 %}
 5858 
 5859 // Load Constant
 5860 instruct loadConI(rRegI dst, immI src) %{
 5861   match(Set dst src);
 5862 
 5863   format %{ "MOV    $dst,$src" %}
 5864   ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark );
 5865   ins_pipe( ialu_reg_fat );
 5866 %}
 5867 
 5868 // Load Constant zero
 5869 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5870   match(Set dst src);
 5871   effect(KILL cr);
 5872 
 5873   ins_cost(50);
 5874   format %{ "XOR    $dst,$dst" %}
 5875   opcode(0x33);  /* + rd */
 5876   ins_encode( OpcP, RegReg( dst, dst ) );
 5877   ins_pipe( ialu_reg );
 5878 %}
 5879 
 5880 instruct loadConP(eRegP dst, immP src) %{
 5881   match(Set dst src);
 5882 
 5883   format %{ "MOV    $dst,$src" %}
 5884   opcode(0xB8);  /* + rd */
 5885   ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark );
 5886   ins_pipe( ialu_reg_fat );
 5887 %}
 5888 
 5889 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5890   match(Set dst src);
 5891   effect(KILL cr);
 5892   ins_cost(200);
 5893   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5894             "MOV    $dst.hi,$src.hi" %}
 5895   opcode(0xB8);
 5896   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5897   ins_pipe( ialu_reg_long_fat );
 5898 %}
 5899 
 5900 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5901   match(Set dst src);
 5902   effect(KILL cr);
 5903   ins_cost(150);
 5904   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5905             "XOR    $dst.hi,$dst.hi" %}
 5906   opcode(0x33,0x33);
 5907   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5908   ins_pipe( ialu_reg_long );
 5909 %}
 5910 
 5911 // The instruction usage is guarded by predicate in operand immFPR().
 5912 instruct loadConFPR(regFPR dst, immFPR con) %{
 5913   match(Set dst con);
 5914   ins_cost(125);
 5915   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5916             "FSTP   $dst" %}
 5917   ins_encode %{
 5918     __ fld_s($constantaddress($con));
 5919     __ fstp_d($dst$$reg);
 5920   %}
 5921   ins_pipe(fpu_reg_con);
 5922 %}
 5923 
 5924 // The instruction usage is guarded by predicate in operand immFPR0().
 5925 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5926   match(Set dst con);
 5927   ins_cost(125);
 5928   format %{ "FLDZ   ST\n\t"
 5929             "FSTP   $dst" %}
 5930   ins_encode %{
 5931     __ fldz();
 5932     __ fstp_d($dst$$reg);
 5933   %}
 5934   ins_pipe(fpu_reg_con);
 5935 %}
 5936 
 5937 // The instruction usage is guarded by predicate in operand immFPR1().
 5938 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5939   match(Set dst con);
 5940   ins_cost(125);
 5941   format %{ "FLD1   ST\n\t"
 5942             "FSTP   $dst" %}
 5943   ins_encode %{
 5944     __ fld1();
 5945     __ fstp_d($dst$$reg);
 5946   %}
 5947   ins_pipe(fpu_reg_con);
 5948 %}
 5949 
 5950 // The instruction usage is guarded by predicate in operand immF().
 5951 instruct loadConF(regF dst, immF con) %{
 5952   match(Set dst con);
 5953   ins_cost(125);
 5954   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 5955   ins_encode %{
 5956     __ movflt($dst$$XMMRegister, $constantaddress($con));
 5957   %}
 5958   ins_pipe(pipe_slow);
 5959 %}
 5960 
 5961 // The instruction usage is guarded by predicate in operand immF0().
 5962 instruct loadConF0(regF dst, immF0 src) %{
 5963   match(Set dst src);
 5964   ins_cost(100);
 5965   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 5966   ins_encode %{
 5967     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 5968   %}
 5969   ins_pipe(pipe_slow);
 5970 %}
 5971 
 5972 // The instruction usage is guarded by predicate in operand immDPR().
 5973 instruct loadConDPR(regDPR dst, immDPR con) %{
 5974   match(Set dst con);
 5975   ins_cost(125);
 5976 
 5977   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 5978             "FSTP   $dst" %}
 5979   ins_encode %{
 5980     __ fld_d($constantaddress($con));
 5981     __ fstp_d($dst$$reg);
 5982   %}
 5983   ins_pipe(fpu_reg_con);
 5984 %}
 5985 
 5986 // The instruction usage is guarded by predicate in operand immDPR0().
 5987 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 5988   match(Set dst con);
 5989   ins_cost(125);
 5990 
 5991   format %{ "FLDZ   ST\n\t"
 5992             "FSTP   $dst" %}
 5993   ins_encode %{
 5994     __ fldz();
 5995     __ fstp_d($dst$$reg);
 5996   %}
 5997   ins_pipe(fpu_reg_con);
 5998 %}
 5999 
 6000 // The instruction usage is guarded by predicate in operand immDPR1().
 6001 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6002   match(Set dst con);
 6003   ins_cost(125);
 6004 
 6005   format %{ "FLD1   ST\n\t"
 6006             "FSTP   $dst" %}
 6007   ins_encode %{
 6008     __ fld1();
 6009     __ fstp_d($dst$$reg);
 6010   %}
 6011   ins_pipe(fpu_reg_con);
 6012 %}
 6013 
 6014 // The instruction usage is guarded by predicate in operand immD().
 6015 instruct loadConD(regD dst, immD con) %{
 6016   match(Set dst con);
 6017   ins_cost(125);
 6018   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6019   ins_encode %{
 6020     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6021   %}
 6022   ins_pipe(pipe_slow);
 6023 %}
 6024 
 6025 // The instruction usage is guarded by predicate in operand immD0().
 6026 instruct loadConD0(regD dst, immD0 src) %{
 6027   match(Set dst src);
 6028   ins_cost(100);
 6029   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6030   ins_encode %{
 6031     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6032   %}
 6033   ins_pipe( pipe_slow );
 6034 %}
 6035 
 6036 // Load Stack Slot
 6037 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6038   match(Set dst src);
 6039   ins_cost(125);
 6040 
 6041   format %{ "MOV    $dst,$src" %}
 6042   opcode(0x8B);
 6043   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6044   ins_pipe( ialu_reg_mem );
 6045 %}
 6046 
 6047 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6048   match(Set dst src);
 6049 
 6050   ins_cost(200);
 6051   format %{ "MOV    $dst,$src.lo\n\t"
 6052             "MOV    $dst+4,$src.hi" %}
 6053   opcode(0x8B, 0x8B);
 6054   ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark );
 6055   ins_pipe( ialu_mem_long_reg );
 6056 %}
 6057 
 6058 // Load Stack Slot
 6059 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6060   match(Set dst src);
 6061   ins_cost(125);
 6062 
 6063   format %{ "MOV    $dst,$src" %}
 6064   opcode(0x8B);
 6065   ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark);
 6066   ins_pipe( ialu_reg_mem );
 6067 %}
 6068 
 6069 // Load Stack Slot
 6070 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6071   match(Set dst src);
 6072   ins_cost(125);
 6073 
 6074   format %{ "FLD_S  $src\n\t"
 6075             "FSTP   $dst" %}
 6076   opcode(0xD9);               /* D9 /0, FLD m32real */
 6077   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6078               Pop_Reg_FPR(dst), ClearInstMark );
 6079   ins_pipe( fpu_reg_mem );
 6080 %}
 6081 
 6082 // Load Stack Slot
 6083 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6084   match(Set dst src);
 6085   ins_cost(125);
 6086 
 6087   format %{ "FLD_D  $src\n\t"
 6088             "FSTP   $dst" %}
 6089   opcode(0xDD);               /* DD /0, FLD m64real */
 6090   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
 6091               Pop_Reg_DPR(dst), ClearInstMark );
 6092   ins_pipe( fpu_reg_mem );
 6093 %}
 6094 
 6095 // Prefetch instructions for allocation.
 6096 // Must be safe to execute with invalid address (cannot fault).
 6097 
 6098 instruct prefetchAlloc0( memory mem ) %{
 6099   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6100   match(PrefetchAllocation mem);
 6101   ins_cost(0);
 6102   size(0);
 6103   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6104   ins_encode();
 6105   ins_pipe(empty);
 6106 %}
 6107 
 6108 instruct prefetchAlloc( memory mem ) %{
 6109   predicate(AllocatePrefetchInstr==3);
 6110   match( PrefetchAllocation mem );
 6111   ins_cost(100);
 6112 
 6113   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6114   ins_encode %{
 6115     __ prefetchw($mem$$Address);
 6116   %}
 6117   ins_pipe(ialu_mem);
 6118 %}
 6119 
 6120 instruct prefetchAllocNTA( memory mem ) %{
 6121   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6122   match(PrefetchAllocation mem);
 6123   ins_cost(100);
 6124 
 6125   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6126   ins_encode %{
 6127     __ prefetchnta($mem$$Address);
 6128   %}
 6129   ins_pipe(ialu_mem);
 6130 %}
 6131 
 6132 instruct prefetchAllocT0( memory mem ) %{
 6133   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6134   match(PrefetchAllocation mem);
 6135   ins_cost(100);
 6136 
 6137   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6138   ins_encode %{
 6139     __ prefetcht0($mem$$Address);
 6140   %}
 6141   ins_pipe(ialu_mem);
 6142 %}
 6143 
 6144 instruct prefetchAllocT2( memory mem ) %{
 6145   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6146   match(PrefetchAllocation mem);
 6147   ins_cost(100);
 6148 
 6149   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6150   ins_encode %{
 6151     __ prefetcht2($mem$$Address);
 6152   %}
 6153   ins_pipe(ialu_mem);
 6154 %}
 6155 
 6156 //----------Store Instructions-------------------------------------------------
 6157 
 6158 // Store Byte
 6159 instruct storeB(memory mem, xRegI src) %{
 6160   match(Set mem (StoreB mem src));
 6161 
 6162   ins_cost(125);
 6163   format %{ "MOV8   $mem,$src" %}
 6164   opcode(0x88);
 6165   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6166   ins_pipe( ialu_mem_reg );
 6167 %}
 6168 
 6169 // Store Char/Short
 6170 instruct storeC(memory mem, rRegI src) %{
 6171   match(Set mem (StoreC mem src));
 6172 
 6173   ins_cost(125);
 6174   format %{ "MOV16  $mem,$src" %}
 6175   opcode(0x89, 0x66);
 6176   ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark );
 6177   ins_pipe( ialu_mem_reg );
 6178 %}
 6179 
 6180 // Store Integer
 6181 instruct storeI(memory mem, rRegI src) %{
 6182   match(Set mem (StoreI mem src));
 6183 
 6184   ins_cost(125);
 6185   format %{ "MOV    $mem,$src" %}
 6186   opcode(0x89);
 6187   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6188   ins_pipe( ialu_mem_reg );
 6189 %}
 6190 
 6191 // Store Long
 6192 instruct storeL(long_memory mem, eRegL src) %{
 6193   predicate(!((StoreLNode*)n)->require_atomic_access());
 6194   match(Set mem (StoreL mem src));
 6195 
 6196   ins_cost(200);
 6197   format %{ "MOV    $mem,$src.lo\n\t"
 6198             "MOV    $mem+4,$src.hi" %}
 6199   opcode(0x89, 0x89);
 6200   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark );
 6201   ins_pipe( ialu_mem_long_reg );
 6202 %}
 6203 
 6204 // Store Long to Integer
 6205 instruct storeL2I(memory mem, eRegL src) %{
 6206   match(Set mem (StoreI mem (ConvL2I src)));
 6207 
 6208   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6209   ins_encode %{
 6210     __ movl($mem$$Address, $src$$Register);
 6211   %}
 6212   ins_pipe(ialu_mem_reg);
 6213 %}
 6214 
 6215 // Volatile Store Long.  Must be atomic, so move it into
 6216 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6217 // target address before the store (for null-ptr checks)
 6218 // so the memory operand is used twice in the encoding.
 6219 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6220   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6221   match(Set mem (StoreL mem src));
 6222   effect( KILL cr );
 6223   ins_cost(400);
 6224   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6225             "FILD   $src\n\t"
 6226             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6227   opcode(0x3B);
 6228   ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark);
 6229   ins_pipe( fpu_reg_mem );
 6230 %}
 6231 
 6232 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6233   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6234   match(Set mem (StoreL mem src));
 6235   effect( TEMP tmp, KILL cr );
 6236   ins_cost(380);
 6237   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6238             "MOVSD  $tmp,$src\n\t"
 6239             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6240   ins_encode %{
 6241     __ cmpl(rax, $mem$$Address);
 6242     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6243     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6244   %}
 6245   ins_pipe( pipe_slow );
 6246 %}
 6247 
 6248 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6249   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6250   match(Set mem (StoreL mem src));
 6251   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6252   ins_cost(360);
 6253   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6254             "MOVD   $tmp,$src.lo\n\t"
 6255             "MOVD   $tmp2,$src.hi\n\t"
 6256             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6257             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6258   ins_encode %{
 6259     __ cmpl(rax, $mem$$Address);
 6260     __ movdl($tmp$$XMMRegister, $src$$Register);
 6261     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6262     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6263     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6264   %}
 6265   ins_pipe( pipe_slow );
 6266 %}
 6267 
 6268 // Store Pointer; for storing unknown oops and raw pointers
 6269 instruct storeP(memory mem, anyRegP src) %{
 6270   match(Set mem (StoreP mem src));
 6271 
 6272   ins_cost(125);
 6273   format %{ "MOV    $mem,$src" %}
 6274   opcode(0x89);
 6275   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
 6276   ins_pipe( ialu_mem_reg );
 6277 %}
 6278 
 6279 // Store Integer Immediate
 6280 instruct storeImmI(memory mem, immI src) %{
 6281   match(Set mem (StoreI mem src));
 6282 
 6283   ins_cost(150);
 6284   format %{ "MOV    $mem,$src" %}
 6285   opcode(0xC7);               /* C7 /0 */
 6286   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark);
 6287   ins_pipe( ialu_mem_imm );
 6288 %}
 6289 
 6290 // Store Short/Char Immediate
 6291 instruct storeImmI16(memory mem, immI16 src) %{
 6292   predicate(UseStoreImmI16);
 6293   match(Set mem (StoreC mem src));
 6294 
 6295   ins_cost(150);
 6296   format %{ "MOV16  $mem,$src" %}
 6297   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6298   ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark);
 6299   ins_pipe( ialu_mem_imm );
 6300 %}
 6301 
 6302 // Store Pointer Immediate; null pointers or constant oops that do not
 6303 // need card-mark barriers.
 6304 instruct storeImmP(memory mem, immP src) %{
 6305   match(Set mem (StoreP mem src));
 6306 
 6307   ins_cost(150);
 6308   format %{ "MOV    $mem,$src" %}
 6309   opcode(0xC7);               /* C7 /0 */
 6310   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark);
 6311   ins_pipe( ialu_mem_imm );
 6312 %}
 6313 
 6314 // Store Byte Immediate
 6315 instruct storeImmB(memory mem, immI8 src) %{
 6316   match(Set mem (StoreB mem src));
 6317 
 6318   ins_cost(150);
 6319   format %{ "MOV8   $mem,$src" %}
 6320   opcode(0xC6);               /* C6 /0 */
 6321   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark);
 6322   ins_pipe( ialu_mem_imm );
 6323 %}
 6324 
 6325 // Store Double
 6326 instruct storeDPR( memory mem, regDPR1 src) %{
 6327   predicate(UseSSE<=1);
 6328   match(Set mem (StoreD mem src));
 6329 
 6330   ins_cost(100);
 6331   format %{ "FST_D  $mem,$src" %}
 6332   opcode(0xDD);       /* DD /2 */
 6333   ins_encode( enc_FPR_store(mem,src) );
 6334   ins_pipe( fpu_mem_reg );
 6335 %}
 6336 
 6337 // Store double does rounding on x86
 6338 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6339   predicate(UseSSE<=1);
 6340   match(Set mem (StoreD mem (RoundDouble src)));
 6341 
 6342   ins_cost(100);
 6343   format %{ "FST_D  $mem,$src\t# round" %}
 6344   opcode(0xDD);       /* DD /2 */
 6345   ins_encode( enc_FPR_store(mem,src) );
 6346   ins_pipe( fpu_mem_reg );
 6347 %}
 6348 
 6349 // Store XMM register to memory (double-precision floating points)
 6350 // MOVSD instruction
 6351 instruct storeD(memory mem, regD src) %{
 6352   predicate(UseSSE>=2);
 6353   match(Set mem (StoreD mem src));
 6354   ins_cost(95);
 6355   format %{ "MOVSD  $mem,$src" %}
 6356   ins_encode %{
 6357     __ movdbl($mem$$Address, $src$$XMMRegister);
 6358   %}
 6359   ins_pipe( pipe_slow );
 6360 %}
 6361 
 6362 // Store XMM register to memory (single-precision floating point)
 6363 // MOVSS instruction
 6364 instruct storeF(memory mem, regF src) %{
 6365   predicate(UseSSE>=1);
 6366   match(Set mem (StoreF mem src));
 6367   ins_cost(95);
 6368   format %{ "MOVSS  $mem,$src" %}
 6369   ins_encode %{
 6370     __ movflt($mem$$Address, $src$$XMMRegister);
 6371   %}
 6372   ins_pipe( pipe_slow );
 6373 %}
 6374 
 6375 
 6376 // Store Float
 6377 instruct storeFPR( memory mem, regFPR1 src) %{
 6378   predicate(UseSSE==0);
 6379   match(Set mem (StoreF mem src));
 6380 
 6381   ins_cost(100);
 6382   format %{ "FST_S  $mem,$src" %}
 6383   opcode(0xD9);       /* D9 /2 */
 6384   ins_encode( enc_FPR_store(mem,src) );
 6385   ins_pipe( fpu_mem_reg );
 6386 %}
 6387 
 6388 // Store Float does rounding on x86
 6389 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6390   predicate(UseSSE==0);
 6391   match(Set mem (StoreF mem (RoundFloat src)));
 6392 
 6393   ins_cost(100);
 6394   format %{ "FST_S  $mem,$src\t# round" %}
 6395   opcode(0xD9);       /* D9 /2 */
 6396   ins_encode( enc_FPR_store(mem,src) );
 6397   ins_pipe( fpu_mem_reg );
 6398 %}
 6399 
 6400 // Store Float does rounding on x86
 6401 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6402   predicate(UseSSE<=1);
 6403   match(Set mem (StoreF mem (ConvD2F src)));
 6404 
 6405   ins_cost(100);
 6406   format %{ "FST_S  $mem,$src\t# D-round" %}
 6407   opcode(0xD9);       /* D9 /2 */
 6408   ins_encode( enc_FPR_store(mem,src) );
 6409   ins_pipe( fpu_mem_reg );
 6410 %}
 6411 
 6412 // Store immediate Float value (it is faster than store from FPU register)
 6413 // The instruction usage is guarded by predicate in operand immFPR().
 6414 instruct storeFPR_imm( memory mem, immFPR src) %{
 6415   match(Set mem (StoreF mem src));
 6416 
 6417   ins_cost(50);
 6418   format %{ "MOV    $mem,$src\t# store float" %}
 6419   opcode(0xC7);               /* C7 /0 */
 6420   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits(src), ClearInstMark);
 6421   ins_pipe( ialu_mem_imm );
 6422 %}
 6423 
 6424 // Store immediate Float value (it is faster than store from XMM register)
 6425 // The instruction usage is guarded by predicate in operand immF().
 6426 instruct storeF_imm( memory mem, immF src) %{
 6427   match(Set mem (StoreF mem src));
 6428 
 6429   ins_cost(50);
 6430   format %{ "MOV    $mem,$src\t# store float" %}
 6431   opcode(0xC7);               /* C7 /0 */
 6432   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits(src), ClearInstMark);
 6433   ins_pipe( ialu_mem_imm );
 6434 %}
 6435 
 6436 // Store Integer to stack slot
 6437 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6438   match(Set dst src);
 6439 
 6440   ins_cost(100);
 6441   format %{ "MOV    $dst,$src" %}
 6442   opcode(0x89);
 6443   ins_encode( OpcPRegSS( dst, src ) );
 6444   ins_pipe( ialu_mem_reg );
 6445 %}
 6446 
 6447 // Store Integer to stack slot
 6448 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6449   match(Set dst src);
 6450 
 6451   ins_cost(100);
 6452   format %{ "MOV    $dst,$src" %}
 6453   opcode(0x89);
 6454   ins_encode( OpcPRegSS( dst, src ) );
 6455   ins_pipe( ialu_mem_reg );
 6456 %}
 6457 
 6458 // Store Long to stack slot
 6459 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6460   match(Set dst src);
 6461 
 6462   ins_cost(200);
 6463   format %{ "MOV    $dst,$src.lo\n\t"
 6464             "MOV    $dst+4,$src.hi" %}
 6465   opcode(0x89, 0x89);
 6466   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
 6467   ins_pipe( ialu_mem_long_reg );
 6468 %}
 6469 
 6470 //----------MemBar Instructions-----------------------------------------------
 6471 // Memory barrier flavors
 6472 
 6473 instruct membar_acquire() %{
 6474   match(MemBarAcquire);
 6475   match(LoadFence);
 6476   ins_cost(400);
 6477 
 6478   size(0);
 6479   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6480   ins_encode();
 6481   ins_pipe(empty);
 6482 %}
 6483 
 6484 instruct membar_acquire_lock() %{
 6485   match(MemBarAcquireLock);
 6486   ins_cost(0);
 6487 
 6488   size(0);
 6489   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6490   ins_encode( );
 6491   ins_pipe(empty);
 6492 %}
 6493 
 6494 instruct membar_release() %{
 6495   match(MemBarRelease);
 6496   match(StoreFence);
 6497   ins_cost(400);
 6498 
 6499   size(0);
 6500   format %{ "MEMBAR-release ! (empty encoding)" %}
 6501   ins_encode( );
 6502   ins_pipe(empty);
 6503 %}
 6504 
 6505 instruct membar_release_lock() %{
 6506   match(MemBarReleaseLock);
 6507   ins_cost(0);
 6508 
 6509   size(0);
 6510   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6511   ins_encode( );
 6512   ins_pipe(empty);
 6513 %}
 6514 
 6515 instruct membar_volatile(eFlagsReg cr) %{
 6516   match(MemBarVolatile);
 6517   effect(KILL cr);
 6518   ins_cost(400);
 6519 
 6520   format %{
 6521     $$template
 6522     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6523   %}
 6524   ins_encode %{
 6525     __ membar(Assembler::StoreLoad);
 6526   %}
 6527   ins_pipe(pipe_slow);
 6528 %}
 6529 
 6530 instruct unnecessary_membar_volatile() %{
 6531   match(MemBarVolatile);
 6532   predicate(Matcher::post_store_load_barrier(n));
 6533   ins_cost(0);
 6534 
 6535   size(0);
 6536   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6537   ins_encode( );
 6538   ins_pipe(empty);
 6539 %}
 6540 
 6541 instruct membar_storestore() %{
 6542   match(MemBarStoreStore);
 6543   match(StoreStoreFence);
 6544   ins_cost(0);
 6545 
 6546   size(0);
 6547   format %{ "MEMBAR-storestore (empty encoding)" %}
 6548   ins_encode( );
 6549   ins_pipe(empty);
 6550 %}
 6551 
 6552 //----------Move Instructions--------------------------------------------------
 6553 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6554   match(Set dst (CastX2P src));
 6555   format %{ "# X2P  $dst, $src" %}
 6556   ins_encode( /*empty encoding*/ );
 6557   ins_cost(0);
 6558   ins_pipe(empty);
 6559 %}
 6560 
 6561 instruct castP2X(rRegI dst, eRegP src ) %{
 6562   match(Set dst (CastP2X src));
 6563   ins_cost(50);
 6564   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6565   ins_encode( enc_Copy( dst, src) );
 6566   ins_pipe( ialu_reg_reg );
 6567 %}
 6568 
 6569 //----------Conditional Move---------------------------------------------------
 6570 // Conditional move
 6571 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6572   predicate(!VM_Version::supports_cmov() );
 6573   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6574   ins_cost(200);
 6575   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6576             "MOV    $dst,$src\n"
 6577       "skip:" %}
 6578   ins_encode %{
 6579     Label Lskip;
 6580     // Invert sense of branch from sense of CMOV
 6581     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6582     __ movl($dst$$Register, $src$$Register);
 6583     __ bind(Lskip);
 6584   %}
 6585   ins_pipe( pipe_cmov_reg );
 6586 %}
 6587 
 6588 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6589   predicate(!VM_Version::supports_cmov() );
 6590   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6591   ins_cost(200);
 6592   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6593             "MOV    $dst,$src\n"
 6594       "skip:" %}
 6595   ins_encode %{
 6596     Label Lskip;
 6597     // Invert sense of branch from sense of CMOV
 6598     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6599     __ movl($dst$$Register, $src$$Register);
 6600     __ bind(Lskip);
 6601   %}
 6602   ins_pipe( pipe_cmov_reg );
 6603 %}
 6604 
 6605 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6606   predicate(VM_Version::supports_cmov() );
 6607   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6608   ins_cost(200);
 6609   format %{ "CMOV$cop $dst,$src" %}
 6610   opcode(0x0F,0x40);
 6611   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6612   ins_pipe( pipe_cmov_reg );
 6613 %}
 6614 
 6615 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6616   predicate(VM_Version::supports_cmov() );
 6617   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6618   ins_cost(200);
 6619   format %{ "CMOV$cop $dst,$src" %}
 6620   opcode(0x0F,0x40);
 6621   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6622   ins_pipe( pipe_cmov_reg );
 6623 %}
 6624 
 6625 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6626   predicate(VM_Version::supports_cmov() );
 6627   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6628   ins_cost(200);
 6629   expand %{
 6630     cmovI_regU(cop, cr, dst, src);
 6631   %}
 6632 %}
 6633 
 6634 // Conditional move
 6635 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6636   predicate(VM_Version::supports_cmov() );
 6637   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6638   ins_cost(250);
 6639   format %{ "CMOV$cop $dst,$src" %}
 6640   opcode(0x0F,0x40);
 6641   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6642   ins_pipe( pipe_cmov_mem );
 6643 %}
 6644 
 6645 // Conditional move
 6646 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6647   predicate(VM_Version::supports_cmov() );
 6648   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6649   ins_cost(250);
 6650   format %{ "CMOV$cop $dst,$src" %}
 6651   opcode(0x0F,0x40);
 6652   ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark );
 6653   ins_pipe( pipe_cmov_mem );
 6654 %}
 6655 
 6656 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6657   predicate(VM_Version::supports_cmov() );
 6658   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6659   ins_cost(250);
 6660   expand %{
 6661     cmovI_memU(cop, cr, dst, src);
 6662   %}
 6663 %}
 6664 
 6665 // Conditional move
 6666 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6667   predicate(VM_Version::supports_cmov() );
 6668   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6669   ins_cost(200);
 6670   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6671   opcode(0x0F,0x40);
 6672   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6673   ins_pipe( pipe_cmov_reg );
 6674 %}
 6675 
 6676 // Conditional move (non-P6 version)
 6677 // Note:  a CMoveP is generated for  stubs and native wrappers
 6678 //        regardless of whether we are on a P6, so we
 6679 //        emulate a cmov here
 6680 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6681   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6682   ins_cost(300);
 6683   format %{ "Jn$cop   skip\n\t"
 6684           "MOV    $dst,$src\t# pointer\n"
 6685       "skip:" %}
 6686   opcode(0x8b);
 6687   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6688   ins_pipe( pipe_cmov_reg );
 6689 %}
 6690 
 6691 // Conditional move
 6692 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6693   predicate(VM_Version::supports_cmov() );
 6694   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6695   ins_cost(200);
 6696   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6697   opcode(0x0F,0x40);
 6698   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6699   ins_pipe( pipe_cmov_reg );
 6700 %}
 6701 
 6702 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6703   predicate(VM_Version::supports_cmov() );
 6704   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6705   ins_cost(200);
 6706   expand %{
 6707     cmovP_regU(cop, cr, dst, src);
 6708   %}
 6709 %}
 6710 
 6711 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6712 // correctly meets the two pointer arguments; one is an incoming
 6713 // register but the other is a memory operand.  ALSO appears to
 6714 // be buggy with implicit null checks.
 6715 //
 6716 //// Conditional move
 6717 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6718 //  predicate(VM_Version::supports_cmov() );
 6719 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6720 //  ins_cost(250);
 6721 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6722 //  opcode(0x0F,0x40);
 6723 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6724 //  ins_pipe( pipe_cmov_mem );
 6725 //%}
 6726 //
 6727 //// Conditional move
 6728 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6729 //  predicate(VM_Version::supports_cmov() );
 6730 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6731 //  ins_cost(250);
 6732 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6733 //  opcode(0x0F,0x40);
 6734 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6735 //  ins_pipe( pipe_cmov_mem );
 6736 //%}
 6737 
 6738 // Conditional move
 6739 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6740   predicate(UseSSE<=1);
 6741   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6742   ins_cost(200);
 6743   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6744   opcode(0xDA);
 6745   ins_encode( enc_cmov_dpr(cop,src) );
 6746   ins_pipe( pipe_cmovDPR_reg );
 6747 %}
 6748 
 6749 // Conditional move
 6750 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6751   predicate(UseSSE==0);
 6752   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6753   ins_cost(200);
 6754   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6755   opcode(0xDA);
 6756   ins_encode( enc_cmov_dpr(cop,src) );
 6757   ins_pipe( pipe_cmovDPR_reg );
 6758 %}
 6759 
 6760 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6761 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6762   predicate(UseSSE<=1);
 6763   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6764   ins_cost(200);
 6765   format %{ "Jn$cop   skip\n\t"
 6766             "MOV    $dst,$src\t# double\n"
 6767       "skip:" %}
 6768   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6769   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6770   ins_pipe( pipe_cmovDPR_reg );
 6771 %}
 6772 
 6773 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6774 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6775   predicate(UseSSE==0);
 6776   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6777   ins_cost(200);
 6778   format %{ "Jn$cop    skip\n\t"
 6779             "MOV    $dst,$src\t# float\n"
 6780       "skip:" %}
 6781   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6782   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6783   ins_pipe( pipe_cmovDPR_reg );
 6784 %}
 6785 
 6786 // No CMOVE with SSE/SSE2
 6787 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6788   predicate (UseSSE>=1);
 6789   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6790   ins_cost(200);
 6791   format %{ "Jn$cop   skip\n\t"
 6792             "MOVSS  $dst,$src\t# float\n"
 6793       "skip:" %}
 6794   ins_encode %{
 6795     Label skip;
 6796     // Invert sense of branch from sense of CMOV
 6797     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6798     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6799     __ bind(skip);
 6800   %}
 6801   ins_pipe( pipe_slow );
 6802 %}
 6803 
 6804 // No CMOVE with SSE/SSE2
 6805 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6806   predicate (UseSSE>=2);
 6807   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6808   ins_cost(200);
 6809   format %{ "Jn$cop   skip\n\t"
 6810             "MOVSD  $dst,$src\t# float\n"
 6811       "skip:" %}
 6812   ins_encode %{
 6813     Label skip;
 6814     // Invert sense of branch from sense of CMOV
 6815     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6816     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6817     __ bind(skip);
 6818   %}
 6819   ins_pipe( pipe_slow );
 6820 %}
 6821 
 6822 // unsigned version
 6823 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6824   predicate (UseSSE>=1);
 6825   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6826   ins_cost(200);
 6827   format %{ "Jn$cop   skip\n\t"
 6828             "MOVSS  $dst,$src\t# float\n"
 6829       "skip:" %}
 6830   ins_encode %{
 6831     Label skip;
 6832     // Invert sense of branch from sense of CMOV
 6833     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6834     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6835     __ bind(skip);
 6836   %}
 6837   ins_pipe( pipe_slow );
 6838 %}
 6839 
 6840 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6841   predicate (UseSSE>=1);
 6842   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6843   ins_cost(200);
 6844   expand %{
 6845     fcmovF_regU(cop, cr, dst, src);
 6846   %}
 6847 %}
 6848 
 6849 // unsigned version
 6850 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6851   predicate (UseSSE>=2);
 6852   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6853   ins_cost(200);
 6854   format %{ "Jn$cop   skip\n\t"
 6855             "MOVSD  $dst,$src\t# float\n"
 6856       "skip:" %}
 6857   ins_encode %{
 6858     Label skip;
 6859     // Invert sense of branch from sense of CMOV
 6860     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6861     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6862     __ bind(skip);
 6863   %}
 6864   ins_pipe( pipe_slow );
 6865 %}
 6866 
 6867 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6868   predicate (UseSSE>=2);
 6869   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6870   ins_cost(200);
 6871   expand %{
 6872     fcmovD_regU(cop, cr, dst, src);
 6873   %}
 6874 %}
 6875 
 6876 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6877   predicate(VM_Version::supports_cmov() );
 6878   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6879   ins_cost(200);
 6880   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6881             "CMOV$cop $dst.hi,$src.hi" %}
 6882   opcode(0x0F,0x40);
 6883   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6884   ins_pipe( pipe_cmov_reg_long );
 6885 %}
 6886 
 6887 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6888   predicate(VM_Version::supports_cmov() );
 6889   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6890   ins_cost(200);
 6891   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6892             "CMOV$cop $dst.hi,$src.hi" %}
 6893   opcode(0x0F,0x40);
 6894   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6895   ins_pipe( pipe_cmov_reg_long );
 6896 %}
 6897 
 6898 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 6899   predicate(VM_Version::supports_cmov() );
 6900   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6901   ins_cost(200);
 6902   expand %{
 6903     cmovL_regU(cop, cr, dst, src);
 6904   %}
 6905 %}
 6906 
 6907 //----------Arithmetic Instructions--------------------------------------------
 6908 //----------Addition Instructions----------------------------------------------
 6909 
 6910 // Integer Addition Instructions
 6911 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 6912   match(Set dst (AddI dst src));
 6913   effect(KILL cr);
 6914 
 6915   size(2);
 6916   format %{ "ADD    $dst,$src" %}
 6917   opcode(0x03);
 6918   ins_encode( OpcP, RegReg( dst, src) );
 6919   ins_pipe( ialu_reg_reg );
 6920 %}
 6921 
 6922 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 6923   match(Set dst (AddI dst src));
 6924   effect(KILL cr);
 6925 
 6926   format %{ "ADD    $dst,$src" %}
 6927   opcode(0x81, 0x00); /* /0 id */
 6928   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6929   ins_pipe( ialu_reg );
 6930 %}
 6931 
 6932 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 6933   predicate(UseIncDec);
 6934   match(Set dst (AddI dst src));
 6935   effect(KILL cr);
 6936 
 6937   size(1);
 6938   format %{ "INC    $dst" %}
 6939   opcode(0x40); /*  */
 6940   ins_encode( Opc_plus( primary, dst ) );
 6941   ins_pipe( ialu_reg );
 6942 %}
 6943 
 6944 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 6945   match(Set dst (AddI src0 src1));
 6946   ins_cost(110);
 6947 
 6948   format %{ "LEA    $dst,[$src0 + $src1]" %}
 6949   opcode(0x8D); /* 0x8D /r */
 6950   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6951   ins_pipe( ialu_reg_reg );
 6952 %}
 6953 
 6954 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 6955   match(Set dst (AddP src0 src1));
 6956   ins_cost(110);
 6957 
 6958   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 6959   opcode(0x8D); /* 0x8D /r */
 6960   ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark );
 6961   ins_pipe( ialu_reg_reg );
 6962 %}
 6963 
 6964 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 6965   predicate(UseIncDec);
 6966   match(Set dst (AddI dst src));
 6967   effect(KILL cr);
 6968 
 6969   size(1);
 6970   format %{ "DEC    $dst" %}
 6971   opcode(0x48); /*  */
 6972   ins_encode( Opc_plus( primary, dst ) );
 6973   ins_pipe( ialu_reg );
 6974 %}
 6975 
 6976 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 6977   match(Set dst (AddP dst src));
 6978   effect(KILL cr);
 6979 
 6980   size(2);
 6981   format %{ "ADD    $dst,$src" %}
 6982   opcode(0x03);
 6983   ins_encode( OpcP, RegReg( dst, src) );
 6984   ins_pipe( ialu_reg_reg );
 6985 %}
 6986 
 6987 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 6988   match(Set dst (AddP dst src));
 6989   effect(KILL cr);
 6990 
 6991   format %{ "ADD    $dst,$src" %}
 6992   opcode(0x81,0x00); /* Opcode 81 /0 id */
 6993   // ins_encode( RegImm( dst, src) );
 6994   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 6995   ins_pipe( ialu_reg );
 6996 %}
 6997 
 6998 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 6999   match(Set dst (AddI dst (LoadI src)));
 7000   effect(KILL cr);
 7001 
 7002   ins_cost(150);
 7003   format %{ "ADD    $dst,$src" %}
 7004   opcode(0x03);
 7005   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7006   ins_pipe( ialu_reg_mem );
 7007 %}
 7008 
 7009 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7010   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7011   effect(KILL cr);
 7012 
 7013   ins_cost(150);
 7014   format %{ "ADD    $dst,$src" %}
 7015   opcode(0x01);  /* Opcode 01 /r */
 7016   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7017   ins_pipe( ialu_mem_reg );
 7018 %}
 7019 
 7020 // Add Memory with Immediate
 7021 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7022   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7023   effect(KILL cr);
 7024 
 7025   ins_cost(125);
 7026   format %{ "ADD    $dst,$src" %}
 7027   opcode(0x81);               /* Opcode 81 /0 id */
 7028   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark );
 7029   ins_pipe( ialu_mem_imm );
 7030 %}
 7031 
 7032 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7033   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7034   effect(KILL cr);
 7035 
 7036   ins_cost(125);
 7037   format %{ "INC    $dst" %}
 7038   opcode(0xFF);               /* Opcode FF /0 */
 7039   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark);
 7040   ins_pipe( ialu_mem_imm );
 7041 %}
 7042 
 7043 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7044   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7045   effect(KILL cr);
 7046 
 7047   ins_cost(125);
 7048   format %{ "DEC    $dst" %}
 7049   opcode(0xFF);               /* Opcode FF /1 */
 7050   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark);
 7051   ins_pipe( ialu_mem_imm );
 7052 %}
 7053 
 7054 
 7055 instruct checkCastPP( eRegP dst ) %{
 7056   match(Set dst (CheckCastPP dst));
 7057 
 7058   size(0);
 7059   format %{ "#checkcastPP of $dst" %}
 7060   ins_encode( /*empty encoding*/ );
 7061   ins_pipe( empty );
 7062 %}
 7063 
 7064 instruct castPP( eRegP dst ) %{
 7065   match(Set dst (CastPP dst));
 7066   format %{ "#castPP of $dst" %}
 7067   ins_encode( /*empty encoding*/ );
 7068   ins_pipe( empty );
 7069 %}
 7070 
 7071 instruct castII( rRegI dst ) %{
 7072   match(Set dst (CastII dst));
 7073   format %{ "#castII of $dst" %}
 7074   ins_encode( /*empty encoding*/ );
 7075   ins_cost(0);
 7076   ins_pipe( empty );
 7077 %}
 7078 
 7079 instruct castLL( eRegL dst ) %{
 7080   match(Set dst (CastLL dst));
 7081   format %{ "#castLL of $dst" %}
 7082   ins_encode( /*empty encoding*/ );
 7083   ins_cost(0);
 7084   ins_pipe( empty );
 7085 %}
 7086 
 7087 instruct castFF( regF dst ) %{
 7088   predicate(UseSSE >= 1);
 7089   match(Set dst (CastFF dst));
 7090   format %{ "#castFF of $dst" %}
 7091   ins_encode( /*empty encoding*/ );
 7092   ins_cost(0);
 7093   ins_pipe( empty );
 7094 %}
 7095 
 7096 instruct castDD( regD dst ) %{
 7097   predicate(UseSSE >= 2);
 7098   match(Set dst (CastDD dst));
 7099   format %{ "#castDD of $dst" %}
 7100   ins_encode( /*empty encoding*/ );
 7101   ins_cost(0);
 7102   ins_pipe( empty );
 7103 %}
 7104 
 7105 instruct castFF_PR( regFPR dst ) %{
 7106   predicate(UseSSE < 1);
 7107   match(Set dst (CastFF dst));
 7108   format %{ "#castFF of $dst" %}
 7109   ins_encode( /*empty encoding*/ );
 7110   ins_cost(0);
 7111   ins_pipe( empty );
 7112 %}
 7113 
 7114 instruct castDD_PR( regDPR dst ) %{
 7115   predicate(UseSSE < 2);
 7116   match(Set dst (CastDD dst));
 7117   format %{ "#castDD of $dst" %}
 7118   ins_encode( /*empty encoding*/ );
 7119   ins_cost(0);
 7120   ins_pipe( empty );
 7121 %}
 7122 
 7123 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7124 
 7125 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7126   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7127   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7128   effect(KILL cr, KILL oldval);
 7129   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7130             "MOV    $res,0\n\t"
 7131             "JNE,s  fail\n\t"
 7132             "MOV    $res,1\n"
 7133           "fail:" %}
 7134   ins_encode( enc_cmpxchg8(mem_ptr),
 7135               enc_flags_ne_to_boolean(res) );
 7136   ins_pipe( pipe_cmpxchg );
 7137 %}
 7138 
 7139 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7140   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7141   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7142   effect(KILL cr, KILL oldval);
 7143   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7144             "MOV    $res,0\n\t"
 7145             "JNE,s  fail\n\t"
 7146             "MOV    $res,1\n"
 7147           "fail:" %}
 7148   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7149   ins_pipe( pipe_cmpxchg );
 7150 %}
 7151 
 7152 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7153   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7154   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7155   effect(KILL cr, KILL oldval);
 7156   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7157             "MOV    $res,0\n\t"
 7158             "JNE,s  fail\n\t"
 7159             "MOV    $res,1\n"
 7160           "fail:" %}
 7161   ins_encode( enc_cmpxchgb(mem_ptr),
 7162               enc_flags_ne_to_boolean(res) );
 7163   ins_pipe( pipe_cmpxchg );
 7164 %}
 7165 
 7166 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7167   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7168   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7169   effect(KILL cr, KILL oldval);
 7170   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7171             "MOV    $res,0\n\t"
 7172             "JNE,s  fail\n\t"
 7173             "MOV    $res,1\n"
 7174           "fail:" %}
 7175   ins_encode( enc_cmpxchgw(mem_ptr),
 7176               enc_flags_ne_to_boolean(res) );
 7177   ins_pipe( pipe_cmpxchg );
 7178 %}
 7179 
 7180 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7181   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7182   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7183   effect(KILL cr, KILL oldval);
 7184   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7185             "MOV    $res,0\n\t"
 7186             "JNE,s  fail\n\t"
 7187             "MOV    $res,1\n"
 7188           "fail:" %}
 7189   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7190   ins_pipe( pipe_cmpxchg );
 7191 %}
 7192 
 7193 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7194   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7195   effect(KILL cr);
 7196   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7197   ins_encode( enc_cmpxchg8(mem_ptr) );
 7198   ins_pipe( pipe_cmpxchg );
 7199 %}
 7200 
 7201 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7202   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7203   effect(KILL cr);
 7204   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7205   ins_encode( enc_cmpxchg(mem_ptr) );
 7206   ins_pipe( pipe_cmpxchg );
 7207 %}
 7208 
 7209 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7210   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7211   effect(KILL cr);
 7212   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7213   ins_encode( enc_cmpxchgb(mem_ptr) );
 7214   ins_pipe( pipe_cmpxchg );
 7215 %}
 7216 
 7217 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7218   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7219   effect(KILL cr);
 7220   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7221   ins_encode( enc_cmpxchgw(mem_ptr) );
 7222   ins_pipe( pipe_cmpxchg );
 7223 %}
 7224 
 7225 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7226   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7227   effect(KILL cr);
 7228   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7229   ins_encode( enc_cmpxchg(mem_ptr) );
 7230   ins_pipe( pipe_cmpxchg );
 7231 %}
 7232 
 7233 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7234   predicate(n->as_LoadStore()->result_not_used());
 7235   match(Set dummy (GetAndAddB mem add));
 7236   effect(KILL cr);
 7237   format %{ "ADDB  [$mem],$add" %}
 7238   ins_encode %{
 7239     __ lock();
 7240     __ addb($mem$$Address, $add$$constant);
 7241   %}
 7242   ins_pipe( pipe_cmpxchg );
 7243 %}
 7244 
 7245 // Important to match to xRegI: only 8-bit regs.
 7246 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7247   match(Set newval (GetAndAddB mem newval));
 7248   effect(KILL cr);
 7249   format %{ "XADDB  [$mem],$newval" %}
 7250   ins_encode %{
 7251     __ lock();
 7252     __ xaddb($mem$$Address, $newval$$Register);
 7253   %}
 7254   ins_pipe( pipe_cmpxchg );
 7255 %}
 7256 
 7257 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7258   predicate(n->as_LoadStore()->result_not_used());
 7259   match(Set dummy (GetAndAddS mem add));
 7260   effect(KILL cr);
 7261   format %{ "ADDS  [$mem],$add" %}
 7262   ins_encode %{
 7263     __ lock();
 7264     __ addw($mem$$Address, $add$$constant);
 7265   %}
 7266   ins_pipe( pipe_cmpxchg );
 7267 %}
 7268 
 7269 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7270   match(Set newval (GetAndAddS mem newval));
 7271   effect(KILL cr);
 7272   format %{ "XADDS  [$mem],$newval" %}
 7273   ins_encode %{
 7274     __ lock();
 7275     __ xaddw($mem$$Address, $newval$$Register);
 7276   %}
 7277   ins_pipe( pipe_cmpxchg );
 7278 %}
 7279 
 7280 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7281   predicate(n->as_LoadStore()->result_not_used());
 7282   match(Set dummy (GetAndAddI mem add));
 7283   effect(KILL cr);
 7284   format %{ "ADDL  [$mem],$add" %}
 7285   ins_encode %{
 7286     __ lock();
 7287     __ addl($mem$$Address, $add$$constant);
 7288   %}
 7289   ins_pipe( pipe_cmpxchg );
 7290 %}
 7291 
 7292 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7293   match(Set newval (GetAndAddI mem newval));
 7294   effect(KILL cr);
 7295   format %{ "XADDL  [$mem],$newval" %}
 7296   ins_encode %{
 7297     __ lock();
 7298     __ xaddl($mem$$Address, $newval$$Register);
 7299   %}
 7300   ins_pipe( pipe_cmpxchg );
 7301 %}
 7302 
 7303 // Important to match to xRegI: only 8-bit regs.
 7304 instruct xchgB( memory mem, xRegI newval) %{
 7305   match(Set newval (GetAndSetB mem newval));
 7306   format %{ "XCHGB  $newval,[$mem]" %}
 7307   ins_encode %{
 7308     __ xchgb($newval$$Register, $mem$$Address);
 7309   %}
 7310   ins_pipe( pipe_cmpxchg );
 7311 %}
 7312 
 7313 instruct xchgS( memory mem, rRegI newval) %{
 7314   match(Set newval (GetAndSetS mem newval));
 7315   format %{ "XCHGW  $newval,[$mem]" %}
 7316   ins_encode %{
 7317     __ xchgw($newval$$Register, $mem$$Address);
 7318   %}
 7319   ins_pipe( pipe_cmpxchg );
 7320 %}
 7321 
 7322 instruct xchgI( memory mem, rRegI newval) %{
 7323   match(Set newval (GetAndSetI mem newval));
 7324   format %{ "XCHGL  $newval,[$mem]" %}
 7325   ins_encode %{
 7326     __ xchgl($newval$$Register, $mem$$Address);
 7327   %}
 7328   ins_pipe( pipe_cmpxchg );
 7329 %}
 7330 
 7331 instruct xchgP( memory mem, pRegP newval) %{
 7332   match(Set newval (GetAndSetP mem newval));
 7333   format %{ "XCHGL  $newval,[$mem]" %}
 7334   ins_encode %{
 7335     __ xchgl($newval$$Register, $mem$$Address);
 7336   %}
 7337   ins_pipe( pipe_cmpxchg );
 7338 %}
 7339 
 7340 //----------Subtraction Instructions-------------------------------------------
 7341 
 7342 // Integer Subtraction Instructions
 7343 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7344   match(Set dst (SubI dst src));
 7345   effect(KILL cr);
 7346 
 7347   size(2);
 7348   format %{ "SUB    $dst,$src" %}
 7349   opcode(0x2B);
 7350   ins_encode( OpcP, RegReg( dst, src) );
 7351   ins_pipe( ialu_reg_reg );
 7352 %}
 7353 
 7354 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7355   match(Set dst (SubI dst src));
 7356   effect(KILL cr);
 7357 
 7358   format %{ "SUB    $dst,$src" %}
 7359   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7360   // ins_encode( RegImm( dst, src) );
 7361   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7362   ins_pipe( ialu_reg );
 7363 %}
 7364 
 7365 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7366   match(Set dst (SubI dst (LoadI src)));
 7367   effect(KILL cr);
 7368 
 7369   ins_cost(150);
 7370   format %{ "SUB    $dst,$src" %}
 7371   opcode(0x2B);
 7372   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 7373   ins_pipe( ialu_reg_mem );
 7374 %}
 7375 
 7376 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7377   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7378   effect(KILL cr);
 7379 
 7380   ins_cost(150);
 7381   format %{ "SUB    $dst,$src" %}
 7382   opcode(0x29);  /* Opcode 29 /r */
 7383   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 7384   ins_pipe( ialu_mem_reg );
 7385 %}
 7386 
 7387 // Subtract from a pointer
 7388 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7389   match(Set dst (AddP dst (SubI zero src)));
 7390   effect(KILL cr);
 7391 
 7392   size(2);
 7393   format %{ "SUB    $dst,$src" %}
 7394   opcode(0x2B);
 7395   ins_encode( OpcP, RegReg( dst, src) );
 7396   ins_pipe( ialu_reg_reg );
 7397 %}
 7398 
 7399 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7400   match(Set dst (SubI zero dst));
 7401   effect(KILL cr);
 7402 
 7403   size(2);
 7404   format %{ "NEG    $dst" %}
 7405   opcode(0xF7,0x03);  // Opcode F7 /3
 7406   ins_encode( OpcP, RegOpc( dst ) );
 7407   ins_pipe( ialu_reg );
 7408 %}
 7409 
 7410 //----------Multiplication/Division Instructions-------------------------------
 7411 // Integer Multiplication Instructions
 7412 // Multiply Register
 7413 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7414   match(Set dst (MulI dst src));
 7415   effect(KILL cr);
 7416 
 7417   size(3);
 7418   ins_cost(300);
 7419   format %{ "IMUL   $dst,$src" %}
 7420   opcode(0xAF, 0x0F);
 7421   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7422   ins_pipe( ialu_reg_reg_alu0 );
 7423 %}
 7424 
 7425 // Multiply 32-bit Immediate
 7426 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7427   match(Set dst (MulI src imm));
 7428   effect(KILL cr);
 7429 
 7430   ins_cost(300);
 7431   format %{ "IMUL   $dst,$src,$imm" %}
 7432   opcode(0x69);  /* 69 /r id */
 7433   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7434   ins_pipe( ialu_reg_reg_alu0 );
 7435 %}
 7436 
 7437 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7438   match(Set dst src);
 7439   effect(KILL cr);
 7440 
 7441   // Note that this is artificially increased to make it more expensive than loadConL
 7442   ins_cost(250);
 7443   format %{ "MOV    EAX,$src\t// low word only" %}
 7444   opcode(0xB8);
 7445   ins_encode( LdImmL_Lo(dst, src) );
 7446   ins_pipe( ialu_reg_fat );
 7447 %}
 7448 
 7449 // Multiply by 32-bit Immediate, taking the shifted high order results
 7450 //  (special case for shift by 32)
 7451 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7452   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7453   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7454              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7455              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7456   effect(USE src1, KILL cr);
 7457 
 7458   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7459   ins_cost(0*100 + 1*400 - 150);
 7460   format %{ "IMUL   EDX:EAX,$src1" %}
 7461   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7462   ins_pipe( pipe_slow );
 7463 %}
 7464 
 7465 // Multiply by 32-bit Immediate, taking the shifted high order results
 7466 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7467   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7468   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7469              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7470              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7471   effect(USE src1, KILL cr);
 7472 
 7473   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7474   ins_cost(1*100 + 1*400 - 150);
 7475   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7476             "SAR    EDX,$cnt-32" %}
 7477   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7478   ins_pipe( pipe_slow );
 7479 %}
 7480 
 7481 // Multiply Memory 32-bit Immediate
 7482 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7483   match(Set dst (MulI (LoadI src) imm));
 7484   effect(KILL cr);
 7485 
 7486   ins_cost(300);
 7487   format %{ "IMUL   $dst,$src,$imm" %}
 7488   opcode(0x69);  /* 69 /r id */
 7489   ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark );
 7490   ins_pipe( ialu_reg_mem_alu0 );
 7491 %}
 7492 
 7493 // Multiply Memory
 7494 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7495   match(Set dst (MulI dst (LoadI src)));
 7496   effect(KILL cr);
 7497 
 7498   ins_cost(350);
 7499   format %{ "IMUL   $dst,$src" %}
 7500   opcode(0xAF, 0x0F);
 7501   ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark );
 7502   ins_pipe( ialu_reg_mem_alu0 );
 7503 %}
 7504 
 7505 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7506 %{
 7507   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7508   effect(KILL cr, KILL src2);
 7509 
 7510   expand %{ mulI_eReg(dst, src1, cr);
 7511            mulI_eReg(src2, src3, cr);
 7512            addI_eReg(dst, src2, cr); %}
 7513 %}
 7514 
 7515 // Multiply Register Int to Long
 7516 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7517   // Basic Idea: long = (long)int * (long)int
 7518   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7519   effect(DEF dst, USE src, USE src1, KILL flags);
 7520 
 7521   ins_cost(300);
 7522   format %{ "IMUL   $dst,$src1" %}
 7523 
 7524   ins_encode( long_int_multiply( dst, src1 ) );
 7525   ins_pipe( ialu_reg_reg_alu0 );
 7526 %}
 7527 
 7528 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7529   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7530   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7531   effect(KILL flags);
 7532 
 7533   ins_cost(300);
 7534   format %{ "MUL    $dst,$src1" %}
 7535 
 7536   ins_encode( long_uint_multiply(dst, src1) );
 7537   ins_pipe( ialu_reg_reg_alu0 );
 7538 %}
 7539 
 7540 // Multiply Register Long
 7541 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7542   match(Set dst (MulL dst src));
 7543   effect(KILL cr, TEMP tmp);
 7544   ins_cost(4*100+3*400);
 7545 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7546 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7547   format %{ "MOV    $tmp,$src.lo\n\t"
 7548             "IMUL   $tmp,EDX\n\t"
 7549             "MOV    EDX,$src.hi\n\t"
 7550             "IMUL   EDX,EAX\n\t"
 7551             "ADD    $tmp,EDX\n\t"
 7552             "MUL    EDX:EAX,$src.lo\n\t"
 7553             "ADD    EDX,$tmp" %}
 7554   ins_encode( long_multiply( dst, src, tmp ) );
 7555   ins_pipe( pipe_slow );
 7556 %}
 7557 
 7558 // Multiply Register Long where the left operand's high 32 bits are zero
 7559 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7560   predicate(is_operand_hi32_zero(n->in(1)));
 7561   match(Set dst (MulL dst src));
 7562   effect(KILL cr, TEMP tmp);
 7563   ins_cost(2*100+2*400);
 7564 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7565 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7566   format %{ "MOV    $tmp,$src.hi\n\t"
 7567             "IMUL   $tmp,EAX\n\t"
 7568             "MUL    EDX:EAX,$src.lo\n\t"
 7569             "ADD    EDX,$tmp" %}
 7570   ins_encode %{
 7571     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7572     __ imull($tmp$$Register, rax);
 7573     __ mull($src$$Register);
 7574     __ addl(rdx, $tmp$$Register);
 7575   %}
 7576   ins_pipe( pipe_slow );
 7577 %}
 7578 
 7579 // Multiply Register Long where the right operand's high 32 bits are zero
 7580 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7581   predicate(is_operand_hi32_zero(n->in(2)));
 7582   match(Set dst (MulL dst src));
 7583   effect(KILL cr, TEMP tmp);
 7584   ins_cost(2*100+2*400);
 7585 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7586 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7587   format %{ "MOV    $tmp,$src.lo\n\t"
 7588             "IMUL   $tmp,EDX\n\t"
 7589             "MUL    EDX:EAX,$src.lo\n\t"
 7590             "ADD    EDX,$tmp" %}
 7591   ins_encode %{
 7592     __ movl($tmp$$Register, $src$$Register);
 7593     __ imull($tmp$$Register, rdx);
 7594     __ mull($src$$Register);
 7595     __ addl(rdx, $tmp$$Register);
 7596   %}
 7597   ins_pipe( pipe_slow );
 7598 %}
 7599 
 7600 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7601 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7602   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7603   match(Set dst (MulL dst src));
 7604   effect(KILL cr);
 7605   ins_cost(1*400);
 7606 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7607 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7608   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7609   ins_encode %{
 7610     __ mull($src$$Register);
 7611   %}
 7612   ins_pipe( pipe_slow );
 7613 %}
 7614 
 7615 // Multiply Register Long by small constant
 7616 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7617   match(Set dst (MulL dst src));
 7618   effect(KILL cr, TEMP tmp);
 7619   ins_cost(2*100+2*400);
 7620   size(12);
 7621 // Basic idea: lo(result) = lo(src * EAX)
 7622 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7623   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7624             "MOV    EDX,$src\n\t"
 7625             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7626             "ADD    EDX,$tmp" %}
 7627   ins_encode( long_multiply_con( dst, src, tmp ) );
 7628   ins_pipe( pipe_slow );
 7629 %}
 7630 
 7631 // Integer DIV with Register
 7632 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7633   match(Set rax (DivI rax div));
 7634   effect(KILL rdx, KILL cr);
 7635   size(26);
 7636   ins_cost(30*100+10*100);
 7637   format %{ "CMP    EAX,0x80000000\n\t"
 7638             "JNE,s  normal\n\t"
 7639             "XOR    EDX,EDX\n\t"
 7640             "CMP    ECX,-1\n\t"
 7641             "JE,s   done\n"
 7642     "normal: CDQ\n\t"
 7643             "IDIV   $div\n\t"
 7644     "done:"        %}
 7645   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7646   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7647   ins_pipe( ialu_reg_reg_alu0 );
 7648 %}
 7649 
 7650 // Divide Register Long
 7651 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7652   match(Set dst (DivL src1 src2));
 7653   effect(CALL);
 7654   ins_cost(10000);
 7655   format %{ "PUSH   $src1.hi\n\t"
 7656             "PUSH   $src1.lo\n\t"
 7657             "PUSH   $src2.hi\n\t"
 7658             "PUSH   $src2.lo\n\t"
 7659             "CALL   SharedRuntime::ldiv\n\t"
 7660             "ADD    ESP,16" %}
 7661   ins_encode( long_div(src1,src2) );
 7662   ins_pipe( pipe_slow );
 7663 %}
 7664 
 7665 // Integer DIVMOD with Register, both quotient and mod results
 7666 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7667   match(DivModI rax div);
 7668   effect(KILL cr);
 7669   size(26);
 7670   ins_cost(30*100+10*100);
 7671   format %{ "CMP    EAX,0x80000000\n\t"
 7672             "JNE,s  normal\n\t"
 7673             "XOR    EDX,EDX\n\t"
 7674             "CMP    ECX,-1\n\t"
 7675             "JE,s   done\n"
 7676     "normal: CDQ\n\t"
 7677             "IDIV   $div\n\t"
 7678     "done:"        %}
 7679   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7680   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7681   ins_pipe( pipe_slow );
 7682 %}
 7683 
 7684 // Integer MOD with Register
 7685 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7686   match(Set rdx (ModI rax div));
 7687   effect(KILL rax, KILL cr);
 7688 
 7689   size(26);
 7690   ins_cost(300);
 7691   format %{ "CDQ\n\t"
 7692             "IDIV   $div" %}
 7693   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7694   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7695   ins_pipe( ialu_reg_reg_alu0 );
 7696 %}
 7697 
 7698 // Remainder Register Long
 7699 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7700   match(Set dst (ModL src1 src2));
 7701   effect(CALL);
 7702   ins_cost(10000);
 7703   format %{ "PUSH   $src1.hi\n\t"
 7704             "PUSH   $src1.lo\n\t"
 7705             "PUSH   $src2.hi\n\t"
 7706             "PUSH   $src2.lo\n\t"
 7707             "CALL   SharedRuntime::lrem\n\t"
 7708             "ADD    ESP,16" %}
 7709   ins_encode( long_mod(src1,src2) );
 7710   ins_pipe( pipe_slow );
 7711 %}
 7712 
 7713 // Divide Register Long (no special case since divisor != -1)
 7714 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7715   match(Set dst (DivL dst imm));
 7716   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7717   ins_cost(1000);
 7718   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7719             "XOR    $tmp2,$tmp2\n\t"
 7720             "CMP    $tmp,EDX\n\t"
 7721             "JA,s   fast\n\t"
 7722             "MOV    $tmp2,EAX\n\t"
 7723             "MOV    EAX,EDX\n\t"
 7724             "MOV    EDX,0\n\t"
 7725             "JLE,s  pos\n\t"
 7726             "LNEG   EAX : $tmp2\n\t"
 7727             "DIV    $tmp # unsigned division\n\t"
 7728             "XCHG   EAX,$tmp2\n\t"
 7729             "DIV    $tmp\n\t"
 7730             "LNEG   $tmp2 : EAX\n\t"
 7731             "JMP,s  done\n"
 7732     "pos:\n\t"
 7733             "DIV    $tmp\n\t"
 7734             "XCHG   EAX,$tmp2\n"
 7735     "fast:\n\t"
 7736             "DIV    $tmp\n"
 7737     "done:\n\t"
 7738             "MOV    EDX,$tmp2\n\t"
 7739             "NEG    EDX:EAX # if $imm < 0" %}
 7740   ins_encode %{
 7741     int con = (int)$imm$$constant;
 7742     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7743     int pcon = (con > 0) ? con : -con;
 7744     Label Lfast, Lpos, Ldone;
 7745 
 7746     __ movl($tmp$$Register, pcon);
 7747     __ xorl($tmp2$$Register,$tmp2$$Register);
 7748     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7749     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7750 
 7751     __ movl($tmp2$$Register, $dst$$Register); // save
 7752     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7753     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7754     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7755 
 7756     // Negative dividend.
 7757     // convert value to positive to use unsigned division
 7758     __ lneg($dst$$Register, $tmp2$$Register);
 7759     __ divl($tmp$$Register);
 7760     __ xchgl($dst$$Register, $tmp2$$Register);
 7761     __ divl($tmp$$Register);
 7762     // revert result back to negative
 7763     __ lneg($tmp2$$Register, $dst$$Register);
 7764     __ jmpb(Ldone);
 7765 
 7766     __ bind(Lpos);
 7767     __ divl($tmp$$Register); // Use unsigned division
 7768     __ xchgl($dst$$Register, $tmp2$$Register);
 7769     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7770 
 7771     __ bind(Lfast);
 7772     // fast path: src is positive
 7773     __ divl($tmp$$Register); // Use unsigned division
 7774 
 7775     __ bind(Ldone);
 7776     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7777     if (con < 0) {
 7778       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7779     }
 7780   %}
 7781   ins_pipe( pipe_slow );
 7782 %}
 7783 
 7784 // Remainder Register Long (remainder fit into 32 bits)
 7785 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7786   match(Set dst (ModL dst imm));
 7787   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7788   ins_cost(1000);
 7789   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7790             "CMP    $tmp,EDX\n\t"
 7791             "JA,s   fast\n\t"
 7792             "MOV    $tmp2,EAX\n\t"
 7793             "MOV    EAX,EDX\n\t"
 7794             "MOV    EDX,0\n\t"
 7795             "JLE,s  pos\n\t"
 7796             "LNEG   EAX : $tmp2\n\t"
 7797             "DIV    $tmp # unsigned division\n\t"
 7798             "MOV    EAX,$tmp2\n\t"
 7799             "DIV    $tmp\n\t"
 7800             "NEG    EDX\n\t"
 7801             "JMP,s  done\n"
 7802     "pos:\n\t"
 7803             "DIV    $tmp\n\t"
 7804             "MOV    EAX,$tmp2\n"
 7805     "fast:\n\t"
 7806             "DIV    $tmp\n"
 7807     "done:\n\t"
 7808             "MOV    EAX,EDX\n\t"
 7809             "SAR    EDX,31\n\t" %}
 7810   ins_encode %{
 7811     int con = (int)$imm$$constant;
 7812     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7813     int pcon = (con > 0) ? con : -con;
 7814     Label  Lfast, Lpos, Ldone;
 7815 
 7816     __ movl($tmp$$Register, pcon);
 7817     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7818     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7819 
 7820     __ movl($tmp2$$Register, $dst$$Register); // save
 7821     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7822     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7823     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7824 
 7825     // Negative dividend.
 7826     // convert value to positive to use unsigned division
 7827     __ lneg($dst$$Register, $tmp2$$Register);
 7828     __ divl($tmp$$Register);
 7829     __ movl($dst$$Register, $tmp2$$Register);
 7830     __ divl($tmp$$Register);
 7831     // revert remainder back to negative
 7832     __ negl(HIGH_FROM_LOW($dst$$Register));
 7833     __ jmpb(Ldone);
 7834 
 7835     __ bind(Lpos);
 7836     __ divl($tmp$$Register);
 7837     __ movl($dst$$Register, $tmp2$$Register);
 7838 
 7839     __ bind(Lfast);
 7840     // fast path: src is positive
 7841     __ divl($tmp$$Register);
 7842 
 7843     __ bind(Ldone);
 7844     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7845     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7846 
 7847   %}
 7848   ins_pipe( pipe_slow );
 7849 %}
 7850 
 7851 // Integer Shift Instructions
 7852 // Shift Left by one
 7853 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7854   match(Set dst (LShiftI dst shift));
 7855   effect(KILL cr);
 7856 
 7857   size(2);
 7858   format %{ "SHL    $dst,$shift" %}
 7859   opcode(0xD1, 0x4);  /* D1 /4 */
 7860   ins_encode( OpcP, RegOpc( dst ) );
 7861   ins_pipe( ialu_reg );
 7862 %}
 7863 
 7864 // Shift Left by 8-bit immediate
 7865 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7866   match(Set dst (LShiftI dst shift));
 7867   effect(KILL cr);
 7868 
 7869   size(3);
 7870   format %{ "SHL    $dst,$shift" %}
 7871   opcode(0xC1, 0x4);  /* C1 /4 ib */
 7872   ins_encode( RegOpcImm( dst, shift) );
 7873   ins_pipe( ialu_reg );
 7874 %}
 7875 
 7876 // Shift Left by variable
 7877 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7878   match(Set dst (LShiftI dst shift));
 7879   effect(KILL cr);
 7880 
 7881   size(2);
 7882   format %{ "SHL    $dst,$shift" %}
 7883   opcode(0xD3, 0x4);  /* D3 /4 */
 7884   ins_encode( OpcP, RegOpc( dst ) );
 7885   ins_pipe( ialu_reg_reg );
 7886 %}
 7887 
 7888 // Arithmetic shift right by one
 7889 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7890   match(Set dst (RShiftI dst shift));
 7891   effect(KILL cr);
 7892 
 7893   size(2);
 7894   format %{ "SAR    $dst,$shift" %}
 7895   opcode(0xD1, 0x7);  /* D1 /7 */
 7896   ins_encode( OpcP, RegOpc( dst ) );
 7897   ins_pipe( ialu_reg );
 7898 %}
 7899 
 7900 // Arithmetic shift right by one
 7901 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 7902   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7903   effect(KILL cr);
 7904   format %{ "SAR    $dst,$shift" %}
 7905   opcode(0xD1, 0x7);  /* D1 /7 */
 7906   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark );
 7907   ins_pipe( ialu_mem_imm );
 7908 %}
 7909 
 7910 // Arithmetic Shift Right by 8-bit immediate
 7911 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7912   match(Set dst (RShiftI dst shift));
 7913   effect(KILL cr);
 7914 
 7915   size(3);
 7916   format %{ "SAR    $dst,$shift" %}
 7917   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7918   ins_encode( RegOpcImm( dst, shift ) );
 7919   ins_pipe( ialu_mem_imm );
 7920 %}
 7921 
 7922 // Arithmetic Shift Right by 8-bit immediate
 7923 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 7924   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 7925   effect(KILL cr);
 7926 
 7927   format %{ "SAR    $dst,$shift" %}
 7928   opcode(0xC1, 0x7);  /* C1 /7 ib */
 7929   ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark );
 7930   ins_pipe( ialu_mem_imm );
 7931 %}
 7932 
 7933 // Arithmetic Shift Right by variable
 7934 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7935   match(Set dst (RShiftI dst shift));
 7936   effect(KILL cr);
 7937 
 7938   size(2);
 7939   format %{ "SAR    $dst,$shift" %}
 7940   opcode(0xD3, 0x7);  /* D3 /7 */
 7941   ins_encode( OpcP, RegOpc( dst ) );
 7942   ins_pipe( ialu_reg_reg );
 7943 %}
 7944 
 7945 // Logical shift right by one
 7946 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7947   match(Set dst (URShiftI dst shift));
 7948   effect(KILL cr);
 7949 
 7950   size(2);
 7951   format %{ "SHR    $dst,$shift" %}
 7952   opcode(0xD1, 0x5);  /* D1 /5 */
 7953   ins_encode( OpcP, RegOpc( dst ) );
 7954   ins_pipe( ialu_reg );
 7955 %}
 7956 
 7957 // Logical Shift Right by 8-bit immediate
 7958 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 7959   match(Set dst (URShiftI dst shift));
 7960   effect(KILL cr);
 7961 
 7962   size(3);
 7963   format %{ "SHR    $dst,$shift" %}
 7964   opcode(0xC1, 0x5);  /* C1 /5 ib */
 7965   ins_encode( RegOpcImm( dst, shift) );
 7966   ins_pipe( ialu_reg );
 7967 %}
 7968 
 7969 
 7970 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 7971 // This idiom is used by the compiler for the i2b bytecode.
 7972 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 7973   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 7974 
 7975   size(3);
 7976   format %{ "MOVSX  $dst,$src :8" %}
 7977   ins_encode %{
 7978     __ movsbl($dst$$Register, $src$$Register);
 7979   %}
 7980   ins_pipe(ialu_reg_reg);
 7981 %}
 7982 
 7983 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 7984 // This idiom is used by the compiler the i2s bytecode.
 7985 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 7986   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 7987 
 7988   size(3);
 7989   format %{ "MOVSX  $dst,$src :16" %}
 7990   ins_encode %{
 7991     __ movswl($dst$$Register, $src$$Register);
 7992   %}
 7993   ins_pipe(ialu_reg_reg);
 7994 %}
 7995 
 7996 
 7997 // Logical Shift Right by variable
 7998 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 7999   match(Set dst (URShiftI dst shift));
 8000   effect(KILL cr);
 8001 
 8002   size(2);
 8003   format %{ "SHR    $dst,$shift" %}
 8004   opcode(0xD3, 0x5);  /* D3 /5 */
 8005   ins_encode( OpcP, RegOpc( dst ) );
 8006   ins_pipe( ialu_reg_reg );
 8007 %}
 8008 
 8009 
 8010 //----------Logical Instructions-----------------------------------------------
 8011 //----------Integer Logical Instructions---------------------------------------
 8012 // And Instructions
 8013 // And Register with Register
 8014 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8015   match(Set dst (AndI dst src));
 8016   effect(KILL cr);
 8017 
 8018   size(2);
 8019   format %{ "AND    $dst,$src" %}
 8020   opcode(0x23);
 8021   ins_encode( OpcP, RegReg( dst, src) );
 8022   ins_pipe( ialu_reg_reg );
 8023 %}
 8024 
 8025 // And Register with Immediate
 8026 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8027   match(Set dst (AndI dst src));
 8028   effect(KILL cr);
 8029 
 8030   format %{ "AND    $dst,$src" %}
 8031   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8032   // ins_encode( RegImm( dst, src) );
 8033   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8034   ins_pipe( ialu_reg );
 8035 %}
 8036 
 8037 // And Register with Memory
 8038 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8039   match(Set dst (AndI dst (LoadI src)));
 8040   effect(KILL cr);
 8041 
 8042   ins_cost(150);
 8043   format %{ "AND    $dst,$src" %}
 8044   opcode(0x23);
 8045   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8046   ins_pipe( ialu_reg_mem );
 8047 %}
 8048 
 8049 // And Memory with Register
 8050 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8051   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8052   effect(KILL cr);
 8053 
 8054   ins_cost(150);
 8055   format %{ "AND    $dst,$src" %}
 8056   opcode(0x21);  /* Opcode 21 /r */
 8057   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8058   ins_pipe( ialu_mem_reg );
 8059 %}
 8060 
 8061 // And Memory with Immediate
 8062 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8063   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8064   effect(KILL cr);
 8065 
 8066   ins_cost(125);
 8067   format %{ "AND    $dst,$src" %}
 8068   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8069   // ins_encode( MemImm( dst, src) );
 8070   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8071   ins_pipe( ialu_mem_imm );
 8072 %}
 8073 
 8074 // BMI1 instructions
 8075 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8076   match(Set dst (AndI (XorI src1 minus_1) src2));
 8077   predicate(UseBMI1Instructions);
 8078   effect(KILL cr);
 8079 
 8080   format %{ "ANDNL  $dst, $src1, $src2" %}
 8081 
 8082   ins_encode %{
 8083     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8084   %}
 8085   ins_pipe(ialu_reg);
 8086 %}
 8087 
 8088 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8089   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8090   predicate(UseBMI1Instructions);
 8091   effect(KILL cr);
 8092 
 8093   ins_cost(125);
 8094   format %{ "ANDNL  $dst, $src1, $src2" %}
 8095 
 8096   ins_encode %{
 8097     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8098   %}
 8099   ins_pipe(ialu_reg_mem);
 8100 %}
 8101 
 8102 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8103   match(Set dst (AndI (SubI imm_zero src) src));
 8104   predicate(UseBMI1Instructions);
 8105   effect(KILL cr);
 8106 
 8107   format %{ "BLSIL  $dst, $src" %}
 8108 
 8109   ins_encode %{
 8110     __ blsil($dst$$Register, $src$$Register);
 8111   %}
 8112   ins_pipe(ialu_reg);
 8113 %}
 8114 
 8115 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8116   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8117   predicate(UseBMI1Instructions);
 8118   effect(KILL cr);
 8119 
 8120   ins_cost(125);
 8121   format %{ "BLSIL  $dst, $src" %}
 8122 
 8123   ins_encode %{
 8124     __ blsil($dst$$Register, $src$$Address);
 8125   %}
 8126   ins_pipe(ialu_reg_mem);
 8127 %}
 8128 
 8129 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8130 %{
 8131   match(Set dst (XorI (AddI src minus_1) src));
 8132   predicate(UseBMI1Instructions);
 8133   effect(KILL cr);
 8134 
 8135   format %{ "BLSMSKL $dst, $src" %}
 8136 
 8137   ins_encode %{
 8138     __ blsmskl($dst$$Register, $src$$Register);
 8139   %}
 8140 
 8141   ins_pipe(ialu_reg);
 8142 %}
 8143 
 8144 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8145 %{
 8146   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8147   predicate(UseBMI1Instructions);
 8148   effect(KILL cr);
 8149 
 8150   ins_cost(125);
 8151   format %{ "BLSMSKL $dst, $src" %}
 8152 
 8153   ins_encode %{
 8154     __ blsmskl($dst$$Register, $src$$Address);
 8155   %}
 8156 
 8157   ins_pipe(ialu_reg_mem);
 8158 %}
 8159 
 8160 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8161 %{
 8162   match(Set dst (AndI (AddI src minus_1) src) );
 8163   predicate(UseBMI1Instructions);
 8164   effect(KILL cr);
 8165 
 8166   format %{ "BLSRL  $dst, $src" %}
 8167 
 8168   ins_encode %{
 8169     __ blsrl($dst$$Register, $src$$Register);
 8170   %}
 8171 
 8172   ins_pipe(ialu_reg);
 8173 %}
 8174 
 8175 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8176 %{
 8177   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8178   predicate(UseBMI1Instructions);
 8179   effect(KILL cr);
 8180 
 8181   ins_cost(125);
 8182   format %{ "BLSRL  $dst, $src" %}
 8183 
 8184   ins_encode %{
 8185     __ blsrl($dst$$Register, $src$$Address);
 8186   %}
 8187 
 8188   ins_pipe(ialu_reg_mem);
 8189 %}
 8190 
 8191 // Or Instructions
 8192 // Or Register with Register
 8193 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8194   match(Set dst (OrI dst src));
 8195   effect(KILL cr);
 8196 
 8197   size(2);
 8198   format %{ "OR     $dst,$src" %}
 8199   opcode(0x0B);
 8200   ins_encode( OpcP, RegReg( dst, src) );
 8201   ins_pipe( ialu_reg_reg );
 8202 %}
 8203 
 8204 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8205   match(Set dst (OrI dst (CastP2X src)));
 8206   effect(KILL cr);
 8207 
 8208   size(2);
 8209   format %{ "OR     $dst,$src" %}
 8210   opcode(0x0B);
 8211   ins_encode( OpcP, RegReg( dst, src) );
 8212   ins_pipe( ialu_reg_reg );
 8213 %}
 8214 
 8215 
 8216 // Or Register with Immediate
 8217 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8218   match(Set dst (OrI dst src));
 8219   effect(KILL cr);
 8220 
 8221   format %{ "OR     $dst,$src" %}
 8222   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8223   // ins_encode( RegImm( dst, src) );
 8224   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8225   ins_pipe( ialu_reg );
 8226 %}
 8227 
 8228 // Or Register with Memory
 8229 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8230   match(Set dst (OrI dst (LoadI src)));
 8231   effect(KILL cr);
 8232 
 8233   ins_cost(150);
 8234   format %{ "OR     $dst,$src" %}
 8235   opcode(0x0B);
 8236   ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark );
 8237   ins_pipe( ialu_reg_mem );
 8238 %}
 8239 
 8240 // Or Memory with Register
 8241 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8242   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8243   effect(KILL cr);
 8244 
 8245   ins_cost(150);
 8246   format %{ "OR     $dst,$src" %}
 8247   opcode(0x09);  /* Opcode 09 /r */
 8248   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8249   ins_pipe( ialu_mem_reg );
 8250 %}
 8251 
 8252 // Or Memory with Immediate
 8253 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8254   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8255   effect(KILL cr);
 8256 
 8257   ins_cost(125);
 8258   format %{ "OR     $dst,$src" %}
 8259   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8260   // ins_encode( MemImm( dst, src) );
 8261   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8262   ins_pipe( ialu_mem_imm );
 8263 %}
 8264 
 8265 // ROL/ROR
 8266 // ROL expand
 8267 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8268   effect(USE_DEF dst, USE shift, KILL cr);
 8269 
 8270   format %{ "ROL    $dst, $shift" %}
 8271   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8272   ins_encode( OpcP, RegOpc( dst ));
 8273   ins_pipe( ialu_reg );
 8274 %}
 8275 
 8276 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8277   effect(USE_DEF dst, USE shift, KILL cr);
 8278 
 8279   format %{ "ROL    $dst, $shift" %}
 8280   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8281   ins_encode( RegOpcImm(dst, shift) );
 8282   ins_pipe(ialu_reg);
 8283 %}
 8284 
 8285 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8286   effect(USE_DEF dst, USE shift, KILL cr);
 8287 
 8288   format %{ "ROL    $dst, $shift" %}
 8289   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8290   ins_encode(OpcP, RegOpc(dst));
 8291   ins_pipe( ialu_reg_reg );
 8292 %}
 8293 // end of ROL expand
 8294 
 8295 // ROL 32bit by one once
 8296 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8297   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8298 
 8299   expand %{
 8300     rolI_eReg_imm1(dst, lshift, cr);
 8301   %}
 8302 %}
 8303 
 8304 // ROL 32bit var by imm8 once
 8305 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8306   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8307   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8308 
 8309   expand %{
 8310     rolI_eReg_imm8(dst, lshift, cr);
 8311   %}
 8312 %}
 8313 
 8314 // ROL 32bit var by var once
 8315 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8316   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8317 
 8318   expand %{
 8319     rolI_eReg_CL(dst, shift, cr);
 8320   %}
 8321 %}
 8322 
 8323 // ROL 32bit var by var once
 8324 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8325   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8326 
 8327   expand %{
 8328     rolI_eReg_CL(dst, shift, cr);
 8329   %}
 8330 %}
 8331 
 8332 // ROR expand
 8333 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8334   effect(USE_DEF dst, USE shift, KILL cr);
 8335 
 8336   format %{ "ROR    $dst, $shift" %}
 8337   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8338   ins_encode( OpcP, RegOpc( dst ) );
 8339   ins_pipe( ialu_reg );
 8340 %}
 8341 
 8342 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8343   effect (USE_DEF dst, USE shift, KILL cr);
 8344 
 8345   format %{ "ROR    $dst, $shift" %}
 8346   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8347   ins_encode( RegOpcImm(dst, shift) );
 8348   ins_pipe( ialu_reg );
 8349 %}
 8350 
 8351 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8352   effect(USE_DEF dst, USE shift, KILL cr);
 8353 
 8354   format %{ "ROR    $dst, $shift" %}
 8355   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8356   ins_encode(OpcP, RegOpc(dst));
 8357   ins_pipe( ialu_reg_reg );
 8358 %}
 8359 // end of ROR expand
 8360 
 8361 // ROR right once
 8362 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8363   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8364 
 8365   expand %{
 8366     rorI_eReg_imm1(dst, rshift, cr);
 8367   %}
 8368 %}
 8369 
 8370 // ROR 32bit by immI8 once
 8371 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8372   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8373   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8374 
 8375   expand %{
 8376     rorI_eReg_imm8(dst, rshift, cr);
 8377   %}
 8378 %}
 8379 
 8380 // ROR 32bit var by var once
 8381 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8382   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8383 
 8384   expand %{
 8385     rorI_eReg_CL(dst, shift, cr);
 8386   %}
 8387 %}
 8388 
 8389 // ROR 32bit var by var once
 8390 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8391   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8392 
 8393   expand %{
 8394     rorI_eReg_CL(dst, shift, cr);
 8395   %}
 8396 %}
 8397 
 8398 // Xor Instructions
 8399 // Xor Register with Register
 8400 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8401   match(Set dst (XorI dst src));
 8402   effect(KILL cr);
 8403 
 8404   size(2);
 8405   format %{ "XOR    $dst,$src" %}
 8406   opcode(0x33);
 8407   ins_encode( OpcP, RegReg( dst, src) );
 8408   ins_pipe( ialu_reg_reg );
 8409 %}
 8410 
 8411 // Xor Register with Immediate -1
 8412 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8413   match(Set dst (XorI dst imm));
 8414 
 8415   size(2);
 8416   format %{ "NOT    $dst" %}
 8417   ins_encode %{
 8418      __ notl($dst$$Register);
 8419   %}
 8420   ins_pipe( ialu_reg );
 8421 %}
 8422 
 8423 // Xor Register with Immediate
 8424 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8425   match(Set dst (XorI dst src));
 8426   effect(KILL cr);
 8427 
 8428   format %{ "XOR    $dst,$src" %}
 8429   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8430   // ins_encode( RegImm( dst, src) );
 8431   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8432   ins_pipe( ialu_reg );
 8433 %}
 8434 
 8435 // Xor Register with Memory
 8436 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8437   match(Set dst (XorI dst (LoadI src)));
 8438   effect(KILL cr);
 8439 
 8440   ins_cost(150);
 8441   format %{ "XOR    $dst,$src" %}
 8442   opcode(0x33);
 8443   ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark );
 8444   ins_pipe( ialu_reg_mem );
 8445 %}
 8446 
 8447 // Xor Memory with Register
 8448 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8449   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8450   effect(KILL cr);
 8451 
 8452   ins_cost(150);
 8453   format %{ "XOR    $dst,$src" %}
 8454   opcode(0x31);  /* Opcode 31 /r */
 8455   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark );
 8456   ins_pipe( ialu_mem_reg );
 8457 %}
 8458 
 8459 // Xor Memory with Immediate
 8460 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8461   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8462   effect(KILL cr);
 8463 
 8464   ins_cost(125);
 8465   format %{ "XOR    $dst,$src" %}
 8466   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8467   ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark );
 8468   ins_pipe( ialu_mem_imm );
 8469 %}
 8470 
 8471 //----------Convert Int to Boolean---------------------------------------------
 8472 
 8473 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8474   effect( DEF dst, USE src );
 8475   format %{ "MOV    $dst,$src" %}
 8476   ins_encode( enc_Copy( dst, src) );
 8477   ins_pipe( ialu_reg_reg );
 8478 %}
 8479 
 8480 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8481   effect( USE_DEF dst, USE src, KILL cr );
 8482 
 8483   size(4);
 8484   format %{ "NEG    $dst\n\t"
 8485             "ADC    $dst,$src" %}
 8486   ins_encode( neg_reg(dst),
 8487               OpcRegReg(0x13,dst,src) );
 8488   ins_pipe( ialu_reg_reg_long );
 8489 %}
 8490 
 8491 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8492   match(Set dst (Conv2B src));
 8493 
 8494   expand %{
 8495     movI_nocopy(dst,src);
 8496     ci2b(dst,src,cr);
 8497   %}
 8498 %}
 8499 
 8500 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8501   effect( DEF dst, USE src );
 8502   format %{ "MOV    $dst,$src" %}
 8503   ins_encode( enc_Copy( dst, src) );
 8504   ins_pipe( ialu_reg_reg );
 8505 %}
 8506 
 8507 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8508   effect( USE_DEF dst, USE src, KILL cr );
 8509   format %{ "NEG    $dst\n\t"
 8510             "ADC    $dst,$src" %}
 8511   ins_encode( neg_reg(dst),
 8512               OpcRegReg(0x13,dst,src) );
 8513   ins_pipe( ialu_reg_reg_long );
 8514 %}
 8515 
 8516 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8517   match(Set dst (Conv2B src));
 8518 
 8519   expand %{
 8520     movP_nocopy(dst,src);
 8521     cp2b(dst,src,cr);
 8522   %}
 8523 %}
 8524 
 8525 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8526   match(Set dst (CmpLTMask p q));
 8527   effect(KILL cr);
 8528   ins_cost(400);
 8529 
 8530   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8531   format %{ "XOR    $dst,$dst\n\t"
 8532             "CMP    $p,$q\n\t"
 8533             "SETlt  $dst\n\t"
 8534             "NEG    $dst" %}
 8535   ins_encode %{
 8536     Register Rp = $p$$Register;
 8537     Register Rq = $q$$Register;
 8538     Register Rd = $dst$$Register;
 8539     Label done;
 8540     __ xorl(Rd, Rd);
 8541     __ cmpl(Rp, Rq);
 8542     __ setb(Assembler::less, Rd);
 8543     __ negl(Rd);
 8544   %}
 8545 
 8546   ins_pipe(pipe_slow);
 8547 %}
 8548 
 8549 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8550   match(Set dst (CmpLTMask dst zero));
 8551   effect(DEF dst, KILL cr);
 8552   ins_cost(100);
 8553 
 8554   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8555   ins_encode %{
 8556   __ sarl($dst$$Register, 31);
 8557   %}
 8558   ins_pipe(ialu_reg);
 8559 %}
 8560 
 8561 /* better to save a register than avoid a branch */
 8562 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8563   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8564   effect(KILL cr);
 8565   ins_cost(400);
 8566   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8567             "JGE    done\n\t"
 8568             "ADD    $p,$y\n"
 8569             "done:  " %}
 8570   ins_encode %{
 8571     Register Rp = $p$$Register;
 8572     Register Rq = $q$$Register;
 8573     Register Ry = $y$$Register;
 8574     Label done;
 8575     __ subl(Rp, Rq);
 8576     __ jccb(Assembler::greaterEqual, done);
 8577     __ addl(Rp, Ry);
 8578     __ bind(done);
 8579   %}
 8580 
 8581   ins_pipe(pipe_cmplt);
 8582 %}
 8583 
 8584 /* better to save a register than avoid a branch */
 8585 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8586   match(Set y (AndI (CmpLTMask p q) y));
 8587   effect(KILL cr);
 8588 
 8589   ins_cost(300);
 8590 
 8591   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8592             "JLT      done\n\t"
 8593             "XORL     $y, $y\n"
 8594             "done:  " %}
 8595   ins_encode %{
 8596     Register Rp = $p$$Register;
 8597     Register Rq = $q$$Register;
 8598     Register Ry = $y$$Register;
 8599     Label done;
 8600     __ cmpl(Rp, Rq);
 8601     __ jccb(Assembler::less, done);
 8602     __ xorl(Ry, Ry);
 8603     __ bind(done);
 8604   %}
 8605 
 8606   ins_pipe(pipe_cmplt);
 8607 %}
 8608 
 8609 /* If I enable this, I encourage spilling in the inner loop of compress.
 8610 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8611   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8612 */
 8613 //----------Overflow Math Instructions-----------------------------------------
 8614 
 8615 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8616 %{
 8617   match(Set cr (OverflowAddI op1 op2));
 8618   effect(DEF cr, USE_KILL op1, USE op2);
 8619 
 8620   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8621 
 8622   ins_encode %{
 8623     __ addl($op1$$Register, $op2$$Register);
 8624   %}
 8625   ins_pipe(ialu_reg_reg);
 8626 %}
 8627 
 8628 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8629 %{
 8630   match(Set cr (OverflowAddI op1 op2));
 8631   effect(DEF cr, USE_KILL op1, USE op2);
 8632 
 8633   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8634 
 8635   ins_encode %{
 8636     __ addl($op1$$Register, $op2$$constant);
 8637   %}
 8638   ins_pipe(ialu_reg_reg);
 8639 %}
 8640 
 8641 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8642 %{
 8643   match(Set cr (OverflowSubI op1 op2));
 8644 
 8645   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8646   ins_encode %{
 8647     __ cmpl($op1$$Register, $op2$$Register);
 8648   %}
 8649   ins_pipe(ialu_reg_reg);
 8650 %}
 8651 
 8652 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8653 %{
 8654   match(Set cr (OverflowSubI op1 op2));
 8655 
 8656   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8657   ins_encode %{
 8658     __ cmpl($op1$$Register, $op2$$constant);
 8659   %}
 8660   ins_pipe(ialu_reg_reg);
 8661 %}
 8662 
 8663 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8664 %{
 8665   match(Set cr (OverflowSubI zero op2));
 8666   effect(DEF cr, USE_KILL op2);
 8667 
 8668   format %{ "NEG    $op2\t# overflow check int" %}
 8669   ins_encode %{
 8670     __ negl($op2$$Register);
 8671   %}
 8672   ins_pipe(ialu_reg_reg);
 8673 %}
 8674 
 8675 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8676 %{
 8677   match(Set cr (OverflowMulI op1 op2));
 8678   effect(DEF cr, USE_KILL op1, USE op2);
 8679 
 8680   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8681   ins_encode %{
 8682     __ imull($op1$$Register, $op2$$Register);
 8683   %}
 8684   ins_pipe(ialu_reg_reg_alu0);
 8685 %}
 8686 
 8687 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8688 %{
 8689   match(Set cr (OverflowMulI op1 op2));
 8690   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8691 
 8692   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8693   ins_encode %{
 8694     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8695   %}
 8696   ins_pipe(ialu_reg_reg_alu0);
 8697 %}
 8698 
 8699 // Integer Absolute Instructions
 8700 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8701 %{
 8702   match(Set dst (AbsI src));
 8703   effect(TEMP dst, TEMP tmp, KILL cr);
 8704   format %{ "movl $tmp, $src\n\t"
 8705             "sarl $tmp, 31\n\t"
 8706             "movl $dst, $src\n\t"
 8707             "xorl $dst, $tmp\n\t"
 8708             "subl $dst, $tmp\n"
 8709           %}
 8710   ins_encode %{
 8711     __ movl($tmp$$Register, $src$$Register);
 8712     __ sarl($tmp$$Register, 31);
 8713     __ movl($dst$$Register, $src$$Register);
 8714     __ xorl($dst$$Register, $tmp$$Register);
 8715     __ subl($dst$$Register, $tmp$$Register);
 8716   %}
 8717 
 8718   ins_pipe(ialu_reg_reg);
 8719 %}
 8720 
 8721 //----------Long Instructions------------------------------------------------
 8722 // Add Long Register with Register
 8723 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8724   match(Set dst (AddL dst src));
 8725   effect(KILL cr);
 8726   ins_cost(200);
 8727   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8728             "ADC    $dst.hi,$src.hi" %}
 8729   opcode(0x03, 0x13);
 8730   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8731   ins_pipe( ialu_reg_reg_long );
 8732 %}
 8733 
 8734 // Add Long Register with Immediate
 8735 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8736   match(Set dst (AddL dst src));
 8737   effect(KILL cr);
 8738   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8739             "ADC    $dst.hi,$src.hi" %}
 8740   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8741   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8742   ins_pipe( ialu_reg_long );
 8743 %}
 8744 
 8745 // Add Long Register with Memory
 8746 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8747   match(Set dst (AddL dst (LoadL mem)));
 8748   effect(KILL cr);
 8749   ins_cost(125);
 8750   format %{ "ADD    $dst.lo,$mem\n\t"
 8751             "ADC    $dst.hi,$mem+4" %}
 8752   opcode(0x03, 0x13);
 8753   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8754   ins_pipe( ialu_reg_long_mem );
 8755 %}
 8756 
 8757 // Subtract Long Register with Register.
 8758 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8759   match(Set dst (SubL dst src));
 8760   effect(KILL cr);
 8761   ins_cost(200);
 8762   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8763             "SBB    $dst.hi,$src.hi" %}
 8764   opcode(0x2B, 0x1B);
 8765   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8766   ins_pipe( ialu_reg_reg_long );
 8767 %}
 8768 
 8769 // Subtract Long Register with Immediate
 8770 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8771   match(Set dst (SubL dst src));
 8772   effect(KILL cr);
 8773   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8774             "SBB    $dst.hi,$src.hi" %}
 8775   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8776   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8777   ins_pipe( ialu_reg_long );
 8778 %}
 8779 
 8780 // Subtract Long Register with Memory
 8781 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8782   match(Set dst (SubL dst (LoadL mem)));
 8783   effect(KILL cr);
 8784   ins_cost(125);
 8785   format %{ "SUB    $dst.lo,$mem\n\t"
 8786             "SBB    $dst.hi,$mem+4" %}
 8787   opcode(0x2B, 0x1B);
 8788   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8789   ins_pipe( ialu_reg_long_mem );
 8790 %}
 8791 
 8792 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8793   match(Set dst (SubL zero dst));
 8794   effect(KILL cr);
 8795   ins_cost(300);
 8796   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8797   ins_encode( neg_long(dst) );
 8798   ins_pipe( ialu_reg_reg_long );
 8799 %}
 8800 
 8801 // And Long Register with Register
 8802 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8803   match(Set dst (AndL dst src));
 8804   effect(KILL cr);
 8805   format %{ "AND    $dst.lo,$src.lo\n\t"
 8806             "AND    $dst.hi,$src.hi" %}
 8807   opcode(0x23,0x23);
 8808   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8809   ins_pipe( ialu_reg_reg_long );
 8810 %}
 8811 
 8812 // And Long Register with Immediate
 8813 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8814   match(Set dst (AndL dst src));
 8815   effect(KILL cr);
 8816   format %{ "AND    $dst.lo,$src.lo\n\t"
 8817             "AND    $dst.hi,$src.hi" %}
 8818   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8819   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8820   ins_pipe( ialu_reg_long );
 8821 %}
 8822 
 8823 // And Long Register with Memory
 8824 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8825   match(Set dst (AndL dst (LoadL mem)));
 8826   effect(KILL cr);
 8827   ins_cost(125);
 8828   format %{ "AND    $dst.lo,$mem\n\t"
 8829             "AND    $dst.hi,$mem+4" %}
 8830   opcode(0x23, 0x23);
 8831   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 8832   ins_pipe( ialu_reg_long_mem );
 8833 %}
 8834 
 8835 // BMI1 instructions
 8836 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8837   match(Set dst (AndL (XorL src1 minus_1) src2));
 8838   predicate(UseBMI1Instructions);
 8839   effect(KILL cr, TEMP dst);
 8840 
 8841   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8842             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8843          %}
 8844 
 8845   ins_encode %{
 8846     Register Rdst = $dst$$Register;
 8847     Register Rsrc1 = $src1$$Register;
 8848     Register Rsrc2 = $src2$$Register;
 8849     __ andnl(Rdst, Rsrc1, Rsrc2);
 8850     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8851   %}
 8852   ins_pipe(ialu_reg_reg_long);
 8853 %}
 8854 
 8855 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8856   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8857   predicate(UseBMI1Instructions);
 8858   effect(KILL cr, TEMP dst);
 8859 
 8860   ins_cost(125);
 8861   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 8862             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 8863          %}
 8864 
 8865   ins_encode %{
 8866     Register Rdst = $dst$$Register;
 8867     Register Rsrc1 = $src1$$Register;
 8868     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 8869 
 8870     __ andnl(Rdst, Rsrc1, $src2$$Address);
 8871     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 8872   %}
 8873   ins_pipe(ialu_reg_mem);
 8874 %}
 8875 
 8876 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 8877   match(Set dst (AndL (SubL imm_zero src) src));
 8878   predicate(UseBMI1Instructions);
 8879   effect(KILL cr, TEMP dst);
 8880 
 8881   format %{ "MOVL   $dst.hi, 0\n\t"
 8882             "BLSIL  $dst.lo, $src.lo\n\t"
 8883             "JNZ    done\n\t"
 8884             "BLSIL  $dst.hi, $src.hi\n"
 8885             "done:"
 8886          %}
 8887 
 8888   ins_encode %{
 8889     Label done;
 8890     Register Rdst = $dst$$Register;
 8891     Register Rsrc = $src$$Register;
 8892     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8893     __ blsil(Rdst, Rsrc);
 8894     __ jccb(Assembler::notZero, done);
 8895     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8896     __ bind(done);
 8897   %}
 8898   ins_pipe(ialu_reg);
 8899 %}
 8900 
 8901 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 8902   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 8903   predicate(UseBMI1Instructions);
 8904   effect(KILL cr, TEMP dst);
 8905 
 8906   ins_cost(125);
 8907   format %{ "MOVL   $dst.hi, 0\n\t"
 8908             "BLSIL  $dst.lo, $src\n\t"
 8909             "JNZ    done\n\t"
 8910             "BLSIL  $dst.hi, $src+4\n"
 8911             "done:"
 8912          %}
 8913 
 8914   ins_encode %{
 8915     Label done;
 8916     Register Rdst = $dst$$Register;
 8917     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8918 
 8919     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8920     __ blsil(Rdst, $src$$Address);
 8921     __ jccb(Assembler::notZero, done);
 8922     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 8923     __ bind(done);
 8924   %}
 8925   ins_pipe(ialu_reg_mem);
 8926 %}
 8927 
 8928 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8929 %{
 8930   match(Set dst (XorL (AddL src minus_1) src));
 8931   predicate(UseBMI1Instructions);
 8932   effect(KILL cr, TEMP dst);
 8933 
 8934   format %{ "MOVL    $dst.hi, 0\n\t"
 8935             "BLSMSKL $dst.lo, $src.lo\n\t"
 8936             "JNC     done\n\t"
 8937             "BLSMSKL $dst.hi, $src.hi\n"
 8938             "done:"
 8939          %}
 8940 
 8941   ins_encode %{
 8942     Label done;
 8943     Register Rdst = $dst$$Register;
 8944     Register Rsrc = $src$$Register;
 8945     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8946     __ blsmskl(Rdst, Rsrc);
 8947     __ jccb(Assembler::carryClear, done);
 8948     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 8949     __ bind(done);
 8950   %}
 8951 
 8952   ins_pipe(ialu_reg);
 8953 %}
 8954 
 8955 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 8956 %{
 8957   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 8958   predicate(UseBMI1Instructions);
 8959   effect(KILL cr, TEMP dst);
 8960 
 8961   ins_cost(125);
 8962   format %{ "MOVL    $dst.hi, 0\n\t"
 8963             "BLSMSKL $dst.lo, $src\n\t"
 8964             "JNC     done\n\t"
 8965             "BLSMSKL $dst.hi, $src+4\n"
 8966             "done:"
 8967          %}
 8968 
 8969   ins_encode %{
 8970     Label done;
 8971     Register Rdst = $dst$$Register;
 8972     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 8973 
 8974     __ movl(HIGH_FROM_LOW(Rdst), 0);
 8975     __ blsmskl(Rdst, $src$$Address);
 8976     __ jccb(Assembler::carryClear, done);
 8977     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 8978     __ bind(done);
 8979   %}
 8980 
 8981   ins_pipe(ialu_reg_mem);
 8982 %}
 8983 
 8984 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 8985 %{
 8986   match(Set dst (AndL (AddL src minus_1) src) );
 8987   predicate(UseBMI1Instructions);
 8988   effect(KILL cr, TEMP dst);
 8989 
 8990   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 8991             "BLSRL  $dst.lo, $src.lo\n\t"
 8992             "JNC    done\n\t"
 8993             "BLSRL  $dst.hi, $src.hi\n"
 8994             "done:"
 8995   %}
 8996 
 8997   ins_encode %{
 8998     Label done;
 8999     Register Rdst = $dst$$Register;
 9000     Register Rsrc = $src$$Register;
 9001     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9002     __ blsrl(Rdst, Rsrc);
 9003     __ jccb(Assembler::carryClear, done);
 9004     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9005     __ bind(done);
 9006   %}
 9007 
 9008   ins_pipe(ialu_reg);
 9009 %}
 9010 
 9011 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9012 %{
 9013   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9014   predicate(UseBMI1Instructions);
 9015   effect(KILL cr, TEMP dst);
 9016 
 9017   ins_cost(125);
 9018   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9019             "BLSRL  $dst.lo, $src\n\t"
 9020             "JNC    done\n\t"
 9021             "BLSRL  $dst.hi, $src+4\n"
 9022             "done:"
 9023   %}
 9024 
 9025   ins_encode %{
 9026     Label done;
 9027     Register Rdst = $dst$$Register;
 9028     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9029     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9030     __ blsrl(Rdst, $src$$Address);
 9031     __ jccb(Assembler::carryClear, done);
 9032     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9033     __ bind(done);
 9034   %}
 9035 
 9036   ins_pipe(ialu_reg_mem);
 9037 %}
 9038 
 9039 // Or Long Register with Register
 9040 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9041   match(Set dst (OrL dst src));
 9042   effect(KILL cr);
 9043   format %{ "OR     $dst.lo,$src.lo\n\t"
 9044             "OR     $dst.hi,$src.hi" %}
 9045   opcode(0x0B,0x0B);
 9046   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9047   ins_pipe( ialu_reg_reg_long );
 9048 %}
 9049 
 9050 // Or Long Register with Immediate
 9051 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9052   match(Set dst (OrL dst src));
 9053   effect(KILL cr);
 9054   format %{ "OR     $dst.lo,$src.lo\n\t"
 9055             "OR     $dst.hi,$src.hi" %}
 9056   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9057   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9058   ins_pipe( ialu_reg_long );
 9059 %}
 9060 
 9061 // Or Long Register with Memory
 9062 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9063   match(Set dst (OrL dst (LoadL mem)));
 9064   effect(KILL cr);
 9065   ins_cost(125);
 9066   format %{ "OR     $dst.lo,$mem\n\t"
 9067             "OR     $dst.hi,$mem+4" %}
 9068   opcode(0x0B,0x0B);
 9069   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9070   ins_pipe( ialu_reg_long_mem );
 9071 %}
 9072 
 9073 // Xor Long Register with Register
 9074 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9075   match(Set dst (XorL dst src));
 9076   effect(KILL cr);
 9077   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9078             "XOR    $dst.hi,$src.hi" %}
 9079   opcode(0x33,0x33);
 9080   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9081   ins_pipe( ialu_reg_reg_long );
 9082 %}
 9083 
 9084 // Xor Long Register with Immediate -1
 9085 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9086   match(Set dst (XorL dst imm));
 9087   format %{ "NOT    $dst.lo\n\t"
 9088             "NOT    $dst.hi" %}
 9089   ins_encode %{
 9090      __ notl($dst$$Register);
 9091      __ notl(HIGH_FROM_LOW($dst$$Register));
 9092   %}
 9093   ins_pipe( ialu_reg_long );
 9094 %}
 9095 
 9096 // Xor Long Register with Immediate
 9097 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9098   match(Set dst (XorL dst src));
 9099   effect(KILL cr);
 9100   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9101             "XOR    $dst.hi,$src.hi" %}
 9102   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9103   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9104   ins_pipe( ialu_reg_long );
 9105 %}
 9106 
 9107 // Xor Long Register with Memory
 9108 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9109   match(Set dst (XorL dst (LoadL mem)));
 9110   effect(KILL cr);
 9111   ins_cost(125);
 9112   format %{ "XOR    $dst.lo,$mem\n\t"
 9113             "XOR    $dst.hi,$mem+4" %}
 9114   opcode(0x33,0x33);
 9115   ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark );
 9116   ins_pipe( ialu_reg_long_mem );
 9117 %}
 9118 
 9119 // Shift Left Long by 1
 9120 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9121   predicate(UseNewLongLShift);
 9122   match(Set dst (LShiftL dst cnt));
 9123   effect(KILL cr);
 9124   ins_cost(100);
 9125   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9126             "ADC    $dst.hi,$dst.hi" %}
 9127   ins_encode %{
 9128     __ addl($dst$$Register,$dst$$Register);
 9129     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9130   %}
 9131   ins_pipe( ialu_reg_long );
 9132 %}
 9133 
 9134 // Shift Left Long by 2
 9135 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9136   predicate(UseNewLongLShift);
 9137   match(Set dst (LShiftL dst cnt));
 9138   effect(KILL cr);
 9139   ins_cost(100);
 9140   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9141             "ADC    $dst.hi,$dst.hi\n\t"
 9142             "ADD    $dst.lo,$dst.lo\n\t"
 9143             "ADC    $dst.hi,$dst.hi" %}
 9144   ins_encode %{
 9145     __ addl($dst$$Register,$dst$$Register);
 9146     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9147     __ addl($dst$$Register,$dst$$Register);
 9148     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9149   %}
 9150   ins_pipe( ialu_reg_long );
 9151 %}
 9152 
 9153 // Shift Left Long by 3
 9154 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9155   predicate(UseNewLongLShift);
 9156   match(Set dst (LShiftL dst cnt));
 9157   effect(KILL cr);
 9158   ins_cost(100);
 9159   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9160             "ADC    $dst.hi,$dst.hi\n\t"
 9161             "ADD    $dst.lo,$dst.lo\n\t"
 9162             "ADC    $dst.hi,$dst.hi\n\t"
 9163             "ADD    $dst.lo,$dst.lo\n\t"
 9164             "ADC    $dst.hi,$dst.hi" %}
 9165   ins_encode %{
 9166     __ addl($dst$$Register,$dst$$Register);
 9167     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9168     __ addl($dst$$Register,$dst$$Register);
 9169     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9170     __ addl($dst$$Register,$dst$$Register);
 9171     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9172   %}
 9173   ins_pipe( ialu_reg_long );
 9174 %}
 9175 
 9176 // Shift Left Long by 1-31
 9177 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9178   match(Set dst (LShiftL dst cnt));
 9179   effect(KILL cr);
 9180   ins_cost(200);
 9181   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9182             "SHL    $dst.lo,$cnt" %}
 9183   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9184   ins_encode( move_long_small_shift(dst,cnt) );
 9185   ins_pipe( ialu_reg_long );
 9186 %}
 9187 
 9188 // Shift Left Long by 32-63
 9189 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9190   match(Set dst (LShiftL dst cnt));
 9191   effect(KILL cr);
 9192   ins_cost(300);
 9193   format %{ "MOV    $dst.hi,$dst.lo\n"
 9194           "\tSHL    $dst.hi,$cnt-32\n"
 9195           "\tXOR    $dst.lo,$dst.lo" %}
 9196   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9197   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9198   ins_pipe( ialu_reg_long );
 9199 %}
 9200 
 9201 // Shift Left Long by variable
 9202 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9203   match(Set dst (LShiftL dst shift));
 9204   effect(KILL cr);
 9205   ins_cost(500+200);
 9206   size(17);
 9207   format %{ "TEST   $shift,32\n\t"
 9208             "JEQ,s  small\n\t"
 9209             "MOV    $dst.hi,$dst.lo\n\t"
 9210             "XOR    $dst.lo,$dst.lo\n"
 9211     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9212             "SHL    $dst.lo,$shift" %}
 9213   ins_encode( shift_left_long( dst, shift ) );
 9214   ins_pipe( pipe_slow );
 9215 %}
 9216 
 9217 // Shift Right Long by 1-31
 9218 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9219   match(Set dst (URShiftL dst cnt));
 9220   effect(KILL cr);
 9221   ins_cost(200);
 9222   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9223             "SHR    $dst.hi,$cnt" %}
 9224   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9225   ins_encode( move_long_small_shift(dst,cnt) );
 9226   ins_pipe( ialu_reg_long );
 9227 %}
 9228 
 9229 // Shift Right Long by 32-63
 9230 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9231   match(Set dst (URShiftL dst cnt));
 9232   effect(KILL cr);
 9233   ins_cost(300);
 9234   format %{ "MOV    $dst.lo,$dst.hi\n"
 9235           "\tSHR    $dst.lo,$cnt-32\n"
 9236           "\tXOR    $dst.hi,$dst.hi" %}
 9237   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9238   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9239   ins_pipe( ialu_reg_long );
 9240 %}
 9241 
 9242 // Shift Right Long by variable
 9243 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9244   match(Set dst (URShiftL dst shift));
 9245   effect(KILL cr);
 9246   ins_cost(600);
 9247   size(17);
 9248   format %{ "TEST   $shift,32\n\t"
 9249             "JEQ,s  small\n\t"
 9250             "MOV    $dst.lo,$dst.hi\n\t"
 9251             "XOR    $dst.hi,$dst.hi\n"
 9252     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9253             "SHR    $dst.hi,$shift" %}
 9254   ins_encode( shift_right_long( dst, shift ) );
 9255   ins_pipe( pipe_slow );
 9256 %}
 9257 
 9258 // Shift Right Long by 1-31
 9259 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9260   match(Set dst (RShiftL dst cnt));
 9261   effect(KILL cr);
 9262   ins_cost(200);
 9263   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9264             "SAR    $dst.hi,$cnt" %}
 9265   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9266   ins_encode( move_long_small_shift(dst,cnt) );
 9267   ins_pipe( ialu_reg_long );
 9268 %}
 9269 
 9270 // Shift Right Long by 32-63
 9271 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9272   match(Set dst (RShiftL dst cnt));
 9273   effect(KILL cr);
 9274   ins_cost(300);
 9275   format %{ "MOV    $dst.lo,$dst.hi\n"
 9276           "\tSAR    $dst.lo,$cnt-32\n"
 9277           "\tSAR    $dst.hi,31" %}
 9278   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9279   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9280   ins_pipe( ialu_reg_long );
 9281 %}
 9282 
 9283 // Shift Right arithmetic Long by variable
 9284 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9285   match(Set dst (RShiftL dst shift));
 9286   effect(KILL cr);
 9287   ins_cost(600);
 9288   size(18);
 9289   format %{ "TEST   $shift,32\n\t"
 9290             "JEQ,s  small\n\t"
 9291             "MOV    $dst.lo,$dst.hi\n\t"
 9292             "SAR    $dst.hi,31\n"
 9293     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9294             "SAR    $dst.hi,$shift" %}
 9295   ins_encode( shift_right_arith_long( dst, shift ) );
 9296   ins_pipe( pipe_slow );
 9297 %}
 9298 
 9299 
 9300 //----------Double Instructions------------------------------------------------
 9301 // Double Math
 9302 
 9303 // Compare & branch
 9304 
 9305 // P6 version of float compare, sets condition codes in EFLAGS
 9306 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9307   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9308   match(Set cr (CmpD src1 src2));
 9309   effect(KILL rax);
 9310   ins_cost(150);
 9311   format %{ "FLD    $src1\n\t"
 9312             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9313             "JNP    exit\n\t"
 9314             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9315             "SAHF\n"
 9316      "exit:\tNOP               // avoid branch to branch" %}
 9317   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9318   ins_encode( Push_Reg_DPR(src1),
 9319               OpcP, RegOpc(src2),
 9320               cmpF_P6_fixup );
 9321   ins_pipe( pipe_slow );
 9322 %}
 9323 
 9324 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9325   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9326   match(Set cr (CmpD src1 src2));
 9327   ins_cost(150);
 9328   format %{ "FLD    $src1\n\t"
 9329             "FUCOMIP ST,$src2  // P6 instruction" %}
 9330   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9331   ins_encode( Push_Reg_DPR(src1),
 9332               OpcP, RegOpc(src2));
 9333   ins_pipe( pipe_slow );
 9334 %}
 9335 
 9336 // Compare & branch
 9337 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9338   predicate(UseSSE<=1);
 9339   match(Set cr (CmpD src1 src2));
 9340   effect(KILL rax);
 9341   ins_cost(200);
 9342   format %{ "FLD    $src1\n\t"
 9343             "FCOMp  $src2\n\t"
 9344             "FNSTSW AX\n\t"
 9345             "TEST   AX,0x400\n\t"
 9346             "JZ,s   flags\n\t"
 9347             "MOV    AH,1\t# unordered treat as LT\n"
 9348     "flags:\tSAHF" %}
 9349   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9350   ins_encode( Push_Reg_DPR(src1),
 9351               OpcP, RegOpc(src2),
 9352               fpu_flags);
 9353   ins_pipe( pipe_slow );
 9354 %}
 9355 
 9356 // Compare vs zero into -1,0,1
 9357 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9358   predicate(UseSSE<=1);
 9359   match(Set dst (CmpD3 src1 zero));
 9360   effect(KILL cr, KILL rax);
 9361   ins_cost(280);
 9362   format %{ "FTSTD  $dst,$src1" %}
 9363   opcode(0xE4, 0xD9);
 9364   ins_encode( Push_Reg_DPR(src1),
 9365               OpcS, OpcP, PopFPU,
 9366               CmpF_Result(dst));
 9367   ins_pipe( pipe_slow );
 9368 %}
 9369 
 9370 // Compare into -1,0,1
 9371 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9372   predicate(UseSSE<=1);
 9373   match(Set dst (CmpD3 src1 src2));
 9374   effect(KILL cr, KILL rax);
 9375   ins_cost(300);
 9376   format %{ "FCMPD  $dst,$src1,$src2" %}
 9377   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9378   ins_encode( Push_Reg_DPR(src1),
 9379               OpcP, RegOpc(src2),
 9380               CmpF_Result(dst));
 9381   ins_pipe( pipe_slow );
 9382 %}
 9383 
 9384 // float compare and set condition codes in EFLAGS by XMM regs
 9385 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9386   predicate(UseSSE>=2);
 9387   match(Set cr (CmpD src1 src2));
 9388   ins_cost(145);
 9389   format %{ "UCOMISD $src1,$src2\n\t"
 9390             "JNP,s   exit\n\t"
 9391             "PUSHF\t# saw NaN, set CF\n\t"
 9392             "AND     [rsp], #0xffffff2b\n\t"
 9393             "POPF\n"
 9394     "exit:" %}
 9395   ins_encode %{
 9396     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9397     emit_cmpfp_fixup(masm);
 9398   %}
 9399   ins_pipe( pipe_slow );
 9400 %}
 9401 
 9402 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9403   predicate(UseSSE>=2);
 9404   match(Set cr (CmpD src1 src2));
 9405   ins_cost(100);
 9406   format %{ "UCOMISD $src1,$src2" %}
 9407   ins_encode %{
 9408     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9409   %}
 9410   ins_pipe( pipe_slow );
 9411 %}
 9412 
 9413 // float compare and set condition codes in EFLAGS by XMM regs
 9414 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9415   predicate(UseSSE>=2);
 9416   match(Set cr (CmpD src1 (LoadD src2)));
 9417   ins_cost(145);
 9418   format %{ "UCOMISD $src1,$src2\n\t"
 9419             "JNP,s   exit\n\t"
 9420             "PUSHF\t# saw NaN, set CF\n\t"
 9421             "AND     [rsp], #0xffffff2b\n\t"
 9422             "POPF\n"
 9423     "exit:" %}
 9424   ins_encode %{
 9425     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9426     emit_cmpfp_fixup(masm);
 9427   %}
 9428   ins_pipe( pipe_slow );
 9429 %}
 9430 
 9431 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9432   predicate(UseSSE>=2);
 9433   match(Set cr (CmpD src1 (LoadD src2)));
 9434   ins_cost(100);
 9435   format %{ "UCOMISD $src1,$src2" %}
 9436   ins_encode %{
 9437     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9438   %}
 9439   ins_pipe( pipe_slow );
 9440 %}
 9441 
 9442 // Compare into -1,0,1 in XMM
 9443 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9444   predicate(UseSSE>=2);
 9445   match(Set dst (CmpD3 src1 src2));
 9446   effect(KILL cr);
 9447   ins_cost(255);
 9448   format %{ "UCOMISD $src1, $src2\n\t"
 9449             "MOV     $dst, #-1\n\t"
 9450             "JP,s    done\n\t"
 9451             "JB,s    done\n\t"
 9452             "SETNE   $dst\n\t"
 9453             "MOVZB   $dst, $dst\n"
 9454     "done:" %}
 9455   ins_encode %{
 9456     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9457     emit_cmpfp3(masm, $dst$$Register);
 9458   %}
 9459   ins_pipe( pipe_slow );
 9460 %}
 9461 
 9462 // Compare into -1,0,1 in XMM and memory
 9463 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9464   predicate(UseSSE>=2);
 9465   match(Set dst (CmpD3 src1 (LoadD src2)));
 9466   effect(KILL cr);
 9467   ins_cost(275);
 9468   format %{ "UCOMISD $src1, $src2\n\t"
 9469             "MOV     $dst, #-1\n\t"
 9470             "JP,s    done\n\t"
 9471             "JB,s    done\n\t"
 9472             "SETNE   $dst\n\t"
 9473             "MOVZB   $dst, $dst\n"
 9474     "done:" %}
 9475   ins_encode %{
 9476     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9477     emit_cmpfp3(masm, $dst$$Register);
 9478   %}
 9479   ins_pipe( pipe_slow );
 9480 %}
 9481 
 9482 
 9483 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9484   predicate (UseSSE <=1);
 9485   match(Set dst (SubD dst src));
 9486 
 9487   format %{ "FLD    $src\n\t"
 9488             "DSUBp  $dst,ST" %}
 9489   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9490   ins_cost(150);
 9491   ins_encode( Push_Reg_DPR(src),
 9492               OpcP, RegOpc(dst) );
 9493   ins_pipe( fpu_reg_reg );
 9494 %}
 9495 
 9496 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9497   predicate (UseSSE <=1);
 9498   match(Set dst (RoundDouble (SubD src1 src2)));
 9499   ins_cost(250);
 9500 
 9501   format %{ "FLD    $src2\n\t"
 9502             "DSUB   ST,$src1\n\t"
 9503             "FSTP_D $dst\t# D-round" %}
 9504   opcode(0xD8, 0x5);
 9505   ins_encode( Push_Reg_DPR(src2),
 9506               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9507   ins_pipe( fpu_mem_reg_reg );
 9508 %}
 9509 
 9510 
 9511 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9512   predicate (UseSSE <=1);
 9513   match(Set dst (SubD dst (LoadD src)));
 9514   ins_cost(150);
 9515 
 9516   format %{ "FLD    $src\n\t"
 9517             "DSUBp  $dst,ST" %}
 9518   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9519   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9520               OpcP, RegOpc(dst), ClearInstMark );
 9521   ins_pipe( fpu_reg_mem );
 9522 %}
 9523 
 9524 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9525   predicate (UseSSE<=1);
 9526   match(Set dst (AbsD src));
 9527   ins_cost(100);
 9528   format %{ "FABS" %}
 9529   opcode(0xE1, 0xD9);
 9530   ins_encode( OpcS, OpcP );
 9531   ins_pipe( fpu_reg_reg );
 9532 %}
 9533 
 9534 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9535   predicate(UseSSE<=1);
 9536   match(Set dst (NegD src));
 9537   ins_cost(100);
 9538   format %{ "FCHS" %}
 9539   opcode(0xE0, 0xD9);
 9540   ins_encode( OpcS, OpcP );
 9541   ins_pipe( fpu_reg_reg );
 9542 %}
 9543 
 9544 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9545   predicate(UseSSE<=1);
 9546   match(Set dst (AddD dst src));
 9547   format %{ "FLD    $src\n\t"
 9548             "DADD   $dst,ST" %}
 9549   size(4);
 9550   ins_cost(150);
 9551   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9552   ins_encode( Push_Reg_DPR(src),
 9553               OpcP, RegOpc(dst) );
 9554   ins_pipe( fpu_reg_reg );
 9555 %}
 9556 
 9557 
 9558 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9559   predicate(UseSSE<=1);
 9560   match(Set dst (RoundDouble (AddD src1 src2)));
 9561   ins_cost(250);
 9562 
 9563   format %{ "FLD    $src2\n\t"
 9564             "DADD   ST,$src1\n\t"
 9565             "FSTP_D $dst\t# D-round" %}
 9566   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9567   ins_encode( Push_Reg_DPR(src2),
 9568               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9569   ins_pipe( fpu_mem_reg_reg );
 9570 %}
 9571 
 9572 
 9573 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9574   predicate(UseSSE<=1);
 9575   match(Set dst (AddD dst (LoadD src)));
 9576   ins_cost(150);
 9577 
 9578   format %{ "FLD    $src\n\t"
 9579             "DADDp  $dst,ST" %}
 9580   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9581   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9582               OpcP, RegOpc(dst), ClearInstMark );
 9583   ins_pipe( fpu_reg_mem );
 9584 %}
 9585 
 9586 // add-to-memory
 9587 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9588   predicate(UseSSE<=1);
 9589   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9590   ins_cost(150);
 9591 
 9592   format %{ "FLD_D  $dst\n\t"
 9593             "DADD   ST,$src\n\t"
 9594             "FST_D  $dst" %}
 9595   opcode(0xDD, 0x0);
 9596   ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst),
 9597               Opcode(0xD8), RegOpc(src), ClearInstMark,
 9598               SetInstMark,
 9599               Opcode(0xDD), RMopc_Mem(0x03,dst),
 9600               ClearInstMark);
 9601   ins_pipe( fpu_reg_mem );
 9602 %}
 9603 
 9604 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9605   predicate(UseSSE<=1);
 9606   match(Set dst (AddD dst con));
 9607   ins_cost(125);
 9608   format %{ "FLD1\n\t"
 9609             "DADDp  $dst,ST" %}
 9610   ins_encode %{
 9611     __ fld1();
 9612     __ faddp($dst$$reg);
 9613   %}
 9614   ins_pipe(fpu_reg);
 9615 %}
 9616 
 9617 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9618   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9619   match(Set dst (AddD dst con));
 9620   ins_cost(200);
 9621   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9622             "DADDp  $dst,ST" %}
 9623   ins_encode %{
 9624     __ fld_d($constantaddress($con));
 9625     __ faddp($dst$$reg);
 9626   %}
 9627   ins_pipe(fpu_reg_mem);
 9628 %}
 9629 
 9630 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9631   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9632   match(Set dst (RoundDouble (AddD src con)));
 9633   ins_cost(200);
 9634   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9635             "DADD   ST,$src\n\t"
 9636             "FSTP_D $dst\t# D-round" %}
 9637   ins_encode %{
 9638     __ fld_d($constantaddress($con));
 9639     __ fadd($src$$reg);
 9640     __ fstp_d(Address(rsp, $dst$$disp));
 9641   %}
 9642   ins_pipe(fpu_mem_reg_con);
 9643 %}
 9644 
 9645 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9646   predicate(UseSSE<=1);
 9647   match(Set dst (MulD dst src));
 9648   format %{ "FLD    $src\n\t"
 9649             "DMULp  $dst,ST" %}
 9650   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9651   ins_cost(150);
 9652   ins_encode( Push_Reg_DPR(src),
 9653               OpcP, RegOpc(dst) );
 9654   ins_pipe( fpu_reg_reg );
 9655 %}
 9656 
 9657 // Strict FP instruction biases argument before multiply then
 9658 // biases result to avoid double rounding of subnormals.
 9659 //
 9660 // scale arg1 by multiplying arg1 by 2^(-15360)
 9661 // load arg2
 9662 // multiply scaled arg1 by arg2
 9663 // rescale product by 2^(15360)
 9664 //
 9665 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9666   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9667   match(Set dst (MulD dst src));
 9668   ins_cost(1);   // Select this instruction for all FP double multiplies
 9669 
 9670   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9671             "DMULp  $dst,ST\n\t"
 9672             "FLD    $src\n\t"
 9673             "DMULp  $dst,ST\n\t"
 9674             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9675             "DMULp  $dst,ST\n\t" %}
 9676   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9677   ins_encode( strictfp_bias1(dst),
 9678               Push_Reg_DPR(src),
 9679               OpcP, RegOpc(dst),
 9680               strictfp_bias2(dst) );
 9681   ins_pipe( fpu_reg_reg );
 9682 %}
 9683 
 9684 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9685   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9686   match(Set dst (MulD dst con));
 9687   ins_cost(200);
 9688   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9689             "DMULp  $dst,ST" %}
 9690   ins_encode %{
 9691     __ fld_d($constantaddress($con));
 9692     __ fmulp($dst$$reg);
 9693   %}
 9694   ins_pipe(fpu_reg_mem);
 9695 %}
 9696 
 9697 
 9698 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9699   predicate( UseSSE<=1 );
 9700   match(Set dst (MulD dst (LoadD src)));
 9701   ins_cost(200);
 9702   format %{ "FLD_D  $src\n\t"
 9703             "DMULp  $dst,ST" %}
 9704   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9705   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
 9706               OpcP, RegOpc(dst), ClearInstMark );
 9707   ins_pipe( fpu_reg_mem );
 9708 %}
 9709 
 9710 //
 9711 // Cisc-alternate to reg-reg multiply
 9712 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9713   predicate( UseSSE<=1 );
 9714   match(Set dst (MulD src (LoadD mem)));
 9715   ins_cost(250);
 9716   format %{ "FLD_D  $mem\n\t"
 9717             "DMUL   ST,$src\n\t"
 9718             "FSTP_D $dst" %}
 9719   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9720   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem),
 9721               OpcReg_FPR(src),
 9722               Pop_Reg_DPR(dst), ClearInstMark );
 9723   ins_pipe( fpu_reg_reg_mem );
 9724 %}
 9725 
 9726 
 9727 // MACRO3 -- addDPR a mulDPR
 9728 // This instruction is a '2-address' instruction in that the result goes
 9729 // back to src2.  This eliminates a move from the macro; possibly the
 9730 // register allocator will have to add it back (and maybe not).
 9731 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9732   predicate( UseSSE<=1 );
 9733   match(Set src2 (AddD (MulD src0 src1) src2));
 9734   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9735             "DMUL   ST,$src1\n\t"
 9736             "DADDp  $src2,ST" %}
 9737   ins_cost(250);
 9738   opcode(0xDD); /* LoadD DD /0 */
 9739   ins_encode( Push_Reg_FPR(src0),
 9740               FMul_ST_reg(src1),
 9741               FAddP_reg_ST(src2) );
 9742   ins_pipe( fpu_reg_reg_reg );
 9743 %}
 9744 
 9745 
 9746 // MACRO3 -- subDPR a mulDPR
 9747 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9748   predicate( UseSSE<=1 );
 9749   match(Set src2 (SubD (MulD src0 src1) src2));
 9750   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9751             "DMUL   ST,$src1\n\t"
 9752             "DSUBRp $src2,ST" %}
 9753   ins_cost(250);
 9754   ins_encode( Push_Reg_FPR(src0),
 9755               FMul_ST_reg(src1),
 9756               Opcode(0xDE), Opc_plus(0xE0,src2));
 9757   ins_pipe( fpu_reg_reg_reg );
 9758 %}
 9759 
 9760 
 9761 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9762   predicate( UseSSE<=1 );
 9763   match(Set dst (DivD dst src));
 9764 
 9765   format %{ "FLD    $src\n\t"
 9766             "FDIVp  $dst,ST" %}
 9767   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9768   ins_cost(150);
 9769   ins_encode( Push_Reg_DPR(src),
 9770               OpcP, RegOpc(dst) );
 9771   ins_pipe( fpu_reg_reg );
 9772 %}
 9773 
 9774 // Strict FP instruction biases argument before division then
 9775 // biases result, to avoid double rounding of subnormals.
 9776 //
 9777 // scale dividend by multiplying dividend by 2^(-15360)
 9778 // load divisor
 9779 // divide scaled dividend by divisor
 9780 // rescale quotient by 2^(15360)
 9781 //
 9782 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9783   predicate (UseSSE<=1);
 9784   match(Set dst (DivD dst src));
 9785   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9786   ins_cost(01);
 9787 
 9788   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9789             "DMULp  $dst,ST\n\t"
 9790             "FLD    $src\n\t"
 9791             "FDIVp  $dst,ST\n\t"
 9792             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9793             "DMULp  $dst,ST\n\t" %}
 9794   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9795   ins_encode( strictfp_bias1(dst),
 9796               Push_Reg_DPR(src),
 9797               OpcP, RegOpc(dst),
 9798               strictfp_bias2(dst) );
 9799   ins_pipe( fpu_reg_reg );
 9800 %}
 9801 
 9802 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9803   predicate(UseSSE<=1);
 9804   match(Set dst (ModD dst src));
 9805   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9806 
 9807   format %{ "DMOD   $dst,$src" %}
 9808   ins_cost(250);
 9809   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9810               emitModDPR(),
 9811               Push_Result_Mod_DPR(src),
 9812               Pop_Reg_DPR(dst));
 9813   ins_pipe( pipe_slow );
 9814 %}
 9815 
 9816 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9817   predicate(UseSSE>=2);
 9818   match(Set dst (ModD src0 src1));
 9819   effect(KILL rax, KILL cr);
 9820 
 9821   format %{ "SUB    ESP,8\t # DMOD\n"
 9822           "\tMOVSD  [ESP+0],$src1\n"
 9823           "\tFLD_D  [ESP+0]\n"
 9824           "\tMOVSD  [ESP+0],$src0\n"
 9825           "\tFLD_D  [ESP+0]\n"
 9826      "loop:\tFPREM\n"
 9827           "\tFWAIT\n"
 9828           "\tFNSTSW AX\n"
 9829           "\tSAHF\n"
 9830           "\tJP     loop\n"
 9831           "\tFSTP_D [ESP+0]\n"
 9832           "\tMOVSD  $dst,[ESP+0]\n"
 9833           "\tADD    ESP,8\n"
 9834           "\tFSTP   ST0\t # Restore FPU Stack"
 9835     %}
 9836   ins_cost(250);
 9837   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9838   ins_pipe( pipe_slow );
 9839 %}
 9840 
 9841 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9842   predicate (UseSSE<=1);
 9843   match(Set dst(AtanD dst src));
 9844   format %{ "DATA   $dst,$src" %}
 9845   opcode(0xD9, 0xF3);
 9846   ins_encode( Push_Reg_DPR(src),
 9847               OpcP, OpcS, RegOpc(dst) );
 9848   ins_pipe( pipe_slow );
 9849 %}
 9850 
 9851 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9852   predicate (UseSSE>=2);
 9853   match(Set dst(AtanD dst src));
 9854   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9855   format %{ "DATA   $dst,$src" %}
 9856   opcode(0xD9, 0xF3);
 9857   ins_encode( Push_SrcD(src),
 9858               OpcP, OpcS, Push_ResultD(dst) );
 9859   ins_pipe( pipe_slow );
 9860 %}
 9861 
 9862 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
 9863   predicate (UseSSE<=1);
 9864   match(Set dst (SqrtD src));
 9865   format %{ "DSQRT  $dst,$src" %}
 9866   opcode(0xFA, 0xD9);
 9867   ins_encode( Push_Reg_DPR(src),
 9868               OpcS, OpcP, Pop_Reg_DPR(dst) );
 9869   ins_pipe( pipe_slow );
 9870 %}
 9871 
 9872 //-------------Float Instructions-------------------------------
 9873 // Float Math
 9874 
 9875 // Code for float compare:
 9876 //     fcompp();
 9877 //     fwait(); fnstsw_ax();
 9878 //     sahf();
 9879 //     movl(dst, unordered_result);
 9880 //     jcc(Assembler::parity, exit);
 9881 //     movl(dst, less_result);
 9882 //     jcc(Assembler::below, exit);
 9883 //     movl(dst, equal_result);
 9884 //     jcc(Assembler::equal, exit);
 9885 //     movl(dst, greater_result);
 9886 //   exit:
 9887 
 9888 // P6 version of float compare, sets condition codes in EFLAGS
 9889 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9890   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9891   match(Set cr (CmpF src1 src2));
 9892   effect(KILL rax);
 9893   ins_cost(150);
 9894   format %{ "FLD    $src1\n\t"
 9895             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9896             "JNP    exit\n\t"
 9897             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
 9898             "SAHF\n"
 9899      "exit:\tNOP               // avoid branch to branch" %}
 9900   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9901   ins_encode( Push_Reg_DPR(src1),
 9902               OpcP, RegOpc(src2),
 9903               cmpF_P6_fixup );
 9904   ins_pipe( pipe_slow );
 9905 %}
 9906 
 9907 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
 9908   predicate(VM_Version::supports_cmov() && UseSSE == 0);
 9909   match(Set cr (CmpF src1 src2));
 9910   ins_cost(100);
 9911   format %{ "FLD    $src1\n\t"
 9912             "FUCOMIP ST,$src2  // P6 instruction" %}
 9913   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9914   ins_encode( Push_Reg_DPR(src1),
 9915               OpcP, RegOpc(src2));
 9916   ins_pipe( pipe_slow );
 9917 %}
 9918 
 9919 
 9920 // Compare & branch
 9921 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
 9922   predicate(UseSSE == 0);
 9923   match(Set cr (CmpF src1 src2));
 9924   effect(KILL rax);
 9925   ins_cost(200);
 9926   format %{ "FLD    $src1\n\t"
 9927             "FCOMp  $src2\n\t"
 9928             "FNSTSW AX\n\t"
 9929             "TEST   AX,0x400\n\t"
 9930             "JZ,s   flags\n\t"
 9931             "MOV    AH,1\t# unordered treat as LT\n"
 9932     "flags:\tSAHF" %}
 9933   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9934   ins_encode( Push_Reg_DPR(src1),
 9935               OpcP, RegOpc(src2),
 9936               fpu_flags);
 9937   ins_pipe( pipe_slow );
 9938 %}
 9939 
 9940 // Compare vs zero into -1,0,1
 9941 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9942   predicate(UseSSE == 0);
 9943   match(Set dst (CmpF3 src1 zero));
 9944   effect(KILL cr, KILL rax);
 9945   ins_cost(280);
 9946   format %{ "FTSTF  $dst,$src1" %}
 9947   opcode(0xE4, 0xD9);
 9948   ins_encode( Push_Reg_DPR(src1),
 9949               OpcS, OpcP, PopFPU,
 9950               CmpF_Result(dst));
 9951   ins_pipe( pipe_slow );
 9952 %}
 9953 
 9954 // Compare into -1,0,1
 9955 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
 9956   predicate(UseSSE == 0);
 9957   match(Set dst (CmpF3 src1 src2));
 9958   effect(KILL cr, KILL rax);
 9959   ins_cost(300);
 9960   format %{ "FCMPF  $dst,$src1,$src2" %}
 9961   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9962   ins_encode( Push_Reg_DPR(src1),
 9963               OpcP, RegOpc(src2),
 9964               CmpF_Result(dst));
 9965   ins_pipe( pipe_slow );
 9966 %}
 9967 
 9968 // float compare and set condition codes in EFLAGS by XMM regs
 9969 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
 9970   predicate(UseSSE>=1);
 9971   match(Set cr (CmpF src1 src2));
 9972   ins_cost(145);
 9973   format %{ "UCOMISS $src1,$src2\n\t"
 9974             "JNP,s   exit\n\t"
 9975             "PUSHF\t# saw NaN, set CF\n\t"
 9976             "AND     [rsp], #0xffffff2b\n\t"
 9977             "POPF\n"
 9978     "exit:" %}
 9979   ins_encode %{
 9980     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9981     emit_cmpfp_fixup(masm);
 9982   %}
 9983   ins_pipe( pipe_slow );
 9984 %}
 9985 
 9986 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
 9987   predicate(UseSSE>=1);
 9988   match(Set cr (CmpF src1 src2));
 9989   ins_cost(100);
 9990   format %{ "UCOMISS $src1,$src2" %}
 9991   ins_encode %{
 9992     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
 9993   %}
 9994   ins_pipe( pipe_slow );
 9995 %}
 9996 
 9997 // float compare and set condition codes in EFLAGS by XMM regs
 9998 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
 9999   predicate(UseSSE>=1);
10000   match(Set cr (CmpF src1 (LoadF src2)));
10001   ins_cost(165);
10002   format %{ "UCOMISS $src1,$src2\n\t"
10003             "JNP,s   exit\n\t"
10004             "PUSHF\t# saw NaN, set CF\n\t"
10005             "AND     [rsp], #0xffffff2b\n\t"
10006             "POPF\n"
10007     "exit:" %}
10008   ins_encode %{
10009     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10010     emit_cmpfp_fixup(masm);
10011   %}
10012   ins_pipe( pipe_slow );
10013 %}
10014 
10015 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10016   predicate(UseSSE>=1);
10017   match(Set cr (CmpF src1 (LoadF src2)));
10018   ins_cost(100);
10019   format %{ "UCOMISS $src1,$src2" %}
10020   ins_encode %{
10021     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10022   %}
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 // Compare into -1,0,1 in XMM
10027 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10028   predicate(UseSSE>=1);
10029   match(Set dst (CmpF3 src1 src2));
10030   effect(KILL cr);
10031   ins_cost(255);
10032   format %{ "UCOMISS $src1, $src2\n\t"
10033             "MOV     $dst, #-1\n\t"
10034             "JP,s    done\n\t"
10035             "JB,s    done\n\t"
10036             "SETNE   $dst\n\t"
10037             "MOVZB   $dst, $dst\n"
10038     "done:" %}
10039   ins_encode %{
10040     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10041     emit_cmpfp3(masm, $dst$$Register);
10042   %}
10043   ins_pipe( pipe_slow );
10044 %}
10045 
10046 // Compare into -1,0,1 in XMM and memory
10047 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10048   predicate(UseSSE>=1);
10049   match(Set dst (CmpF3 src1 (LoadF src2)));
10050   effect(KILL cr);
10051   ins_cost(275);
10052   format %{ "UCOMISS $src1, $src2\n\t"
10053             "MOV     $dst, #-1\n\t"
10054             "JP,s    done\n\t"
10055             "JB,s    done\n\t"
10056             "SETNE   $dst\n\t"
10057             "MOVZB   $dst, $dst\n"
10058     "done:" %}
10059   ins_encode %{
10060     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10061     emit_cmpfp3(masm, $dst$$Register);
10062   %}
10063   ins_pipe( pipe_slow );
10064 %}
10065 
10066 // Spill to obtain 24-bit precision
10067 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10068   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10069   match(Set dst (SubF src1 src2));
10070 
10071   format %{ "FSUB   $dst,$src1 - $src2" %}
10072   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10073   ins_encode( Push_Reg_FPR(src1),
10074               OpcReg_FPR(src2),
10075               Pop_Mem_FPR(dst) );
10076   ins_pipe( fpu_mem_reg_reg );
10077 %}
10078 //
10079 // This instruction does not round to 24-bits
10080 instruct subFPR_reg(regFPR dst, regFPR src) %{
10081   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10082   match(Set dst (SubF dst src));
10083 
10084   format %{ "FSUB   $dst,$src" %}
10085   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10086   ins_encode( Push_Reg_FPR(src),
10087               OpcP, RegOpc(dst) );
10088   ins_pipe( fpu_reg_reg );
10089 %}
10090 
10091 // Spill to obtain 24-bit precision
10092 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10093   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10094   match(Set dst (AddF src1 src2));
10095 
10096   format %{ "FADD   $dst,$src1,$src2" %}
10097   opcode(0xD8, 0x0); /* D8 C0+i */
10098   ins_encode( Push_Reg_FPR(src2),
10099               OpcReg_FPR(src1),
10100               Pop_Mem_FPR(dst) );
10101   ins_pipe( fpu_mem_reg_reg );
10102 %}
10103 //
10104 // This instruction does not round to 24-bits
10105 instruct addFPR_reg(regFPR dst, regFPR src) %{
10106   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10107   match(Set dst (AddF dst src));
10108 
10109   format %{ "FLD    $src\n\t"
10110             "FADDp  $dst,ST" %}
10111   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10112   ins_encode( Push_Reg_FPR(src),
10113               OpcP, RegOpc(dst) );
10114   ins_pipe( fpu_reg_reg );
10115 %}
10116 
10117 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10118   predicate(UseSSE==0);
10119   match(Set dst (AbsF src));
10120   ins_cost(100);
10121   format %{ "FABS" %}
10122   opcode(0xE1, 0xD9);
10123   ins_encode( OpcS, OpcP );
10124   ins_pipe( fpu_reg_reg );
10125 %}
10126 
10127 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10128   predicate(UseSSE==0);
10129   match(Set dst (NegF src));
10130   ins_cost(100);
10131   format %{ "FCHS" %}
10132   opcode(0xE0, 0xD9);
10133   ins_encode( OpcS, OpcP );
10134   ins_pipe( fpu_reg_reg );
10135 %}
10136 
10137 // Cisc-alternate to addFPR_reg
10138 // Spill to obtain 24-bit precision
10139 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10140   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10141   match(Set dst (AddF src1 (LoadF src2)));
10142 
10143   format %{ "FLD    $src2\n\t"
10144             "FADD   ST,$src1\n\t"
10145             "FSTP_S $dst" %}
10146   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10147   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10148               OpcReg_FPR(src1),
10149               Pop_Mem_FPR(dst), ClearInstMark );
10150   ins_pipe( fpu_mem_reg_mem );
10151 %}
10152 //
10153 // Cisc-alternate to addFPR_reg
10154 // This instruction does not round to 24-bits
10155 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10156   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10157   match(Set dst (AddF dst (LoadF src)));
10158 
10159   format %{ "FADD   $dst,$src" %}
10160   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10161   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src),
10162               OpcP, RegOpc(dst), ClearInstMark );
10163   ins_pipe( fpu_reg_mem );
10164 %}
10165 
10166 // // Following two instructions for _222_mpegaudio
10167 // Spill to obtain 24-bit precision
10168 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10169   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10170   match(Set dst (AddF src1 src2));
10171 
10172   format %{ "FADD   $dst,$src1,$src2" %}
10173   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10174   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1),
10175               OpcReg_FPR(src2),
10176               Pop_Mem_FPR(dst), ClearInstMark );
10177   ins_pipe( fpu_mem_reg_mem );
10178 %}
10179 
10180 // Cisc-spill variant
10181 // Spill to obtain 24-bit precision
10182 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10183   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10184   match(Set dst (AddF src1 (LoadF src2)));
10185 
10186   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10187   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10188   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10189               OpcP, RMopc_Mem(secondary,src1),
10190               Pop_Mem_FPR(dst),
10191               ClearInstMark);
10192   ins_pipe( fpu_mem_mem_mem );
10193 %}
10194 
10195 // Spill to obtain 24-bit precision
10196 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10197   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10198   match(Set dst (AddF src1 src2));
10199 
10200   format %{ "FADD   $dst,$src1,$src2" %}
10201   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10202   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10203               OpcP, RMopc_Mem(secondary,src1),
10204               Pop_Mem_FPR(dst),
10205               ClearInstMark);
10206   ins_pipe( fpu_mem_mem_mem );
10207 %}
10208 
10209 
10210 // Spill to obtain 24-bit precision
10211 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10212   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10213   match(Set dst (AddF src con));
10214   format %{ "FLD    $src\n\t"
10215             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10216             "FSTP_S $dst"  %}
10217   ins_encode %{
10218     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10219     __ fadd_s($constantaddress($con));
10220     __ fstp_s(Address(rsp, $dst$$disp));
10221   %}
10222   ins_pipe(fpu_mem_reg_con);
10223 %}
10224 //
10225 // This instruction does not round to 24-bits
10226 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10227   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10228   match(Set dst (AddF src con));
10229   format %{ "FLD    $src\n\t"
10230             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10231             "FSTP   $dst"  %}
10232   ins_encode %{
10233     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10234     __ fadd_s($constantaddress($con));
10235     __ fstp_d($dst$$reg);
10236   %}
10237   ins_pipe(fpu_reg_reg_con);
10238 %}
10239 
10240 // Spill to obtain 24-bit precision
10241 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10242   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10243   match(Set dst (MulF src1 src2));
10244 
10245   format %{ "FLD    $src1\n\t"
10246             "FMUL   $src2\n\t"
10247             "FSTP_S $dst"  %}
10248   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10249   ins_encode( Push_Reg_FPR(src1),
10250               OpcReg_FPR(src2),
10251               Pop_Mem_FPR(dst) );
10252   ins_pipe( fpu_mem_reg_reg );
10253 %}
10254 //
10255 // This instruction does not round to 24-bits
10256 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10257   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10258   match(Set dst (MulF src1 src2));
10259 
10260   format %{ "FLD    $src1\n\t"
10261             "FMUL   $src2\n\t"
10262             "FSTP_S $dst"  %}
10263   opcode(0xD8, 0x1); /* D8 C8+i */
10264   ins_encode( Push_Reg_FPR(src2),
10265               OpcReg_FPR(src1),
10266               Pop_Reg_FPR(dst) );
10267   ins_pipe( fpu_reg_reg_reg );
10268 %}
10269 
10270 
10271 // Spill to obtain 24-bit precision
10272 // Cisc-alternate to reg-reg multiply
10273 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10274   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10275   match(Set dst (MulF src1 (LoadF src2)));
10276 
10277   format %{ "FLD_S  $src2\n\t"
10278             "FMUL   $src1\n\t"
10279             "FSTP_S $dst"  %}
10280   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10281   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10282               OpcReg_FPR(src1),
10283               Pop_Mem_FPR(dst), ClearInstMark );
10284   ins_pipe( fpu_mem_reg_mem );
10285 %}
10286 //
10287 // This instruction does not round to 24-bits
10288 // Cisc-alternate to reg-reg multiply
10289 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10290   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10291   match(Set dst (MulF src1 (LoadF src2)));
10292 
10293   format %{ "FMUL   $dst,$src1,$src2" %}
10294   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10295   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10296               OpcReg_FPR(src1),
10297               Pop_Reg_FPR(dst), ClearInstMark );
10298   ins_pipe( fpu_reg_reg_mem );
10299 %}
10300 
10301 // Spill to obtain 24-bit precision
10302 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10303   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10304   match(Set dst (MulF src1 src2));
10305 
10306   format %{ "FMUL   $dst,$src1,$src2" %}
10307   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10308   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2),
10309               OpcP, RMopc_Mem(secondary,src1),
10310               Pop_Mem_FPR(dst),
10311               ClearInstMark );
10312   ins_pipe( fpu_mem_mem_mem );
10313 %}
10314 
10315 // Spill to obtain 24-bit precision
10316 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10317   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10318   match(Set dst (MulF src con));
10319 
10320   format %{ "FLD    $src\n\t"
10321             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10322             "FSTP_S $dst"  %}
10323   ins_encode %{
10324     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10325     __ fmul_s($constantaddress($con));
10326     __ fstp_s(Address(rsp, $dst$$disp));
10327   %}
10328   ins_pipe(fpu_mem_reg_con);
10329 %}
10330 //
10331 // This instruction does not round to 24-bits
10332 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10333   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10334   match(Set dst (MulF src con));
10335 
10336   format %{ "FLD    $src\n\t"
10337             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10338             "FSTP   $dst"  %}
10339   ins_encode %{
10340     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10341     __ fmul_s($constantaddress($con));
10342     __ fstp_d($dst$$reg);
10343   %}
10344   ins_pipe(fpu_reg_reg_con);
10345 %}
10346 
10347 
10348 //
10349 // MACRO1 -- subsume unshared load into mulFPR
10350 // This instruction does not round to 24-bits
10351 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10352   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10353   match(Set dst (MulF (LoadF mem1) src));
10354 
10355   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10356             "FMUL   ST,$src\n\t"
10357             "FSTP   $dst" %}
10358   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10359   ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1),
10360               OpcReg_FPR(src),
10361               Pop_Reg_FPR(dst), ClearInstMark );
10362   ins_pipe( fpu_reg_reg_mem );
10363 %}
10364 //
10365 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10366 // This instruction does not round to 24-bits
10367 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10368   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10369   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10370   ins_cost(95);
10371 
10372   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10373             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10374             "FADD   ST,$src2\n\t"
10375             "FSTP   $dst" %}
10376   opcode(0xD9); /* LoadF D9 /0 */
10377   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1),
10378               FMul_ST_reg(src1),
10379               FAdd_ST_reg(src2),
10380               Pop_Reg_FPR(dst), ClearInstMark );
10381   ins_pipe( fpu_reg_mem_reg_reg );
10382 %}
10383 
10384 // MACRO3 -- addFPR a mulFPR
10385 // This instruction does not round to 24-bits.  It is a '2-address'
10386 // instruction in that the result goes back to src2.  This eliminates
10387 // a move from the macro; possibly the register allocator will have
10388 // to add it back (and maybe not).
10389 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10390   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10391   match(Set src2 (AddF (MulF src0 src1) src2));
10392 
10393   format %{ "FLD    $src0     ===MACRO3===\n\t"
10394             "FMUL   ST,$src1\n\t"
10395             "FADDP  $src2,ST" %}
10396   opcode(0xD9); /* LoadF D9 /0 */
10397   ins_encode( Push_Reg_FPR(src0),
10398               FMul_ST_reg(src1),
10399               FAddP_reg_ST(src2) );
10400   ins_pipe( fpu_reg_reg_reg );
10401 %}
10402 
10403 // MACRO4 -- divFPR subFPR
10404 // This instruction does not round to 24-bits
10405 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10406   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10407   match(Set dst (DivF (SubF src2 src1) src3));
10408 
10409   format %{ "FLD    $src2   ===MACRO4===\n\t"
10410             "FSUB   ST,$src1\n\t"
10411             "FDIV   ST,$src3\n\t"
10412             "FSTP  $dst" %}
10413   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10414   ins_encode( Push_Reg_FPR(src2),
10415               subFPR_divFPR_encode(src1,src3),
10416               Pop_Reg_FPR(dst) );
10417   ins_pipe( fpu_reg_reg_reg_reg );
10418 %}
10419 
10420 // Spill to obtain 24-bit precision
10421 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10422   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10423   match(Set dst (DivF src1 src2));
10424 
10425   format %{ "FDIV   $dst,$src1,$src2" %}
10426   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10427   ins_encode( Push_Reg_FPR(src1),
10428               OpcReg_FPR(src2),
10429               Pop_Mem_FPR(dst) );
10430   ins_pipe( fpu_mem_reg_reg );
10431 %}
10432 //
10433 // This instruction does not round to 24-bits
10434 instruct divFPR_reg(regFPR dst, regFPR src) %{
10435   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10436   match(Set dst (DivF dst src));
10437 
10438   format %{ "FDIV   $dst,$src" %}
10439   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10440   ins_encode( Push_Reg_FPR(src),
10441               OpcP, RegOpc(dst) );
10442   ins_pipe( fpu_reg_reg );
10443 %}
10444 
10445 
10446 // Spill to obtain 24-bit precision
10447 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10448   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10449   match(Set dst (ModF src1 src2));
10450   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10451 
10452   format %{ "FMOD   $dst,$src1,$src2" %}
10453   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10454               emitModDPR(),
10455               Push_Result_Mod_DPR(src2),
10456               Pop_Mem_FPR(dst));
10457   ins_pipe( pipe_slow );
10458 %}
10459 //
10460 // This instruction does not round to 24-bits
10461 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10462   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10463   match(Set dst (ModF dst src));
10464   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10465 
10466   format %{ "FMOD   $dst,$src" %}
10467   ins_encode(Push_Reg_Mod_DPR(dst, src),
10468               emitModDPR(),
10469               Push_Result_Mod_DPR(src),
10470               Pop_Reg_FPR(dst));
10471   ins_pipe( pipe_slow );
10472 %}
10473 
10474 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10475   predicate(UseSSE>=1);
10476   match(Set dst (ModF src0 src1));
10477   effect(KILL rax, KILL cr);
10478   format %{ "SUB    ESP,4\t # FMOD\n"
10479           "\tMOVSS  [ESP+0],$src1\n"
10480           "\tFLD_S  [ESP+0]\n"
10481           "\tMOVSS  [ESP+0],$src0\n"
10482           "\tFLD_S  [ESP+0]\n"
10483      "loop:\tFPREM\n"
10484           "\tFWAIT\n"
10485           "\tFNSTSW AX\n"
10486           "\tSAHF\n"
10487           "\tJP     loop\n"
10488           "\tFSTP_S [ESP+0]\n"
10489           "\tMOVSS  $dst,[ESP+0]\n"
10490           "\tADD    ESP,4\n"
10491           "\tFSTP   ST0\t # Restore FPU Stack"
10492     %}
10493   ins_cost(250);
10494   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10495   ins_pipe( pipe_slow );
10496 %}
10497 
10498 
10499 //----------Arithmetic Conversion Instructions---------------------------------
10500 // The conversions operations are all Alpha sorted.  Please keep it that way!
10501 
10502 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10503   predicate(UseSSE==0);
10504   match(Set dst (RoundFloat src));
10505   ins_cost(125);
10506   format %{ "FST_S  $dst,$src\t# F-round" %}
10507   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10508   ins_pipe( fpu_mem_reg );
10509 %}
10510 
10511 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10512   predicate(UseSSE<=1);
10513   match(Set dst (RoundDouble src));
10514   ins_cost(125);
10515   format %{ "FST_D  $dst,$src\t# D-round" %}
10516   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10517   ins_pipe( fpu_mem_reg );
10518 %}
10519 
10520 // Force rounding to 24-bit precision and 6-bit exponent
10521 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10522   predicate(UseSSE==0);
10523   match(Set dst (ConvD2F src));
10524   format %{ "FST_S  $dst,$src\t# F-round" %}
10525   expand %{
10526     roundFloat_mem_reg(dst,src);
10527   %}
10528 %}
10529 
10530 // Force rounding to 24-bit precision and 6-bit exponent
10531 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10532   predicate(UseSSE==1);
10533   match(Set dst (ConvD2F src));
10534   effect( KILL cr );
10535   format %{ "SUB    ESP,4\n\t"
10536             "FST_S  [ESP],$src\t# F-round\n\t"
10537             "MOVSS  $dst,[ESP]\n\t"
10538             "ADD ESP,4" %}
10539   ins_encode %{
10540     __ subptr(rsp, 4);
10541     if ($src$$reg != FPR1L_enc) {
10542       __ fld_s($src$$reg-1);
10543       __ fstp_s(Address(rsp, 0));
10544     } else {
10545       __ fst_s(Address(rsp, 0));
10546     }
10547     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10548     __ addptr(rsp, 4);
10549   %}
10550   ins_pipe( pipe_slow );
10551 %}
10552 
10553 // Force rounding double precision to single precision
10554 instruct convD2F_reg(regF dst, regD src) %{
10555   predicate(UseSSE>=2);
10556   match(Set dst (ConvD2F src));
10557   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10558   ins_encode %{
10559     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10560   %}
10561   ins_pipe( pipe_slow );
10562 %}
10563 
10564 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10565   predicate(UseSSE==0);
10566   match(Set dst (ConvF2D src));
10567   format %{ "FST_S  $dst,$src\t# D-round" %}
10568   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10569   ins_pipe( fpu_reg_reg );
10570 %}
10571 
10572 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10573   predicate(UseSSE==1);
10574   match(Set dst (ConvF2D src));
10575   format %{ "FST_D  $dst,$src\t# D-round" %}
10576   expand %{
10577     roundDouble_mem_reg(dst,src);
10578   %}
10579 %}
10580 
10581 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10582   predicate(UseSSE==1);
10583   match(Set dst (ConvF2D src));
10584   effect( KILL cr );
10585   format %{ "SUB    ESP,4\n\t"
10586             "MOVSS  [ESP] $src\n\t"
10587             "FLD_S  [ESP]\n\t"
10588             "ADD    ESP,4\n\t"
10589             "FSTP   $dst\t# D-round" %}
10590   ins_encode %{
10591     __ subptr(rsp, 4);
10592     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10593     __ fld_s(Address(rsp, 0));
10594     __ addptr(rsp, 4);
10595     __ fstp_d($dst$$reg);
10596   %}
10597   ins_pipe( pipe_slow );
10598 %}
10599 
10600 instruct convF2D_reg(regD dst, regF src) %{
10601   predicate(UseSSE>=2);
10602   match(Set dst (ConvF2D src));
10603   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10604   ins_encode %{
10605     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10606   %}
10607   ins_pipe( pipe_slow );
10608 %}
10609 
10610 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10611 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10612   predicate(UseSSE<=1);
10613   match(Set dst (ConvD2I src));
10614   effect( KILL tmp, KILL cr );
10615   format %{ "FLD    $src\t# Convert double to int \n\t"
10616             "FLDCW  trunc mode\n\t"
10617             "SUB    ESP,4\n\t"
10618             "FISTp  [ESP + #0]\n\t"
10619             "FLDCW  std/24-bit mode\n\t"
10620             "POP    EAX\n\t"
10621             "CMP    EAX,0x80000000\n\t"
10622             "JNE,s  fast\n\t"
10623             "FLD_D  $src\n\t"
10624             "CALL   d2i_wrapper\n"
10625       "fast:" %}
10626   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10627   ins_pipe( pipe_slow );
10628 %}
10629 
10630 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10631 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10632   predicate(UseSSE>=2);
10633   match(Set dst (ConvD2I src));
10634   effect( KILL tmp, KILL cr );
10635   format %{ "CVTTSD2SI $dst, $src\n\t"
10636             "CMP    $dst,0x80000000\n\t"
10637             "JNE,s  fast\n\t"
10638             "SUB    ESP, 8\n\t"
10639             "MOVSD  [ESP], $src\n\t"
10640             "FLD_D  [ESP]\n\t"
10641             "ADD    ESP, 8\n\t"
10642             "CALL   d2i_wrapper\n"
10643       "fast:" %}
10644   ins_encode %{
10645     Label fast;
10646     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10647     __ cmpl($dst$$Register, 0x80000000);
10648     __ jccb(Assembler::notEqual, fast);
10649     __ subptr(rsp, 8);
10650     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10651     __ fld_d(Address(rsp, 0));
10652     __ addptr(rsp, 8);
10653     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10654     __ post_call_nop();
10655     __ bind(fast);
10656   %}
10657   ins_pipe( pipe_slow );
10658 %}
10659 
10660 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10661   predicate(UseSSE<=1);
10662   match(Set dst (ConvD2L src));
10663   effect( KILL cr );
10664   format %{ "FLD    $src\t# Convert double to long\n\t"
10665             "FLDCW  trunc mode\n\t"
10666             "SUB    ESP,8\n\t"
10667             "FISTp  [ESP + #0]\n\t"
10668             "FLDCW  std/24-bit mode\n\t"
10669             "POP    EAX\n\t"
10670             "POP    EDX\n\t"
10671             "CMP    EDX,0x80000000\n\t"
10672             "JNE,s  fast\n\t"
10673             "TEST   EAX,EAX\n\t"
10674             "JNE,s  fast\n\t"
10675             "FLD    $src\n\t"
10676             "CALL   d2l_wrapper\n"
10677       "fast:" %}
10678   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10679   ins_pipe( pipe_slow );
10680 %}
10681 
10682 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10683 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10684   predicate (UseSSE>=2);
10685   match(Set dst (ConvD2L src));
10686   effect( KILL cr );
10687   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10688             "MOVSD  [ESP],$src\n\t"
10689             "FLD_D  [ESP]\n\t"
10690             "FLDCW  trunc mode\n\t"
10691             "FISTp  [ESP + #0]\n\t"
10692             "FLDCW  std/24-bit mode\n\t"
10693             "POP    EAX\n\t"
10694             "POP    EDX\n\t"
10695             "CMP    EDX,0x80000000\n\t"
10696             "JNE,s  fast\n\t"
10697             "TEST   EAX,EAX\n\t"
10698             "JNE,s  fast\n\t"
10699             "SUB    ESP,8\n\t"
10700             "MOVSD  [ESP],$src\n\t"
10701             "FLD_D  [ESP]\n\t"
10702             "ADD    ESP,8\n\t"
10703             "CALL   d2l_wrapper\n"
10704       "fast:" %}
10705   ins_encode %{
10706     Label fast;
10707     __ subptr(rsp, 8);
10708     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10709     __ fld_d(Address(rsp, 0));
10710     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10711     __ fistp_d(Address(rsp, 0));
10712     // Restore the rounding mode, mask the exception
10713     if (Compile::current()->in_24_bit_fp_mode()) {
10714       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10715     } else {
10716       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10717     }
10718     // Load the converted long, adjust CPU stack
10719     __ pop(rax);
10720     __ pop(rdx);
10721     __ cmpl(rdx, 0x80000000);
10722     __ jccb(Assembler::notEqual, fast);
10723     __ testl(rax, rax);
10724     __ jccb(Assembler::notEqual, fast);
10725     __ subptr(rsp, 8);
10726     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10727     __ fld_d(Address(rsp, 0));
10728     __ addptr(rsp, 8);
10729     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10730     __ post_call_nop();
10731     __ bind(fast);
10732   %}
10733   ins_pipe( pipe_slow );
10734 %}
10735 
10736 // Convert a double to an int.  Java semantics require we do complex
10737 // manglations in the corner cases.  So we set the rounding mode to
10738 // 'zero', store the darned double down as an int, and reset the
10739 // rounding mode to 'nearest'.  The hardware stores a flag value down
10740 // if we would overflow or converted a NAN; we check for this and
10741 // and go the slow path if needed.
10742 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10743   predicate(UseSSE==0);
10744   match(Set dst (ConvF2I src));
10745   effect( KILL tmp, KILL cr );
10746   format %{ "FLD    $src\t# Convert float to int \n\t"
10747             "FLDCW  trunc mode\n\t"
10748             "SUB    ESP,4\n\t"
10749             "FISTp  [ESP + #0]\n\t"
10750             "FLDCW  std/24-bit mode\n\t"
10751             "POP    EAX\n\t"
10752             "CMP    EAX,0x80000000\n\t"
10753             "JNE,s  fast\n\t"
10754             "FLD    $src\n\t"
10755             "CALL   d2i_wrapper\n"
10756       "fast:" %}
10757   // DPR2I_encoding works for FPR2I
10758   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10759   ins_pipe( pipe_slow );
10760 %}
10761 
10762 // Convert a float in xmm to an int reg.
10763 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10764   predicate(UseSSE>=1);
10765   match(Set dst (ConvF2I src));
10766   effect( KILL tmp, KILL cr );
10767   format %{ "CVTTSS2SI $dst, $src\n\t"
10768             "CMP    $dst,0x80000000\n\t"
10769             "JNE,s  fast\n\t"
10770             "SUB    ESP, 4\n\t"
10771             "MOVSS  [ESP], $src\n\t"
10772             "FLD    [ESP]\n\t"
10773             "ADD    ESP, 4\n\t"
10774             "CALL   d2i_wrapper\n"
10775       "fast:" %}
10776   ins_encode %{
10777     Label fast;
10778     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10779     __ cmpl($dst$$Register, 0x80000000);
10780     __ jccb(Assembler::notEqual, fast);
10781     __ subptr(rsp, 4);
10782     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10783     __ fld_s(Address(rsp, 0));
10784     __ addptr(rsp, 4);
10785     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10786     __ post_call_nop();
10787     __ bind(fast);
10788   %}
10789   ins_pipe( pipe_slow );
10790 %}
10791 
10792 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10793   predicate(UseSSE==0);
10794   match(Set dst (ConvF2L src));
10795   effect( KILL cr );
10796   format %{ "FLD    $src\t# Convert float to long\n\t"
10797             "FLDCW  trunc mode\n\t"
10798             "SUB    ESP,8\n\t"
10799             "FISTp  [ESP + #0]\n\t"
10800             "FLDCW  std/24-bit mode\n\t"
10801             "POP    EAX\n\t"
10802             "POP    EDX\n\t"
10803             "CMP    EDX,0x80000000\n\t"
10804             "JNE,s  fast\n\t"
10805             "TEST   EAX,EAX\n\t"
10806             "JNE,s  fast\n\t"
10807             "FLD    $src\n\t"
10808             "CALL   d2l_wrapper\n"
10809       "fast:" %}
10810   // DPR2L_encoding works for FPR2L
10811   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10812   ins_pipe( pipe_slow );
10813 %}
10814 
10815 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10816 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10817   predicate (UseSSE>=1);
10818   match(Set dst (ConvF2L src));
10819   effect( KILL cr );
10820   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10821             "MOVSS  [ESP],$src\n\t"
10822             "FLD_S  [ESP]\n\t"
10823             "FLDCW  trunc mode\n\t"
10824             "FISTp  [ESP + #0]\n\t"
10825             "FLDCW  std/24-bit mode\n\t"
10826             "POP    EAX\n\t"
10827             "POP    EDX\n\t"
10828             "CMP    EDX,0x80000000\n\t"
10829             "JNE,s  fast\n\t"
10830             "TEST   EAX,EAX\n\t"
10831             "JNE,s  fast\n\t"
10832             "SUB    ESP,4\t# Convert float to long\n\t"
10833             "MOVSS  [ESP],$src\n\t"
10834             "FLD_S  [ESP]\n\t"
10835             "ADD    ESP,4\n\t"
10836             "CALL   d2l_wrapper\n"
10837       "fast:" %}
10838   ins_encode %{
10839     Label fast;
10840     __ subptr(rsp, 8);
10841     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10842     __ fld_s(Address(rsp, 0));
10843     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10844     __ fistp_d(Address(rsp, 0));
10845     // Restore the rounding mode, mask the exception
10846     if (Compile::current()->in_24_bit_fp_mode()) {
10847       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10848     } else {
10849       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10850     }
10851     // Load the converted long, adjust CPU stack
10852     __ pop(rax);
10853     __ pop(rdx);
10854     __ cmpl(rdx, 0x80000000);
10855     __ jccb(Assembler::notEqual, fast);
10856     __ testl(rax, rax);
10857     __ jccb(Assembler::notEqual, fast);
10858     __ subptr(rsp, 4);
10859     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10860     __ fld_s(Address(rsp, 0));
10861     __ addptr(rsp, 4);
10862     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10863     __ post_call_nop();
10864     __ bind(fast);
10865   %}
10866   ins_pipe( pipe_slow );
10867 %}
10868 
10869 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10870   predicate( UseSSE<=1 );
10871   match(Set dst (ConvI2D src));
10872   format %{ "FILD   $src\n\t"
10873             "FSTP   $dst" %}
10874   opcode(0xDB, 0x0);  /* DB /0 */
10875   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10876   ins_pipe( fpu_reg_mem );
10877 %}
10878 
10879 instruct convI2D_reg(regD dst, rRegI src) %{
10880   predicate( UseSSE>=2 && !UseXmmI2D );
10881   match(Set dst (ConvI2D src));
10882   format %{ "CVTSI2SD $dst,$src" %}
10883   ins_encode %{
10884     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10885   %}
10886   ins_pipe( pipe_slow );
10887 %}
10888 
10889 instruct convI2D_mem(regD dst, memory mem) %{
10890   predicate( UseSSE>=2 );
10891   match(Set dst (ConvI2D (LoadI mem)));
10892   format %{ "CVTSI2SD $dst,$mem" %}
10893   ins_encode %{
10894     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10895   %}
10896   ins_pipe( pipe_slow );
10897 %}
10898 
10899 instruct convXI2D_reg(regD dst, rRegI src)
10900 %{
10901   predicate( UseSSE>=2 && UseXmmI2D );
10902   match(Set dst (ConvI2D src));
10903 
10904   format %{ "MOVD  $dst,$src\n\t"
10905             "CVTDQ2PD $dst,$dst\t# i2d" %}
10906   ins_encode %{
10907     __ movdl($dst$$XMMRegister, $src$$Register);
10908     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10909   %}
10910   ins_pipe(pipe_slow); // XXX
10911 %}
10912 
10913 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10914   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10915   match(Set dst (ConvI2D (LoadI mem)));
10916   format %{ "FILD   $mem\n\t"
10917             "FSTP   $dst" %}
10918   opcode(0xDB);      /* DB /0 */
10919   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10920               Pop_Reg_DPR(dst), ClearInstMark);
10921   ins_pipe( fpu_reg_mem );
10922 %}
10923 
10924 // Convert a byte to a float; no rounding step needed.
10925 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10926   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10927   match(Set dst (ConvI2F src));
10928   format %{ "FILD   $src\n\t"
10929             "FSTP   $dst" %}
10930 
10931   opcode(0xDB, 0x0);  /* DB /0 */
10932   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10933   ins_pipe( fpu_reg_mem );
10934 %}
10935 
10936 // In 24-bit mode, force exponent rounding by storing back out
10937 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10938   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10939   match(Set dst (ConvI2F src));
10940   ins_cost(200);
10941   format %{ "FILD   $src\n\t"
10942             "FSTP_S $dst" %}
10943   opcode(0xDB, 0x0);  /* DB /0 */
10944   ins_encode( Push_Mem_I(src),
10945               Pop_Mem_FPR(dst));
10946   ins_pipe( fpu_mem_mem );
10947 %}
10948 
10949 // In 24-bit mode, force exponent rounding by storing back out
10950 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10951   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10952   match(Set dst (ConvI2F (LoadI mem)));
10953   ins_cost(200);
10954   format %{ "FILD   $mem\n\t"
10955             "FSTP_S $dst" %}
10956   opcode(0xDB);  /* DB /0 */
10957   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10958               Pop_Mem_FPR(dst), ClearInstMark);
10959   ins_pipe( fpu_mem_mem );
10960 %}
10961 
10962 // This instruction does not round to 24-bits
10963 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10964   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10965   match(Set dst (ConvI2F src));
10966   format %{ "FILD   $src\n\t"
10967             "FSTP   $dst" %}
10968   opcode(0xDB, 0x0);  /* DB /0 */
10969   ins_encode( Push_Mem_I(src),
10970               Pop_Reg_FPR(dst));
10971   ins_pipe( fpu_reg_mem );
10972 %}
10973 
10974 // This instruction does not round to 24-bits
10975 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10976   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10977   match(Set dst (ConvI2F (LoadI mem)));
10978   format %{ "FILD   $mem\n\t"
10979             "FSTP   $dst" %}
10980   opcode(0xDB);      /* DB /0 */
10981   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem),
10982               Pop_Reg_FPR(dst), ClearInstMark);
10983   ins_pipe( fpu_reg_mem );
10984 %}
10985 
10986 // Convert an int to a float in xmm; no rounding step needed.
10987 instruct convI2F_reg(regF dst, rRegI src) %{
10988   predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F ));
10989   match(Set dst (ConvI2F src));
10990   format %{ "CVTSI2SS $dst, $src" %}
10991   ins_encode %{
10992     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10993   %}
10994   ins_pipe( pipe_slow );
10995 %}
10996 
10997  instruct convXI2F_reg(regF dst, rRegI src)
10998 %{
10999   predicate( UseSSE>=2 && UseXmmI2F );
11000   match(Set dst (ConvI2F src));
11001 
11002   format %{ "MOVD  $dst,$src\n\t"
11003             "CVTDQ2PS $dst,$dst\t# i2f" %}
11004   ins_encode %{
11005     __ movdl($dst$$XMMRegister, $src$$Register);
11006     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11007   %}
11008   ins_pipe(pipe_slow); // XXX
11009 %}
11010 
11011 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11012   match(Set dst (ConvI2L src));
11013   effect(KILL cr);
11014   ins_cost(375);
11015   format %{ "MOV    $dst.lo,$src\n\t"
11016             "MOV    $dst.hi,$src\n\t"
11017             "SAR    $dst.hi,31" %}
11018   ins_encode(convert_int_long(dst,src));
11019   ins_pipe( ialu_reg_reg_long );
11020 %}
11021 
11022 // Zero-extend convert int to long
11023 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11024   match(Set dst (AndL (ConvI2L src) mask) );
11025   effect( KILL flags );
11026   ins_cost(250);
11027   format %{ "MOV    $dst.lo,$src\n\t"
11028             "XOR    $dst.hi,$dst.hi" %}
11029   opcode(0x33); // XOR
11030   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11031   ins_pipe( ialu_reg_reg_long );
11032 %}
11033 
11034 // Zero-extend long
11035 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11036   match(Set dst (AndL src mask) );
11037   effect( KILL flags );
11038   ins_cost(250);
11039   format %{ "MOV    $dst.lo,$src.lo\n\t"
11040             "XOR    $dst.hi,$dst.hi\n\t" %}
11041   opcode(0x33); // XOR
11042   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11043   ins_pipe( ialu_reg_reg_long );
11044 %}
11045 
11046 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11047   predicate (UseSSE<=1);
11048   match(Set dst (ConvL2D src));
11049   effect( KILL cr );
11050   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11051             "PUSH   $src.lo\n\t"
11052             "FILD   ST,[ESP + #0]\n\t"
11053             "ADD    ESP,8\n\t"
11054             "FSTP_D $dst\t# D-round" %}
11055   opcode(0xDF, 0x5);  /* DF /5 */
11056   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11057   ins_pipe( pipe_slow );
11058 %}
11059 
11060 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11061   predicate (UseSSE>=2);
11062   match(Set dst (ConvL2D src));
11063   effect( KILL cr );
11064   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11065             "PUSH   $src.lo\n\t"
11066             "FILD_D [ESP]\n\t"
11067             "FSTP_D [ESP]\n\t"
11068             "MOVSD  $dst,[ESP]\n\t"
11069             "ADD    ESP,8" %}
11070   opcode(0xDF, 0x5);  /* DF /5 */
11071   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11072   ins_pipe( pipe_slow );
11073 %}
11074 
11075 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11076   predicate (UseSSE>=1);
11077   match(Set dst (ConvL2F src));
11078   effect( KILL cr );
11079   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11080             "PUSH   $src.lo\n\t"
11081             "FILD_D [ESP]\n\t"
11082             "FSTP_S [ESP]\n\t"
11083             "MOVSS  $dst,[ESP]\n\t"
11084             "ADD    ESP,8" %}
11085   opcode(0xDF, 0x5);  /* DF /5 */
11086   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11087   ins_pipe( pipe_slow );
11088 %}
11089 
11090 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11091   match(Set dst (ConvL2F src));
11092   effect( KILL cr );
11093   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11094             "PUSH   $src.lo\n\t"
11095             "FILD   ST,[ESP + #0]\n\t"
11096             "ADD    ESP,8\n\t"
11097             "FSTP_S $dst\t# F-round" %}
11098   opcode(0xDF, 0x5);  /* DF /5 */
11099   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11100   ins_pipe( pipe_slow );
11101 %}
11102 
11103 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11104   match(Set dst (ConvL2I src));
11105   effect( DEF dst, USE src );
11106   format %{ "MOV    $dst,$src.lo" %}
11107   ins_encode(enc_CopyL_Lo(dst,src));
11108   ins_pipe( ialu_reg_reg );
11109 %}
11110 
11111 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11112   match(Set dst (MoveF2I src));
11113   effect( DEF dst, USE src );
11114   ins_cost(100);
11115   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11116   ins_encode %{
11117     __ movl($dst$$Register, Address(rsp, $src$$disp));
11118   %}
11119   ins_pipe( ialu_reg_mem );
11120 %}
11121 
11122 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11123   predicate(UseSSE==0);
11124   match(Set dst (MoveF2I src));
11125   effect( DEF dst, USE src );
11126 
11127   ins_cost(125);
11128   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11129   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11130   ins_pipe( fpu_mem_reg );
11131 %}
11132 
11133 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11134   predicate(UseSSE>=1);
11135   match(Set dst (MoveF2I src));
11136   effect( DEF dst, USE src );
11137 
11138   ins_cost(95);
11139   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11140   ins_encode %{
11141     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11142   %}
11143   ins_pipe( pipe_slow );
11144 %}
11145 
11146 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11147   predicate(UseSSE>=2);
11148   match(Set dst (MoveF2I src));
11149   effect( DEF dst, USE src );
11150   ins_cost(85);
11151   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11152   ins_encode %{
11153     __ movdl($dst$$Register, $src$$XMMRegister);
11154   %}
11155   ins_pipe( pipe_slow );
11156 %}
11157 
11158 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11159   match(Set dst (MoveI2F src));
11160   effect( DEF dst, USE src );
11161 
11162   ins_cost(100);
11163   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11164   ins_encode %{
11165     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11166   %}
11167   ins_pipe( ialu_mem_reg );
11168 %}
11169 
11170 
11171 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11172   predicate(UseSSE==0);
11173   match(Set dst (MoveI2F src));
11174   effect(DEF dst, USE src);
11175 
11176   ins_cost(125);
11177   format %{ "FLD_S  $src\n\t"
11178             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11179   opcode(0xD9);               /* D9 /0, FLD m32real */
11180   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11181               Pop_Reg_FPR(dst), ClearInstMark );
11182   ins_pipe( fpu_reg_mem );
11183 %}
11184 
11185 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11186   predicate(UseSSE>=1);
11187   match(Set dst (MoveI2F src));
11188   effect( DEF dst, USE src );
11189 
11190   ins_cost(95);
11191   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11192   ins_encode %{
11193     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11194   %}
11195   ins_pipe( pipe_slow );
11196 %}
11197 
11198 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11199   predicate(UseSSE>=2);
11200   match(Set dst (MoveI2F src));
11201   effect( DEF dst, USE src );
11202 
11203   ins_cost(85);
11204   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11205   ins_encode %{
11206     __ movdl($dst$$XMMRegister, $src$$Register);
11207   %}
11208   ins_pipe( pipe_slow );
11209 %}
11210 
11211 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11212   match(Set dst (MoveD2L src));
11213   effect(DEF dst, USE src);
11214 
11215   ins_cost(250);
11216   format %{ "MOV    $dst.lo,$src\n\t"
11217             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11218   opcode(0x8B, 0x8B);
11219   ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark);
11220   ins_pipe( ialu_mem_long_reg );
11221 %}
11222 
11223 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11224   predicate(UseSSE<=1);
11225   match(Set dst (MoveD2L src));
11226   effect(DEF dst, USE src);
11227 
11228   ins_cost(125);
11229   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11230   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11231   ins_pipe( fpu_mem_reg );
11232 %}
11233 
11234 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11235   predicate(UseSSE>=2);
11236   match(Set dst (MoveD2L src));
11237   effect(DEF dst, USE src);
11238   ins_cost(95);
11239   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11240   ins_encode %{
11241     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11242   %}
11243   ins_pipe( pipe_slow );
11244 %}
11245 
11246 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11247   predicate(UseSSE>=2);
11248   match(Set dst (MoveD2L src));
11249   effect(DEF dst, USE src, TEMP tmp);
11250   ins_cost(85);
11251   format %{ "MOVD   $dst.lo,$src\n\t"
11252             "PSHUFLW $tmp,$src,0x4E\n\t"
11253             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11254   ins_encode %{
11255     __ movdl($dst$$Register, $src$$XMMRegister);
11256     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11257     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11258   %}
11259   ins_pipe( pipe_slow );
11260 %}
11261 
11262 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11263   match(Set dst (MoveL2D src));
11264   effect(DEF dst, USE src);
11265 
11266   ins_cost(200);
11267   format %{ "MOV    $dst,$src.lo\n\t"
11268             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11269   opcode(0x89, 0x89);
11270   ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark );
11271   ins_pipe( ialu_mem_long_reg );
11272 %}
11273 
11274 
11275 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11276   predicate(UseSSE<=1);
11277   match(Set dst (MoveL2D src));
11278   effect(DEF dst, USE src);
11279   ins_cost(125);
11280 
11281   format %{ "FLD_D  $src\n\t"
11282             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11283   opcode(0xDD);               /* DD /0, FLD m64real */
11284   ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src),
11285               Pop_Reg_DPR(dst), ClearInstMark );
11286   ins_pipe( fpu_reg_mem );
11287 %}
11288 
11289 
11290 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11291   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11292   match(Set dst (MoveL2D src));
11293   effect(DEF dst, USE src);
11294 
11295   ins_cost(95);
11296   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11297   ins_encode %{
11298     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11299   %}
11300   ins_pipe( pipe_slow );
11301 %}
11302 
11303 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11304   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11305   match(Set dst (MoveL2D src));
11306   effect(DEF dst, USE src);
11307 
11308   ins_cost(95);
11309   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11310   ins_encode %{
11311     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11312   %}
11313   ins_pipe( pipe_slow );
11314 %}
11315 
11316 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11317   predicate(UseSSE>=2);
11318   match(Set dst (MoveL2D src));
11319   effect(TEMP dst, USE src, TEMP tmp);
11320   ins_cost(85);
11321   format %{ "MOVD   $dst,$src.lo\n\t"
11322             "MOVD   $tmp,$src.hi\n\t"
11323             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11324   ins_encode %{
11325     __ movdl($dst$$XMMRegister, $src$$Register);
11326     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11327     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11328   %}
11329   ins_pipe( pipe_slow );
11330 %}
11331 
11332 //----------------------------- CompressBits/ExpandBits ------------------------
11333 
11334 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11335   predicate(n->bottom_type()->isa_long());
11336   match(Set dst (CompressBits src mask));
11337   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11338   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11339   ins_encode %{
11340     Label exit, partail_result;
11341     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11342     // Merge the results of upper and lower destination registers such that upper destination
11343     // results are contiguously laid out after the lower destination result.
11344     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11345     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11346     __ popcntl($rtmp$$Register, $mask$$Register);
11347     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11348     __ cmpl($rtmp$$Register, 32);
11349     __ jccb(Assembler::equal, exit);
11350     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11351     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11352     // Shift left the contents of upper destination register by true bit count of lower mask register
11353     // and merge with lower destination register.
11354     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11355     __ orl($dst$$Register, $rtmp$$Register);
11356     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11357     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11358     // since contents of upper destination have already been copied to lower destination
11359     // register.
11360     __ cmpl($rtmp$$Register, 0);
11361     __ jccb(Assembler::greater, partail_result);
11362     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11363     __ jmp(exit);
11364     __ bind(partail_result);
11365     // Perform right shift over upper destination register to move out bits already copied
11366     // to lower destination register.
11367     __ subl($rtmp$$Register, 32);
11368     __ negl($rtmp$$Register);
11369     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11370     __ bind(exit);
11371   %}
11372   ins_pipe( pipe_slow );
11373 %}
11374 
11375 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11376   predicate(n->bottom_type()->isa_long());
11377   match(Set dst (ExpandBits src mask));
11378   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11379   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11380   ins_encode %{
11381     // Extraction operation sequentially reads the bits from source register starting from LSB
11382     // and lays them out into destination register at bit locations corresponding to true bits
11383     // in mask register. Thus number of source bits read are equal to combined true bit count
11384     // of mask register pair.
11385     Label exit, mask_clipping;
11386     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11387     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11388     __ popcntl($rtmp$$Register, $mask$$Register);
11389     // If true bit count of lower mask register is 32 then none of bit of lower source register
11390     // will feed to upper destination register.
11391     __ cmpl($rtmp$$Register, 32);
11392     __ jccb(Assembler::equal, exit);
11393     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11394     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11395     // Shift right the contents of lower source register to remove already consumed bits.
11396     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11397     // Extract the bits from lower source register starting from LSB under the influence
11398     // of upper mask register.
11399     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11400     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11401     __ subl($rtmp$$Register, 32);
11402     __ negl($rtmp$$Register);
11403     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11404     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11405     // Clear the set bits in upper mask register which have been used to extract the contents
11406     // from lower source register.
11407     __ bind(mask_clipping);
11408     __ blsrl($mask$$Register, $mask$$Register);
11409     __ decrementl($rtmp$$Register, 1);
11410     __ jccb(Assembler::greater, mask_clipping);
11411     // Starting from LSB extract the bits from upper source register under the influence of
11412     // remaining set bits in upper mask register.
11413     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11414     // Merge the partial results extracted from lower and upper source register bits.
11415     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11416     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11417     __ bind(exit);
11418   %}
11419   ins_pipe( pipe_slow );
11420 %}
11421 
11422 // =======================================================================
11423 // Fast clearing of an array
11424 // Small non-constant length ClearArray for non-AVX512 targets.
11425 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11426   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11427   match(Set dummy (ClearArray cnt base));
11428   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11429 
11430   format %{ $$template
11431     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11432     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11433     $$emit$$"JG     LARGE\n\t"
11434     $$emit$$"SHL    ECX, 1\n\t"
11435     $$emit$$"DEC    ECX\n\t"
11436     $$emit$$"JS     DONE\t# Zero length\n\t"
11437     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11438     $$emit$$"DEC    ECX\n\t"
11439     $$emit$$"JGE    LOOP\n\t"
11440     $$emit$$"JMP    DONE\n\t"
11441     $$emit$$"# LARGE:\n\t"
11442     if (UseFastStosb) {
11443        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11444        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11445     } else if (UseXMMForObjInit) {
11446        $$emit$$"MOV     RDI,RAX\n\t"
11447        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11448        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11449        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11450        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11451        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11452        $$emit$$"ADD     0x40,RAX\n\t"
11453        $$emit$$"# L_zero_64_bytes:\n\t"
11454        $$emit$$"SUB     0x8,RCX\n\t"
11455        $$emit$$"JGE     L_loop\n\t"
11456        $$emit$$"ADD     0x4,RCX\n\t"
11457        $$emit$$"JL      L_tail\n\t"
11458        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11459        $$emit$$"ADD     0x20,RAX\n\t"
11460        $$emit$$"SUB     0x4,RCX\n\t"
11461        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11462        $$emit$$"ADD     0x4,RCX\n\t"
11463        $$emit$$"JLE     L_end\n\t"
11464        $$emit$$"DEC     RCX\n\t"
11465        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11466        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11467        $$emit$$"ADD     0x8,RAX\n\t"
11468        $$emit$$"DEC     RCX\n\t"
11469        $$emit$$"JGE     L_sloop\n\t"
11470        $$emit$$"# L_end:\n\t"
11471     } else {
11472        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11473        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11474     }
11475     $$emit$$"# DONE"
11476   %}
11477   ins_encode %{
11478     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11479                  $tmp$$XMMRegister, false, knoreg);
11480   %}
11481   ins_pipe( pipe_slow );
11482 %}
11483 
11484 // Small non-constant length ClearArray for AVX512 targets.
11485 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11486   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11487   match(Set dummy (ClearArray cnt base));
11488   ins_cost(125);
11489   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11490 
11491   format %{ $$template
11492     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11493     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11494     $$emit$$"JG     LARGE\n\t"
11495     $$emit$$"SHL    ECX, 1\n\t"
11496     $$emit$$"DEC    ECX\n\t"
11497     $$emit$$"JS     DONE\t# Zero length\n\t"
11498     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11499     $$emit$$"DEC    ECX\n\t"
11500     $$emit$$"JGE    LOOP\n\t"
11501     $$emit$$"JMP    DONE\n\t"
11502     $$emit$$"# LARGE:\n\t"
11503     if (UseFastStosb) {
11504        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11505        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11506     } else if (UseXMMForObjInit) {
11507        $$emit$$"MOV     RDI,RAX\n\t"
11508        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11509        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11510        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11511        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11512        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11513        $$emit$$"ADD     0x40,RAX\n\t"
11514        $$emit$$"# L_zero_64_bytes:\n\t"
11515        $$emit$$"SUB     0x8,RCX\n\t"
11516        $$emit$$"JGE     L_loop\n\t"
11517        $$emit$$"ADD     0x4,RCX\n\t"
11518        $$emit$$"JL      L_tail\n\t"
11519        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11520        $$emit$$"ADD     0x20,RAX\n\t"
11521        $$emit$$"SUB     0x4,RCX\n\t"
11522        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11523        $$emit$$"ADD     0x4,RCX\n\t"
11524        $$emit$$"JLE     L_end\n\t"
11525        $$emit$$"DEC     RCX\n\t"
11526        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11527        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11528        $$emit$$"ADD     0x8,RAX\n\t"
11529        $$emit$$"DEC     RCX\n\t"
11530        $$emit$$"JGE     L_sloop\n\t"
11531        $$emit$$"# L_end:\n\t"
11532     } else {
11533        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11534        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11535     }
11536     $$emit$$"# DONE"
11537   %}
11538   ins_encode %{
11539     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11540                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11541   %}
11542   ins_pipe( pipe_slow );
11543 %}
11544 
11545 // Large non-constant length ClearArray for non-AVX512 targets.
11546 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11547   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11548   match(Set dummy (ClearArray cnt base));
11549   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11550   format %{ $$template
11551     if (UseFastStosb) {
11552        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11553        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11554        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11555     } else if (UseXMMForObjInit) {
11556        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11557        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11558        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11559        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11560        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11561        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11562        $$emit$$"ADD     0x40,RAX\n\t"
11563        $$emit$$"# L_zero_64_bytes:\n\t"
11564        $$emit$$"SUB     0x8,RCX\n\t"
11565        $$emit$$"JGE     L_loop\n\t"
11566        $$emit$$"ADD     0x4,RCX\n\t"
11567        $$emit$$"JL      L_tail\n\t"
11568        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11569        $$emit$$"ADD     0x20,RAX\n\t"
11570        $$emit$$"SUB     0x4,RCX\n\t"
11571        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11572        $$emit$$"ADD     0x4,RCX\n\t"
11573        $$emit$$"JLE     L_end\n\t"
11574        $$emit$$"DEC     RCX\n\t"
11575        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11576        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11577        $$emit$$"ADD     0x8,RAX\n\t"
11578        $$emit$$"DEC     RCX\n\t"
11579        $$emit$$"JGE     L_sloop\n\t"
11580        $$emit$$"# L_end:\n\t"
11581     } else {
11582        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11583        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11584        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11585     }
11586     $$emit$$"# DONE"
11587   %}
11588   ins_encode %{
11589     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11590                  $tmp$$XMMRegister, true, knoreg);
11591   %}
11592   ins_pipe( pipe_slow );
11593 %}
11594 
11595 // Large non-constant length ClearArray for AVX512 targets.
11596 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11597   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11598   match(Set dummy (ClearArray cnt base));
11599   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11600   format %{ $$template
11601     if (UseFastStosb) {
11602        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11603        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11604        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11605     } else if (UseXMMForObjInit) {
11606        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11607        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11608        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11609        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11610        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11611        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11612        $$emit$$"ADD     0x40,RAX\n\t"
11613        $$emit$$"# L_zero_64_bytes:\n\t"
11614        $$emit$$"SUB     0x8,RCX\n\t"
11615        $$emit$$"JGE     L_loop\n\t"
11616        $$emit$$"ADD     0x4,RCX\n\t"
11617        $$emit$$"JL      L_tail\n\t"
11618        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11619        $$emit$$"ADD     0x20,RAX\n\t"
11620        $$emit$$"SUB     0x4,RCX\n\t"
11621        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11622        $$emit$$"ADD     0x4,RCX\n\t"
11623        $$emit$$"JLE     L_end\n\t"
11624        $$emit$$"DEC     RCX\n\t"
11625        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11626        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11627        $$emit$$"ADD     0x8,RAX\n\t"
11628        $$emit$$"DEC     RCX\n\t"
11629        $$emit$$"JGE     L_sloop\n\t"
11630        $$emit$$"# L_end:\n\t"
11631     } else {
11632        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11633        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11634        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11635     }
11636     $$emit$$"# DONE"
11637   %}
11638   ins_encode %{
11639     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11640                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11641   %}
11642   ins_pipe( pipe_slow );
11643 %}
11644 
11645 // Small constant length ClearArray for AVX512 targets.
11646 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11647 %{
11648   predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl());
11649   match(Set dummy (ClearArray cnt base));
11650   ins_cost(100);
11651   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11652   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11653   ins_encode %{
11654    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11655   %}
11656   ins_pipe(pipe_slow);
11657 %}
11658 
11659 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11660                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11661   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11662   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11663   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11664 
11665   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11666   ins_encode %{
11667     __ string_compare($str1$$Register, $str2$$Register,
11668                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11669                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11670   %}
11671   ins_pipe( pipe_slow );
11672 %}
11673 
11674 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11675                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11676   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11677   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11678   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11679 
11680   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11681   ins_encode %{
11682     __ string_compare($str1$$Register, $str2$$Register,
11683                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11684                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11685   %}
11686   ins_pipe( pipe_slow );
11687 %}
11688 
11689 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11690                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11691   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11692   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11693   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11694 
11695   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11696   ins_encode %{
11697     __ string_compare($str1$$Register, $str2$$Register,
11698                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11699                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11700   %}
11701   ins_pipe( pipe_slow );
11702 %}
11703 
11704 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11705                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11706   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11707   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11708   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11709 
11710   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11711   ins_encode %{
11712     __ string_compare($str1$$Register, $str2$$Register,
11713                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11714                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11715   %}
11716   ins_pipe( pipe_slow );
11717 %}
11718 
11719 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11720                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11721   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11722   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11723   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11724 
11725   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11726   ins_encode %{
11727     __ string_compare($str1$$Register, $str2$$Register,
11728                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11729                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11730   %}
11731   ins_pipe( pipe_slow );
11732 %}
11733 
11734 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11735                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11736   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11737   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11738   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11739 
11740   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11741   ins_encode %{
11742     __ string_compare($str1$$Register, $str2$$Register,
11743                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11744                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11745   %}
11746   ins_pipe( pipe_slow );
11747 %}
11748 
11749 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11750                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11751   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11752   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11753   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11754 
11755   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11756   ins_encode %{
11757     __ string_compare($str2$$Register, $str1$$Register,
11758                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11759                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11760   %}
11761   ins_pipe( pipe_slow );
11762 %}
11763 
11764 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11765                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11766   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11767   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11768   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11769 
11770   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11771   ins_encode %{
11772     __ string_compare($str2$$Register, $str1$$Register,
11773                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11774                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11775   %}
11776   ins_pipe( pipe_slow );
11777 %}
11778 
11779 // fast string equals
11780 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11781                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11782   predicate(!VM_Version::supports_avx512vlbw());
11783   match(Set result (StrEquals (Binary str1 str2) cnt));
11784   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11785 
11786   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11787   ins_encode %{
11788     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11789                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11790                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11791   %}
11792 
11793   ins_pipe( pipe_slow );
11794 %}
11795 
11796 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11797                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11798   predicate(VM_Version::supports_avx512vlbw());
11799   match(Set result (StrEquals (Binary str1 str2) cnt));
11800   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11801 
11802   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11803   ins_encode %{
11804     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11805                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11806                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11807   %}
11808 
11809   ins_pipe( pipe_slow );
11810 %}
11811 
11812 
11813 // fast search of substring with known size.
11814 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11815                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11816   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11817   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11818   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11819 
11820   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11821   ins_encode %{
11822     int icnt2 = (int)$int_cnt2$$constant;
11823     if (icnt2 >= 16) {
11824       // IndexOf for constant substrings with size >= 16 elements
11825       // which don't need to be loaded through stack.
11826       __ string_indexofC8($str1$$Register, $str2$$Register,
11827                           $cnt1$$Register, $cnt2$$Register,
11828                           icnt2, $result$$Register,
11829                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11830     } else {
11831       // Small strings are loaded through stack if they cross page boundary.
11832       __ string_indexof($str1$$Register, $str2$$Register,
11833                         $cnt1$$Register, $cnt2$$Register,
11834                         icnt2, $result$$Register,
11835                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11836     }
11837   %}
11838   ins_pipe( pipe_slow );
11839 %}
11840 
11841 // fast search of substring with known size.
11842 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11843                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11844   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11845   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11846   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11847 
11848   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11849   ins_encode %{
11850     int icnt2 = (int)$int_cnt2$$constant;
11851     if (icnt2 >= 8) {
11852       // IndexOf for constant substrings with size >= 8 elements
11853       // which don't need to be loaded through stack.
11854       __ string_indexofC8($str1$$Register, $str2$$Register,
11855                           $cnt1$$Register, $cnt2$$Register,
11856                           icnt2, $result$$Register,
11857                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11858     } else {
11859       // Small strings are loaded through stack if they cross page boundary.
11860       __ string_indexof($str1$$Register, $str2$$Register,
11861                         $cnt1$$Register, $cnt2$$Register,
11862                         icnt2, $result$$Register,
11863                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11864     }
11865   %}
11866   ins_pipe( pipe_slow );
11867 %}
11868 
11869 // fast search of substring with known size.
11870 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11871                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11872   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11873   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11874   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11875 
11876   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11877   ins_encode %{
11878     int icnt2 = (int)$int_cnt2$$constant;
11879     if (icnt2 >= 8) {
11880       // IndexOf for constant substrings with size >= 8 elements
11881       // which don't need to be loaded through stack.
11882       __ string_indexofC8($str1$$Register, $str2$$Register,
11883                           $cnt1$$Register, $cnt2$$Register,
11884                           icnt2, $result$$Register,
11885                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11886     } else {
11887       // Small strings are loaded through stack if they cross page boundary.
11888       __ string_indexof($str1$$Register, $str2$$Register,
11889                         $cnt1$$Register, $cnt2$$Register,
11890                         icnt2, $result$$Register,
11891                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11892     }
11893   %}
11894   ins_pipe( pipe_slow );
11895 %}
11896 
11897 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11898                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11899   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11900   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11901   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11902 
11903   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11904   ins_encode %{
11905     __ string_indexof($str1$$Register, $str2$$Register,
11906                       $cnt1$$Register, $cnt2$$Register,
11907                       (-1), $result$$Register,
11908                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11909   %}
11910   ins_pipe( pipe_slow );
11911 %}
11912 
11913 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11914                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11915   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11916   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11917   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11918 
11919   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11920   ins_encode %{
11921     __ string_indexof($str1$$Register, $str2$$Register,
11922                       $cnt1$$Register, $cnt2$$Register,
11923                       (-1), $result$$Register,
11924                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11925   %}
11926   ins_pipe( pipe_slow );
11927 %}
11928 
11929 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11930                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11931   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11932   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11933   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11934 
11935   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11936   ins_encode %{
11937     __ string_indexof($str1$$Register, $str2$$Register,
11938                       $cnt1$$Register, $cnt2$$Register,
11939                       (-1), $result$$Register,
11940                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11941   %}
11942   ins_pipe( pipe_slow );
11943 %}
11944 
11945 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11946                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11947   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
11948   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11949   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11950   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11951   ins_encode %{
11952     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11953                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11954   %}
11955   ins_pipe( pipe_slow );
11956 %}
11957 
11958 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11959                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11960   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
11961   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11962   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11963   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11964   ins_encode %{
11965     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11966                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11967   %}
11968   ins_pipe( pipe_slow );
11969 %}
11970 
11971 
11972 // fast array equals
11973 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11974                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11975 %{
11976   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11977   match(Set result (AryEq ary1 ary2));
11978   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11979   //ins_cost(300);
11980 
11981   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11982   ins_encode %{
11983     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11984                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11985                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11986   %}
11987   ins_pipe( pipe_slow );
11988 %}
11989 
11990 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11991                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11992 %{
11993   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11994   match(Set result (AryEq ary1 ary2));
11995   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11996   //ins_cost(300);
11997 
11998   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11999   ins_encode %{
12000     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12001                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12002                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12003   %}
12004   ins_pipe( pipe_slow );
12005 %}
12006 
12007 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12008                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12009 %{
12010   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12011   match(Set result (AryEq ary1 ary2));
12012   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12013   //ins_cost(300);
12014 
12015   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12016   ins_encode %{
12017     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12018                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12019                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12020   %}
12021   ins_pipe( pipe_slow );
12022 %}
12023 
12024 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12025                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12026 %{
12027   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12028   match(Set result (AryEq ary1 ary2));
12029   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12030   //ins_cost(300);
12031 
12032   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12033   ins_encode %{
12034     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12035                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12036                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12037   %}
12038   ins_pipe( pipe_slow );
12039 %}
12040 
12041 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12042                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12043 %{
12044   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12045   match(Set result (CountPositives ary1 len));
12046   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12047 
12048   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12049   ins_encode %{
12050     __ count_positives($ary1$$Register, $len$$Register,
12051                        $result$$Register, $tmp3$$Register,
12052                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12053   %}
12054   ins_pipe( pipe_slow );
12055 %}
12056 
12057 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12058                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12059 %{
12060   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12061   match(Set result (CountPositives ary1 len));
12062   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12063 
12064   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12065   ins_encode %{
12066     __ count_positives($ary1$$Register, $len$$Register,
12067                        $result$$Register, $tmp3$$Register,
12068                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12069   %}
12070   ins_pipe( pipe_slow );
12071 %}
12072 
12073 
12074 // fast char[] to byte[] compression
12075 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12076                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12077   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12078   match(Set result (StrCompressedCopy src (Binary dst len)));
12079   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12080 
12081   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12082   ins_encode %{
12083     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12084                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12085                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12086                            knoreg, knoreg);
12087   %}
12088   ins_pipe( pipe_slow );
12089 %}
12090 
12091 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12092                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12093   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12094   match(Set result (StrCompressedCopy src (Binary dst len)));
12095   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12096 
12097   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12098   ins_encode %{
12099     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12100                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12101                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12102                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12103   %}
12104   ins_pipe( pipe_slow );
12105 %}
12106 
12107 // fast byte[] to char[] inflation
12108 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12109                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12110   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12111   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12112   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12113 
12114   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12115   ins_encode %{
12116     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12117                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12118   %}
12119   ins_pipe( pipe_slow );
12120 %}
12121 
12122 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12123                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12124   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12125   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12126   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12127 
12128   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12129   ins_encode %{
12130     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12131                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12132   %}
12133   ins_pipe( pipe_slow );
12134 %}
12135 
12136 // encode char[] to byte[] in ISO_8859_1
12137 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12138                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12139                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12140   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12141   match(Set result (EncodeISOArray src (Binary dst len)));
12142   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12143 
12144   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12145   ins_encode %{
12146     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12147                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12148                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12149   %}
12150   ins_pipe( pipe_slow );
12151 %}
12152 
12153 // encode char[] to byte[] in ASCII
12154 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12155                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12156                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12157   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12158   match(Set result (EncodeISOArray src (Binary dst len)));
12159   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12160 
12161   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12162   ins_encode %{
12163     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12164                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12165                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12166   %}
12167   ins_pipe( pipe_slow );
12168 %}
12169 
12170 //----------Control Flow Instructions------------------------------------------
12171 // Signed compare Instructions
12172 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12173   match(Set cr (CmpI op1 op2));
12174   effect( DEF cr, USE op1, USE op2 );
12175   format %{ "CMP    $op1,$op2" %}
12176   opcode(0x3B);  /* Opcode 3B /r */
12177   ins_encode( OpcP, RegReg( op1, op2) );
12178   ins_pipe( ialu_cr_reg_reg );
12179 %}
12180 
12181 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12182   match(Set cr (CmpI op1 op2));
12183   effect( DEF cr, USE op1 );
12184   format %{ "CMP    $op1,$op2" %}
12185   opcode(0x81,0x07);  /* Opcode 81 /7 */
12186   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12187   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12188   ins_pipe( ialu_cr_reg_imm );
12189 %}
12190 
12191 // Cisc-spilled version of cmpI_eReg
12192 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12193   match(Set cr (CmpI op1 (LoadI op2)));
12194 
12195   format %{ "CMP    $op1,$op2" %}
12196   ins_cost(500);
12197   opcode(0x3B);  /* Opcode 3B /r */
12198   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12199   ins_pipe( ialu_cr_reg_mem );
12200 %}
12201 
12202 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12203   match(Set cr (CmpI src zero));
12204   effect( DEF cr, USE src );
12205 
12206   format %{ "TEST   $src,$src" %}
12207   opcode(0x85);
12208   ins_encode( OpcP, RegReg( src, src ) );
12209   ins_pipe( ialu_cr_reg_imm );
12210 %}
12211 
12212 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12213   match(Set cr (CmpI (AndI src con) zero));
12214 
12215   format %{ "TEST   $src,$con" %}
12216   opcode(0xF7,0x00);
12217   ins_encode( OpcP, RegOpc(src), Con32(con) );
12218   ins_pipe( ialu_cr_reg_imm );
12219 %}
12220 
12221 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12222   match(Set cr (CmpI (AndI src mem) zero));
12223 
12224   format %{ "TEST   $src,$mem" %}
12225   opcode(0x85);
12226   ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark );
12227   ins_pipe( ialu_cr_reg_mem );
12228 %}
12229 
12230 // Unsigned compare Instructions; really, same as signed except they
12231 // produce an eFlagsRegU instead of eFlagsReg.
12232 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12233   match(Set cr (CmpU op1 op2));
12234 
12235   format %{ "CMPu   $op1,$op2" %}
12236   opcode(0x3B);  /* Opcode 3B /r */
12237   ins_encode( OpcP, RegReg( op1, op2) );
12238   ins_pipe( ialu_cr_reg_reg );
12239 %}
12240 
12241 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12242   match(Set cr (CmpU op1 op2));
12243 
12244   format %{ "CMPu   $op1,$op2" %}
12245   opcode(0x81,0x07);  /* Opcode 81 /7 */
12246   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12247   ins_pipe( ialu_cr_reg_imm );
12248 %}
12249 
12250 // // Cisc-spilled version of cmpU_eReg
12251 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12252   match(Set cr (CmpU op1 (LoadI op2)));
12253 
12254   format %{ "CMPu   $op1,$op2" %}
12255   ins_cost(500);
12256   opcode(0x3B);  /* Opcode 3B /r */
12257   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12258   ins_pipe( ialu_cr_reg_mem );
12259 %}
12260 
12261 // // Cisc-spilled version of cmpU_eReg
12262 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12263 //  match(Set cr (CmpU (LoadI op1) op2));
12264 //
12265 //  format %{ "CMPu   $op1,$op2" %}
12266 //  ins_cost(500);
12267 //  opcode(0x39);  /* Opcode 39 /r */
12268 //  ins_encode( OpcP, RegMem( op1, op2) );
12269 //%}
12270 
12271 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12272   match(Set cr (CmpU src zero));
12273 
12274   format %{ "TESTu  $src,$src" %}
12275   opcode(0x85);
12276   ins_encode( OpcP, RegReg( src, src ) );
12277   ins_pipe( ialu_cr_reg_imm );
12278 %}
12279 
12280 // Unsigned pointer compare Instructions
12281 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12282   match(Set cr (CmpP op1 op2));
12283 
12284   format %{ "CMPu   $op1,$op2" %}
12285   opcode(0x3B);  /* Opcode 3B /r */
12286   ins_encode( OpcP, RegReg( op1, op2) );
12287   ins_pipe( ialu_cr_reg_reg );
12288 %}
12289 
12290 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12291   match(Set cr (CmpP op1 op2));
12292 
12293   format %{ "CMPu   $op1,$op2" %}
12294   opcode(0x81,0x07);  /* Opcode 81 /7 */
12295   ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark );
12296   ins_pipe( ialu_cr_reg_imm );
12297 %}
12298 
12299 // // Cisc-spilled version of cmpP_eReg
12300 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12301   match(Set cr (CmpP op1 (LoadP op2)));
12302 
12303   format %{ "CMPu   $op1,$op2" %}
12304   ins_cost(500);
12305   opcode(0x3B);  /* Opcode 3B /r */
12306   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12307   ins_pipe( ialu_cr_reg_mem );
12308 %}
12309 
12310 // // Cisc-spilled version of cmpP_eReg
12311 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12312 //  match(Set cr (CmpP (LoadP op1) op2));
12313 //
12314 //  format %{ "CMPu   $op1,$op2" %}
12315 //  ins_cost(500);
12316 //  opcode(0x39);  /* Opcode 39 /r */
12317 //  ins_encode( OpcP, RegMem( op1, op2) );
12318 //%}
12319 
12320 // Compare raw pointer (used in out-of-heap check).
12321 // Only works because non-oop pointers must be raw pointers
12322 // and raw pointers have no anti-dependencies.
12323 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12324   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12325   match(Set cr (CmpP op1 (LoadP op2)));
12326 
12327   format %{ "CMPu   $op1,$op2" %}
12328   opcode(0x3B);  /* Opcode 3B /r */
12329   ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark );
12330   ins_pipe( ialu_cr_reg_mem );
12331 %}
12332 
12333 //
12334 // This will generate a signed flags result. This should be ok
12335 // since any compare to a zero should be eq/neq.
12336 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12337   match(Set cr (CmpP src zero));
12338 
12339   format %{ "TEST   $src,$src" %}
12340   opcode(0x85);
12341   ins_encode( OpcP, RegReg( src, src ) );
12342   ins_pipe( ialu_cr_reg_imm );
12343 %}
12344 
12345 // Cisc-spilled version of testP_reg
12346 // This will generate a signed flags result. This should be ok
12347 // since any compare to a zero should be eq/neq.
12348 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12349   match(Set cr (CmpP (LoadP op) zero));
12350 
12351   format %{ "TEST   $op,0xFFFFFFFF" %}
12352   ins_cost(500);
12353   opcode(0xF7);               /* Opcode F7 /0 */
12354   ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark );
12355   ins_pipe( ialu_cr_reg_imm );
12356 %}
12357 
12358 // Yanked all unsigned pointer compare operations.
12359 // Pointer compares are done with CmpP which is already unsigned.
12360 
12361 //----------Max and Min--------------------------------------------------------
12362 // Min Instructions
12363 ////
12364 //   *** Min and Max using the conditional move are slower than the
12365 //   *** branch version on a Pentium III.
12366 // // Conditional move for min
12367 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12368 //  effect( USE_DEF op2, USE op1, USE cr );
12369 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12370 //  opcode(0x4C,0x0F);
12371 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12372 //  ins_pipe( pipe_cmov_reg );
12373 //%}
12374 //
12375 //// Min Register with Register (P6 version)
12376 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12377 //  predicate(VM_Version::supports_cmov() );
12378 //  match(Set op2 (MinI op1 op2));
12379 //  ins_cost(200);
12380 //  expand %{
12381 //    eFlagsReg cr;
12382 //    compI_eReg(cr,op1,op2);
12383 //    cmovI_reg_lt(op2,op1,cr);
12384 //  %}
12385 //%}
12386 
12387 // Min Register with Register (generic version)
12388 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12389   match(Set dst (MinI dst src));
12390   effect(KILL flags);
12391   ins_cost(300);
12392 
12393   format %{ "MIN    $dst,$src" %}
12394   opcode(0xCC);
12395   ins_encode( min_enc(dst,src) );
12396   ins_pipe( pipe_slow );
12397 %}
12398 
12399 // Max Register with Register
12400 //   *** Min and Max using the conditional move are slower than the
12401 //   *** branch version on a Pentium III.
12402 // // Conditional move for max
12403 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12404 //  effect( USE_DEF op2, USE op1, USE cr );
12405 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12406 //  opcode(0x4F,0x0F);
12407 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12408 //  ins_pipe( pipe_cmov_reg );
12409 //%}
12410 //
12411 // // Max Register with Register (P6 version)
12412 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12413 //  predicate(VM_Version::supports_cmov() );
12414 //  match(Set op2 (MaxI op1 op2));
12415 //  ins_cost(200);
12416 //  expand %{
12417 //    eFlagsReg cr;
12418 //    compI_eReg(cr,op1,op2);
12419 //    cmovI_reg_gt(op2,op1,cr);
12420 //  %}
12421 //%}
12422 
12423 // Max Register with Register (generic version)
12424 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12425   match(Set dst (MaxI dst src));
12426   effect(KILL flags);
12427   ins_cost(300);
12428 
12429   format %{ "MAX    $dst,$src" %}
12430   opcode(0xCC);
12431   ins_encode( max_enc(dst,src) );
12432   ins_pipe( pipe_slow );
12433 %}
12434 
12435 // ============================================================================
12436 // Counted Loop limit node which represents exact final iterator value.
12437 // Note: the resulting value should fit into integer range since
12438 // counted loops have limit check on overflow.
12439 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12440   match(Set limit (LoopLimit (Binary init limit) stride));
12441   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12442   ins_cost(300);
12443 
12444   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12445   ins_encode %{
12446     int strd = (int)$stride$$constant;
12447     assert(strd != 1 && strd != -1, "sanity");
12448     int m1 = (strd > 0) ? 1 : -1;
12449     // Convert limit to long (EAX:EDX)
12450     __ cdql();
12451     // Convert init to long (init:tmp)
12452     __ movl($tmp$$Register, $init$$Register);
12453     __ sarl($tmp$$Register, 31);
12454     // $limit - $init
12455     __ subl($limit$$Register, $init$$Register);
12456     __ sbbl($limit_hi$$Register, $tmp$$Register);
12457     // + ($stride - 1)
12458     if (strd > 0) {
12459       __ addl($limit$$Register, (strd - 1));
12460       __ adcl($limit_hi$$Register, 0);
12461       __ movl($tmp$$Register, strd);
12462     } else {
12463       __ addl($limit$$Register, (strd + 1));
12464       __ adcl($limit_hi$$Register, -1);
12465       __ lneg($limit_hi$$Register, $limit$$Register);
12466       __ movl($tmp$$Register, -strd);
12467     }
12468     // signed division: (EAX:EDX) / pos_stride
12469     __ idivl($tmp$$Register);
12470     if (strd < 0) {
12471       // restore sign
12472       __ negl($tmp$$Register);
12473     }
12474     // (EAX) * stride
12475     __ mull($tmp$$Register);
12476     // + init (ignore upper bits)
12477     __ addl($limit$$Register, $init$$Register);
12478   %}
12479   ins_pipe( pipe_slow );
12480 %}
12481 
12482 // ============================================================================
12483 // Branch Instructions
12484 // Jump Table
12485 instruct jumpXtnd(rRegI switch_val) %{
12486   match(Jump switch_val);
12487   ins_cost(350);
12488   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12489   ins_encode %{
12490     // Jump to Address(table_base + switch_reg)
12491     Address index(noreg, $switch_val$$Register, Address::times_1);
12492     __ jump(ArrayAddress($constantaddress, index), noreg);
12493   %}
12494   ins_pipe(pipe_jmp);
12495 %}
12496 
12497 // Jump Direct - Label defines a relative address from JMP+1
12498 instruct jmpDir(label labl) %{
12499   match(Goto);
12500   effect(USE labl);
12501 
12502   ins_cost(300);
12503   format %{ "JMP    $labl" %}
12504   size(5);
12505   ins_encode %{
12506     Label* L = $labl$$label;
12507     __ jmp(*L, false); // Always long jump
12508   %}
12509   ins_pipe( pipe_jmp );
12510 %}
12511 
12512 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12513 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12514   match(If cop cr);
12515   effect(USE labl);
12516 
12517   ins_cost(300);
12518   format %{ "J$cop    $labl" %}
12519   size(6);
12520   ins_encode %{
12521     Label* L = $labl$$label;
12522     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12523   %}
12524   ins_pipe( pipe_jcc );
12525 %}
12526 
12527 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12528 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12529   match(CountedLoopEnd cop cr);
12530   effect(USE labl);
12531 
12532   ins_cost(300);
12533   format %{ "J$cop    $labl\t# Loop end" %}
12534   size(6);
12535   ins_encode %{
12536     Label* L = $labl$$label;
12537     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12538   %}
12539   ins_pipe( pipe_jcc );
12540 %}
12541 
12542 // Jump Direct Conditional - using unsigned comparison
12543 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12544   match(If cop cmp);
12545   effect(USE labl);
12546 
12547   ins_cost(300);
12548   format %{ "J$cop,u  $labl" %}
12549   size(6);
12550   ins_encode %{
12551     Label* L = $labl$$label;
12552     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12553   %}
12554   ins_pipe(pipe_jcc);
12555 %}
12556 
12557 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12558   match(If cop cmp);
12559   effect(USE labl);
12560 
12561   ins_cost(200);
12562   format %{ "J$cop,u  $labl" %}
12563   size(6);
12564   ins_encode %{
12565     Label* L = $labl$$label;
12566     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12567   %}
12568   ins_pipe(pipe_jcc);
12569 %}
12570 
12571 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12572   match(If cop cmp);
12573   effect(USE labl);
12574 
12575   ins_cost(200);
12576   format %{ $$template
12577     if ($cop$$cmpcode == Assembler::notEqual) {
12578       $$emit$$"JP,u   $labl\n\t"
12579       $$emit$$"J$cop,u   $labl"
12580     } else {
12581       $$emit$$"JP,u   done\n\t"
12582       $$emit$$"J$cop,u   $labl\n\t"
12583       $$emit$$"done:"
12584     }
12585   %}
12586   ins_encode %{
12587     Label* l = $labl$$label;
12588     if ($cop$$cmpcode == Assembler::notEqual) {
12589       __ jcc(Assembler::parity, *l, false);
12590       __ jcc(Assembler::notEqual, *l, false);
12591     } else if ($cop$$cmpcode == Assembler::equal) {
12592       Label done;
12593       __ jccb(Assembler::parity, done);
12594       __ jcc(Assembler::equal, *l, false);
12595       __ bind(done);
12596     } else {
12597        ShouldNotReachHere();
12598     }
12599   %}
12600   ins_pipe(pipe_jcc);
12601 %}
12602 
12603 // ============================================================================
12604 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12605 // array for an instance of the superklass.  Set a hidden internal cache on a
12606 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12607 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12608 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12609   match(Set result (PartialSubtypeCheck sub super));
12610   effect( KILL rcx, KILL cr );
12611 
12612   ins_cost(1100);  // slightly larger than the next version
12613   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12614             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12615             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12616             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12617             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12618             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12619             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12620      "miss:\t" %}
12621 
12622   opcode(0x1); // Force a XOR of EDI
12623   ins_encode( enc_PartialSubtypeCheck() );
12624   ins_pipe( pipe_slow );
12625 %}
12626 
12627 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12628   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12629   effect( KILL rcx, KILL result );
12630 
12631   ins_cost(1000);
12632   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12633             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12634             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12635             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12636             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12637             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12638      "miss:\t" %}
12639 
12640   opcode(0x0);  // No need to XOR EDI
12641   ins_encode( enc_PartialSubtypeCheck() );
12642   ins_pipe( pipe_slow );
12643 %}
12644 
12645 // ============================================================================
12646 // Branch Instructions -- short offset versions
12647 //
12648 // These instructions are used to replace jumps of a long offset (the default
12649 // match) with jumps of a shorter offset.  These instructions are all tagged
12650 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12651 // match rules in general matching.  Instead, the ADLC generates a conversion
12652 // method in the MachNode which can be used to do in-place replacement of the
12653 // long variant with the shorter variant.  The compiler will determine if a
12654 // branch can be taken by the is_short_branch_offset() predicate in the machine
12655 // specific code section of the file.
12656 
12657 // Jump Direct - Label defines a relative address from JMP+1
12658 instruct jmpDir_short(label labl) %{
12659   match(Goto);
12660   effect(USE labl);
12661 
12662   ins_cost(300);
12663   format %{ "JMP,s  $labl" %}
12664   size(2);
12665   ins_encode %{
12666     Label* L = $labl$$label;
12667     __ jmpb(*L);
12668   %}
12669   ins_pipe( pipe_jmp );
12670   ins_short_branch(1);
12671 %}
12672 
12673 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12674 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12675   match(If cop cr);
12676   effect(USE labl);
12677 
12678   ins_cost(300);
12679   format %{ "J$cop,s  $labl" %}
12680   size(2);
12681   ins_encode %{
12682     Label* L = $labl$$label;
12683     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12684   %}
12685   ins_pipe( pipe_jcc );
12686   ins_short_branch(1);
12687 %}
12688 
12689 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12690 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12691   match(CountedLoopEnd cop cr);
12692   effect(USE labl);
12693 
12694   ins_cost(300);
12695   format %{ "J$cop,s  $labl\t# Loop end" %}
12696   size(2);
12697   ins_encode %{
12698     Label* L = $labl$$label;
12699     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12700   %}
12701   ins_pipe( pipe_jcc );
12702   ins_short_branch(1);
12703 %}
12704 
12705 // Jump Direct Conditional - using unsigned comparison
12706 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12707   match(If cop cmp);
12708   effect(USE labl);
12709 
12710   ins_cost(300);
12711   format %{ "J$cop,us $labl" %}
12712   size(2);
12713   ins_encode %{
12714     Label* L = $labl$$label;
12715     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12716   %}
12717   ins_pipe( pipe_jcc );
12718   ins_short_branch(1);
12719 %}
12720 
12721 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12722   match(If cop cmp);
12723   effect(USE labl);
12724 
12725   ins_cost(300);
12726   format %{ "J$cop,us $labl" %}
12727   size(2);
12728   ins_encode %{
12729     Label* L = $labl$$label;
12730     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12731   %}
12732   ins_pipe( pipe_jcc );
12733   ins_short_branch(1);
12734 %}
12735 
12736 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12737   match(If cop cmp);
12738   effect(USE labl);
12739 
12740   ins_cost(300);
12741   format %{ $$template
12742     if ($cop$$cmpcode == Assembler::notEqual) {
12743       $$emit$$"JP,u,s   $labl\n\t"
12744       $$emit$$"J$cop,u,s   $labl"
12745     } else {
12746       $$emit$$"JP,u,s   done\n\t"
12747       $$emit$$"J$cop,u,s  $labl\n\t"
12748       $$emit$$"done:"
12749     }
12750   %}
12751   size(4);
12752   ins_encode %{
12753     Label* l = $labl$$label;
12754     if ($cop$$cmpcode == Assembler::notEqual) {
12755       __ jccb(Assembler::parity, *l);
12756       __ jccb(Assembler::notEqual, *l);
12757     } else if ($cop$$cmpcode == Assembler::equal) {
12758       Label done;
12759       __ jccb(Assembler::parity, done);
12760       __ jccb(Assembler::equal, *l);
12761       __ bind(done);
12762     } else {
12763        ShouldNotReachHere();
12764     }
12765   %}
12766   ins_pipe(pipe_jcc);
12767   ins_short_branch(1);
12768 %}
12769 
12770 // ============================================================================
12771 // Long Compare
12772 //
12773 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12774 // is tricky.  The flavor of compare used depends on whether we are testing
12775 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12776 // The GE test is the negated LT test.  The LE test can be had by commuting
12777 // the operands (yielding a GE test) and then negating; negate again for the
12778 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12779 // NE test is negated from that.
12780 
12781 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12782 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12783 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12784 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12785 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12786 // foo match ends up with the wrong leaf.  One fix is to not match both
12787 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12788 // both forms beat the trinary form of long-compare and both are very useful
12789 // on Intel which has so few registers.
12790 
12791 // Manifest a CmpL result in an integer register.  Very painful.
12792 // This is the test to avoid.
12793 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12794   match(Set dst (CmpL3 src1 src2));
12795   effect( KILL flags );
12796   ins_cost(1000);
12797   format %{ "XOR    $dst,$dst\n\t"
12798             "CMP    $src1.hi,$src2.hi\n\t"
12799             "JLT,s  m_one\n\t"
12800             "JGT,s  p_one\n\t"
12801             "CMP    $src1.lo,$src2.lo\n\t"
12802             "JB,s   m_one\n\t"
12803             "JEQ,s  done\n"
12804     "p_one:\tINC    $dst\n\t"
12805             "JMP,s  done\n"
12806     "m_one:\tDEC    $dst\n"
12807      "done:" %}
12808   ins_encode %{
12809     Label p_one, m_one, done;
12810     __ xorptr($dst$$Register, $dst$$Register);
12811     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12812     __ jccb(Assembler::less,    m_one);
12813     __ jccb(Assembler::greater, p_one);
12814     __ cmpl($src1$$Register, $src2$$Register);
12815     __ jccb(Assembler::below,   m_one);
12816     __ jccb(Assembler::equal,   done);
12817     __ bind(p_one);
12818     __ incrementl($dst$$Register);
12819     __ jmpb(done);
12820     __ bind(m_one);
12821     __ decrementl($dst$$Register);
12822     __ bind(done);
12823   %}
12824   ins_pipe( pipe_slow );
12825 %}
12826 
12827 //======
12828 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12829 // compares.  Can be used for LE or GT compares by reversing arguments.
12830 // NOT GOOD FOR EQ/NE tests.
12831 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12832   match( Set flags (CmpL src zero ));
12833   ins_cost(100);
12834   format %{ "TEST   $src.hi,$src.hi" %}
12835   opcode(0x85);
12836   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12837   ins_pipe( ialu_cr_reg_reg );
12838 %}
12839 
12840 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12841 // compares.  Can be used for LE or GT compares by reversing arguments.
12842 // NOT GOOD FOR EQ/NE tests.
12843 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12844   match( Set flags (CmpL src1 src2 ));
12845   effect( TEMP tmp );
12846   ins_cost(300);
12847   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12848             "MOV    $tmp,$src1.hi\n\t"
12849             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12850   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12851   ins_pipe( ialu_cr_reg_reg );
12852 %}
12853 
12854 // Long compares reg < zero/req OR reg >= zero/req.
12855 // Just a wrapper for a normal branch, plus the predicate test.
12856 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12857   match(If cmp flags);
12858   effect(USE labl);
12859   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12860   expand %{
12861     jmpCon(cmp,flags,labl);    // JLT or JGE...
12862   %}
12863 %}
12864 
12865 //======
12866 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12867 // compares.  Can be used for LE or GT compares by reversing arguments.
12868 // NOT GOOD FOR EQ/NE tests.
12869 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12870   match(Set flags (CmpUL src zero));
12871   ins_cost(100);
12872   format %{ "TEST   $src.hi,$src.hi" %}
12873   opcode(0x85);
12874   ins_encode(OpcP, RegReg_Hi2(src, src));
12875   ins_pipe(ialu_cr_reg_reg);
12876 %}
12877 
12878 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12879 // compares.  Can be used for LE or GT compares by reversing arguments.
12880 // NOT GOOD FOR EQ/NE tests.
12881 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12882   match(Set flags (CmpUL src1 src2));
12883   effect(TEMP tmp);
12884   ins_cost(300);
12885   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12886             "MOV    $tmp,$src1.hi\n\t"
12887             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12888   ins_encode(long_cmp_flags2(src1, src2, tmp));
12889   ins_pipe(ialu_cr_reg_reg);
12890 %}
12891 
12892 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12893 // Just a wrapper for a normal branch, plus the predicate test.
12894 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12895   match(If cmp flags);
12896   effect(USE labl);
12897   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12898   expand %{
12899     jmpCon(cmp, flags, labl);    // JLT or JGE...
12900   %}
12901 %}
12902 
12903 // Compare 2 longs and CMOVE longs.
12904 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12905   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12906   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12907   ins_cost(400);
12908   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12909             "CMOV$cmp $dst.hi,$src.hi" %}
12910   opcode(0x0F,0x40);
12911   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12912   ins_pipe( pipe_cmov_reg_long );
12913 %}
12914 
12915 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12916   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12917   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12918   ins_cost(500);
12919   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12920             "CMOV$cmp $dst.hi,$src.hi" %}
12921   opcode(0x0F,0x40);
12922   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
12923   ins_pipe( pipe_cmov_reg_long );
12924 %}
12925 
12926 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
12927   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12928   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12929   ins_cost(400);
12930   expand %{
12931     cmovLL_reg_LTGE(cmp, flags, dst, src);
12932   %}
12933 %}
12934 
12935 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
12936   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12937   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12938   ins_cost(500);
12939   expand %{
12940     cmovLL_mem_LTGE(cmp, flags, dst, src);
12941   %}
12942 %}
12943 
12944 // Compare 2 longs and CMOVE ints.
12945 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12946   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12947   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12948   ins_cost(200);
12949   format %{ "CMOV$cmp $dst,$src" %}
12950   opcode(0x0F,0x40);
12951   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12952   ins_pipe( pipe_cmov_reg );
12953 %}
12954 
12955 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12956   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12957   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12958   ins_cost(250);
12959   format %{ "CMOV$cmp $dst,$src" %}
12960   opcode(0x0F,0x40);
12961   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
12962   ins_pipe( pipe_cmov_mem );
12963 %}
12964 
12965 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
12966   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12967   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12968   ins_cost(200);
12969   expand %{
12970     cmovII_reg_LTGE(cmp, flags, dst, src);
12971   %}
12972 %}
12973 
12974 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
12975   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12976   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12977   ins_cost(250);
12978   expand %{
12979     cmovII_mem_LTGE(cmp, flags, dst, src);
12980   %}
12981 %}
12982 
12983 // Compare 2 longs and CMOVE ptrs.
12984 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12985   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12986   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12987   ins_cost(200);
12988   format %{ "CMOV$cmp $dst,$src" %}
12989   opcode(0x0F,0x40);
12990   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12991   ins_pipe( pipe_cmov_reg );
12992 %}
12993 
12994 // Compare 2 unsigned longs and CMOVE ptrs.
12995 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
12996   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12997   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12998   ins_cost(200);
12999   expand %{
13000     cmovPP_reg_LTGE(cmp,flags,dst,src);
13001   %}
13002 %}
13003 
13004 // Compare 2 longs and CMOVE doubles
13005 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13006   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13007   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13008   ins_cost(200);
13009   expand %{
13010     fcmovDPR_regS(cmp,flags,dst,src);
13011   %}
13012 %}
13013 
13014 // Compare 2 longs and CMOVE doubles
13015 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13016   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13017   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13018   ins_cost(200);
13019   expand %{
13020     fcmovD_regS(cmp,flags,dst,src);
13021   %}
13022 %}
13023 
13024 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13025   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13026   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13027   ins_cost(200);
13028   expand %{
13029     fcmovFPR_regS(cmp,flags,dst,src);
13030   %}
13031 %}
13032 
13033 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13034   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13035   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13036   ins_cost(200);
13037   expand %{
13038     fcmovF_regS(cmp,flags,dst,src);
13039   %}
13040 %}
13041 
13042 //======
13043 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13044 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13045   match( Set flags (CmpL src zero ));
13046   effect(TEMP tmp);
13047   ins_cost(200);
13048   format %{ "MOV    $tmp,$src.lo\n\t"
13049             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13050   ins_encode( long_cmp_flags0( src, tmp ) );
13051   ins_pipe( ialu_reg_reg_long );
13052 %}
13053 
13054 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13055 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13056   match( Set flags (CmpL src1 src2 ));
13057   ins_cost(200+300);
13058   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13059             "JNE,s  skip\n\t"
13060             "CMP    $src1.hi,$src2.hi\n\t"
13061      "skip:\t" %}
13062   ins_encode( long_cmp_flags1( src1, src2 ) );
13063   ins_pipe( ialu_cr_reg_reg );
13064 %}
13065 
13066 // Long compare reg == zero/reg OR reg != zero/reg
13067 // Just a wrapper for a normal branch, plus the predicate test.
13068 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13069   match(If cmp flags);
13070   effect(USE labl);
13071   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13072   expand %{
13073     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13074   %}
13075 %}
13076 
13077 //======
13078 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13079 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13080   match(Set flags (CmpUL src zero));
13081   effect(TEMP tmp);
13082   ins_cost(200);
13083   format %{ "MOV    $tmp,$src.lo\n\t"
13084             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13085   ins_encode(long_cmp_flags0(src, tmp));
13086   ins_pipe(ialu_reg_reg_long);
13087 %}
13088 
13089 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13090 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13091   match(Set flags (CmpUL src1 src2));
13092   ins_cost(200+300);
13093   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13094             "JNE,s  skip\n\t"
13095             "CMP    $src1.hi,$src2.hi\n\t"
13096      "skip:\t" %}
13097   ins_encode(long_cmp_flags1(src1, src2));
13098   ins_pipe(ialu_cr_reg_reg);
13099 %}
13100 
13101 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13102 // Just a wrapper for a normal branch, plus the predicate test.
13103 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13104   match(If cmp flags);
13105   effect(USE labl);
13106   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13107   expand %{
13108     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13109   %}
13110 %}
13111 
13112 // Compare 2 longs and CMOVE longs.
13113 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13114   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13115   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13116   ins_cost(400);
13117   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13118             "CMOV$cmp $dst.hi,$src.hi" %}
13119   opcode(0x0F,0x40);
13120   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13121   ins_pipe( pipe_cmov_reg_long );
13122 %}
13123 
13124 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13125   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13126   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13127   ins_cost(500);
13128   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13129             "CMOV$cmp $dst.hi,$src.hi" %}
13130   opcode(0x0F,0x40);
13131   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13132   ins_pipe( pipe_cmov_reg_long );
13133 %}
13134 
13135 // Compare 2 longs and CMOVE ints.
13136 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13137   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13138   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13139   ins_cost(200);
13140   format %{ "CMOV$cmp $dst,$src" %}
13141   opcode(0x0F,0x40);
13142   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13143   ins_pipe( pipe_cmov_reg );
13144 %}
13145 
13146 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13147   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13148   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13149   ins_cost(250);
13150   format %{ "CMOV$cmp $dst,$src" %}
13151   opcode(0x0F,0x40);
13152   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13153   ins_pipe( pipe_cmov_mem );
13154 %}
13155 
13156 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13157   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13158   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13159   ins_cost(200);
13160   expand %{
13161     cmovII_reg_EQNE(cmp, flags, dst, src);
13162   %}
13163 %}
13164 
13165 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13166   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13167   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13168   ins_cost(250);
13169   expand %{
13170     cmovII_mem_EQNE(cmp, flags, dst, src);
13171   %}
13172 %}
13173 
13174 // Compare 2 longs and CMOVE ptrs.
13175 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13176   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13177   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13178   ins_cost(200);
13179   format %{ "CMOV$cmp $dst,$src" %}
13180   opcode(0x0F,0x40);
13181   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13182   ins_pipe( pipe_cmov_reg );
13183 %}
13184 
13185 // Compare 2 unsigned longs and CMOVE ptrs.
13186 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13187   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13188   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13189   ins_cost(200);
13190   expand %{
13191     cmovPP_reg_EQNE(cmp,flags,dst,src);
13192   %}
13193 %}
13194 
13195 // Compare 2 longs and CMOVE doubles
13196 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13197   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13198   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13199   ins_cost(200);
13200   expand %{
13201     fcmovDPR_regS(cmp,flags,dst,src);
13202   %}
13203 %}
13204 
13205 // Compare 2 longs and CMOVE doubles
13206 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13207   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13208   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13209   ins_cost(200);
13210   expand %{
13211     fcmovD_regS(cmp,flags,dst,src);
13212   %}
13213 %}
13214 
13215 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13216   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13217   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13218   ins_cost(200);
13219   expand %{
13220     fcmovFPR_regS(cmp,flags,dst,src);
13221   %}
13222 %}
13223 
13224 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13225   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13226   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13227   ins_cost(200);
13228   expand %{
13229     fcmovF_regS(cmp,flags,dst,src);
13230   %}
13231 %}
13232 
13233 //======
13234 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13235 // Same as cmpL_reg_flags_LEGT except must negate src
13236 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13237   match( Set flags (CmpL src zero ));
13238   effect( TEMP tmp );
13239   ins_cost(300);
13240   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13241             "CMP    $tmp,$src.lo\n\t"
13242             "SBB    $tmp,$src.hi\n\t" %}
13243   ins_encode( long_cmp_flags3(src, tmp) );
13244   ins_pipe( ialu_reg_reg_long );
13245 %}
13246 
13247 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13248 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13249 // requires a commuted test to get the same result.
13250 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13251   match( Set flags (CmpL src1 src2 ));
13252   effect( TEMP tmp );
13253   ins_cost(300);
13254   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13255             "MOV    $tmp,$src2.hi\n\t"
13256             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13257   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13258   ins_pipe( ialu_cr_reg_reg );
13259 %}
13260 
13261 // Long compares reg < zero/req OR reg >= zero/req.
13262 // Just a wrapper for a normal branch, plus the predicate test
13263 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13264   match(If cmp flags);
13265   effect(USE labl);
13266   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13267   ins_cost(300);
13268   expand %{
13269     jmpCon(cmp,flags,labl);    // JGT or JLE...
13270   %}
13271 %}
13272 
13273 //======
13274 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13275 // Same as cmpUL_reg_flags_LEGT except must negate src
13276 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13277   match(Set flags (CmpUL src zero));
13278   effect(TEMP tmp);
13279   ins_cost(300);
13280   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13281             "CMP    $tmp,$src.lo\n\t"
13282             "SBB    $tmp,$src.hi\n\t" %}
13283   ins_encode(long_cmp_flags3(src, tmp));
13284   ins_pipe(ialu_reg_reg_long);
13285 %}
13286 
13287 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13288 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13289 // requires a commuted test to get the same result.
13290 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13291   match(Set flags (CmpUL src1 src2));
13292   effect(TEMP tmp);
13293   ins_cost(300);
13294   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13295             "MOV    $tmp,$src2.hi\n\t"
13296             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13297   ins_encode(long_cmp_flags2( src2, src1, tmp));
13298   ins_pipe(ialu_cr_reg_reg);
13299 %}
13300 
13301 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13302 // Just a wrapper for a normal branch, plus the predicate test
13303 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13304   match(If cmp flags);
13305   effect(USE labl);
13306   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13307   ins_cost(300);
13308   expand %{
13309     jmpCon(cmp, flags, labl);    // JGT or JLE...
13310   %}
13311 %}
13312 
13313 // Compare 2 longs and CMOVE longs.
13314 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13315   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13316   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13317   ins_cost(400);
13318   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13319             "CMOV$cmp $dst.hi,$src.hi" %}
13320   opcode(0x0F,0x40);
13321   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13322   ins_pipe( pipe_cmov_reg_long );
13323 %}
13324 
13325 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13326   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13327   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13328   ins_cost(500);
13329   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13330             "CMOV$cmp $dst.hi,$src.hi+4" %}
13331   opcode(0x0F,0x40);
13332   ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark );
13333   ins_pipe( pipe_cmov_reg_long );
13334 %}
13335 
13336 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13337   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13338   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13339   ins_cost(400);
13340   expand %{
13341     cmovLL_reg_LEGT(cmp, flags, dst, src);
13342   %}
13343 %}
13344 
13345 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13346   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13347   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13348   ins_cost(500);
13349   expand %{
13350     cmovLL_mem_LEGT(cmp, flags, dst, src);
13351   %}
13352 %}
13353 
13354 // Compare 2 longs and CMOVE ints.
13355 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13356   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13357   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13358   ins_cost(200);
13359   format %{ "CMOV$cmp $dst,$src" %}
13360   opcode(0x0F,0x40);
13361   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13362   ins_pipe( pipe_cmov_reg );
13363 %}
13364 
13365 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13366   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13367   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13368   ins_cost(250);
13369   format %{ "CMOV$cmp $dst,$src" %}
13370   opcode(0x0F,0x40);
13371   ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark );
13372   ins_pipe( pipe_cmov_mem );
13373 %}
13374 
13375 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13376   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13377   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13378   ins_cost(200);
13379   expand %{
13380     cmovII_reg_LEGT(cmp, flags, dst, src);
13381   %}
13382 %}
13383 
13384 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13385   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13386   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13387   ins_cost(250);
13388   expand %{
13389     cmovII_mem_LEGT(cmp, flags, dst, src);
13390   %}
13391 %}
13392 
13393 // Compare 2 longs and CMOVE ptrs.
13394 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13395   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13396   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13397   ins_cost(200);
13398   format %{ "CMOV$cmp $dst,$src" %}
13399   opcode(0x0F,0x40);
13400   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13401   ins_pipe( pipe_cmov_reg );
13402 %}
13403 
13404 // Compare 2 unsigned longs and CMOVE ptrs.
13405 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13406   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13407   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13408   ins_cost(200);
13409   expand %{
13410     cmovPP_reg_LEGT(cmp,flags,dst,src);
13411   %}
13412 %}
13413 
13414 // Compare 2 longs and CMOVE doubles
13415 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13416   predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13417   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13418   ins_cost(200);
13419   expand %{
13420     fcmovDPR_regS(cmp,flags,dst,src);
13421   %}
13422 %}
13423 
13424 // Compare 2 longs and CMOVE doubles
13425 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13426   predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13427   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13428   ins_cost(200);
13429   expand %{
13430     fcmovD_regS(cmp,flags,dst,src);
13431   %}
13432 %}
13433 
13434 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13435   predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13436   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13437   ins_cost(200);
13438   expand %{
13439     fcmovFPR_regS(cmp,flags,dst,src);
13440   %}
13441 %}
13442 
13443 
13444 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13445   predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13446   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13447   ins_cost(200);
13448   expand %{
13449     fcmovF_regS(cmp,flags,dst,src);
13450   %}
13451 %}
13452 
13453 
13454 // ============================================================================
13455 // Procedure Call/Return Instructions
13456 // Call Java Static Instruction
13457 // Note: If this code changes, the corresponding ret_addr_offset() and
13458 //       compute_padding() functions will have to be adjusted.
13459 instruct CallStaticJavaDirect(method meth) %{
13460   match(CallStaticJava);
13461   effect(USE meth);
13462 
13463   ins_cost(300);
13464   format %{ "CALL,static " %}
13465   opcode(0xE8); /* E8 cd */
13466   ins_encode( pre_call_resets,
13467               Java_Static_Call( meth ),
13468               call_epilog,
13469               post_call_FPU );
13470   ins_pipe( pipe_slow );
13471   ins_alignment(4);
13472 %}
13473 
13474 // Call Java Dynamic Instruction
13475 // Note: If this code changes, the corresponding ret_addr_offset() and
13476 //       compute_padding() functions will have to be adjusted.
13477 instruct CallDynamicJavaDirect(method meth) %{
13478   match(CallDynamicJava);
13479   effect(USE meth);
13480 
13481   ins_cost(300);
13482   format %{ "MOV    EAX,(oop)-1\n\t"
13483             "CALL,dynamic" %}
13484   opcode(0xE8); /* E8 cd */
13485   ins_encode( pre_call_resets,
13486               Java_Dynamic_Call( meth ),
13487               call_epilog,
13488               post_call_FPU );
13489   ins_pipe( pipe_slow );
13490   ins_alignment(4);
13491 %}
13492 
13493 // Call Runtime Instruction
13494 instruct CallRuntimeDirect(method meth) %{
13495   match(CallRuntime );
13496   effect(USE meth);
13497 
13498   ins_cost(300);
13499   format %{ "CALL,runtime " %}
13500   opcode(0xE8); /* E8 cd */
13501   // Use FFREEs to clear entries in float stack
13502   ins_encode( pre_call_resets,
13503               FFree_Float_Stack_All,
13504               Java_To_Runtime( meth ),
13505               post_call_FPU );
13506   ins_pipe( pipe_slow );
13507 %}
13508 
13509 // Call runtime without safepoint
13510 instruct CallLeafDirect(method meth) %{
13511   match(CallLeaf);
13512   effect(USE meth);
13513 
13514   ins_cost(300);
13515   format %{ "CALL_LEAF,runtime " %}
13516   opcode(0xE8); /* E8 cd */
13517   ins_encode( pre_call_resets,
13518               FFree_Float_Stack_All,
13519               Java_To_Runtime( meth ),
13520               Verify_FPU_For_Leaf, post_call_FPU );
13521   ins_pipe( pipe_slow );
13522 %}
13523 
13524 instruct CallLeafNoFPDirect(method meth) %{
13525   match(CallLeafNoFP);
13526   effect(USE meth);
13527 
13528   ins_cost(300);
13529   format %{ "CALL_LEAF_NOFP,runtime " %}
13530   opcode(0xE8); /* E8 cd */
13531   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13532   ins_pipe( pipe_slow );
13533 %}
13534 
13535 
13536 // Return Instruction
13537 // Remove the return address & jump to it.
13538 instruct Ret() %{
13539   match(Return);
13540   format %{ "RET" %}
13541   opcode(0xC3);
13542   ins_encode(OpcP);
13543   ins_pipe( pipe_jmp );
13544 %}
13545 
13546 // Tail Call; Jump from runtime stub to Java code.
13547 // Also known as an 'interprocedural jump'.
13548 // Target of jump will eventually return to caller.
13549 // TailJump below removes the return address.
13550 // Don't use ebp for 'jump_target' because a MachEpilogNode has already been
13551 // emitted just above the TailCall which has reset ebp to the caller state.
13552 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13553   match(TailCall jump_target method_ptr);
13554   ins_cost(300);
13555   format %{ "JMP    $jump_target \t# EBX holds method" %}
13556   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13557   ins_encode( OpcP, RegOpc(jump_target) );
13558   ins_pipe( pipe_jmp );
13559 %}
13560 
13561 
13562 // Tail Jump; remove the return address; jump to target.
13563 // TailCall above leaves the return address around.
13564 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13565   match( TailJump jump_target ex_oop );
13566   ins_cost(300);
13567   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13568             "JMP    $jump_target " %}
13569   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13570   ins_encode( enc_pop_rdx,
13571               OpcP, RegOpc(jump_target) );
13572   ins_pipe( pipe_jmp );
13573 %}
13574 
13575 // Forward exception.
13576 instruct ForwardExceptionjmp()
13577 %{
13578   match(ForwardException);
13579 
13580   format %{ "JMP    forward_exception_stub" %}
13581   ins_encode %{
13582     __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg);
13583   %}
13584   ins_pipe(pipe_jmp);
13585 %}
13586 
13587 // Create exception oop: created by stack-crawling runtime code.
13588 // Created exception is now available to this handler, and is setup
13589 // just prior to jumping to this handler.  No code emitted.
13590 instruct CreateException( eAXRegP ex_oop )
13591 %{
13592   match(Set ex_oop (CreateEx));
13593 
13594   size(0);
13595   // use the following format syntax
13596   format %{ "# exception oop is in EAX; no code emitted" %}
13597   ins_encode();
13598   ins_pipe( empty );
13599 %}
13600 
13601 
13602 // Rethrow exception:
13603 // The exception oop will come in the first argument position.
13604 // Then JUMP (not call) to the rethrow stub code.
13605 instruct RethrowException()
13606 %{
13607   match(Rethrow);
13608 
13609   // use the following format syntax
13610   format %{ "JMP    rethrow_stub" %}
13611   ins_encode(enc_rethrow);
13612   ins_pipe( pipe_jmp );
13613 %}
13614 
13615 // inlined locking and unlocking
13616 
13617 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13618   predicate(LockingMode != LM_LIGHTWEIGHT);
13619   match(Set cr (FastLock object box));
13620   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13621   ins_cost(300);
13622   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13623   ins_encode %{
13624     __ get_thread($thread$$Register);
13625     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13626                  $scr$$Register, noreg, noreg, $thread$$Register, nullptr);
13627   %}
13628   ins_pipe(pipe_slow);
13629 %}
13630 
13631 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13632   predicate(LockingMode != LM_LIGHTWEIGHT);
13633   match(Set cr (FastUnlock object box));
13634   effect(TEMP tmp, USE_KILL box);
13635   ins_cost(300);
13636   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13637   ins_encode %{
13638     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
13639   %}
13640   ins_pipe(pipe_slow);
13641 %}
13642 
13643 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
13644   predicate(LockingMode == LM_LIGHTWEIGHT);
13645   match(Set cr (FastLock object box));
13646   effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
13647   ins_cost(300);
13648   format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
13649   ins_encode %{
13650     __ get_thread($thread$$Register);
13651     __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13652   %}
13653   ins_pipe(pipe_slow);
13654 %}
13655 
13656 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
13657   predicate(LockingMode == LM_LIGHTWEIGHT);
13658   match(Set cr (FastUnlock object eax_reg));
13659   effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
13660   ins_cost(300);
13661   format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
13662   ins_encode %{
13663     __ get_thread($thread$$Register);
13664     __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
13665   %}
13666   ins_pipe(pipe_slow);
13667 %}
13668 
13669 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13670   predicate(Matcher::vector_length(n) <= 32);
13671   match(Set dst (MaskAll src));
13672   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13673   ins_encode %{
13674     int mask_len = Matcher::vector_length(this);
13675     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13676   %}
13677   ins_pipe( pipe_slow );
13678 %}
13679 
13680 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13681   predicate(Matcher::vector_length(n) > 32);
13682   match(Set dst (MaskAll src));
13683   effect(TEMP ktmp);
13684   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13685   ins_encode %{
13686     int mask_len = Matcher::vector_length(this);
13687     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13688   %}
13689   ins_pipe( pipe_slow );
13690 %}
13691 
13692 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13693   predicate(Matcher::vector_length(n) > 32);
13694   match(Set dst (MaskAll src));
13695   effect(TEMP ktmp);
13696   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13697   ins_encode %{
13698     int mask_len = Matcher::vector_length(this);
13699     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13700   %}
13701   ins_pipe( pipe_slow );
13702 %}
13703 
13704 // ============================================================================
13705 // Safepoint Instruction
13706 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13707   match(SafePoint poll);
13708   effect(KILL cr, USE poll);
13709 
13710   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13711   ins_cost(125);
13712   // EBP would need size(3)
13713   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13714   ins_encode %{
13715     __ set_inst_mark();
13716     __ relocate(relocInfo::poll_type);
13717     __ clear_inst_mark();
13718     address pre_pc = __ pc();
13719     __ testl(rax, Address($poll$$Register, 0));
13720     address post_pc = __ pc();
13721     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13722   %}
13723   ins_pipe(ialu_reg_mem);
13724 %}
13725 
13726 
13727 // ============================================================================
13728 // This name is KNOWN by the ADLC and cannot be changed.
13729 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13730 // for this guy.
13731 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13732   match(Set dst (ThreadLocal));
13733   effect(DEF dst, KILL cr);
13734 
13735   format %{ "MOV    $dst, Thread::current()" %}
13736   ins_encode %{
13737     Register dstReg = as_Register($dst$$reg);
13738     __ get_thread(dstReg);
13739   %}
13740   ins_pipe( ialu_reg_fat );
13741 %}
13742 
13743 
13744 
13745 //----------PEEPHOLE RULES-----------------------------------------------------
13746 // These must follow all instruction definitions as they use the names
13747 // defined in the instructions definitions.
13748 //
13749 // peepmatch ( root_instr_name [preceding_instruction]* );
13750 //
13751 // peepconstraint %{
13752 // (instruction_number.operand_name relational_op instruction_number.operand_name
13753 //  [, ...] );
13754 // // instruction numbers are zero-based using left to right order in peepmatch
13755 //
13756 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13757 // // provide an instruction_number.operand_name for each operand that appears
13758 // // in the replacement instruction's match rule
13759 //
13760 // ---------VM FLAGS---------------------------------------------------------
13761 //
13762 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13763 //
13764 // Each peephole rule is given an identifying number starting with zero and
13765 // increasing by one in the order seen by the parser.  An individual peephole
13766 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13767 // on the command-line.
13768 //
13769 // ---------CURRENT LIMITATIONS----------------------------------------------
13770 //
13771 // Only match adjacent instructions in same basic block
13772 // Only equality constraints
13773 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13774 // Only one replacement instruction
13775 //
13776 // ---------EXAMPLE----------------------------------------------------------
13777 //
13778 // // pertinent parts of existing instructions in architecture description
13779 // instruct movI(rRegI dst, rRegI src) %{
13780 //   match(Set dst (CopyI src));
13781 // %}
13782 //
13783 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13784 //   match(Set dst (AddI dst src));
13785 //   effect(KILL cr);
13786 // %}
13787 //
13788 // // Change (inc mov) to lea
13789 // peephole %{
13790 //   // increment preceded by register-register move
13791 //   peepmatch ( incI_eReg movI );
13792 //   // require that the destination register of the increment
13793 //   // match the destination register of the move
13794 //   peepconstraint ( 0.dst == 1.dst );
13795 //   // construct a replacement instruction that sets
13796 //   // the destination to ( move's source register + one )
13797 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13798 // %}
13799 //
13800 // Implementation no longer uses movX instructions since
13801 // machine-independent system no longer uses CopyX nodes.
13802 //
13803 // peephole %{
13804 //   peepmatch ( incI_eReg movI );
13805 //   peepconstraint ( 0.dst == 1.dst );
13806 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13807 // %}
13808 //
13809 // peephole %{
13810 //   peepmatch ( decI_eReg movI );
13811 //   peepconstraint ( 0.dst == 1.dst );
13812 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13813 // %}
13814 //
13815 // peephole %{
13816 //   peepmatch ( addI_eReg_imm movI );
13817 //   peepconstraint ( 0.dst == 1.dst );
13818 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13819 // %}
13820 //
13821 // peephole %{
13822 //   peepmatch ( addP_eReg_imm movP );
13823 //   peepconstraint ( 0.dst == 1.dst );
13824 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13825 // %}
13826 
13827 // // Change load of spilled value to only a spill
13828 // instruct storeI(memory mem, rRegI src) %{
13829 //   match(Set mem (StoreI mem src));
13830 // %}
13831 //
13832 // instruct loadI(rRegI dst, memory mem) %{
13833 //   match(Set dst (LoadI mem));
13834 // %}
13835 //
13836 peephole %{
13837   peepmatch ( loadI storeI );
13838   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13839   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13840 %}
13841 
13842 //----------SMARTSPILL RULES---------------------------------------------------
13843 // These must follow all instruction definitions as they use the names
13844 // defined in the instructions definitions.