1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 
  217 // Not AX or DX, used in divides
  218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  219 // Not AX or DX (and neither EBP), used in divides
  220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  223 
  224 // Floating point registers.  Notice FPR0 is not a choice.
  225 // FPR0 is not ever allocated; we use clever encodings to fake
  226 // a 2-address instructions out of Intels FP stack.
  227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  228 
  229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  230                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  231                       FPR7L,FPR7H );
  232 
  233 reg_class fp_flt_reg0( FPR1L );
  234 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  235 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  237                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  238 
  239 %}
  240 
  241 
  242 //----------SOURCE BLOCK-------------------------------------------------------
  243 // This is a block of C++ code which provides values, functions, and
  244 // definitions necessary in the rest of the architecture description
  245 source_hpp %{
  246 // Must be visible to the DFA in dfa_x86_32.cpp
  247 extern bool is_operand_hi32_zero(Node* n);
  248 %}
  249 
  250 source %{
  251 #define   RELOC_IMM32    Assembler::imm_operand
  252 #define   RELOC_DISP32   Assembler::disp32_operand
  253 
  254 #define __ _masm.
  255 
  256 // How to find the high register of a Long pair, given the low register
  257 #define   HIGH_FROM_LOW(x) ((x)+2)
  258 
  259 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  260 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  261 // fast versions of NegF/NegD and AbsF/AbsD.
  262 
  263 void reg_mask_init() {
  264   if (Matcher::has_predicated_vectors()) {
  265     // Post-loop multi-versioning expects mask to be present in K1 register, till the time
  266     // its fixed, RA should not be allocting K1 register, this shall prevent any accidental
  267     // curruption of value held in K1 register.
  268     if (PostLoopMultiversioning) {
  269       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
  270       const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
  271     }
  272   }
  273 }
  274 
  275 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  277   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  278   // of 128-bits operands for SSE instructions.
  279   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  280   // Store the value to a 128-bits operand.
  281   operand[0] = lo;
  282   operand[1] = hi;
  283   return operand;
  284 }
  285 
  286 // Buffer for 128-bits masks used by SSE instructions.
  287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  288 
  289 // Static initialization during VM startup.
  290 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  292 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  294 
  295 // Offset hacking within calls.
  296 static int pre_call_resets_size() {
  297   int size = 0;
  298   Compile* C = Compile::current();
  299   if (C->in_24_bit_fp_mode()) {
  300     size += 6; // fldcw
  301   }
  302   if (VM_Version::supports_vzeroupper()) {
  303     size += 3; // vzeroupper
  304   }
  305   return size;
  306 }
  307 
  308 // !!!!! Special hack to get all type of calls to specify the byte offset
  309 //       from the start of the call to the point where the return address
  310 //       will point.
  311 int MachCallStaticJavaNode::ret_addr_offset() {
  312   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  313 }
  314 
  315 int MachCallDynamicJavaNode::ret_addr_offset() {
  316   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  317 }
  318 
  319 static int sizeof_FFree_Float_Stack_All = -1;
  320 
  321 int MachCallRuntimeNode::ret_addr_offset() {
  322   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  323   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  324 }
  325 
  326 int MachCallNativeNode::ret_addr_offset() {
  327   ShouldNotCallThis();
  328   return -1;
  329 }
  330 
  331 //
  332 // Compute padding required for nodes which need alignment
  333 //
  334 
  335 // The address of the call instruction needs to be 4-byte aligned to
  336 // ensure that it does not span a cache line so that it can be patched.
  337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  338   current_offset += pre_call_resets_size();  // skip fldcw, if any
  339   current_offset += 1;      // skip call opcode byte
  340   return align_up(current_offset, alignment_required()) - current_offset;
  341 }
  342 
  343 // The address of the call instruction needs to be 4-byte aligned to
  344 // ensure that it does not span a cache line so that it can be patched.
  345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  346   current_offset += pre_call_resets_size();  // skip fldcw, if any
  347   current_offset += 5;      // skip MOV instruction
  348   current_offset += 1;      // skip call opcode byte
  349   return align_up(current_offset, alignment_required()) - current_offset;
  350 }
  351 
  352 // EMIT_RM()
  353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  354   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  355   cbuf.insts()->emit_int8(c);
  356 }
  357 
  358 // EMIT_CC()
  359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  360   unsigned char c = (unsigned char)( f1 | f2 );
  361   cbuf.insts()->emit_int8(c);
  362 }
  363 
  364 // EMIT_OPCODE()
  365 void emit_opcode(CodeBuffer &cbuf, int code) {
  366   cbuf.insts()->emit_int8((unsigned char) code);
  367 }
  368 
  369 // EMIT_OPCODE() w/ relocation information
  370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  371   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  372   emit_opcode(cbuf, code);
  373 }
  374 
  375 // EMIT_D8()
  376 void emit_d8(CodeBuffer &cbuf, int d8) {
  377   cbuf.insts()->emit_int8((unsigned char) d8);
  378 }
  379 
  380 // EMIT_D16()
  381 void emit_d16(CodeBuffer &cbuf, int d16) {
  382   cbuf.insts()->emit_int16(d16);
  383 }
  384 
  385 // EMIT_D32()
  386 void emit_d32(CodeBuffer &cbuf, int d32) {
  387   cbuf.insts()->emit_int32(d32);
  388 }
  389 
  390 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  392         int format) {
  393   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  394   cbuf.insts()->emit_int32(d32);
  395 }
  396 
  397 // emit 32 bit value and construct relocation entry from RelocationHolder
  398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  399         int format) {
  400 #ifdef ASSERT
  401   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  402     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  403   }
  404 #endif
  405   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  406   cbuf.insts()->emit_int32(d32);
  407 }
  408 
  409 // Access stack slot for load or store
  410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  411   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  412   if( -128 <= disp && disp <= 127 ) {
  413     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  414     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  415     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  416   } else {
  417     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  418     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  419     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  420   }
  421 }
  422 
  423    // rRegI ereg, memory mem) %{    // emit_reg_mem
  424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  425   // There is no index & no scale, use form without SIB byte
  426   if ((index == 0x4) &&
  427       (scale == 0) && (base != ESP_enc)) {
  428     // If no displacement, mode is 0x0; unless base is [EBP]
  429     if ( (displace == 0) && (base != EBP_enc) ) {
  430       emit_rm(cbuf, 0x0, reg_encoding, base);
  431     }
  432     else {                    // If 8-bit displacement, mode 0x1
  433       if ((displace >= -128) && (displace <= 127)
  434           && (disp_reloc == relocInfo::none) ) {
  435         emit_rm(cbuf, 0x1, reg_encoding, base);
  436         emit_d8(cbuf, displace);
  437       }
  438       else {                  // If 32-bit displacement
  439         if (base == -1) { // Special flag for absolute address
  440           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  441           // (manual lies; no SIB needed here)
  442           if ( disp_reloc != relocInfo::none ) {
  443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  444           } else {
  445             emit_d32      (cbuf, displace);
  446           }
  447         }
  448         else {                // Normal base + offset
  449           emit_rm(cbuf, 0x2, reg_encoding, base);
  450           if ( disp_reloc != relocInfo::none ) {
  451             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  452           } else {
  453             emit_d32      (cbuf, displace);
  454           }
  455         }
  456       }
  457     }
  458   }
  459   else {                      // Else, encode with the SIB byte
  460     // If no displacement, mode is 0x0; unless base is [EBP]
  461     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  462       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  463       emit_rm(cbuf, scale, index, base);
  464     }
  465     else {                    // If 8-bit displacement, mode 0x1
  466       if ((displace >= -128) && (displace <= 127)
  467           && (disp_reloc == relocInfo::none) ) {
  468         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  469         emit_rm(cbuf, scale, index, base);
  470         emit_d8(cbuf, displace);
  471       }
  472       else {                  // If 32-bit displacement
  473         if (base == 0x04 ) {
  474           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  475           emit_rm(cbuf, scale, index, 0x04);
  476         } else {
  477           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  478           emit_rm(cbuf, scale, index, base);
  479         }
  480         if ( disp_reloc != relocInfo::none ) {
  481           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  482         } else {
  483           emit_d32      (cbuf, displace);
  484         }
  485       }
  486     }
  487   }
  488 }
  489 
  490 
  491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  492   if( dst_encoding == src_encoding ) {
  493     // reg-reg copy, use an empty encoding
  494   } else {
  495     emit_opcode( cbuf, 0x8B );
  496     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  497   }
  498 }
  499 
  500 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  501   Label exit;
  502   __ jccb(Assembler::noParity, exit);
  503   __ pushf();
  504   //
  505   // comiss/ucomiss instructions set ZF,PF,CF flags and
  506   // zero OF,AF,SF for NaN values.
  507   // Fixup flags by zeroing ZF,PF so that compare of NaN
  508   // values returns 'less than' result (CF is set).
  509   // Leave the rest of flags unchanged.
  510   //
  511   //    7 6 5 4 3 2 1 0
  512   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  513   //    0 0 1 0 1 0 1 1   (0x2B)
  514   //
  515   __ andl(Address(rsp, 0), 0xffffff2b);
  516   __ popf();
  517   __ bind(exit);
  518 }
  519 
  520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  521   Label done;
  522   __ movl(dst, -1);
  523   __ jcc(Assembler::parity, done);
  524   __ jcc(Assembler::below, done);
  525   __ setb(Assembler::notEqual, dst);
  526   __ movzbl(dst, dst);
  527   __ bind(done);
  528 }
  529 
  530 
  531 //=============================================================================
  532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  533 
  534 int ConstantTable::calculate_table_base_offset() const {
  535   return 0;  // absolute addressing, no offset
  536 }
  537 
  538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  540   ShouldNotReachHere();
  541 }
  542 
  543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  544   // Empty encoding
  545 }
  546 
  547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  548   return 0;
  549 }
  550 
  551 #ifndef PRODUCT
  552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  553   st->print("# MachConstantBaseNode (empty encoding)");
  554 }
  555 #endif
  556 
  557 
  558 //=============================================================================
  559 #ifndef PRODUCT
  560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  561   Compile* C = ra_->C;
  562 
  563   int framesize = C->output()->frame_size_in_bytes();
  564   int bangsize = C->output()->bang_size_in_bytes();
  565   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  566   // Remove wordSize for return addr which is already pushed.
  567   framesize -= wordSize;
  568 
  569   if (C->output()->need_stack_bang(bangsize)) {
  570     framesize -= wordSize;
  571     st->print("# stack bang (%d bytes)", bangsize);
  572     st->print("\n\t");
  573     st->print("PUSH   EBP\t# Save EBP");
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577     }
  578     if (framesize) {
  579       st->print("\n\t");
  580       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  581     }
  582   } else {
  583     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  584     st->print("\n\t");
  585     framesize -= wordSize;
  586     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  587     if (PreserveFramePointer) {
  588       st->print("\n\t");
  589       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  590       if (framesize > 0) {
  591         st->print("\n\t");
  592         st->print("ADD    EBP, #%d", framesize);
  593       }
  594     }
  595   }
  596 
  597   if (VerifyStackAtCalls) {
  598     st->print("\n\t");
  599     framesize -= wordSize;
  600     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  601   }
  602 
  603   if( C->in_24_bit_fp_mode() ) {
  604     st->print("\n\t");
  605     st->print("FLDCW  \t# load 24 bit fpu control word");
  606   }
  607   if (UseSSE >= 2 && VerifyFPU) {
  608     st->print("\n\t");
  609     st->print("# verify FPU stack (must be clean on entry)");
  610   }
  611 
  612 #ifdef ASSERT
  613   if (VerifyStackAtCalls) {
  614     st->print("\n\t");
  615     st->print("# stack alignment check");
  616   }
  617 #endif
  618   st->cr();
  619 }
  620 #endif
  621 
  622 
  623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  624   Compile* C = ra_->C;
  625   MacroAssembler _masm(&cbuf);
  626 
  627   int framesize = C->output()->frame_size_in_bytes();
  628   int bangsize = C->output()->bang_size_in_bytes();
  629 
  630   int max_monitors = C->method() != NULL ? C->max_monitors() : 0;
  631   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL, max_monitors);
  632 
  633   C->output()->set_frame_complete(cbuf.insts_size());
  634 
  635   if (C->has_mach_constant_base_node()) {
  636     // NOTE: We set the table base offset here because users might be
  637     // emitted before MachConstantBaseNode.
  638     ConstantTable& constant_table = C->output()->constant_table();
  639     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  640   }
  641 }
  642 
  643 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  644   return MachNode::size(ra_); // too many variables; just compute it the hard way
  645 }
  646 
  647 int MachPrologNode::reloc() const {
  648   return 0; // a large enough number
  649 }
  650 
  651 //=============================================================================
  652 #ifndef PRODUCT
  653 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  654   Compile *C = ra_->C;
  655   int framesize = C->output()->frame_size_in_bytes();
  656   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  657   // Remove two words for return addr and rbp,
  658   framesize -= 2*wordSize;
  659 
  660   if (C->max_vector_size() > 16) {
  661     st->print("VZEROUPPER");
  662     st->cr(); st->print("\t");
  663   }
  664   if (C->in_24_bit_fp_mode()) {
  665     st->print("FLDCW  standard control word");
  666     st->cr(); st->print("\t");
  667   }
  668   if (framesize) {
  669     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  670     st->cr(); st->print("\t");
  671   }
  672   st->print_cr("POPL   EBP"); st->print("\t");
  673   if (do_polling() && C->is_method_compilation()) {
  674     st->print("CMPL     rsp, poll_offset[thread]  \n\t"
  675               "JA       #safepoint_stub\t"
  676               "# Safepoint: poll for GC");
  677   }
  678 }
  679 #endif
  680 
  681 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  682   Compile *C = ra_->C;
  683   MacroAssembler _masm(&cbuf);
  684 
  685   if (C->max_vector_size() > 16) {
  686     // Clear upper bits of YMM registers when current compiled code uses
  687     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  688     _masm.vzeroupper();
  689   }
  690   // If method set FPU control word, restore to standard control word
  691   if (C->in_24_bit_fp_mode()) {
  692     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  693   }
  694 
  695   int framesize = C->output()->frame_size_in_bytes();
  696   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  697   // Remove two words for return addr and rbp,
  698   framesize -= 2*wordSize;
  699 
  700   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  701 
  702   if (framesize >= 128) {
  703     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  704     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  705     emit_d32(cbuf, framesize);
  706   } else if (framesize) {
  707     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  708     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  709     emit_d8(cbuf, framesize);
  710   }
  711 
  712   emit_opcode(cbuf, 0x58 | EBP_enc);
  713 
  714   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  715     __ reserved_stack_check();
  716   }
  717 
  718   if (do_polling() && C->is_method_compilation()) {
  719     Register thread = as_Register(EBX_enc);
  720     MacroAssembler masm(&cbuf);
  721     __ get_thread(thread);
  722     Label dummy_label;
  723     Label* code_stub = &dummy_label;
  724     if (!C->output()->in_scratch_emit_size()) {
  725       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  726       C->output()->add_stub(stub);
  727       code_stub = &stub->entry();
  728     }
  729     __ relocate(relocInfo::poll_return_type);
  730     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  731   }
  732 }
  733 
  734 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  735   return MachNode::size(ra_); // too many variables; just compute it
  736                               // the hard way
  737 }
  738 
  739 int MachEpilogNode::reloc() const {
  740   return 0; // a large enough number
  741 }
  742 
  743 const Pipeline * MachEpilogNode::pipeline() const {
  744   return MachNode::pipeline_class();
  745 }
  746 
  747 //=============================================================================
  748 
  749 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  750 static enum RC rc_class( OptoReg::Name reg ) {
  751 
  752   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  753   if (OptoReg::is_stack(reg)) return rc_stack;
  754 
  755   VMReg r = OptoReg::as_VMReg(reg);
  756   if (r->is_Register()) return rc_int;
  757   if (r->is_FloatRegister()) {
  758     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  759     return rc_float;
  760   }
  761   if (r->is_KRegister()) return rc_kreg;
  762   assert(r->is_XMMRegister(), "must be");
  763   return rc_xmm;
  764 }
  765 
  766 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  767                         int opcode, const char *op_str, int size, outputStream* st ) {
  768   if( cbuf ) {
  769     emit_opcode  (*cbuf, opcode );
  770     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  771 #ifndef PRODUCT
  772   } else if( !do_size ) {
  773     if( size != 0 ) st->print("\n\t");
  774     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  775       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  776       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  777     } else { // FLD, FST, PUSH, POP
  778       st->print("%s [ESP + #%d]",op_str,offset);
  779     }
  780 #endif
  781   }
  782   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  783   return size+3+offset_size;
  784 }
  785 
  786 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  787 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  788                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  789   int in_size_in_bits = Assembler::EVEX_32bit;
  790   int evex_encoding = 0;
  791   if (reg_lo+1 == reg_hi) {
  792     in_size_in_bits = Assembler::EVEX_64bit;
  793     evex_encoding = Assembler::VEX_W;
  794   }
  795   if (cbuf) {
  796     MacroAssembler _masm(cbuf);
  797     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  798     //                          it maps more cases to single byte displacement
  799     _masm.set_managed();
  800     if (reg_lo+1 == reg_hi) { // double move?
  801       if (is_load) {
  802         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  803       } else {
  804         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  805       }
  806     } else {
  807       if (is_load) {
  808         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  809       } else {
  810         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  811       }
  812     }
  813 #ifndef PRODUCT
  814   } else if (!do_size) {
  815     if (size != 0) st->print("\n\t");
  816     if (reg_lo+1 == reg_hi) { // double move?
  817       if (is_load) st->print("%s %s,[ESP + #%d]",
  818                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  819                               Matcher::regName[reg_lo], offset);
  820       else         st->print("MOVSD  [ESP + #%d],%s",
  821                               offset, Matcher::regName[reg_lo]);
  822     } else {
  823       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  824                               Matcher::regName[reg_lo], offset);
  825       else         st->print("MOVSS  [ESP + #%d],%s",
  826                               offset, Matcher::regName[reg_lo]);
  827     }
  828 #endif
  829   }
  830   bool is_single_byte = false;
  831   if ((UseAVX > 2) && (offset != 0)) {
  832     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  833   }
  834   int offset_size = 0;
  835   if (UseAVX > 2 ) {
  836     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  837   } else {
  838     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  839   }
  840   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  841   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  842   return size+5+offset_size;
  843 }
  844 
  845 
  846 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  847                             int src_hi, int dst_hi, int size, outputStream* st ) {
  848   if (cbuf) {
  849     MacroAssembler _masm(cbuf);
  850     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  851     _masm.set_managed();
  852     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  853       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  854                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  855     } else {
  856       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  857                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  858     }
  859 #ifndef PRODUCT
  860   } else if (!do_size) {
  861     if (size != 0) st->print("\n\t");
  862     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  863       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  864         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  865       } else {
  866         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  867       }
  868     } else {
  869       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  870         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  871       } else {
  872         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  873       }
  874     }
  875 #endif
  876   }
  877   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  878   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  879   int sz = (UseAVX > 2) ? 6 : 4;
  880   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  881       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  882   return size + sz;
  883 }
  884 
  885 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  886                             int src_hi, int dst_hi, int size, outputStream* st ) {
  887   // 32-bit
  888   if (cbuf) {
  889     MacroAssembler _masm(cbuf);
  890     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  891     _masm.set_managed();
  892     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  893              as_Register(Matcher::_regEncode[src_lo]));
  894 #ifndef PRODUCT
  895   } else if (!do_size) {
  896     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  897 #endif
  898   }
  899   return (UseAVX> 2) ? 6 : 4;
  900 }
  901 
  902 
  903 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  904                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  905   // 32-bit
  906   if (cbuf) {
  907     MacroAssembler _masm(cbuf);
  908     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  909     _masm.set_managed();
  910     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  911              as_XMMRegister(Matcher::_regEncode[src_lo]));
  912 #ifndef PRODUCT
  913   } else if (!do_size) {
  914     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  915 #endif
  916   }
  917   return (UseAVX> 2) ? 6 : 4;
  918 }
  919 
  920 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  921   if( cbuf ) {
  922     emit_opcode(*cbuf, 0x8B );
  923     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  924 #ifndef PRODUCT
  925   } else if( !do_size ) {
  926     if( size != 0 ) st->print("\n\t");
  927     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  928 #endif
  929   }
  930   return size+2;
  931 }
  932 
  933 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  934                                  int offset, int size, outputStream* st ) {
  935   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  936     if( cbuf ) {
  937       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  938       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  939 #ifndef PRODUCT
  940     } else if( !do_size ) {
  941       if( size != 0 ) st->print("\n\t");
  942       st->print("FLD    %s",Matcher::regName[src_lo]);
  943 #endif
  944     }
  945     size += 2;
  946   }
  947 
  948   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  949   const char *op_str;
  950   int op;
  951   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  952     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  953     op = 0xDD;
  954   } else {                   // 32-bit store
  955     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  956     op = 0xD9;
  957     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  958   }
  959 
  960   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  961 }
  962 
  963 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  964 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  965                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  966 
  967 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  968                             int stack_offset, int reg, uint ireg, outputStream* st);
  969 
  970 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  971                                      int dst_offset, uint ireg, outputStream* st) {
  972   if (cbuf) {
  973     MacroAssembler _masm(cbuf);
  974     switch (ireg) {
  975     case Op_VecS:
  976       __ pushl(Address(rsp, src_offset));
  977       __ popl (Address(rsp, dst_offset));
  978       break;
  979     case Op_VecD:
  980       __ pushl(Address(rsp, src_offset));
  981       __ popl (Address(rsp, dst_offset));
  982       __ pushl(Address(rsp, src_offset+4));
  983       __ popl (Address(rsp, dst_offset+4));
  984       break;
  985     case Op_VecX:
  986       __ movdqu(Address(rsp, -16), xmm0);
  987       __ movdqu(xmm0, Address(rsp, src_offset));
  988       __ movdqu(Address(rsp, dst_offset), xmm0);
  989       __ movdqu(xmm0, Address(rsp, -16));
  990       break;
  991     case Op_VecY:
  992       __ vmovdqu(Address(rsp, -32), xmm0);
  993       __ vmovdqu(xmm0, Address(rsp, src_offset));
  994       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  995       __ vmovdqu(xmm0, Address(rsp, -32));
  996       break;
  997     case Op_VecZ:
  998       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  999       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
 1000       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
 1001       __ evmovdquq(xmm0, Address(rsp, -64), 2);
 1002       break;
 1003     default:
 1004       ShouldNotReachHere();
 1005     }
 1006 #ifndef PRODUCT
 1007   } else {
 1008     switch (ireg) {
 1009     case Op_VecS:
 1010       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 1011                 "popl    [rsp + #%d]",
 1012                 src_offset, dst_offset);
 1013       break;
 1014     case Op_VecD:
 1015       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 1016                 "popq    [rsp + #%d]\n\t"
 1017                 "pushl   [rsp + #%d]\n\t"
 1018                 "popq    [rsp + #%d]",
 1019                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1020       break;
 1021      case Op_VecX:
 1022       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1023                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1024                 "movdqu  [rsp + #%d], xmm0\n\t"
 1025                 "movdqu  xmm0, [rsp - #16]",
 1026                 src_offset, dst_offset);
 1027       break;
 1028     case Op_VecY:
 1029       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1030                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1031                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1032                 "vmovdqu xmm0, [rsp - #32]",
 1033                 src_offset, dst_offset);
 1034       break;
 1035     case Op_VecZ:
 1036       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1037                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1038                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1039                 "vmovdqu xmm0, [rsp - #64]",
 1040                 src_offset, dst_offset);
 1041       break;
 1042     default:
 1043       ShouldNotReachHere();
 1044     }
 1045 #endif
 1046   }
 1047 }
 1048 
 1049 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1050   // Get registers to move
 1051   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1052   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1053   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1054   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1055 
 1056   enum RC src_second_rc = rc_class(src_second);
 1057   enum RC src_first_rc = rc_class(src_first);
 1058   enum RC dst_second_rc = rc_class(dst_second);
 1059   enum RC dst_first_rc = rc_class(dst_first);
 1060 
 1061   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1062 
 1063   // Generate spill code!
 1064   int size = 0;
 1065 
 1066   if( src_first == dst_first && src_second == dst_second )
 1067     return size;            // Self copy, no move
 1068 
 1069   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1070     uint ireg = ideal_reg();
 1071     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1072     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1073     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1074     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1075       // mem -> mem
 1076       int src_offset = ra_->reg2offset(src_first);
 1077       int dst_offset = ra_->reg2offset(dst_first);
 1078       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1079     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1080       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1081     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1082       int stack_offset = ra_->reg2offset(dst_first);
 1083       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1084     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1085       int stack_offset = ra_->reg2offset(src_first);
 1086       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1087     } else {
 1088       ShouldNotReachHere();
 1089     }
 1090     return 0;
 1091   }
 1092 
 1093   // --------------------------------------
 1094   // Check for mem-mem move.  push/pop to move.
 1095   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1096     if( src_second == dst_first ) { // overlapping stack copy ranges
 1097       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1098       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1099       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1100       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1101     }
 1102     // move low bits
 1103     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1104     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1105     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1106       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1107       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1108     }
 1109     return size;
 1110   }
 1111 
 1112   // --------------------------------------
 1113   // Check for integer reg-reg copy
 1114   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1115     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1116 
 1117   // Check for integer store
 1118   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1119     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1120 
 1121   // Check for integer load
 1122   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1124 
 1125   // Check for integer reg-xmm reg copy
 1126   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1127     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1128             "no 64 bit integer-float reg moves" );
 1129     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1130   }
 1131   // --------------------------------------
 1132   // Check for float reg-reg copy
 1133   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1134     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1135             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1136     if( cbuf ) {
 1137 
 1138       // Note the mucking with the register encode to compensate for the 0/1
 1139       // indexing issue mentioned in a comment in the reg_def sections
 1140       // for FPR registers many lines above here.
 1141 
 1142       if( src_first != FPR1L_num ) {
 1143         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1144         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1145         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1146         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1147      } else {
 1148         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1149         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1150      }
 1151 #ifndef PRODUCT
 1152     } else if( !do_size ) {
 1153       if( size != 0 ) st->print("\n\t");
 1154       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1155       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1156 #endif
 1157     }
 1158     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1159   }
 1160 
 1161   // Check for float store
 1162   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1163     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1164   }
 1165 
 1166   // Check for float load
 1167   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1168     int offset = ra_->reg2offset(src_first);
 1169     const char *op_str;
 1170     int op;
 1171     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1172       op_str = "FLD_D";
 1173       op = 0xDD;
 1174     } else {                   // 32-bit load
 1175       op_str = "FLD_S";
 1176       op = 0xD9;
 1177       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1178     }
 1179     if( cbuf ) {
 1180       emit_opcode  (*cbuf, op );
 1181       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1182       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1183       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1184 #ifndef PRODUCT
 1185     } else if( !do_size ) {
 1186       if( size != 0 ) st->print("\n\t");
 1187       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1188 #endif
 1189     }
 1190     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1191     return size + 3+offset_size+2;
 1192   }
 1193 
 1194   // Check for xmm reg-reg copy
 1195   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1196     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1197             (src_first+1 == src_second && dst_first+1 == dst_second),
 1198             "no non-adjacent float-moves" );
 1199     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1200   }
 1201 
 1202   // Check for xmm reg-integer reg copy
 1203   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1204     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1205             "no 64 bit float-integer reg moves" );
 1206     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1207   }
 1208 
 1209   // Check for xmm store
 1210   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1211     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1212   }
 1213 
 1214   // Check for float xmm load
 1215   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1216     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1217   }
 1218 
 1219   // Copy from float reg to xmm reg
 1220   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1221     // copy to the top of stack from floating point reg
 1222     // and use LEA to preserve flags
 1223     if( cbuf ) {
 1224       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1225       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1226       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1227       emit_d8(*cbuf,0xF8);
 1228 #ifndef PRODUCT
 1229     } else if( !do_size ) {
 1230       if( size != 0 ) st->print("\n\t");
 1231       st->print("LEA    ESP,[ESP-8]");
 1232 #endif
 1233     }
 1234     size += 4;
 1235 
 1236     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1237 
 1238     // Copy from the temp memory to the xmm reg.
 1239     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1240 
 1241     if( cbuf ) {
 1242       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1243       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1244       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1245       emit_d8(*cbuf,0x08);
 1246 #ifndef PRODUCT
 1247     } else if( !do_size ) {
 1248       if( size != 0 ) st->print("\n\t");
 1249       st->print("LEA    ESP,[ESP+8]");
 1250 #endif
 1251     }
 1252     size += 4;
 1253     return size;
 1254   }
 1255 
 1256   // AVX-512 opmask specific spilling.
 1257   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1258     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1259     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1260     MacroAssembler _masm(cbuf);
 1261     int offset = ra_->reg2offset(src_first);
 1262     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1263     return 0;
 1264   }
 1265 
 1266   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1267     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1268     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1269     MacroAssembler _masm(cbuf);
 1270     int offset = ra_->reg2offset(dst_first);
 1271     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1272     return 0;
 1273   }
 1274 
 1275   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1276     Unimplemented();
 1277     return 0;
 1278   }
 1279 
 1280   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1281     Unimplemented();
 1282     return 0;
 1283   }
 1284 
 1285   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1286     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1287     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1288     MacroAssembler _masm(cbuf);
 1289     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1290     return 0;
 1291   }
 1292 
 1293   assert( size > 0, "missed a case" );
 1294 
 1295   // --------------------------------------------------------------------
 1296   // Check for second bits still needing moving.
 1297   if( src_second == dst_second )
 1298     return size;               // Self copy; no move
 1299   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1300 
 1301   // Check for second word int-int move
 1302   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1303     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1304 
 1305   // Check for second word integer store
 1306   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1307     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1308 
 1309   // Check for second word integer load
 1310   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1311     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1312 
 1313   Unimplemented();
 1314   return 0; // Mute compiler
 1315 }
 1316 
 1317 #ifndef PRODUCT
 1318 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1319   implementation( NULL, ra_, false, st );
 1320 }
 1321 #endif
 1322 
 1323 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1324   implementation( &cbuf, ra_, false, NULL );
 1325 }
 1326 
 1327 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1328   return MachNode::size(ra_);
 1329 }
 1330 
 1331 
 1332 //=============================================================================
 1333 #ifndef PRODUCT
 1334 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1335   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1336   int reg = ra_->get_reg_first(this);
 1337   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1338 }
 1339 #endif
 1340 
 1341 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1342   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1343   int reg = ra_->get_encode(this);
 1344   if( offset >= 128 ) {
 1345     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1346     emit_rm(cbuf, 0x2, reg, 0x04);
 1347     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1348     emit_d32(cbuf, offset);
 1349   }
 1350   else {
 1351     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1352     emit_rm(cbuf, 0x1, reg, 0x04);
 1353     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1354     emit_d8(cbuf, offset);
 1355   }
 1356 }
 1357 
 1358 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1359   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1360   if( offset >= 128 ) {
 1361     return 7;
 1362   }
 1363   else {
 1364     return 4;
 1365   }
 1366 }
 1367 
 1368 //=============================================================================
 1369 #ifndef PRODUCT
 1370 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1371   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1372   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1373   st->print_cr("\tNOP");
 1374   st->print_cr("\tNOP");
 1375   if( !OptoBreakpoint )
 1376     st->print_cr("\tNOP");
 1377 }
 1378 #endif
 1379 
 1380 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1381   MacroAssembler masm(&cbuf);
 1382 #ifdef ASSERT
 1383   uint insts_size = cbuf.insts_size();
 1384 #endif
 1385   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1386   masm.jump_cc(Assembler::notEqual,
 1387                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1388   /* WARNING these NOPs are critical so that verified entry point is properly
 1389      aligned for patching by NativeJump::patch_verified_entry() */
 1390   int nops_cnt = 2;
 1391   if( !OptoBreakpoint ) // Leave space for int3
 1392      nops_cnt += 1;
 1393   masm.nop(nops_cnt);
 1394 
 1395   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1396 }
 1397 
 1398 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1399   return OptoBreakpoint ? 11 : 12;
 1400 }
 1401 
 1402 
 1403 //=============================================================================
 1404 
 1405 // Vector calling convention not supported.
 1406 const bool Matcher::supports_vector_calling_convention() {
 1407   return false;
 1408 }
 1409 
 1410 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1411   Unimplemented();
 1412   return OptoRegPair(0, 0);
 1413 }
 1414 
 1415 // Is this branch offset short enough that a short branch can be used?
 1416 //
 1417 // NOTE: If the platform does not provide any short branch variants, then
 1418 //       this method should return false for offset 0.
 1419 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1420   // The passed offset is relative to address of the branch.
 1421   // On 86 a branch displacement is calculated relative to address
 1422   // of a next instruction.
 1423   offset -= br_size;
 1424 
 1425   // the short version of jmpConUCF2 contains multiple branches,
 1426   // making the reach slightly less
 1427   if (rule == jmpConUCF2_rule)
 1428     return (-126 <= offset && offset <= 125);
 1429   return (-128 <= offset && offset <= 127);
 1430 }
 1431 
 1432 // Return whether or not this register is ever used as an argument.  This
 1433 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1434 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1435 // arguments in those registers not be available to the callee.
 1436 bool Matcher::can_be_java_arg( int reg ) {
 1437   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1438   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1439   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1440   return false;
 1441 }
 1442 
 1443 bool Matcher::is_spillable_arg( int reg ) {
 1444   return can_be_java_arg(reg);
 1445 }
 1446 
 1447 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1448   // Use hardware integer DIV instruction when
 1449   // it is faster than a code which use multiply.
 1450   // Only when constant divisor fits into 32 bit
 1451   // (min_jint is excluded to get only correct
 1452   // positive 32 bit values from negative).
 1453   return VM_Version::has_fast_idiv() &&
 1454          (divisor == (int)divisor && divisor != min_jint);
 1455 }
 1456 
 1457 // Register for DIVI projection of divmodI
 1458 RegMask Matcher::divI_proj_mask() {
 1459   return EAX_REG_mask();
 1460 }
 1461 
 1462 // Register for MODI projection of divmodI
 1463 RegMask Matcher::modI_proj_mask() {
 1464   return EDX_REG_mask();
 1465 }
 1466 
 1467 // Register for DIVL projection of divmodL
 1468 RegMask Matcher::divL_proj_mask() {
 1469   ShouldNotReachHere();
 1470   return RegMask();
 1471 }
 1472 
 1473 // Register for MODL projection of divmodL
 1474 RegMask Matcher::modL_proj_mask() {
 1475   ShouldNotReachHere();
 1476   return RegMask();
 1477 }
 1478 
 1479 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1480   return NO_REG_mask();
 1481 }
 1482 
 1483 // Returns true if the high 32 bits of the value is known to be zero.
 1484 bool is_operand_hi32_zero(Node* n) {
 1485   int opc = n->Opcode();
 1486   if (opc == Op_AndL) {
 1487     Node* o2 = n->in(2);
 1488     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1489       return true;
 1490     }
 1491   }
 1492   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1493     return true;
 1494   }
 1495   return false;
 1496 }
 1497 
 1498 %}
 1499 
 1500 //----------ENCODING BLOCK-----------------------------------------------------
 1501 // This block specifies the encoding classes used by the compiler to output
 1502 // byte streams.  Encoding classes generate functions which are called by
 1503 // Machine Instruction Nodes in order to generate the bit encoding of the
 1504 // instruction.  Operands specify their base encoding interface with the
 1505 // interface keyword.  There are currently supported four interfaces,
 1506 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1507 // operand to generate a function which returns its register number when
 1508 // queried.   CONST_INTER causes an operand to generate a function which
 1509 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1510 // operand to generate four functions which return the Base Register, the
 1511 // Index Register, the Scale Value, and the Offset Value of the operand when
 1512 // queried.  COND_INTER causes an operand to generate six functions which
 1513 // return the encoding code (ie - encoding bits for the instruction)
 1514 // associated with each basic boolean condition for a conditional instruction.
 1515 // Instructions specify two basic values for encoding.  They use the
 1516 // ins_encode keyword to specify their encoding class (which must be one of
 1517 // the class names specified in the encoding block), and they use the
 1518 // opcode keyword to specify, in order, their primary, secondary, and
 1519 // tertiary opcode.  Only the opcode sections which a particular instruction
 1520 // needs for encoding need to be specified.
 1521 encode %{
 1522   // Build emit functions for each basic byte or larger field in the intel
 1523   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1524   // code in the enc_class source block.  Emit functions will live in the
 1525   // main source block for now.  In future, we can generalize this by
 1526   // adding a syntax that specifies the sizes of fields in an order,
 1527   // so that the adlc can build the emit functions automagically
 1528 
 1529   // Emit primary opcode
 1530   enc_class OpcP %{
 1531     emit_opcode(cbuf, $primary);
 1532   %}
 1533 
 1534   // Emit secondary opcode
 1535   enc_class OpcS %{
 1536     emit_opcode(cbuf, $secondary);
 1537   %}
 1538 
 1539   // Emit opcode directly
 1540   enc_class Opcode(immI d8) %{
 1541     emit_opcode(cbuf, $d8$$constant);
 1542   %}
 1543 
 1544   enc_class SizePrefix %{
 1545     emit_opcode(cbuf,0x66);
 1546   %}
 1547 
 1548   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1549     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1550   %}
 1551 
 1552   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1553     emit_opcode(cbuf,$opcode$$constant);
 1554     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1555   %}
 1556 
 1557   enc_class mov_r32_imm0( rRegI dst ) %{
 1558     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1559     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1560   %}
 1561 
 1562   enc_class cdq_enc %{
 1563     // Full implementation of Java idiv and irem; checks for
 1564     // special case as described in JVM spec., p.243 & p.271.
 1565     //
 1566     //         normal case                           special case
 1567     //
 1568     // input : rax,: dividend                         min_int
 1569     //         reg: divisor                          -1
 1570     //
 1571     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1572     //         rdx: remainder (= rax, irem reg)       0
 1573     //
 1574     //  Code sequnce:
 1575     //
 1576     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1577     //  0F 85 0B 00 00 00    jne         normal_case
 1578     //  33 D2                xor         rdx,edx
 1579     //  83 F9 FF             cmp         rcx,0FFh
 1580     //  0F 84 03 00 00 00    je          done
 1581     //                  normal_case:
 1582     //  99                   cdq
 1583     //  F7 F9                idiv        rax,ecx
 1584     //                  done:
 1585     //
 1586     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1587     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1588     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1589     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1590     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1591     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1592     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1593     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1594     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1595     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1596     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1597     // normal_case:
 1598     emit_opcode(cbuf,0x99);                                         // cdq
 1599     // idiv (note: must be emitted by the user of this rule)
 1600     // normal:
 1601   %}
 1602 
 1603   // Dense encoding for older common ops
 1604   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1605     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1606   %}
 1607 
 1608 
 1609   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1610   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1611     // Check for 8-bit immediate, and set sign extend bit in opcode
 1612     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1613       emit_opcode(cbuf, $primary | 0x02);
 1614     }
 1615     else {                          // If 32-bit immediate
 1616       emit_opcode(cbuf, $primary);
 1617     }
 1618   %}
 1619 
 1620   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1621     // Emit primary opcode and set sign-extend bit
 1622     // Check for 8-bit immediate, and set sign extend bit in opcode
 1623     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1624       emit_opcode(cbuf, $primary | 0x02);    }
 1625     else {                          // If 32-bit immediate
 1626       emit_opcode(cbuf, $primary);
 1627     }
 1628     // Emit r/m byte with secondary opcode, after primary opcode.
 1629     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1630   %}
 1631 
 1632   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1633     // Check for 8-bit immediate, and set sign extend bit in opcode
 1634     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1635       $$$emit8$imm$$constant;
 1636     }
 1637     else {                          // If 32-bit immediate
 1638       // Output immediate
 1639       $$$emit32$imm$$constant;
 1640     }
 1641   %}
 1642 
 1643   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1644     // Emit primary opcode and set sign-extend bit
 1645     // Check for 8-bit immediate, and set sign extend bit in opcode
 1646     int con = (int)$imm$$constant; // Throw away top bits
 1647     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1648     // Emit r/m byte with secondary opcode, after primary opcode.
 1649     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1650     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1651     else                               emit_d32(cbuf,con);
 1652   %}
 1653 
 1654   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1655     // Emit primary opcode and set sign-extend bit
 1656     // Check for 8-bit immediate, and set sign extend bit in opcode
 1657     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1658     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1659     // Emit r/m byte with tertiary opcode, after primary opcode.
 1660     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
 1661     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1662     else                               emit_d32(cbuf,con);
 1663   %}
 1664 
 1665   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1666     emit_cc(cbuf, $secondary, $dst$$reg );
 1667   %}
 1668 
 1669   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1670     int destlo = $dst$$reg;
 1671     int desthi = HIGH_FROM_LOW(destlo);
 1672     // bswap lo
 1673     emit_opcode(cbuf, 0x0F);
 1674     emit_cc(cbuf, 0xC8, destlo);
 1675     // bswap hi
 1676     emit_opcode(cbuf, 0x0F);
 1677     emit_cc(cbuf, 0xC8, desthi);
 1678     // xchg lo and hi
 1679     emit_opcode(cbuf, 0x87);
 1680     emit_rm(cbuf, 0x3, destlo, desthi);
 1681   %}
 1682 
 1683   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1684     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1685   %}
 1686 
 1687   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1688     $$$emit8$primary;
 1689     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1690   %}
 1691 
 1692   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1693     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1694     emit_d8(cbuf, op >> 8 );
 1695     emit_d8(cbuf, op & 255);
 1696   %}
 1697 
 1698   // emulate a CMOV with a conditional branch around a MOV
 1699   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1700     // Invert sense of branch from sense of CMOV
 1701     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1702     emit_d8( cbuf, $brOffs$$constant );
 1703   %}
 1704 
 1705   enc_class enc_PartialSubtypeCheck( ) %{
 1706     Register Redi = as_Register(EDI_enc); // result register
 1707     Register Reax = as_Register(EAX_enc); // super class
 1708     Register Recx = as_Register(ECX_enc); // killed
 1709     Register Resi = as_Register(ESI_enc); // sub class
 1710     Label miss;
 1711 
 1712     MacroAssembler _masm(&cbuf);
 1713     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1714                                      NULL, &miss,
 1715                                      /*set_cond_codes:*/ true);
 1716     if ($primary) {
 1717       __ xorptr(Redi, Redi);
 1718     }
 1719     __ bind(miss);
 1720   %}
 1721 
 1722   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1723     MacroAssembler masm(&cbuf);
 1724     int start = masm.offset();
 1725     if (UseSSE >= 2) {
 1726       if (VerifyFPU) {
 1727         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1728       }
 1729     } else {
 1730       // External c_calling_convention expects the FPU stack to be 'clean'.
 1731       // Compiled code leaves it dirty.  Do cleanup now.
 1732       masm.empty_FPU_stack();
 1733     }
 1734     if (sizeof_FFree_Float_Stack_All == -1) {
 1735       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1736     } else {
 1737       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1738     }
 1739   %}
 1740 
 1741   enc_class Verify_FPU_For_Leaf %{
 1742     if( VerifyFPU ) {
 1743       MacroAssembler masm(&cbuf);
 1744       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1745     }
 1746   %}
 1747 
 1748   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1749     // This is the instruction starting address for relocation info.
 1750     cbuf.set_insts_mark();
 1751     $$$emit8$primary;
 1752     // CALL directly to the runtime
 1753     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1754                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1755 
 1756     if (UseSSE >= 2) {
 1757       MacroAssembler _masm(&cbuf);
 1758       BasicType rt = tf()->return_type();
 1759 
 1760       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1761         // A C runtime call where the return value is unused.  In SSE2+
 1762         // mode the result needs to be removed from the FPU stack.  It's
 1763         // likely that this function call could be removed by the
 1764         // optimizer if the C function is a pure function.
 1765         __ ffree(0);
 1766       } else if (rt == T_FLOAT) {
 1767         __ lea(rsp, Address(rsp, -4));
 1768         __ fstp_s(Address(rsp, 0));
 1769         __ movflt(xmm0, Address(rsp, 0));
 1770         __ lea(rsp, Address(rsp,  4));
 1771       } else if (rt == T_DOUBLE) {
 1772         __ lea(rsp, Address(rsp, -8));
 1773         __ fstp_d(Address(rsp, 0));
 1774         __ movdbl(xmm0, Address(rsp, 0));
 1775         __ lea(rsp, Address(rsp,  8));
 1776       }
 1777     }
 1778   %}
 1779 
 1780   enc_class pre_call_resets %{
 1781     // If method sets FPU control word restore it here
 1782     debug_only(int off0 = cbuf.insts_size());
 1783     if (ra_->C->in_24_bit_fp_mode()) {
 1784       MacroAssembler _masm(&cbuf);
 1785       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1786     }
 1787     // Clear upper bits of YMM registers when current compiled code uses
 1788     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1789     MacroAssembler _masm(&cbuf);
 1790     __ vzeroupper();
 1791     debug_only(int off1 = cbuf.insts_size());
 1792     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1793   %}
 1794 
 1795   enc_class post_call_FPU %{
 1796     // If method sets FPU control word do it here also
 1797     if (Compile::current()->in_24_bit_fp_mode()) {
 1798       MacroAssembler masm(&cbuf);
 1799       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1800     }
 1801   %}
 1802 
 1803   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1804     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1805     // who we intended to call.
 1806     cbuf.set_insts_mark();
 1807     $$$emit8$primary;
 1808 
 1809     if (!_method) {
 1810       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1811                      runtime_call_Relocation::spec(),
 1812                      RELOC_IMM32);
 1813     } else {
 1814       int method_index = resolved_method_index(cbuf);
 1815       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1816                                                   : static_call_Relocation::spec(method_index);
 1817       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1818                      rspec, RELOC_DISP32);
 1819       // Emit stubs for static call.
 1820       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 1821       if (stub == NULL) {
 1822         ciEnv::current()->record_failure("CodeCache is full");
 1823         return;
 1824       }
 1825     }
 1826   %}
 1827 
 1828   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1829     MacroAssembler _masm(&cbuf);
 1830     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1831   %}
 1832 
 1833   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1834     int disp = in_bytes(Method::from_compiled_offset());
 1835     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1836 
 1837     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1838     cbuf.set_insts_mark();
 1839     $$$emit8$primary;
 1840     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1841     emit_d8(cbuf, disp);             // Displacement
 1842 
 1843   %}
 1844 
 1845 //   Following encoding is no longer used, but may be restored if calling
 1846 //   convention changes significantly.
 1847 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1848 //
 1849 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1850 //     // int ic_reg     = Matcher::inline_cache_reg();
 1851 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1852 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1853 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1854 //
 1855 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1856 //     // // so we load it immediately before the call
 1857 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1858 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1859 //
 1860 //     // xor rbp,ebp
 1861 //     emit_opcode(cbuf, 0x33);
 1862 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1863 //
 1864 //     // CALL to interpreter.
 1865 //     cbuf.set_insts_mark();
 1866 //     $$$emit8$primary;
 1867 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1868 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1869 //   %}
 1870 
 1871   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1872     $$$emit8$primary;
 1873     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1874     $$$emit8$shift$$constant;
 1875   %}
 1876 
 1877   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1878     // Load immediate does not have a zero or sign extended version
 1879     // for 8-bit immediates
 1880     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1881     $$$emit32$src$$constant;
 1882   %}
 1883 
 1884   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1885     // Load immediate does not have a zero or sign extended version
 1886     // for 8-bit immediates
 1887     emit_opcode(cbuf, $primary + $dst$$reg);
 1888     $$$emit32$src$$constant;
 1889   %}
 1890 
 1891   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1892     // Load immediate does not have a zero or sign extended version
 1893     // for 8-bit immediates
 1894     int dst_enc = $dst$$reg;
 1895     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1896     if (src_con == 0) {
 1897       // xor dst, dst
 1898       emit_opcode(cbuf, 0x33);
 1899       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1900     } else {
 1901       emit_opcode(cbuf, $primary + dst_enc);
 1902       emit_d32(cbuf, src_con);
 1903     }
 1904   %}
 1905 
 1906   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1907     // Load immediate does not have a zero or sign extended version
 1908     // for 8-bit immediates
 1909     int dst_enc = $dst$$reg + 2;
 1910     int src_con = ((julong)($src$$constant)) >> 32;
 1911     if (src_con == 0) {
 1912       // xor dst, dst
 1913       emit_opcode(cbuf, 0x33);
 1914       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1915     } else {
 1916       emit_opcode(cbuf, $primary + dst_enc);
 1917       emit_d32(cbuf, src_con);
 1918     }
 1919   %}
 1920 
 1921 
 1922   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1923   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1924     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1925   %}
 1926 
 1927   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1928     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1929   %}
 1930 
 1931   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1932     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1933   %}
 1934 
 1935   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1936     $$$emit8$primary;
 1937     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1938   %}
 1939 
 1940   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1941     $$$emit8$secondary;
 1942     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1943   %}
 1944 
 1945   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1946     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1947   %}
 1948 
 1949   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1950     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
 1951   %}
 1952 
 1953   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1954     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
 1955   %}
 1956 
 1957   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1958     // Output immediate
 1959     $$$emit32$src$$constant;
 1960   %}
 1961 
 1962   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1963     // Output Float immediate bits
 1964     jfloat jf = $src$$constant;
 1965     int    jf_as_bits = jint_cast( jf );
 1966     emit_d32(cbuf, jf_as_bits);
 1967   %}
 1968 
 1969   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1970     // Output Float immediate bits
 1971     jfloat jf = $src$$constant;
 1972     int    jf_as_bits = jint_cast( jf );
 1973     emit_d32(cbuf, jf_as_bits);
 1974   %}
 1975 
 1976   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1977     // Output immediate
 1978     $$$emit16$src$$constant;
 1979   %}
 1980 
 1981   enc_class Con_d32(immI src) %{
 1982     emit_d32(cbuf,$src$$constant);
 1983   %}
 1984 
 1985   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1986     // Output immediate memory reference
 1987     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1988     emit_d32(cbuf, 0x00);
 1989   %}
 1990 
 1991   enc_class lock_prefix( ) %{
 1992     emit_opcode(cbuf,0xF0);         // [Lock]
 1993   %}
 1994 
 1995   // Cmp-xchg long value.
 1996   // Note: we need to swap rbx, and rcx before and after the
 1997   //       cmpxchg8 instruction because the instruction uses
 1998   //       rcx as the high order word of the new value to store but
 1999   //       our register encoding uses rbx,.
 2000   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2001 
 2002     // XCHG  rbx,ecx
 2003     emit_opcode(cbuf,0x87);
 2004     emit_opcode(cbuf,0xD9);
 2005     // [Lock]
 2006     emit_opcode(cbuf,0xF0);
 2007     // CMPXCHG8 [Eptr]
 2008     emit_opcode(cbuf,0x0F);
 2009     emit_opcode(cbuf,0xC7);
 2010     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2011     // XCHG  rbx,ecx
 2012     emit_opcode(cbuf,0x87);
 2013     emit_opcode(cbuf,0xD9);
 2014   %}
 2015 
 2016   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2017     // [Lock]
 2018     emit_opcode(cbuf,0xF0);
 2019 
 2020     // CMPXCHG [Eptr]
 2021     emit_opcode(cbuf,0x0F);
 2022     emit_opcode(cbuf,0xB1);
 2023     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2024   %}
 2025 
 2026   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2027     // [Lock]
 2028     emit_opcode(cbuf,0xF0);
 2029 
 2030     // CMPXCHGB [Eptr]
 2031     emit_opcode(cbuf,0x0F);
 2032     emit_opcode(cbuf,0xB0);
 2033     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2034   %}
 2035 
 2036   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2037     // [Lock]
 2038     emit_opcode(cbuf,0xF0);
 2039 
 2040     // 16-bit mode
 2041     emit_opcode(cbuf, 0x66);
 2042 
 2043     // CMPXCHGW [Eptr]
 2044     emit_opcode(cbuf,0x0F);
 2045     emit_opcode(cbuf,0xB1);
 2046     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2047   %}
 2048 
 2049   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2050     int res_encoding = $res$$reg;
 2051 
 2052     // MOV  res,0
 2053     emit_opcode( cbuf, 0xB8 + res_encoding);
 2054     emit_d32( cbuf, 0 );
 2055     // JNE,s  fail
 2056     emit_opcode(cbuf,0x75);
 2057     emit_d8(cbuf, 5 );
 2058     // MOV  res,1
 2059     emit_opcode( cbuf, 0xB8 + res_encoding);
 2060     emit_d32( cbuf, 1 );
 2061     // fail:
 2062   %}
 2063 
 2064   enc_class set_instruction_start( ) %{
 2065     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2066   %}
 2067 
 2068   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2069     int reg_encoding = $ereg$$reg;
 2070     int base  = $mem$$base;
 2071     int index = $mem$$index;
 2072     int scale = $mem$$scale;
 2073     int displace = $mem$$disp;
 2074     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2075     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2076   %}
 2077 
 2078   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2079     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
 2080     int base  = $mem$$base;
 2081     int index = $mem$$index;
 2082     int scale = $mem$$scale;
 2083     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2084     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2085     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2086   %}
 2087 
 2088   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2089     int r1, r2;
 2090     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2091     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2092     emit_opcode(cbuf,0x0F);
 2093     emit_opcode(cbuf,$tertiary);
 2094     emit_rm(cbuf, 0x3, r1, r2);
 2095     emit_d8(cbuf,$cnt$$constant);
 2096     emit_d8(cbuf,$primary);
 2097     emit_rm(cbuf, 0x3, $secondary, r1);
 2098     emit_d8(cbuf,$cnt$$constant);
 2099   %}
 2100 
 2101   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2102     emit_opcode( cbuf, 0x8B ); // Move
 2103     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2104     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2105       emit_d8(cbuf,$primary);
 2106       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2107       emit_d8(cbuf,$cnt$$constant-32);
 2108     }
 2109     emit_d8(cbuf,$primary);
 2110     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
 2111     emit_d8(cbuf,31);
 2112   %}
 2113 
 2114   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2115     int r1, r2;
 2116     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
 2117     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
 2118 
 2119     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2120     emit_rm(cbuf, 0x3, r1, r2);
 2121     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2122       emit_opcode(cbuf,$primary);
 2123       emit_rm(cbuf, 0x3, $secondary, r1);
 2124       emit_d8(cbuf,$cnt$$constant-32);
 2125     }
 2126     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2127     emit_rm(cbuf, 0x3, r2, r2);
 2128   %}
 2129 
 2130   // Clone of RegMem but accepts an extra parameter to access each
 2131   // half of a double in memory; it never needs relocation info.
 2132   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2133     emit_opcode(cbuf,$opcode$$constant);
 2134     int reg_encoding = $rm_reg$$reg;
 2135     int base     = $mem$$base;
 2136     int index    = $mem$$index;
 2137     int scale    = $mem$$scale;
 2138     int displace = $mem$$disp + $disp_for_half$$constant;
 2139     relocInfo::relocType disp_reloc = relocInfo::none;
 2140     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2141   %}
 2142 
 2143   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2144   //
 2145   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2146   // and it never needs relocation information.
 2147   // Frequently used to move data between FPU's Stack Top and memory.
 2148   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2149     int rm_byte_opcode = $rm_opcode$$constant;
 2150     int base     = $mem$$base;
 2151     int index    = $mem$$index;
 2152     int scale    = $mem$$scale;
 2153     int displace = $mem$$disp;
 2154     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2155     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2156   %}
 2157 
 2158   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2159     int rm_byte_opcode = $rm_opcode$$constant;
 2160     int base     = $mem$$base;
 2161     int index    = $mem$$index;
 2162     int scale    = $mem$$scale;
 2163     int displace = $mem$$disp;
 2164     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2165     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2166   %}
 2167 
 2168   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2169     int reg_encoding = $dst$$reg;
 2170     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2171     int index        = 0x04;            // 0x04 indicates no index
 2172     int scale        = 0x00;            // 0x00 indicates no scale
 2173     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2174     relocInfo::relocType disp_reloc = relocInfo::none;
 2175     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2176   %}
 2177 
 2178   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2179     // Compare dst,src
 2180     emit_opcode(cbuf,0x3B);
 2181     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2182     // jmp dst < src around move
 2183     emit_opcode(cbuf,0x7C);
 2184     emit_d8(cbuf,2);
 2185     // move dst,src
 2186     emit_opcode(cbuf,0x8B);
 2187     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2188   %}
 2189 
 2190   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2191     // Compare dst,src
 2192     emit_opcode(cbuf,0x3B);
 2193     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2194     // jmp dst > src around move
 2195     emit_opcode(cbuf,0x7F);
 2196     emit_d8(cbuf,2);
 2197     // move dst,src
 2198     emit_opcode(cbuf,0x8B);
 2199     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2200   %}
 2201 
 2202   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2203     // If src is FPR1, we can just FST to store it.
 2204     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2205     int reg_encoding = 0x2; // Just store
 2206     int base  = $mem$$base;
 2207     int index = $mem$$index;
 2208     int scale = $mem$$scale;
 2209     int displace = $mem$$disp;
 2210     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2211     if( $src$$reg != FPR1L_enc ) {
 2212       reg_encoding = 0x3;  // Store & pop
 2213       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2214       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2215     }
 2216     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2217     emit_opcode(cbuf,$primary);
 2218     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2219   %}
 2220 
 2221   enc_class neg_reg(rRegI dst) %{
 2222     // NEG $dst
 2223     emit_opcode(cbuf,0xF7);
 2224     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2225   %}
 2226 
 2227   enc_class setLT_reg(eCXRegI dst) %{
 2228     // SETLT $dst
 2229     emit_opcode(cbuf,0x0F);
 2230     emit_opcode(cbuf,0x9C);
 2231     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2232   %}
 2233 
 2234   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2235     int tmpReg = $tmp$$reg;
 2236 
 2237     // SUB $p,$q
 2238     emit_opcode(cbuf,0x2B);
 2239     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2240     // SBB $tmp,$tmp
 2241     emit_opcode(cbuf,0x1B);
 2242     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2243     // AND $tmp,$y
 2244     emit_opcode(cbuf,0x23);
 2245     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2246     // ADD $p,$tmp
 2247     emit_opcode(cbuf,0x03);
 2248     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2249   %}
 2250 
 2251   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2252     // TEST shift,32
 2253     emit_opcode(cbuf,0xF7);
 2254     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2255     emit_d32(cbuf,0x20);
 2256     // JEQ,s small
 2257     emit_opcode(cbuf, 0x74);
 2258     emit_d8(cbuf, 0x04);
 2259     // MOV    $dst.hi,$dst.lo
 2260     emit_opcode( cbuf, 0x8B );
 2261     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2262     // CLR    $dst.lo
 2263     emit_opcode(cbuf, 0x33);
 2264     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2265 // small:
 2266     // SHLD   $dst.hi,$dst.lo,$shift
 2267     emit_opcode(cbuf,0x0F);
 2268     emit_opcode(cbuf,0xA5);
 2269     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
 2270     // SHL    $dst.lo,$shift"
 2271     emit_opcode(cbuf,0xD3);
 2272     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2273   %}
 2274 
 2275   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2276     // TEST shift,32
 2277     emit_opcode(cbuf,0xF7);
 2278     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2279     emit_d32(cbuf,0x20);
 2280     // JEQ,s small
 2281     emit_opcode(cbuf, 0x74);
 2282     emit_d8(cbuf, 0x04);
 2283     // MOV    $dst.lo,$dst.hi
 2284     emit_opcode( cbuf, 0x8B );
 2285     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2286     // CLR    $dst.hi
 2287     emit_opcode(cbuf, 0x33);
 2288     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
 2289 // small:
 2290     // SHRD   $dst.lo,$dst.hi,$shift
 2291     emit_opcode(cbuf,0x0F);
 2292     emit_opcode(cbuf,0xAD);
 2293     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2294     // SHR    $dst.hi,$shift"
 2295     emit_opcode(cbuf,0xD3);
 2296     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
 2297   %}
 2298 
 2299   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2300     // TEST shift,32
 2301     emit_opcode(cbuf,0xF7);
 2302     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2303     emit_d32(cbuf,0x20);
 2304     // JEQ,s small
 2305     emit_opcode(cbuf, 0x74);
 2306     emit_d8(cbuf, 0x05);
 2307     // MOV    $dst.lo,$dst.hi
 2308     emit_opcode( cbuf, 0x8B );
 2309     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
 2310     // SAR    $dst.hi,31
 2311     emit_opcode(cbuf, 0xC1);
 2312     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
 2313     emit_d8(cbuf, 0x1F );
 2314 // small:
 2315     // SHRD   $dst.lo,$dst.hi,$shift
 2316     emit_opcode(cbuf,0x0F);
 2317     emit_opcode(cbuf,0xAD);
 2318     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
 2319     // SAR    $dst.hi,$shift"
 2320     emit_opcode(cbuf,0xD3);
 2321     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
 2322   %}
 2323 
 2324 
 2325   // ----------------- Encodings for floating point unit -----------------
 2326   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2327   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2328     $$$emit8$primary;
 2329     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2330   %}
 2331 
 2332   // Pop argument in FPR0 with FSTP ST(0)
 2333   enc_class PopFPU() %{
 2334     emit_opcode( cbuf, 0xDD );
 2335     emit_d8( cbuf, 0xD8 );
 2336   %}
 2337 
 2338   // !!!!! equivalent to Pop_Reg_F
 2339   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2340     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2341     emit_d8( cbuf, 0xD8+$dst$$reg );
 2342   %}
 2343 
 2344   enc_class Push_Reg_DPR( regDPR dst ) %{
 2345     emit_opcode( cbuf, 0xD9 );
 2346     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2347   %}
 2348 
 2349   enc_class strictfp_bias1( regDPR dst ) %{
 2350     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2351     emit_opcode( cbuf, 0x2D );
 2352     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2353     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2354     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2355   %}
 2356 
 2357   enc_class strictfp_bias2( regDPR dst ) %{
 2358     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2359     emit_opcode( cbuf, 0x2D );
 2360     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2361     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2362     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2363   %}
 2364 
 2365   // Special case for moving an integer register to a stack slot.
 2366   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2367     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2368   %}
 2369 
 2370   // Special case for moving a register to a stack slot.
 2371   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2372     // Opcode already emitted
 2373     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2374     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2375     emit_d32(cbuf, $dst$$disp);   // Displacement
 2376   %}
 2377 
 2378   // Push the integer in stackSlot 'src' onto FP-stack
 2379   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2380     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2381   %}
 2382 
 2383   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2384   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2385     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2386   %}
 2387 
 2388   // Same as Pop_Mem_F except for opcode
 2389   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2390   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2391     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2392   %}
 2393 
 2394   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2395     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2396     emit_d8( cbuf, 0xD8+$dst$$reg );
 2397   %}
 2398 
 2399   enc_class Push_Reg_FPR( regFPR dst ) %{
 2400     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2401     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2402   %}
 2403 
 2404   // Push FPU's float to a stack-slot, and pop FPU-stack
 2405   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2406     int pop = 0x02;
 2407     if ($src$$reg != FPR1L_enc) {
 2408       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2409       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2410       pop = 0x03;
 2411     }
 2412     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2413   %}
 2414 
 2415   // Push FPU's double to a stack-slot, and pop FPU-stack
 2416   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2417     int pop = 0x02;
 2418     if ($src$$reg != FPR1L_enc) {
 2419       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2420       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2421       pop = 0x03;
 2422     }
 2423     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2424   %}
 2425 
 2426   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2427   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2428     int pop = 0xD0 - 1; // -1 since we skip FLD
 2429     if ($src$$reg != FPR1L_enc) {
 2430       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2431       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2432       pop = 0xD8;
 2433     }
 2434     emit_opcode( cbuf, 0xDD );
 2435     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2436   %}
 2437 
 2438 
 2439   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2440     // load dst in FPR0
 2441     emit_opcode( cbuf, 0xD9 );
 2442     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2443     if ($src$$reg != FPR1L_enc) {
 2444       // fincstp
 2445       emit_opcode (cbuf, 0xD9);
 2446       emit_opcode (cbuf, 0xF7);
 2447       // swap src with FPR1:
 2448       // FXCH FPR1 with src
 2449       emit_opcode(cbuf, 0xD9);
 2450       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2451       // fdecstp
 2452       emit_opcode (cbuf, 0xD9);
 2453       emit_opcode (cbuf, 0xF6);
 2454     }
 2455   %}
 2456 
 2457   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2458     MacroAssembler _masm(&cbuf);
 2459     __ subptr(rsp, 8);
 2460     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2461     __ fld_d(Address(rsp, 0));
 2462     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2463     __ fld_d(Address(rsp, 0));
 2464   %}
 2465 
 2466   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2467     MacroAssembler _masm(&cbuf);
 2468     __ subptr(rsp, 4);
 2469     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2470     __ fld_s(Address(rsp, 0));
 2471     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2472     __ fld_s(Address(rsp, 0));
 2473   %}
 2474 
 2475   enc_class Push_ResultD(regD dst) %{
 2476     MacroAssembler _masm(&cbuf);
 2477     __ fstp_d(Address(rsp, 0));
 2478     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2479     __ addptr(rsp, 8);
 2480   %}
 2481 
 2482   enc_class Push_ResultF(regF dst, immI d8) %{
 2483     MacroAssembler _masm(&cbuf);
 2484     __ fstp_s(Address(rsp, 0));
 2485     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2486     __ addptr(rsp, $d8$$constant);
 2487   %}
 2488 
 2489   enc_class Push_SrcD(regD src) %{
 2490     MacroAssembler _masm(&cbuf);
 2491     __ subptr(rsp, 8);
 2492     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2493     __ fld_d(Address(rsp, 0));
 2494   %}
 2495 
 2496   enc_class push_stack_temp_qword() %{
 2497     MacroAssembler _masm(&cbuf);
 2498     __ subptr(rsp, 8);
 2499   %}
 2500 
 2501   enc_class pop_stack_temp_qword() %{
 2502     MacroAssembler _masm(&cbuf);
 2503     __ addptr(rsp, 8);
 2504   %}
 2505 
 2506   enc_class push_xmm_to_fpr1(regD src) %{
 2507     MacroAssembler _masm(&cbuf);
 2508     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2509     __ fld_d(Address(rsp, 0));
 2510   %}
 2511 
 2512   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2513     if ($src$$reg != FPR1L_enc) {
 2514       // fincstp
 2515       emit_opcode (cbuf, 0xD9);
 2516       emit_opcode (cbuf, 0xF7);
 2517       // FXCH FPR1 with src
 2518       emit_opcode(cbuf, 0xD9);
 2519       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2520       // fdecstp
 2521       emit_opcode (cbuf, 0xD9);
 2522       emit_opcode (cbuf, 0xF6);
 2523     }
 2524     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2525     // // FSTP   FPR$dst$$reg
 2526     // emit_opcode( cbuf, 0xDD );
 2527     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2528   %}
 2529 
 2530   enc_class fnstsw_sahf_skip_parity() %{
 2531     // fnstsw ax
 2532     emit_opcode( cbuf, 0xDF );
 2533     emit_opcode( cbuf, 0xE0 );
 2534     // sahf
 2535     emit_opcode( cbuf, 0x9E );
 2536     // jnp  ::skip
 2537     emit_opcode( cbuf, 0x7B );
 2538     emit_opcode( cbuf, 0x05 );
 2539   %}
 2540 
 2541   enc_class emitModDPR() %{
 2542     // fprem must be iterative
 2543     // :: loop
 2544     // fprem
 2545     emit_opcode( cbuf, 0xD9 );
 2546     emit_opcode( cbuf, 0xF8 );
 2547     // wait
 2548     emit_opcode( cbuf, 0x9b );
 2549     // fnstsw ax
 2550     emit_opcode( cbuf, 0xDF );
 2551     emit_opcode( cbuf, 0xE0 );
 2552     // sahf
 2553     emit_opcode( cbuf, 0x9E );
 2554     // jp  ::loop
 2555     emit_opcode( cbuf, 0x0F );
 2556     emit_opcode( cbuf, 0x8A );
 2557     emit_opcode( cbuf, 0xF4 );
 2558     emit_opcode( cbuf, 0xFF );
 2559     emit_opcode( cbuf, 0xFF );
 2560     emit_opcode( cbuf, 0xFF );
 2561   %}
 2562 
 2563   enc_class fpu_flags() %{
 2564     // fnstsw_ax
 2565     emit_opcode( cbuf, 0xDF);
 2566     emit_opcode( cbuf, 0xE0);
 2567     // test ax,0x0400
 2568     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2569     emit_opcode( cbuf, 0xA9 );
 2570     emit_d16   ( cbuf, 0x0400 );
 2571     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2572     // // test rax,0x0400
 2573     // emit_opcode( cbuf, 0xA9 );
 2574     // emit_d32   ( cbuf, 0x00000400 );
 2575     //
 2576     // jz exit (no unordered comparison)
 2577     emit_opcode( cbuf, 0x74 );
 2578     emit_d8    ( cbuf, 0x02 );
 2579     // mov ah,1 - treat as LT case (set carry flag)
 2580     emit_opcode( cbuf, 0xB4 );
 2581     emit_d8    ( cbuf, 0x01 );
 2582     // sahf
 2583     emit_opcode( cbuf, 0x9E);
 2584   %}
 2585 
 2586   enc_class cmpF_P6_fixup() %{
 2587     // Fixup the integer flags in case comparison involved a NaN
 2588     //
 2589     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2590     emit_opcode( cbuf, 0x7B );
 2591     emit_d8    ( cbuf, 0x03 );
 2592     // MOV AH,1 - treat as LT case (set carry flag)
 2593     emit_opcode( cbuf, 0xB4 );
 2594     emit_d8    ( cbuf, 0x01 );
 2595     // SAHF
 2596     emit_opcode( cbuf, 0x9E);
 2597     // NOP     // target for branch to avoid branch to branch
 2598     emit_opcode( cbuf, 0x90);
 2599   %}
 2600 
 2601 //     fnstsw_ax();
 2602 //     sahf();
 2603 //     movl(dst, nan_result);
 2604 //     jcc(Assembler::parity, exit);
 2605 //     movl(dst, less_result);
 2606 //     jcc(Assembler::below, exit);
 2607 //     movl(dst, equal_result);
 2608 //     jcc(Assembler::equal, exit);
 2609 //     movl(dst, greater_result);
 2610 
 2611 // less_result     =  1;
 2612 // greater_result  = -1;
 2613 // equal_result    = 0;
 2614 // nan_result      = -1;
 2615 
 2616   enc_class CmpF_Result(rRegI dst) %{
 2617     // fnstsw_ax();
 2618     emit_opcode( cbuf, 0xDF);
 2619     emit_opcode( cbuf, 0xE0);
 2620     // sahf
 2621     emit_opcode( cbuf, 0x9E);
 2622     // movl(dst, nan_result);
 2623     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2624     emit_d32( cbuf, -1 );
 2625     // jcc(Assembler::parity, exit);
 2626     emit_opcode( cbuf, 0x7A );
 2627     emit_d8    ( cbuf, 0x13 );
 2628     // movl(dst, less_result);
 2629     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2630     emit_d32( cbuf, -1 );
 2631     // jcc(Assembler::below, exit);
 2632     emit_opcode( cbuf, 0x72 );
 2633     emit_d8    ( cbuf, 0x0C );
 2634     // movl(dst, equal_result);
 2635     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2636     emit_d32( cbuf, 0 );
 2637     // jcc(Assembler::equal, exit);
 2638     emit_opcode( cbuf, 0x74 );
 2639     emit_d8    ( cbuf, 0x05 );
 2640     // movl(dst, greater_result);
 2641     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2642     emit_d32( cbuf, 1 );
 2643   %}
 2644 
 2645 
 2646   // Compare the longs and set flags
 2647   // BROKEN!  Do Not use as-is
 2648   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2649     // CMP    $src1.hi,$src2.hi
 2650     emit_opcode( cbuf, 0x3B );
 2651     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2652     // JNE,s  done
 2653     emit_opcode(cbuf,0x75);
 2654     emit_d8(cbuf, 2 );
 2655     // CMP    $src1.lo,$src2.lo
 2656     emit_opcode( cbuf, 0x3B );
 2657     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2658 // done:
 2659   %}
 2660 
 2661   enc_class convert_int_long( regL dst, rRegI src ) %{
 2662     // mov $dst.lo,$src
 2663     int dst_encoding = $dst$$reg;
 2664     int src_encoding = $src$$reg;
 2665     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2666     // mov $dst.hi,$src
 2667     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
 2668     // sar $dst.hi,31
 2669     emit_opcode( cbuf, 0xC1 );
 2670     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
 2671     emit_d8(cbuf, 0x1F );
 2672   %}
 2673 
 2674   enc_class convert_long_double( eRegL src ) %{
 2675     // push $src.hi
 2676     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2677     // push $src.lo
 2678     emit_opcode(cbuf, 0x50+$src$$reg  );
 2679     // fild 64-bits at [SP]
 2680     emit_opcode(cbuf,0xdf);
 2681     emit_d8(cbuf, 0x6C);
 2682     emit_d8(cbuf, 0x24);
 2683     emit_d8(cbuf, 0x00);
 2684     // pop stack
 2685     emit_opcode(cbuf, 0x83); // add  SP, #8
 2686     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2687     emit_d8(cbuf, 0x8);
 2688   %}
 2689 
 2690   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2691     // IMUL   EDX:EAX,$src1
 2692     emit_opcode( cbuf, 0xF7 );
 2693     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2694     // SAR    EDX,$cnt-32
 2695     int shift_count = ((int)$cnt$$constant) - 32;
 2696     if (shift_count > 0) {
 2697       emit_opcode(cbuf, 0xC1);
 2698       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2699       emit_d8(cbuf, shift_count);
 2700     }
 2701   %}
 2702 
 2703   // this version doesn't have add sp, 8
 2704   enc_class convert_long_double2( eRegL src ) %{
 2705     // push $src.hi
 2706     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
 2707     // push $src.lo
 2708     emit_opcode(cbuf, 0x50+$src$$reg  );
 2709     // fild 64-bits at [SP]
 2710     emit_opcode(cbuf,0xdf);
 2711     emit_d8(cbuf, 0x6C);
 2712     emit_d8(cbuf, 0x24);
 2713     emit_d8(cbuf, 0x00);
 2714   %}
 2715 
 2716   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2717     // Basic idea: long = (long)int * (long)int
 2718     // IMUL EDX:EAX, src
 2719     emit_opcode( cbuf, 0xF7 );
 2720     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2721   %}
 2722 
 2723   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2724     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2725     // MUL EDX:EAX, src
 2726     emit_opcode( cbuf, 0xF7 );
 2727     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2728   %}
 2729 
 2730   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2731     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2732     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2733     // MOV    $tmp,$src.lo
 2734     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2735     // IMUL   $tmp,EDX
 2736     emit_opcode( cbuf, 0x0F );
 2737     emit_opcode( cbuf, 0xAF );
 2738     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2739     // MOV    EDX,$src.hi
 2740     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
 2741     // IMUL   EDX,EAX
 2742     emit_opcode( cbuf, 0x0F );
 2743     emit_opcode( cbuf, 0xAF );
 2744     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
 2745     // ADD    $tmp,EDX
 2746     emit_opcode( cbuf, 0x03 );
 2747     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2748     // MUL   EDX:EAX,$src.lo
 2749     emit_opcode( cbuf, 0xF7 );
 2750     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2751     // ADD    EDX,ESI
 2752     emit_opcode( cbuf, 0x03 );
 2753     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
 2754   %}
 2755 
 2756   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2757     // Basic idea: lo(result) = lo(src * y_lo)
 2758     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2759     // IMUL   $tmp,EDX,$src
 2760     emit_opcode( cbuf, 0x6B );
 2761     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
 2762     emit_d8( cbuf, (int)$src$$constant );
 2763     // MOV    EDX,$src
 2764     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2765     emit_d32( cbuf, (int)$src$$constant );
 2766     // MUL   EDX:EAX,EDX
 2767     emit_opcode( cbuf, 0xF7 );
 2768     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2769     // ADD    EDX,ESI
 2770     emit_opcode( cbuf, 0x03 );
 2771     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2772   %}
 2773 
 2774   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2775     // PUSH src1.hi
 2776     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2777     // PUSH src1.lo
 2778     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2779     // PUSH src2.hi
 2780     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2781     // PUSH src2.lo
 2782     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2783     // CALL directly to the runtime
 2784     cbuf.set_insts_mark();
 2785     emit_opcode(cbuf,0xE8);       // Call into runtime
 2786     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2787     // Restore stack
 2788     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2789     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2790     emit_d8(cbuf, 4*4);
 2791   %}
 2792 
 2793   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2794     // PUSH src1.hi
 2795     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
 2796     // PUSH src1.lo
 2797     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2798     // PUSH src2.hi
 2799     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
 2800     // PUSH src2.lo
 2801     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2802     // CALL directly to the runtime
 2803     cbuf.set_insts_mark();
 2804     emit_opcode(cbuf,0xE8);       // Call into runtime
 2805     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2806     // Restore stack
 2807     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2808     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2809     emit_d8(cbuf, 4*4);
 2810   %}
 2811 
 2812   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2813     // MOV   $tmp,$src.lo
 2814     emit_opcode(cbuf, 0x8B);
 2815     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2816     // OR    $tmp,$src.hi
 2817     emit_opcode(cbuf, 0x0B);
 2818     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
 2819   %}
 2820 
 2821   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2822     // CMP    $src1.lo,$src2.lo
 2823     emit_opcode( cbuf, 0x3B );
 2824     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2825     // JNE,s  skip
 2826     emit_cc(cbuf, 0x70, 0x5);
 2827     emit_d8(cbuf,2);
 2828     // CMP    $src1.hi,$src2.hi
 2829     emit_opcode( cbuf, 0x3B );
 2830     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
 2831   %}
 2832 
 2833   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2834     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2835     emit_opcode( cbuf, 0x3B );
 2836     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2837     // MOV    $tmp,$src1.hi
 2838     emit_opcode( cbuf, 0x8B );
 2839     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
 2840     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2841     emit_opcode( cbuf, 0x1B );
 2842     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
 2843   %}
 2844 
 2845   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2846     // XOR    $tmp,$tmp
 2847     emit_opcode(cbuf,0x33);  // XOR
 2848     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2849     // CMP    $tmp,$src.lo
 2850     emit_opcode( cbuf, 0x3B );
 2851     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2852     // SBB    $tmp,$src.hi
 2853     emit_opcode( cbuf, 0x1B );
 2854     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
 2855   %}
 2856 
 2857  // Sniff, sniff... smells like Gnu Superoptimizer
 2858   enc_class neg_long( eRegL dst ) %{
 2859     emit_opcode(cbuf,0xF7);    // NEG hi
 2860     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2861     emit_opcode(cbuf,0xF7);    // NEG lo
 2862     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2863     emit_opcode(cbuf,0x83);    // SBB hi,0
 2864     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
 2865     emit_d8    (cbuf,0 );
 2866   %}
 2867 
 2868   enc_class enc_pop_rdx() %{
 2869     emit_opcode(cbuf,0x5A);
 2870   %}
 2871 
 2872   enc_class enc_rethrow() %{
 2873     cbuf.set_insts_mark();
 2874     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2875     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2876                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2877   %}
 2878 
 2879 
 2880   // Convert a double to an int.  Java semantics require we do complex
 2881   // manglelations in the corner cases.  So we set the rounding mode to
 2882   // 'zero', store the darned double down as an int, and reset the
 2883   // rounding mode to 'nearest'.  The hardware throws an exception which
 2884   // patches up the correct value directly to the stack.
 2885   enc_class DPR2I_encoding( regDPR src ) %{
 2886     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2887     // exceptions here, so that a NAN or other corner-case value will
 2888     // thrown an exception (but normal values get converted at full speed).
 2889     // However, I2C adapters and other float-stack manglers leave pending
 2890     // invalid-op exceptions hanging.  We would have to clear them before
 2891     // enabling them and that is more expensive than just testing for the
 2892     // invalid value Intel stores down in the corner cases.
 2893     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2894     emit_opcode(cbuf,0x2D);
 2895     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2896     // Allocate a word
 2897     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2898     emit_opcode(cbuf,0xEC);
 2899     emit_d8(cbuf,0x04);
 2900     // Encoding assumes a double has been pushed into FPR0.
 2901     // Store down the double as an int, popping the FPU stack
 2902     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2903     emit_opcode(cbuf,0x1C);
 2904     emit_d8(cbuf,0x24);
 2905     // Restore the rounding mode; mask the exception
 2906     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2907     emit_opcode(cbuf,0x2D);
 2908     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2909         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2910         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2911 
 2912     // Load the converted int; adjust CPU stack
 2913     emit_opcode(cbuf,0x58);       // POP EAX
 2914     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2915     emit_d32   (cbuf,0x80000000); //         0x80000000
 2916     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2917     emit_d8    (cbuf,0x07);       // Size of slow_call
 2918     // Push src onto stack slow-path
 2919     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2920     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2921     // CALL directly to the runtime
 2922     cbuf.set_insts_mark();
 2923     emit_opcode(cbuf,0xE8);       // Call into runtime
 2924     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2925     // Carry on here...
 2926   %}
 2927 
 2928   enc_class DPR2L_encoding( regDPR src ) %{
 2929     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2930     emit_opcode(cbuf,0x2D);
 2931     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2932     // Allocate a word
 2933     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2934     emit_opcode(cbuf,0xEC);
 2935     emit_d8(cbuf,0x08);
 2936     // Encoding assumes a double has been pushed into FPR0.
 2937     // Store down the double as a long, popping the FPU stack
 2938     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2939     emit_opcode(cbuf,0x3C);
 2940     emit_d8(cbuf,0x24);
 2941     // Restore the rounding mode; mask the exception
 2942     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2943     emit_opcode(cbuf,0x2D);
 2944     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2945         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2946         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2947 
 2948     // Load the converted int; adjust CPU stack
 2949     emit_opcode(cbuf,0x58);       // POP EAX
 2950     emit_opcode(cbuf,0x5A);       // POP EDX
 2951     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2952     emit_d8    (cbuf,0xFA);       // rdx
 2953     emit_d32   (cbuf,0x80000000); //         0x80000000
 2954     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2955     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2956     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2957     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2958     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2959     emit_d8    (cbuf,0x07);       // Size of slow_call
 2960     // Push src onto stack slow-path
 2961     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2962     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2963     // CALL directly to the runtime
 2964     cbuf.set_insts_mark();
 2965     emit_opcode(cbuf,0xE8);       // Call into runtime
 2966     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2967     // Carry on here...
 2968   %}
 2969 
 2970   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2971     // Operand was loaded from memory into fp ST (stack top)
 2972     // FMUL   ST,$src  /* D8 C8+i */
 2973     emit_opcode(cbuf, 0xD8);
 2974     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2975   %}
 2976 
 2977   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2978     // FADDP  ST,src2  /* D8 C0+i */
 2979     emit_opcode(cbuf, 0xD8);
 2980     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2981     //could use FADDP  src2,fpST  /* DE C0+i */
 2982   %}
 2983 
 2984   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 2985     // FADDP  src2,ST  /* DE C0+i */
 2986     emit_opcode(cbuf, 0xDE);
 2987     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2988   %}
 2989 
 2990   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 2991     // Operand has been loaded into fp ST (stack top)
 2992       // FSUB   ST,$src1
 2993       emit_opcode(cbuf, 0xD8);
 2994       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 2995 
 2996       // FDIV
 2997       emit_opcode(cbuf, 0xD8);
 2998       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 2999   %}
 3000 
 3001   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3002     // Operand was loaded from memory into fp ST (stack top)
 3003     // FADD   ST,$src  /* D8 C0+i */
 3004     emit_opcode(cbuf, 0xD8);
 3005     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3006 
 3007     // FMUL  ST,src2  /* D8 C*+i */
 3008     emit_opcode(cbuf, 0xD8);
 3009     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3010   %}
 3011 
 3012 
 3013   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3014     // Operand was loaded from memory into fp ST (stack top)
 3015     // FADD   ST,$src  /* D8 C0+i */
 3016     emit_opcode(cbuf, 0xD8);
 3017     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3018 
 3019     // FMULP  src2,ST  /* DE C8+i */
 3020     emit_opcode(cbuf, 0xDE);
 3021     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3022   %}
 3023 
 3024   // Atomically load the volatile long
 3025   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3026     emit_opcode(cbuf,0xDF);
 3027     int rm_byte_opcode = 0x05;
 3028     int base     = $mem$$base;
 3029     int index    = $mem$$index;
 3030     int scale    = $mem$$scale;
 3031     int displace = $mem$$disp;
 3032     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3033     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3034     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3035   %}
 3036 
 3037   // Volatile Store Long.  Must be atomic, so move it into
 3038   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3039   // target address before the store (for null-ptr checks)
 3040   // so the memory operand is used twice in the encoding.
 3041   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3042     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3043     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3044     emit_opcode(cbuf,0xDF);
 3045     int rm_byte_opcode = 0x07;
 3046     int base     = $mem$$base;
 3047     int index    = $mem$$index;
 3048     int scale    = $mem$$scale;
 3049     int displace = $mem$$disp;
 3050     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3051     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3052   %}
 3053 
 3054 %}
 3055 
 3056 
 3057 //----------FRAME--------------------------------------------------------------
 3058 // Definition of frame structure and management information.
 3059 //
 3060 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3061 //                             |   (to get allocators register number
 3062 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3063 //  r   CALLER     |        |
 3064 //  o     |        +--------+      pad to even-align allocators stack-slot
 3065 //  w     V        |  pad0  |        numbers; owned by CALLER
 3066 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3067 //  h     ^        |   in   |  5
 3068 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3069 //  |     |        |        |  3
 3070 //  |     |        +--------+
 3071 //  V     |        | old out|      Empty on Intel, window on Sparc
 3072 //        |    old |preserve|      Must be even aligned.
 3073 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3074 //        |        |   in   |  3   area for Intel ret address
 3075 //     Owned by    |preserve|      Empty on Sparc.
 3076 //       SELF      +--------+
 3077 //        |        |  pad2  |  2   pad to align old SP
 3078 //        |        +--------+  1
 3079 //        |        | locks  |  0
 3080 //        |        +--------+----> OptoReg::stack0(), even aligned
 3081 //        |        |  pad1  | 11   pad to align new SP
 3082 //        |        +--------+
 3083 //        |        |        | 10
 3084 //        |        | spills |  9   spills
 3085 //        V        |        |  8   (pad0 slot for callee)
 3086 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3087 //        ^        |  out   |  7
 3088 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3089 //     Owned by    +--------+
 3090 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3091 //        |    new |preserve|      Must be even-aligned.
 3092 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3093 //        |        |        |
 3094 //
 3095 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3096 //         known from SELF's arguments and the Java calling convention.
 3097 //         Region 6-7 is determined per call site.
 3098 // Note 2: If the calling convention leaves holes in the incoming argument
 3099 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3100 //         are owned by the CALLEE.  Holes should not be nessecary in the
 3101 //         incoming area, as the Java calling convention is completely under
 3102 //         the control of the AD file.  Doubles can be sorted and packed to
 3103 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
 3104 //         varargs C calling conventions.
 3105 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3106 //         even aligned with pad0 as needed.
 3107 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3108 //         region 6-11 is even aligned; it may be padded out more so that
 3109 //         the region from SP to FP meets the minimum stack alignment.
 3110 
 3111 frame %{
 3112   // These three registers define part of the calling convention
 3113   // between compiled code and the interpreter.
 3114   inline_cache_reg(EAX);                // Inline Cache Register
 3115 
 3116   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3117   cisc_spilling_operand_name(indOffset32);
 3118 
 3119   // Number of stack slots consumed by locking an object
 3120   sync_stack_slots(1);
 3121 
 3122   // Compiled code's Frame Pointer
 3123   frame_pointer(ESP);
 3124   // Interpreter stores its frame pointer in a register which is
 3125   // stored to the stack by I2CAdaptors.
 3126   // I2CAdaptors convert from interpreted java to compiled java.
 3127   interpreter_frame_pointer(EBP);
 3128 
 3129   // Stack alignment requirement
 3130   // Alignment size in bytes (128-bit -> 16 bytes)
 3131   stack_alignment(StackAlignmentInBytes);
 3132 
 3133   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3134   // for calls to C.  Supports the var-args backing area for register parms.
 3135   varargs_C_out_slots_killed(0);
 3136 
 3137   // The after-PROLOG location of the return address.  Location of
 3138   // return address specifies a type (REG or STACK) and a number
 3139   // representing the register number (i.e. - use a register name) or
 3140   // stack slot.
 3141   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3142   // Otherwise, it is above the locks and verification slot and alignment word
 3143   return_addr(STACK - 1 +
 3144               align_up((Compile::current()->in_preserve_stack_slots() +
 3145                         Compile::current()->fixed_slots()),
 3146                        stack_alignment_in_slots()));
 3147 
 3148   // Location of C & interpreter return values
 3149   c_return_value %{
 3150     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3151     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3152     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3153 
 3154     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3155     // that C functions return float and double results in XMM0.
 3156     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3157       return OptoRegPair(XMM0b_num,XMM0_num);
 3158     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3159       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3160 
 3161     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3162   %}
 3163 
 3164   // Location of return values
 3165   return_value %{
 3166     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3167     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3168     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3169     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3170       return OptoRegPair(XMM0b_num,XMM0_num);
 3171     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3172       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3173     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3174   %}
 3175 
 3176 %}
 3177 
 3178 //----------ATTRIBUTES---------------------------------------------------------
 3179 //----------Operand Attributes-------------------------------------------------
 3180 op_attrib op_cost(0);        // Required cost attribute
 3181 
 3182 //----------Instruction Attributes---------------------------------------------
 3183 ins_attrib ins_cost(100);       // Required cost attribute
 3184 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3185 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3186                                 // non-matching short branch variant of some
 3187                                                             // long branch?
 3188 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3189                                 // specifies the alignment that some part of the instruction (not
 3190                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3191                                 // function must be provided for the instruction
 3192 
 3193 //----------OPERANDS-----------------------------------------------------------
 3194 // Operand definitions must precede instruction definitions for correct parsing
 3195 // in the ADLC because operands constitute user defined types which are used in
 3196 // instruction definitions.
 3197 
 3198 //----------Simple Operands----------------------------------------------------
 3199 // Immediate Operands
 3200 // Integer Immediate
 3201 operand immI() %{
 3202   match(ConI);
 3203 
 3204   op_cost(10);
 3205   format %{ %}
 3206   interface(CONST_INTER);
 3207 %}
 3208 
 3209 // Constant for test vs zero
 3210 operand immI_0() %{
 3211   predicate(n->get_int() == 0);
 3212   match(ConI);
 3213 
 3214   op_cost(0);
 3215   format %{ %}
 3216   interface(CONST_INTER);
 3217 %}
 3218 
 3219 // Constant for increment
 3220 operand immI_1() %{
 3221   predicate(n->get_int() == 1);
 3222   match(ConI);
 3223 
 3224   op_cost(0);
 3225   format %{ %}
 3226   interface(CONST_INTER);
 3227 %}
 3228 
 3229 // Constant for decrement
 3230 operand immI_M1() %{
 3231   predicate(n->get_int() == -1);
 3232   match(ConI);
 3233 
 3234   op_cost(0);
 3235   format %{ %}
 3236   interface(CONST_INTER);
 3237 %}
 3238 
 3239 // Valid scale values for addressing modes
 3240 operand immI2() %{
 3241   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3242   match(ConI);
 3243 
 3244   format %{ %}
 3245   interface(CONST_INTER);
 3246 %}
 3247 
 3248 operand immI8() %{
 3249   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3250   match(ConI);
 3251 
 3252   op_cost(5);
 3253   format %{ %}
 3254   interface(CONST_INTER);
 3255 %}
 3256 
 3257 operand immU8() %{
 3258   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3259   match(ConI);
 3260 
 3261   op_cost(5);
 3262   format %{ %}
 3263   interface(CONST_INTER);
 3264 %}
 3265 
 3266 operand immI16() %{
 3267   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3268   match(ConI);
 3269 
 3270   op_cost(10);
 3271   format %{ %}
 3272   interface(CONST_INTER);
 3273 %}
 3274 
 3275 // Int Immediate non-negative
 3276 operand immU31()
 3277 %{
 3278   predicate(n->get_int() >= 0);
 3279   match(ConI);
 3280 
 3281   op_cost(0);
 3282   format %{ %}
 3283   interface(CONST_INTER);
 3284 %}
 3285 
 3286 // Constant for long shifts
 3287 operand immI_32() %{
 3288   predicate( n->get_int() == 32 );
 3289   match(ConI);
 3290 
 3291   op_cost(0);
 3292   format %{ %}
 3293   interface(CONST_INTER);
 3294 %}
 3295 
 3296 operand immI_1_31() %{
 3297   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3298   match(ConI);
 3299 
 3300   op_cost(0);
 3301   format %{ %}
 3302   interface(CONST_INTER);
 3303 %}
 3304 
 3305 operand immI_32_63() %{
 3306   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3307   match(ConI);
 3308   op_cost(0);
 3309 
 3310   format %{ %}
 3311   interface(CONST_INTER);
 3312 %}
 3313 
 3314 operand immI_2() %{
 3315   predicate( n->get_int() == 2 );
 3316   match(ConI);
 3317 
 3318   op_cost(0);
 3319   format %{ %}
 3320   interface(CONST_INTER);
 3321 %}
 3322 
 3323 operand immI_3() %{
 3324   predicate( n->get_int() == 3 );
 3325   match(ConI);
 3326 
 3327   op_cost(0);
 3328   format %{ %}
 3329   interface(CONST_INTER);
 3330 %}
 3331 
 3332 operand immI_4()
 3333 %{
 3334   predicate(n->get_int() == 4);
 3335   match(ConI);
 3336 
 3337   op_cost(0);
 3338   format %{ %}
 3339   interface(CONST_INTER);
 3340 %}
 3341 
 3342 operand immI_8()
 3343 %{
 3344   predicate(n->get_int() == 8);
 3345   match(ConI);
 3346 
 3347   op_cost(0);
 3348   format %{ %}
 3349   interface(CONST_INTER);
 3350 %}
 3351 
 3352 // Pointer Immediate
 3353 operand immP() %{
 3354   match(ConP);
 3355 
 3356   op_cost(10);
 3357   format %{ %}
 3358   interface(CONST_INTER);
 3359 %}
 3360 
 3361 // NULL Pointer Immediate
 3362 operand immP0() %{
 3363   predicate( n->get_ptr() == 0 );
 3364   match(ConP);
 3365   op_cost(0);
 3366 
 3367   format %{ %}
 3368   interface(CONST_INTER);
 3369 %}
 3370 
 3371 // Long Immediate
 3372 operand immL() %{
 3373   match(ConL);
 3374 
 3375   op_cost(20);
 3376   format %{ %}
 3377   interface(CONST_INTER);
 3378 %}
 3379 
 3380 // Long Immediate zero
 3381 operand immL0() %{
 3382   predicate( n->get_long() == 0L );
 3383   match(ConL);
 3384   op_cost(0);
 3385 
 3386   format %{ %}
 3387   interface(CONST_INTER);
 3388 %}
 3389 
 3390 // Long Immediate zero
 3391 operand immL_M1() %{
 3392   predicate( n->get_long() == -1L );
 3393   match(ConL);
 3394   op_cost(0);
 3395 
 3396   format %{ %}
 3397   interface(CONST_INTER);
 3398 %}
 3399 
 3400 // Long immediate from 0 to 127.
 3401 // Used for a shorter form of long mul by 10.
 3402 operand immL_127() %{
 3403   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3404   match(ConL);
 3405   op_cost(0);
 3406 
 3407   format %{ %}
 3408   interface(CONST_INTER);
 3409 %}
 3410 
 3411 // Long Immediate: low 32-bit mask
 3412 operand immL_32bits() %{
 3413   predicate(n->get_long() == 0xFFFFFFFFL);
 3414   match(ConL);
 3415   op_cost(0);
 3416 
 3417   format %{ %}
 3418   interface(CONST_INTER);
 3419 %}
 3420 
 3421 // Long Immediate: low 32-bit mask
 3422 operand immL32() %{
 3423   predicate(n->get_long() == (int)(n->get_long()));
 3424   match(ConL);
 3425   op_cost(20);
 3426 
 3427   format %{ %}
 3428   interface(CONST_INTER);
 3429 %}
 3430 
 3431 //Double Immediate zero
 3432 operand immDPR0() %{
 3433   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3434   // bug that generates code such that NaNs compare equal to 0.0
 3435   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3436   match(ConD);
 3437 
 3438   op_cost(5);
 3439   format %{ %}
 3440   interface(CONST_INTER);
 3441 %}
 3442 
 3443 // Double Immediate one
 3444 operand immDPR1() %{
 3445   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3446   match(ConD);
 3447 
 3448   op_cost(5);
 3449   format %{ %}
 3450   interface(CONST_INTER);
 3451 %}
 3452 
 3453 // Double Immediate
 3454 operand immDPR() %{
 3455   predicate(UseSSE<=1);
 3456   match(ConD);
 3457 
 3458   op_cost(5);
 3459   format %{ %}
 3460   interface(CONST_INTER);
 3461 %}
 3462 
 3463 operand immD() %{
 3464   predicate(UseSSE>=2);
 3465   match(ConD);
 3466 
 3467   op_cost(5);
 3468   format %{ %}
 3469   interface(CONST_INTER);
 3470 %}
 3471 
 3472 // Double Immediate zero
 3473 operand immD0() %{
 3474   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3475   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3476   // compare equal to -0.0.
 3477   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3478   match(ConD);
 3479 
 3480   format %{ %}
 3481   interface(CONST_INTER);
 3482 %}
 3483 
 3484 // Float Immediate zero
 3485 operand immFPR0() %{
 3486   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3487   match(ConF);
 3488 
 3489   op_cost(5);
 3490   format %{ %}
 3491   interface(CONST_INTER);
 3492 %}
 3493 
 3494 // Float Immediate one
 3495 operand immFPR1() %{
 3496   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3497   match(ConF);
 3498 
 3499   op_cost(5);
 3500   format %{ %}
 3501   interface(CONST_INTER);
 3502 %}
 3503 
 3504 // Float Immediate
 3505 operand immFPR() %{
 3506   predicate( UseSSE == 0 );
 3507   match(ConF);
 3508 
 3509   op_cost(5);
 3510   format %{ %}
 3511   interface(CONST_INTER);
 3512 %}
 3513 
 3514 // Float Immediate
 3515 operand immF() %{
 3516   predicate(UseSSE >= 1);
 3517   match(ConF);
 3518 
 3519   op_cost(5);
 3520   format %{ %}
 3521   interface(CONST_INTER);
 3522 %}
 3523 
 3524 // Float Immediate zero.  Zero and not -0.0
 3525 operand immF0() %{
 3526   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3527   match(ConF);
 3528 
 3529   op_cost(5);
 3530   format %{ %}
 3531   interface(CONST_INTER);
 3532 %}
 3533 
 3534 // Immediates for special shifts (sign extend)
 3535 
 3536 // Constants for increment
 3537 operand immI_16() %{
 3538   predicate( n->get_int() == 16 );
 3539   match(ConI);
 3540 
 3541   format %{ %}
 3542   interface(CONST_INTER);
 3543 %}
 3544 
 3545 operand immI_24() %{
 3546   predicate( n->get_int() == 24 );
 3547   match(ConI);
 3548 
 3549   format %{ %}
 3550   interface(CONST_INTER);
 3551 %}
 3552 
 3553 // Constant for byte-wide masking
 3554 operand immI_255() %{
 3555   predicate( n->get_int() == 255 );
 3556   match(ConI);
 3557 
 3558   format %{ %}
 3559   interface(CONST_INTER);
 3560 %}
 3561 
 3562 // Constant for short-wide masking
 3563 operand immI_65535() %{
 3564   predicate(n->get_int() == 65535);
 3565   match(ConI);
 3566 
 3567   format %{ %}
 3568   interface(CONST_INTER);
 3569 %}
 3570 
 3571 operand kReg()
 3572 %{
 3573   constraint(ALLOC_IN_RC(vectmask_reg));
 3574   match(RegVectMask);
 3575   format %{%}
 3576   interface(REG_INTER);
 3577 %}
 3578 
 3579 operand kReg_K1()
 3580 %{
 3581   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3582   match(RegVectMask);
 3583   format %{%}
 3584   interface(REG_INTER);
 3585 %}
 3586 
 3587 operand kReg_K2()
 3588 %{
 3589   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3590   match(RegVectMask);
 3591   format %{%}
 3592   interface(REG_INTER);
 3593 %}
 3594 
 3595 // Special Registers
 3596 operand kReg_K3()
 3597 %{
 3598   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3599   match(RegVectMask);
 3600   format %{%}
 3601   interface(REG_INTER);
 3602 %}
 3603 
 3604 operand kReg_K4()
 3605 %{
 3606   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3607   match(RegVectMask);
 3608   format %{%}
 3609   interface(REG_INTER);
 3610 %}
 3611 
 3612 operand kReg_K5()
 3613 %{
 3614   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3615   match(RegVectMask);
 3616   format %{%}
 3617   interface(REG_INTER);
 3618 %}
 3619 
 3620 operand kReg_K6()
 3621 %{
 3622   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3623   match(RegVectMask);
 3624   format %{%}
 3625   interface(REG_INTER);
 3626 %}
 3627 
 3628 // Special Registers
 3629 operand kReg_K7()
 3630 %{
 3631   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3632   match(RegVectMask);
 3633   format %{%}
 3634   interface(REG_INTER);
 3635 %}
 3636 
 3637 // Register Operands
 3638 // Integer Register
 3639 operand rRegI() %{
 3640   constraint(ALLOC_IN_RC(int_reg));
 3641   match(RegI);
 3642   match(xRegI);
 3643   match(eAXRegI);
 3644   match(eBXRegI);
 3645   match(eCXRegI);
 3646   match(eDXRegI);
 3647   match(eDIRegI);
 3648   match(eSIRegI);
 3649 
 3650   format %{ %}
 3651   interface(REG_INTER);
 3652 %}
 3653 
 3654 // Subset of Integer Register
 3655 operand xRegI(rRegI reg) %{
 3656   constraint(ALLOC_IN_RC(int_x_reg));
 3657   match(reg);
 3658   match(eAXRegI);
 3659   match(eBXRegI);
 3660   match(eCXRegI);
 3661   match(eDXRegI);
 3662 
 3663   format %{ %}
 3664   interface(REG_INTER);
 3665 %}
 3666 
 3667 // Special Registers
 3668 operand eAXRegI(xRegI reg) %{
 3669   constraint(ALLOC_IN_RC(eax_reg));
 3670   match(reg);
 3671   match(rRegI);
 3672 
 3673   format %{ "EAX" %}
 3674   interface(REG_INTER);
 3675 %}
 3676 
 3677 // Special Registers
 3678 operand eBXRegI(xRegI reg) %{
 3679   constraint(ALLOC_IN_RC(ebx_reg));
 3680   match(reg);
 3681   match(rRegI);
 3682 
 3683   format %{ "EBX" %}
 3684   interface(REG_INTER);
 3685 %}
 3686 
 3687 operand eCXRegI(xRegI reg) %{
 3688   constraint(ALLOC_IN_RC(ecx_reg));
 3689   match(reg);
 3690   match(rRegI);
 3691 
 3692   format %{ "ECX" %}
 3693   interface(REG_INTER);
 3694 %}
 3695 
 3696 operand eDXRegI(xRegI reg) %{
 3697   constraint(ALLOC_IN_RC(edx_reg));
 3698   match(reg);
 3699   match(rRegI);
 3700 
 3701   format %{ "EDX" %}
 3702   interface(REG_INTER);
 3703 %}
 3704 
 3705 operand eDIRegI(xRegI reg) %{
 3706   constraint(ALLOC_IN_RC(edi_reg));
 3707   match(reg);
 3708   match(rRegI);
 3709 
 3710   format %{ "EDI" %}
 3711   interface(REG_INTER);
 3712 %}
 3713 
 3714 operand naxRegI() %{
 3715   constraint(ALLOC_IN_RC(nax_reg));
 3716   match(RegI);
 3717   match(eCXRegI);
 3718   match(eDXRegI);
 3719   match(eSIRegI);
 3720   match(eDIRegI);
 3721 
 3722   format %{ %}
 3723   interface(REG_INTER);
 3724 %}
 3725 
 3726 operand nadxRegI() %{
 3727   constraint(ALLOC_IN_RC(nadx_reg));
 3728   match(RegI);
 3729   match(eBXRegI);
 3730   match(eCXRegI);
 3731   match(eSIRegI);
 3732   match(eDIRegI);
 3733 
 3734   format %{ %}
 3735   interface(REG_INTER);
 3736 %}
 3737 
 3738 operand ncxRegI() %{
 3739   constraint(ALLOC_IN_RC(ncx_reg));
 3740   match(RegI);
 3741   match(eAXRegI);
 3742   match(eDXRegI);
 3743   match(eSIRegI);
 3744   match(eDIRegI);
 3745 
 3746   format %{ %}
 3747   interface(REG_INTER);
 3748 %}
 3749 
 3750 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3751 // //
 3752 operand eSIRegI(xRegI reg) %{
 3753    constraint(ALLOC_IN_RC(esi_reg));
 3754    match(reg);
 3755    match(rRegI);
 3756 
 3757    format %{ "ESI" %}
 3758    interface(REG_INTER);
 3759 %}
 3760 
 3761 // Pointer Register
 3762 operand anyRegP() %{
 3763   constraint(ALLOC_IN_RC(any_reg));
 3764   match(RegP);
 3765   match(eAXRegP);
 3766   match(eBXRegP);
 3767   match(eCXRegP);
 3768   match(eDIRegP);
 3769   match(eRegP);
 3770 
 3771   format %{ %}
 3772   interface(REG_INTER);
 3773 %}
 3774 
 3775 operand eRegP() %{
 3776   constraint(ALLOC_IN_RC(int_reg));
 3777   match(RegP);
 3778   match(eAXRegP);
 3779   match(eBXRegP);
 3780   match(eCXRegP);
 3781   match(eDIRegP);
 3782 
 3783   format %{ %}
 3784   interface(REG_INTER);
 3785 %}
 3786 
 3787 operand rRegP() %{
 3788   constraint(ALLOC_IN_RC(int_reg));
 3789   match(RegP);
 3790   match(eAXRegP);
 3791   match(eBXRegP);
 3792   match(eCXRegP);
 3793   match(eDIRegP);
 3794 
 3795   format %{ %}
 3796   interface(REG_INTER);
 3797 %}
 3798 
 3799 // On windows95, EBP is not safe to use for implicit null tests.
 3800 operand eRegP_no_EBP() %{
 3801   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3802   match(RegP);
 3803   match(eAXRegP);
 3804   match(eBXRegP);
 3805   match(eCXRegP);
 3806   match(eDIRegP);
 3807 
 3808   op_cost(100);
 3809   format %{ %}
 3810   interface(REG_INTER);
 3811 %}
 3812 
 3813 operand naxRegP() %{
 3814   constraint(ALLOC_IN_RC(nax_reg));
 3815   match(RegP);
 3816   match(eBXRegP);
 3817   match(eDXRegP);
 3818   match(eCXRegP);
 3819   match(eSIRegP);
 3820   match(eDIRegP);
 3821 
 3822   format %{ %}
 3823   interface(REG_INTER);
 3824 %}
 3825 
 3826 operand nabxRegP() %{
 3827   constraint(ALLOC_IN_RC(nabx_reg));
 3828   match(RegP);
 3829   match(eCXRegP);
 3830   match(eDXRegP);
 3831   match(eSIRegP);
 3832   match(eDIRegP);
 3833 
 3834   format %{ %}
 3835   interface(REG_INTER);
 3836 %}
 3837 
 3838 operand pRegP() %{
 3839   constraint(ALLOC_IN_RC(p_reg));
 3840   match(RegP);
 3841   match(eBXRegP);
 3842   match(eDXRegP);
 3843   match(eSIRegP);
 3844   match(eDIRegP);
 3845 
 3846   format %{ %}
 3847   interface(REG_INTER);
 3848 %}
 3849 
 3850 // Special Registers
 3851 // Return a pointer value
 3852 operand eAXRegP(eRegP reg) %{
 3853   constraint(ALLOC_IN_RC(eax_reg));
 3854   match(reg);
 3855   format %{ "EAX" %}
 3856   interface(REG_INTER);
 3857 %}
 3858 
 3859 // Used in AtomicAdd
 3860 operand eBXRegP(eRegP reg) %{
 3861   constraint(ALLOC_IN_RC(ebx_reg));
 3862   match(reg);
 3863   format %{ "EBX" %}
 3864   interface(REG_INTER);
 3865 %}
 3866 
 3867 // Tail-call (interprocedural jump) to interpreter
 3868 operand eCXRegP(eRegP reg) %{
 3869   constraint(ALLOC_IN_RC(ecx_reg));
 3870   match(reg);
 3871   format %{ "ECX" %}
 3872   interface(REG_INTER);
 3873 %}
 3874 
 3875 operand eDXRegP(eRegP reg) %{
 3876   constraint(ALLOC_IN_RC(edx_reg));
 3877   match(reg);
 3878   format %{ "EDX" %}
 3879   interface(REG_INTER);
 3880 %}
 3881 
 3882 operand eSIRegP(eRegP reg) %{
 3883   constraint(ALLOC_IN_RC(esi_reg));
 3884   match(reg);
 3885   format %{ "ESI" %}
 3886   interface(REG_INTER);
 3887 %}
 3888 
 3889 // Used in rep stosw
 3890 operand eDIRegP(eRegP reg) %{
 3891   constraint(ALLOC_IN_RC(edi_reg));
 3892   match(reg);
 3893   format %{ "EDI" %}
 3894   interface(REG_INTER);
 3895 %}
 3896 
 3897 operand eRegL() %{
 3898   constraint(ALLOC_IN_RC(long_reg));
 3899   match(RegL);
 3900   match(eADXRegL);
 3901 
 3902   format %{ %}
 3903   interface(REG_INTER);
 3904 %}
 3905 
 3906 operand eADXRegL( eRegL reg ) %{
 3907   constraint(ALLOC_IN_RC(eadx_reg));
 3908   match(reg);
 3909 
 3910   format %{ "EDX:EAX" %}
 3911   interface(REG_INTER);
 3912 %}
 3913 
 3914 operand eBCXRegL( eRegL reg ) %{
 3915   constraint(ALLOC_IN_RC(ebcx_reg));
 3916   match(reg);
 3917 
 3918   format %{ "EBX:ECX" %}
 3919   interface(REG_INTER);
 3920 %}
 3921 
 3922 // Special case for integer high multiply
 3923 operand eADXRegL_low_only() %{
 3924   constraint(ALLOC_IN_RC(eadx_reg));
 3925   match(RegL);
 3926 
 3927   format %{ "EAX" %}
 3928   interface(REG_INTER);
 3929 %}
 3930 
 3931 // Flags register, used as output of compare instructions
 3932 operand rFlagsReg() %{
 3933   constraint(ALLOC_IN_RC(int_flags));
 3934   match(RegFlags);
 3935 
 3936   format %{ "EFLAGS" %}
 3937   interface(REG_INTER);
 3938 %}
 3939 
 3940 // Flags register, used as output of compare instructions
 3941 operand eFlagsReg() %{
 3942   constraint(ALLOC_IN_RC(int_flags));
 3943   match(RegFlags);
 3944 
 3945   format %{ "EFLAGS" %}
 3946   interface(REG_INTER);
 3947 %}
 3948 
 3949 // Flags register, used as output of FLOATING POINT compare instructions
 3950 operand eFlagsRegU() %{
 3951   constraint(ALLOC_IN_RC(int_flags));
 3952   match(RegFlags);
 3953 
 3954   format %{ "EFLAGS_U" %}
 3955   interface(REG_INTER);
 3956 %}
 3957 
 3958 operand eFlagsRegUCF() %{
 3959   constraint(ALLOC_IN_RC(int_flags));
 3960   match(RegFlags);
 3961   predicate(false);
 3962 
 3963   format %{ "EFLAGS_U_CF" %}
 3964   interface(REG_INTER);
 3965 %}
 3966 
 3967 // Condition Code Register used by long compare
 3968 operand flagsReg_long_LTGE() %{
 3969   constraint(ALLOC_IN_RC(int_flags));
 3970   match(RegFlags);
 3971   format %{ "FLAGS_LTGE" %}
 3972   interface(REG_INTER);
 3973 %}
 3974 operand flagsReg_long_EQNE() %{
 3975   constraint(ALLOC_IN_RC(int_flags));
 3976   match(RegFlags);
 3977   format %{ "FLAGS_EQNE" %}
 3978   interface(REG_INTER);
 3979 %}
 3980 operand flagsReg_long_LEGT() %{
 3981   constraint(ALLOC_IN_RC(int_flags));
 3982   match(RegFlags);
 3983   format %{ "FLAGS_LEGT" %}
 3984   interface(REG_INTER);
 3985 %}
 3986 
 3987 // Condition Code Register used by unsigned long compare
 3988 operand flagsReg_ulong_LTGE() %{
 3989   constraint(ALLOC_IN_RC(int_flags));
 3990   match(RegFlags);
 3991   format %{ "FLAGS_U_LTGE" %}
 3992   interface(REG_INTER);
 3993 %}
 3994 operand flagsReg_ulong_EQNE() %{
 3995   constraint(ALLOC_IN_RC(int_flags));
 3996   match(RegFlags);
 3997   format %{ "FLAGS_U_EQNE" %}
 3998   interface(REG_INTER);
 3999 %}
 4000 operand flagsReg_ulong_LEGT() %{
 4001   constraint(ALLOC_IN_RC(int_flags));
 4002   match(RegFlags);
 4003   format %{ "FLAGS_U_LEGT" %}
 4004   interface(REG_INTER);
 4005 %}
 4006 
 4007 // Float register operands
 4008 operand regDPR() %{
 4009   predicate( UseSSE < 2 );
 4010   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4011   match(RegD);
 4012   match(regDPR1);
 4013   match(regDPR2);
 4014   format %{ %}
 4015   interface(REG_INTER);
 4016 %}
 4017 
 4018 operand regDPR1(regDPR reg) %{
 4019   predicate( UseSSE < 2 );
 4020   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4021   match(reg);
 4022   format %{ "FPR1" %}
 4023   interface(REG_INTER);
 4024 %}
 4025 
 4026 operand regDPR2(regDPR reg) %{
 4027   predicate( UseSSE < 2 );
 4028   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4029   match(reg);
 4030   format %{ "FPR2" %}
 4031   interface(REG_INTER);
 4032 %}
 4033 
 4034 operand regnotDPR1(regDPR reg) %{
 4035   predicate( UseSSE < 2 );
 4036   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4037   match(reg);
 4038   format %{ %}
 4039   interface(REG_INTER);
 4040 %}
 4041 
 4042 // Float register operands
 4043 operand regFPR() %{
 4044   predicate( UseSSE < 2 );
 4045   constraint(ALLOC_IN_RC(fp_flt_reg));
 4046   match(RegF);
 4047   match(regFPR1);
 4048   format %{ %}
 4049   interface(REG_INTER);
 4050 %}
 4051 
 4052 // Float register operands
 4053 operand regFPR1(regFPR reg) %{
 4054   predicate( UseSSE < 2 );
 4055   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4056   match(reg);
 4057   format %{ "FPR1" %}
 4058   interface(REG_INTER);
 4059 %}
 4060 
 4061 // XMM Float register operands
 4062 operand regF() %{
 4063   predicate( UseSSE>=1 );
 4064   constraint(ALLOC_IN_RC(float_reg_legacy));
 4065   match(RegF);
 4066   format %{ %}
 4067   interface(REG_INTER);
 4068 %}
 4069 
 4070 operand legRegF() %{
 4071   predicate( UseSSE>=1 );
 4072   constraint(ALLOC_IN_RC(float_reg_legacy));
 4073   match(RegF);
 4074   format %{ %}
 4075   interface(REG_INTER);
 4076 %}
 4077 
 4078 // Float register operands
 4079 operand vlRegF() %{
 4080    constraint(ALLOC_IN_RC(float_reg_vl));
 4081    match(RegF);
 4082 
 4083    format %{ %}
 4084    interface(REG_INTER);
 4085 %}
 4086 
 4087 // XMM Double register operands
 4088 operand regD() %{
 4089   predicate( UseSSE>=2 );
 4090   constraint(ALLOC_IN_RC(double_reg_legacy));
 4091   match(RegD);
 4092   format %{ %}
 4093   interface(REG_INTER);
 4094 %}
 4095 
 4096 // Double register operands
 4097 operand legRegD() %{
 4098   predicate( UseSSE>=2 );
 4099   constraint(ALLOC_IN_RC(double_reg_legacy));
 4100   match(RegD);
 4101   format %{ %}
 4102   interface(REG_INTER);
 4103 %}
 4104 
 4105 operand vlRegD() %{
 4106    constraint(ALLOC_IN_RC(double_reg_vl));
 4107    match(RegD);
 4108 
 4109    format %{ %}
 4110    interface(REG_INTER);
 4111 %}
 4112 
 4113 //----------Memory Operands----------------------------------------------------
 4114 // Direct Memory Operand
 4115 operand direct(immP addr) %{
 4116   match(addr);
 4117 
 4118   format %{ "[$addr]" %}
 4119   interface(MEMORY_INTER) %{
 4120     base(0xFFFFFFFF);
 4121     index(0x4);
 4122     scale(0x0);
 4123     disp($addr);
 4124   %}
 4125 %}
 4126 
 4127 // Indirect Memory Operand
 4128 operand indirect(eRegP reg) %{
 4129   constraint(ALLOC_IN_RC(int_reg));
 4130   match(reg);
 4131 
 4132   format %{ "[$reg]" %}
 4133   interface(MEMORY_INTER) %{
 4134     base($reg);
 4135     index(0x4);
 4136     scale(0x0);
 4137     disp(0x0);
 4138   %}
 4139 %}
 4140 
 4141 // Indirect Memory Plus Short Offset Operand
 4142 operand indOffset8(eRegP reg, immI8 off) %{
 4143   match(AddP reg off);
 4144 
 4145   format %{ "[$reg + $off]" %}
 4146   interface(MEMORY_INTER) %{
 4147     base($reg);
 4148     index(0x4);
 4149     scale(0x0);
 4150     disp($off);
 4151   %}
 4152 %}
 4153 
 4154 // Indirect Memory Plus Long Offset Operand
 4155 operand indOffset32(eRegP reg, immI off) %{
 4156   match(AddP reg off);
 4157 
 4158   format %{ "[$reg + $off]" %}
 4159   interface(MEMORY_INTER) %{
 4160     base($reg);
 4161     index(0x4);
 4162     scale(0x0);
 4163     disp($off);
 4164   %}
 4165 %}
 4166 
 4167 // Indirect Memory Plus Long Offset Operand
 4168 operand indOffset32X(rRegI reg, immP off) %{
 4169   match(AddP off reg);
 4170 
 4171   format %{ "[$reg + $off]" %}
 4172   interface(MEMORY_INTER) %{
 4173     base($reg);
 4174     index(0x4);
 4175     scale(0x0);
 4176     disp($off);
 4177   %}
 4178 %}
 4179 
 4180 // Indirect Memory Plus Index Register Plus Offset Operand
 4181 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4182   match(AddP (AddP reg ireg) off);
 4183 
 4184   op_cost(10);
 4185   format %{"[$reg + $off + $ireg]" %}
 4186   interface(MEMORY_INTER) %{
 4187     base($reg);
 4188     index($ireg);
 4189     scale(0x0);
 4190     disp($off);
 4191   %}
 4192 %}
 4193 
 4194 // Indirect Memory Plus Index Register Plus Offset Operand
 4195 operand indIndex(eRegP reg, rRegI ireg) %{
 4196   match(AddP reg ireg);
 4197 
 4198   op_cost(10);
 4199   format %{"[$reg + $ireg]" %}
 4200   interface(MEMORY_INTER) %{
 4201     base($reg);
 4202     index($ireg);
 4203     scale(0x0);
 4204     disp(0x0);
 4205   %}
 4206 %}
 4207 
 4208 // // -------------------------------------------------------------------------
 4209 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4210 // // -------------------------------------------------------------------------
 4211 // // Scaled Memory Operands
 4212 // // Indirect Memory Times Scale Plus Offset Operand
 4213 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4214 //   match(AddP off (LShiftI ireg scale));
 4215 //
 4216 //   op_cost(10);
 4217 //   format %{"[$off + $ireg << $scale]" %}
 4218 //   interface(MEMORY_INTER) %{
 4219 //     base(0x4);
 4220 //     index($ireg);
 4221 //     scale($scale);
 4222 //     disp($off);
 4223 //   %}
 4224 // %}
 4225 
 4226 // Indirect Memory Times Scale Plus Index Register
 4227 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4228   match(AddP reg (LShiftI ireg scale));
 4229 
 4230   op_cost(10);
 4231   format %{"[$reg + $ireg << $scale]" %}
 4232   interface(MEMORY_INTER) %{
 4233     base($reg);
 4234     index($ireg);
 4235     scale($scale);
 4236     disp(0x0);
 4237   %}
 4238 %}
 4239 
 4240 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4241 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4242   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4243 
 4244   op_cost(10);
 4245   format %{"[$reg + $off + $ireg << $scale]" %}
 4246   interface(MEMORY_INTER) %{
 4247     base($reg);
 4248     index($ireg);
 4249     scale($scale);
 4250     disp($off);
 4251   %}
 4252 %}
 4253 
 4254 //----------Load Long Memory Operands------------------------------------------
 4255 // The load-long idiom will use it's address expression again after loading
 4256 // the first word of the long.  If the load-long destination overlaps with
 4257 // registers used in the addressing expression, the 2nd half will be loaded
 4258 // from a clobbered address.  Fix this by requiring that load-long use
 4259 // address registers that do not overlap with the load-long target.
 4260 
 4261 // load-long support
 4262 operand load_long_RegP() %{
 4263   constraint(ALLOC_IN_RC(esi_reg));
 4264   match(RegP);
 4265   match(eSIRegP);
 4266   op_cost(100);
 4267   format %{  %}
 4268   interface(REG_INTER);
 4269 %}
 4270 
 4271 // Indirect Memory Operand Long
 4272 operand load_long_indirect(load_long_RegP reg) %{
 4273   constraint(ALLOC_IN_RC(esi_reg));
 4274   match(reg);
 4275 
 4276   format %{ "[$reg]" %}
 4277   interface(MEMORY_INTER) %{
 4278     base($reg);
 4279     index(0x4);
 4280     scale(0x0);
 4281     disp(0x0);
 4282   %}
 4283 %}
 4284 
 4285 // Indirect Memory Plus Long Offset Operand
 4286 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4287   match(AddP reg off);
 4288 
 4289   format %{ "[$reg + $off]" %}
 4290   interface(MEMORY_INTER) %{
 4291     base($reg);
 4292     index(0x4);
 4293     scale(0x0);
 4294     disp($off);
 4295   %}
 4296 %}
 4297 
 4298 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4299 
 4300 
 4301 //----------Special Memory Operands--------------------------------------------
 4302 // Stack Slot Operand - This operand is used for loading and storing temporary
 4303 //                      values on the stack where a match requires a value to
 4304 //                      flow through memory.
 4305 operand stackSlotP(sRegP reg) %{
 4306   constraint(ALLOC_IN_RC(stack_slots));
 4307   // No match rule because this operand is only generated in matching
 4308   format %{ "[$reg]" %}
 4309   interface(MEMORY_INTER) %{
 4310     base(0x4);   // ESP
 4311     index(0x4);  // No Index
 4312     scale(0x0);  // No Scale
 4313     disp($reg);  // Stack Offset
 4314   %}
 4315 %}
 4316 
 4317 operand stackSlotI(sRegI reg) %{
 4318   constraint(ALLOC_IN_RC(stack_slots));
 4319   // No match rule because this operand is only generated in matching
 4320   format %{ "[$reg]" %}
 4321   interface(MEMORY_INTER) %{
 4322     base(0x4);   // ESP
 4323     index(0x4);  // No Index
 4324     scale(0x0);  // No Scale
 4325     disp($reg);  // Stack Offset
 4326   %}
 4327 %}
 4328 
 4329 operand stackSlotF(sRegF reg) %{
 4330   constraint(ALLOC_IN_RC(stack_slots));
 4331   // No match rule because this operand is only generated in matching
 4332   format %{ "[$reg]" %}
 4333   interface(MEMORY_INTER) %{
 4334     base(0x4);   // ESP
 4335     index(0x4);  // No Index
 4336     scale(0x0);  // No Scale
 4337     disp($reg);  // Stack Offset
 4338   %}
 4339 %}
 4340 
 4341 operand stackSlotD(sRegD reg) %{
 4342   constraint(ALLOC_IN_RC(stack_slots));
 4343   // No match rule because this operand is only generated in matching
 4344   format %{ "[$reg]" %}
 4345   interface(MEMORY_INTER) %{
 4346     base(0x4);   // ESP
 4347     index(0x4);  // No Index
 4348     scale(0x0);  // No Scale
 4349     disp($reg);  // Stack Offset
 4350   %}
 4351 %}
 4352 
 4353 operand stackSlotL(sRegL reg) %{
 4354   constraint(ALLOC_IN_RC(stack_slots));
 4355   // No match rule because this operand is only generated in matching
 4356   format %{ "[$reg]" %}
 4357   interface(MEMORY_INTER) %{
 4358     base(0x4);   // ESP
 4359     index(0x4);  // No Index
 4360     scale(0x0);  // No Scale
 4361     disp($reg);  // Stack Offset
 4362   %}
 4363 %}
 4364 
 4365 //----------Conditional Branch Operands----------------------------------------
 4366 // Comparison Op  - This is the operation of the comparison, and is limited to
 4367 //                  the following set of codes:
 4368 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4369 //
 4370 // Other attributes of the comparison, such as unsignedness, are specified
 4371 // by the comparison instruction that sets a condition code flags register.
 4372 // That result is represented by a flags operand whose subtype is appropriate
 4373 // to the unsignedness (etc.) of the comparison.
 4374 //
 4375 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4376 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4377 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4378 
 4379 // Comparision Code
 4380 operand cmpOp() %{
 4381   match(Bool);
 4382 
 4383   format %{ "" %}
 4384   interface(COND_INTER) %{
 4385     equal(0x4, "e");
 4386     not_equal(0x5, "ne");
 4387     less(0xC, "l");
 4388     greater_equal(0xD, "ge");
 4389     less_equal(0xE, "le");
 4390     greater(0xF, "g");
 4391     overflow(0x0, "o");
 4392     no_overflow(0x1, "no");
 4393   %}
 4394 %}
 4395 
 4396 // Comparison Code, unsigned compare.  Used by FP also, with
 4397 // C2 (unordered) turned into GT or LT already.  The other bits
 4398 // C0 and C3 are turned into Carry & Zero flags.
 4399 operand cmpOpU() %{
 4400   match(Bool);
 4401 
 4402   format %{ "" %}
 4403   interface(COND_INTER) %{
 4404     equal(0x4, "e");
 4405     not_equal(0x5, "ne");
 4406     less(0x2, "b");
 4407     greater_equal(0x3, "nb");
 4408     less_equal(0x6, "be");
 4409     greater(0x7, "nbe");
 4410     overflow(0x0, "o");
 4411     no_overflow(0x1, "no");
 4412   %}
 4413 %}
 4414 
 4415 // Floating comparisons that don't require any fixup for the unordered case
 4416 operand cmpOpUCF() %{
 4417   match(Bool);
 4418   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4419             n->as_Bool()->_test._test == BoolTest::ge ||
 4420             n->as_Bool()->_test._test == BoolTest::le ||
 4421             n->as_Bool()->_test._test == BoolTest::gt);
 4422   format %{ "" %}
 4423   interface(COND_INTER) %{
 4424     equal(0x4, "e");
 4425     not_equal(0x5, "ne");
 4426     less(0x2, "b");
 4427     greater_equal(0x3, "nb");
 4428     less_equal(0x6, "be");
 4429     greater(0x7, "nbe");
 4430     overflow(0x0, "o");
 4431     no_overflow(0x1, "no");
 4432   %}
 4433 %}
 4434 
 4435 
 4436 // Floating comparisons that can be fixed up with extra conditional jumps
 4437 operand cmpOpUCF2() %{
 4438   match(Bool);
 4439   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4440             n->as_Bool()->_test._test == BoolTest::eq);
 4441   format %{ "" %}
 4442   interface(COND_INTER) %{
 4443     equal(0x4, "e");
 4444     not_equal(0x5, "ne");
 4445     less(0x2, "b");
 4446     greater_equal(0x3, "nb");
 4447     less_equal(0x6, "be");
 4448     greater(0x7, "nbe");
 4449     overflow(0x0, "o");
 4450     no_overflow(0x1, "no");
 4451   %}
 4452 %}
 4453 
 4454 // Comparison Code for FP conditional move
 4455 operand cmpOp_fcmov() %{
 4456   match(Bool);
 4457 
 4458   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4459             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4460   format %{ "" %}
 4461   interface(COND_INTER) %{
 4462     equal        (0x0C8);
 4463     not_equal    (0x1C8);
 4464     less         (0x0C0);
 4465     greater_equal(0x1C0);
 4466     less_equal   (0x0D0);
 4467     greater      (0x1D0);
 4468     overflow(0x0, "o"); // not really supported by the instruction
 4469     no_overflow(0x1, "no"); // not really supported by the instruction
 4470   %}
 4471 %}
 4472 
 4473 // Comparison Code used in long compares
 4474 operand cmpOp_commute() %{
 4475   match(Bool);
 4476 
 4477   format %{ "" %}
 4478   interface(COND_INTER) %{
 4479     equal(0x4, "e");
 4480     not_equal(0x5, "ne");
 4481     less(0xF, "g");
 4482     greater_equal(0xE, "le");
 4483     less_equal(0xD, "ge");
 4484     greater(0xC, "l");
 4485     overflow(0x0, "o");
 4486     no_overflow(0x1, "no");
 4487   %}
 4488 %}
 4489 
 4490 // Comparison Code used in unsigned long compares
 4491 operand cmpOpU_commute() %{
 4492   match(Bool);
 4493 
 4494   format %{ "" %}
 4495   interface(COND_INTER) %{
 4496     equal(0x4, "e");
 4497     not_equal(0x5, "ne");
 4498     less(0x7, "nbe");
 4499     greater_equal(0x6, "be");
 4500     less_equal(0x3, "nb");
 4501     greater(0x2, "b");
 4502     overflow(0x0, "o");
 4503     no_overflow(0x1, "no");
 4504   %}
 4505 %}
 4506 
 4507 //----------OPERAND CLASSES----------------------------------------------------
 4508 // Operand Classes are groups of operands that are used as to simplify
 4509 // instruction definitions by not requiring the AD writer to specify separate
 4510 // instructions for every form of operand when the instruction accepts
 4511 // multiple operand types with the same basic encoding and format.  The classic
 4512 // case of this is memory operands.
 4513 
 4514 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4515                indIndex, indIndexScale, indIndexScaleOffset);
 4516 
 4517 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4518 // This means some kind of offset is always required and you cannot use
 4519 // an oop as the offset (done when working on static globals).
 4520 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4521                     indIndex, indIndexScale, indIndexScaleOffset);
 4522 
 4523 
 4524 //----------PIPELINE-----------------------------------------------------------
 4525 // Rules which define the behavior of the target architectures pipeline.
 4526 pipeline %{
 4527 
 4528 //----------ATTRIBUTES---------------------------------------------------------
 4529 attributes %{
 4530   variable_size_instructions;        // Fixed size instructions
 4531   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4532   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4533   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4534   instruction_fetch_units = 1;       // of 16 bytes
 4535 
 4536   // List of nop instructions
 4537   nops( MachNop );
 4538 %}
 4539 
 4540 //----------RESOURCES----------------------------------------------------------
 4541 // Resources are the functional units available to the machine
 4542 
 4543 // Generic P2/P3 pipeline
 4544 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4545 // 3 instructions decoded per cycle.
 4546 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4547 // 2 ALU op, only ALU0 handles mul/div instructions.
 4548 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4549            MS0, MS1, MEM = MS0 | MS1,
 4550            BR, FPU,
 4551            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4552 
 4553 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4554 // Pipeline Description specifies the stages in the machine's pipeline
 4555 
 4556 // Generic P2/P3 pipeline
 4557 pipe_desc(S0, S1, S2, S3, S4, S5);
 4558 
 4559 //----------PIPELINE CLASSES---------------------------------------------------
 4560 // Pipeline Classes describe the stages in which input and output are
 4561 // referenced by the hardware pipeline.
 4562 
 4563 // Naming convention: ialu or fpu
 4564 // Then: _reg
 4565 // Then: _reg if there is a 2nd register
 4566 // Then: _long if it's a pair of instructions implementing a long
 4567 // Then: _fat if it requires the big decoder
 4568 //   Or: _mem if it requires the big decoder and a memory unit.
 4569 
 4570 // Integer ALU reg operation
 4571 pipe_class ialu_reg(rRegI dst) %{
 4572     single_instruction;
 4573     dst    : S4(write);
 4574     dst    : S3(read);
 4575     DECODE : S0;        // any decoder
 4576     ALU    : S3;        // any alu
 4577 %}
 4578 
 4579 // Long ALU reg operation
 4580 pipe_class ialu_reg_long(eRegL dst) %{
 4581     instruction_count(2);
 4582     dst    : S4(write);
 4583     dst    : S3(read);
 4584     DECODE : S0(2);     // any 2 decoders
 4585     ALU    : S3(2);     // both alus
 4586 %}
 4587 
 4588 // Integer ALU reg operation using big decoder
 4589 pipe_class ialu_reg_fat(rRegI dst) %{
 4590     single_instruction;
 4591     dst    : S4(write);
 4592     dst    : S3(read);
 4593     D0     : S0;        // big decoder only
 4594     ALU    : S3;        // any alu
 4595 %}
 4596 
 4597 // Long ALU reg operation using big decoder
 4598 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4599     instruction_count(2);
 4600     dst    : S4(write);
 4601     dst    : S3(read);
 4602     D0     : S0(2);     // big decoder only; twice
 4603     ALU    : S3(2);     // any 2 alus
 4604 %}
 4605 
 4606 // Integer ALU reg-reg operation
 4607 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4608     single_instruction;
 4609     dst    : S4(write);
 4610     src    : S3(read);
 4611     DECODE : S0;        // any decoder
 4612     ALU    : S3;        // any alu
 4613 %}
 4614 
 4615 // Long ALU reg-reg operation
 4616 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4617     instruction_count(2);
 4618     dst    : S4(write);
 4619     src    : S3(read);
 4620     DECODE : S0(2);     // any 2 decoders
 4621     ALU    : S3(2);     // both alus
 4622 %}
 4623 
 4624 // Integer ALU reg-reg operation
 4625 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4626     single_instruction;
 4627     dst    : S4(write);
 4628     src    : S3(read);
 4629     D0     : S0;        // big decoder only
 4630     ALU    : S3;        // any alu
 4631 %}
 4632 
 4633 // Long ALU reg-reg operation
 4634 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4635     instruction_count(2);
 4636     dst    : S4(write);
 4637     src    : S3(read);
 4638     D0     : S0(2);     // big decoder only; twice
 4639     ALU    : S3(2);     // both alus
 4640 %}
 4641 
 4642 // Integer ALU reg-mem operation
 4643 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4644     single_instruction;
 4645     dst    : S5(write);
 4646     mem    : S3(read);
 4647     D0     : S0;        // big decoder only
 4648     ALU    : S4;        // any alu
 4649     MEM    : S3;        // any mem
 4650 %}
 4651 
 4652 // Long ALU reg-mem operation
 4653 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4654     instruction_count(2);
 4655     dst    : S5(write);
 4656     mem    : S3(read);
 4657     D0     : S0(2);     // big decoder only; twice
 4658     ALU    : S4(2);     // any 2 alus
 4659     MEM    : S3(2);     // both mems
 4660 %}
 4661 
 4662 // Integer mem operation (prefetch)
 4663 pipe_class ialu_mem(memory mem)
 4664 %{
 4665     single_instruction;
 4666     mem    : S3(read);
 4667     D0     : S0;        // big decoder only
 4668     MEM    : S3;        // any mem
 4669 %}
 4670 
 4671 // Integer Store to Memory
 4672 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4673     single_instruction;
 4674     mem    : S3(read);
 4675     src    : S5(read);
 4676     D0     : S0;        // big decoder only
 4677     ALU    : S4;        // any alu
 4678     MEM    : S3;
 4679 %}
 4680 
 4681 // Long Store to Memory
 4682 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4683     instruction_count(2);
 4684     mem    : S3(read);
 4685     src    : S5(read);
 4686     D0     : S0(2);     // big decoder only; twice
 4687     ALU    : S4(2);     // any 2 alus
 4688     MEM    : S3(2);     // Both mems
 4689 %}
 4690 
 4691 // Integer Store to Memory
 4692 pipe_class ialu_mem_imm(memory mem) %{
 4693     single_instruction;
 4694     mem    : S3(read);
 4695     D0     : S0;        // big decoder only
 4696     ALU    : S4;        // any alu
 4697     MEM    : S3;
 4698 %}
 4699 
 4700 // Integer ALU0 reg-reg operation
 4701 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4702     single_instruction;
 4703     dst    : S4(write);
 4704     src    : S3(read);
 4705     D0     : S0;        // Big decoder only
 4706     ALU0   : S3;        // only alu0
 4707 %}
 4708 
 4709 // Integer ALU0 reg-mem operation
 4710 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4711     single_instruction;
 4712     dst    : S5(write);
 4713     mem    : S3(read);
 4714     D0     : S0;        // big decoder only
 4715     ALU0   : S4;        // ALU0 only
 4716     MEM    : S3;        // any mem
 4717 %}
 4718 
 4719 // Integer ALU reg-reg operation
 4720 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4721     single_instruction;
 4722     cr     : S4(write);
 4723     src1   : S3(read);
 4724     src2   : S3(read);
 4725     DECODE : S0;        // any decoder
 4726     ALU    : S3;        // any alu
 4727 %}
 4728 
 4729 // Integer ALU reg-imm operation
 4730 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4731     single_instruction;
 4732     cr     : S4(write);
 4733     src1   : S3(read);
 4734     DECODE : S0;        // any decoder
 4735     ALU    : S3;        // any alu
 4736 %}
 4737 
 4738 // Integer ALU reg-mem operation
 4739 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4740     single_instruction;
 4741     cr     : S4(write);
 4742     src1   : S3(read);
 4743     src2   : S3(read);
 4744     D0     : S0;        // big decoder only
 4745     ALU    : S4;        // any alu
 4746     MEM    : S3;
 4747 %}
 4748 
 4749 // Conditional move reg-reg
 4750 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4751     instruction_count(4);
 4752     y      : S4(read);
 4753     q      : S3(read);
 4754     p      : S3(read);
 4755     DECODE : S0(4);     // any decoder
 4756 %}
 4757 
 4758 // Conditional move reg-reg
 4759 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4760     single_instruction;
 4761     dst    : S4(write);
 4762     src    : S3(read);
 4763     cr     : S3(read);
 4764     DECODE : S0;        // any decoder
 4765 %}
 4766 
 4767 // Conditional move reg-mem
 4768 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4769     single_instruction;
 4770     dst    : S4(write);
 4771     src    : S3(read);
 4772     cr     : S3(read);
 4773     DECODE : S0;        // any decoder
 4774     MEM    : S3;
 4775 %}
 4776 
 4777 // Conditional move reg-reg long
 4778 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4779     single_instruction;
 4780     dst    : S4(write);
 4781     src    : S3(read);
 4782     cr     : S3(read);
 4783     DECODE : S0(2);     // any 2 decoders
 4784 %}
 4785 
 4786 // Conditional move double reg-reg
 4787 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4788     single_instruction;
 4789     dst    : S4(write);
 4790     src    : S3(read);
 4791     cr     : S3(read);
 4792     DECODE : S0;        // any decoder
 4793 %}
 4794 
 4795 // Float reg-reg operation
 4796 pipe_class fpu_reg(regDPR dst) %{
 4797     instruction_count(2);
 4798     dst    : S3(read);
 4799     DECODE : S0(2);     // any 2 decoders
 4800     FPU    : S3;
 4801 %}
 4802 
 4803 // Float reg-reg operation
 4804 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4805     instruction_count(2);
 4806     dst    : S4(write);
 4807     src    : S3(read);
 4808     DECODE : S0(2);     // any 2 decoders
 4809     FPU    : S3;
 4810 %}
 4811 
 4812 // Float reg-reg operation
 4813 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4814     instruction_count(3);
 4815     dst    : S4(write);
 4816     src1   : S3(read);
 4817     src2   : S3(read);
 4818     DECODE : S0(3);     // any 3 decoders
 4819     FPU    : S3(2);
 4820 %}
 4821 
 4822 // Float reg-reg operation
 4823 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4824     instruction_count(4);
 4825     dst    : S4(write);
 4826     src1   : S3(read);
 4827     src2   : S3(read);
 4828     src3   : S3(read);
 4829     DECODE : S0(4);     // any 3 decoders
 4830     FPU    : S3(2);
 4831 %}
 4832 
 4833 // Float reg-reg operation
 4834 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4835     instruction_count(4);
 4836     dst    : S4(write);
 4837     src1   : S3(read);
 4838     src2   : S3(read);
 4839     src3   : S3(read);
 4840     DECODE : S1(3);     // any 3 decoders
 4841     D0     : S0;        // Big decoder only
 4842     FPU    : S3(2);
 4843     MEM    : S3;
 4844 %}
 4845 
 4846 // Float reg-mem operation
 4847 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4848     instruction_count(2);
 4849     dst    : S5(write);
 4850     mem    : S3(read);
 4851     D0     : S0;        // big decoder only
 4852     DECODE : S1;        // any decoder for FPU POP
 4853     FPU    : S4;
 4854     MEM    : S3;        // any mem
 4855 %}
 4856 
 4857 // Float reg-mem operation
 4858 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4859     instruction_count(3);
 4860     dst    : S5(write);
 4861     src1   : S3(read);
 4862     mem    : S3(read);
 4863     D0     : S0;        // big decoder only
 4864     DECODE : S1(2);     // any decoder for FPU POP
 4865     FPU    : S4;
 4866     MEM    : S3;        // any mem
 4867 %}
 4868 
 4869 // Float mem-reg operation
 4870 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4871     instruction_count(2);
 4872     src    : S5(read);
 4873     mem    : S3(read);
 4874     DECODE : S0;        // any decoder for FPU PUSH
 4875     D0     : S1;        // big decoder only
 4876     FPU    : S4;
 4877     MEM    : S3;        // any mem
 4878 %}
 4879 
 4880 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4881     instruction_count(3);
 4882     src1   : S3(read);
 4883     src2   : S3(read);
 4884     mem    : S3(read);
 4885     DECODE : S0(2);     // any decoder for FPU PUSH
 4886     D0     : S1;        // big decoder only
 4887     FPU    : S4;
 4888     MEM    : S3;        // any mem
 4889 %}
 4890 
 4891 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4892     instruction_count(3);
 4893     src1   : S3(read);
 4894     src2   : S3(read);
 4895     mem    : S4(read);
 4896     DECODE : S0;        // any decoder for FPU PUSH
 4897     D0     : S0(2);     // big decoder only
 4898     FPU    : S4;
 4899     MEM    : S3(2);     // any mem
 4900 %}
 4901 
 4902 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4903     instruction_count(2);
 4904     src1   : S3(read);
 4905     dst    : S4(read);
 4906     D0     : S0(2);     // big decoder only
 4907     MEM    : S3(2);     // any mem
 4908 %}
 4909 
 4910 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4911     instruction_count(3);
 4912     src1   : S3(read);
 4913     src2   : S3(read);
 4914     dst    : S4(read);
 4915     D0     : S0(3);     // big decoder only
 4916     FPU    : S4;
 4917     MEM    : S3(3);     // any mem
 4918 %}
 4919 
 4920 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4921     instruction_count(3);
 4922     src1   : S4(read);
 4923     mem    : S4(read);
 4924     DECODE : S0;        // any decoder for FPU PUSH
 4925     D0     : S0(2);     // big decoder only
 4926     FPU    : S4;
 4927     MEM    : S3(2);     // any mem
 4928 %}
 4929 
 4930 // Float load constant
 4931 pipe_class fpu_reg_con(regDPR dst) %{
 4932     instruction_count(2);
 4933     dst    : S5(write);
 4934     D0     : S0;        // big decoder only for the load
 4935     DECODE : S1;        // any decoder for FPU POP
 4936     FPU    : S4;
 4937     MEM    : S3;        // any mem
 4938 %}
 4939 
 4940 // Float load constant
 4941 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4942     instruction_count(3);
 4943     dst    : S5(write);
 4944     src    : S3(read);
 4945     D0     : S0;        // big decoder only for the load
 4946     DECODE : S1(2);     // any decoder for FPU POP
 4947     FPU    : S4;
 4948     MEM    : S3;        // any mem
 4949 %}
 4950 
 4951 // UnConditional branch
 4952 pipe_class pipe_jmp( label labl ) %{
 4953     single_instruction;
 4954     BR   : S3;
 4955 %}
 4956 
 4957 // Conditional branch
 4958 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4959     single_instruction;
 4960     cr    : S1(read);
 4961     BR    : S3;
 4962 %}
 4963 
 4964 // Allocation idiom
 4965 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4966     instruction_count(1); force_serialization;
 4967     fixed_latency(6);
 4968     heap_ptr : S3(read);
 4969     DECODE   : S0(3);
 4970     D0       : S2;
 4971     MEM      : S3;
 4972     ALU      : S3(2);
 4973     dst      : S5(write);
 4974     BR       : S5;
 4975 %}
 4976 
 4977 // Generic big/slow expanded idiom
 4978 pipe_class pipe_slow(  ) %{
 4979     instruction_count(10); multiple_bundles; force_serialization;
 4980     fixed_latency(100);
 4981     D0  : S0(2);
 4982     MEM : S3(2);
 4983 %}
 4984 
 4985 // The real do-nothing guy
 4986 pipe_class empty( ) %{
 4987     instruction_count(0);
 4988 %}
 4989 
 4990 // Define the class for the Nop node
 4991 define %{
 4992    MachNop = empty;
 4993 %}
 4994 
 4995 %}
 4996 
 4997 //----------INSTRUCTIONS-------------------------------------------------------
 4998 //
 4999 // match      -- States which machine-independent subtree may be replaced
 5000 //               by this instruction.
 5001 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5002 //               selection to identify a minimum cost tree of machine
 5003 //               instructions that matches a tree of machine-independent
 5004 //               instructions.
 5005 // format     -- A string providing the disassembly for this instruction.
 5006 //               The value of an instruction's operand may be inserted
 5007 //               by referring to it with a '$' prefix.
 5008 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5009 //               to within an encode class as $primary, $secondary, and $tertiary
 5010 //               respectively.  The primary opcode is commonly used to
 5011 //               indicate the type of machine instruction, while secondary
 5012 //               and tertiary are often used for prefix options or addressing
 5013 //               modes.
 5014 // ins_encode -- A list of encode classes with parameters. The encode class
 5015 //               name must have been defined in an 'enc_class' specification
 5016 //               in the encode section of the architecture description.
 5017 
 5018 //----------BSWAP-Instruction--------------------------------------------------
 5019 instruct bytes_reverse_int(rRegI dst) %{
 5020   match(Set dst (ReverseBytesI dst));
 5021 
 5022   format %{ "BSWAP  $dst" %}
 5023   opcode(0x0F, 0xC8);
 5024   ins_encode( OpcP, OpcSReg(dst) );
 5025   ins_pipe( ialu_reg );
 5026 %}
 5027 
 5028 instruct bytes_reverse_long(eRegL dst) %{
 5029   match(Set dst (ReverseBytesL dst));
 5030 
 5031   format %{ "BSWAP  $dst.lo\n\t"
 5032             "BSWAP  $dst.hi\n\t"
 5033             "XCHG   $dst.lo $dst.hi" %}
 5034 
 5035   ins_cost(125);
 5036   ins_encode( bswap_long_bytes(dst) );
 5037   ins_pipe( ialu_reg_reg);
 5038 %}
 5039 
 5040 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5041   match(Set dst (ReverseBytesUS dst));
 5042   effect(KILL cr);
 5043 
 5044   format %{ "BSWAP  $dst\n\t"
 5045             "SHR    $dst,16\n\t" %}
 5046   ins_encode %{
 5047     __ bswapl($dst$$Register);
 5048     __ shrl($dst$$Register, 16);
 5049   %}
 5050   ins_pipe( ialu_reg );
 5051 %}
 5052 
 5053 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5054   match(Set dst (ReverseBytesS dst));
 5055   effect(KILL cr);
 5056 
 5057   format %{ "BSWAP  $dst\n\t"
 5058             "SAR    $dst,16\n\t" %}
 5059   ins_encode %{
 5060     __ bswapl($dst$$Register);
 5061     __ sarl($dst$$Register, 16);
 5062   %}
 5063   ins_pipe( ialu_reg );
 5064 %}
 5065 
 5066 
 5067 //---------- Zeros Count Instructions ------------------------------------------
 5068 
 5069 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5070   predicate(UseCountLeadingZerosInstruction);
 5071   match(Set dst (CountLeadingZerosI src));
 5072   effect(KILL cr);
 5073 
 5074   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5075   ins_encode %{
 5076     __ lzcntl($dst$$Register, $src$$Register);
 5077   %}
 5078   ins_pipe(ialu_reg);
 5079 %}
 5080 
 5081 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5082   predicate(!UseCountLeadingZerosInstruction);
 5083   match(Set dst (CountLeadingZerosI src));
 5084   effect(KILL cr);
 5085 
 5086   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5087             "JNZ    skip\n\t"
 5088             "MOV    $dst, -1\n"
 5089       "skip:\n\t"
 5090             "NEG    $dst\n\t"
 5091             "ADD    $dst, 31" %}
 5092   ins_encode %{
 5093     Register Rdst = $dst$$Register;
 5094     Register Rsrc = $src$$Register;
 5095     Label skip;
 5096     __ bsrl(Rdst, Rsrc);
 5097     __ jccb(Assembler::notZero, skip);
 5098     __ movl(Rdst, -1);
 5099     __ bind(skip);
 5100     __ negl(Rdst);
 5101     __ addl(Rdst, BitsPerInt - 1);
 5102   %}
 5103   ins_pipe(ialu_reg);
 5104 %}
 5105 
 5106 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5107   predicate(UseCountLeadingZerosInstruction);
 5108   match(Set dst (CountLeadingZerosL src));
 5109   effect(TEMP dst, KILL cr);
 5110 
 5111   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5112             "JNC    done\n\t"
 5113             "LZCNT  $dst, $src.lo\n\t"
 5114             "ADD    $dst, 32\n"
 5115       "done:" %}
 5116   ins_encode %{
 5117     Register Rdst = $dst$$Register;
 5118     Register Rsrc = $src$$Register;
 5119     Label done;
 5120     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5121     __ jccb(Assembler::carryClear, done);
 5122     __ lzcntl(Rdst, Rsrc);
 5123     __ addl(Rdst, BitsPerInt);
 5124     __ bind(done);
 5125   %}
 5126   ins_pipe(ialu_reg);
 5127 %}
 5128 
 5129 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5130   predicate(!UseCountLeadingZerosInstruction);
 5131   match(Set dst (CountLeadingZerosL src));
 5132   effect(TEMP dst, KILL cr);
 5133 
 5134   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5135             "JZ     msw_is_zero\n\t"
 5136             "ADD    $dst, 32\n\t"
 5137             "JMP    not_zero\n"
 5138       "msw_is_zero:\n\t"
 5139             "BSR    $dst, $src.lo\n\t"
 5140             "JNZ    not_zero\n\t"
 5141             "MOV    $dst, -1\n"
 5142       "not_zero:\n\t"
 5143             "NEG    $dst\n\t"
 5144             "ADD    $dst, 63\n" %}
 5145  ins_encode %{
 5146     Register Rdst = $dst$$Register;
 5147     Register Rsrc = $src$$Register;
 5148     Label msw_is_zero;
 5149     Label not_zero;
 5150     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5151     __ jccb(Assembler::zero, msw_is_zero);
 5152     __ addl(Rdst, BitsPerInt);
 5153     __ jmpb(not_zero);
 5154     __ bind(msw_is_zero);
 5155     __ bsrl(Rdst, Rsrc);
 5156     __ jccb(Assembler::notZero, not_zero);
 5157     __ movl(Rdst, -1);
 5158     __ bind(not_zero);
 5159     __ negl(Rdst);
 5160     __ addl(Rdst, BitsPerLong - 1);
 5161   %}
 5162   ins_pipe(ialu_reg);
 5163 %}
 5164 
 5165 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5166   predicate(UseCountTrailingZerosInstruction);
 5167   match(Set dst (CountTrailingZerosI src));
 5168   effect(KILL cr);
 5169 
 5170   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5171   ins_encode %{
 5172     __ tzcntl($dst$$Register, $src$$Register);
 5173   %}
 5174   ins_pipe(ialu_reg);
 5175 %}
 5176 
 5177 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5178   predicate(!UseCountTrailingZerosInstruction);
 5179   match(Set dst (CountTrailingZerosI src));
 5180   effect(KILL cr);
 5181 
 5182   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5183             "JNZ    done\n\t"
 5184             "MOV    $dst, 32\n"
 5185       "done:" %}
 5186   ins_encode %{
 5187     Register Rdst = $dst$$Register;
 5188     Label done;
 5189     __ bsfl(Rdst, $src$$Register);
 5190     __ jccb(Assembler::notZero, done);
 5191     __ movl(Rdst, BitsPerInt);
 5192     __ bind(done);
 5193   %}
 5194   ins_pipe(ialu_reg);
 5195 %}
 5196 
 5197 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5198   predicate(UseCountTrailingZerosInstruction);
 5199   match(Set dst (CountTrailingZerosL src));
 5200   effect(TEMP dst, KILL cr);
 5201 
 5202   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5203             "JNC    done\n\t"
 5204             "TZCNT  $dst, $src.hi\n\t"
 5205             "ADD    $dst, 32\n"
 5206             "done:" %}
 5207   ins_encode %{
 5208     Register Rdst = $dst$$Register;
 5209     Register Rsrc = $src$$Register;
 5210     Label done;
 5211     __ tzcntl(Rdst, Rsrc);
 5212     __ jccb(Assembler::carryClear, done);
 5213     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5214     __ addl(Rdst, BitsPerInt);
 5215     __ bind(done);
 5216   %}
 5217   ins_pipe(ialu_reg);
 5218 %}
 5219 
 5220 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5221   predicate(!UseCountTrailingZerosInstruction);
 5222   match(Set dst (CountTrailingZerosL src));
 5223   effect(TEMP dst, KILL cr);
 5224 
 5225   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5226             "JNZ    done\n\t"
 5227             "BSF    $dst, $src.hi\n\t"
 5228             "JNZ    msw_not_zero\n\t"
 5229             "MOV    $dst, 32\n"
 5230       "msw_not_zero:\n\t"
 5231             "ADD    $dst, 32\n"
 5232       "done:" %}
 5233   ins_encode %{
 5234     Register Rdst = $dst$$Register;
 5235     Register Rsrc = $src$$Register;
 5236     Label msw_not_zero;
 5237     Label done;
 5238     __ bsfl(Rdst, Rsrc);
 5239     __ jccb(Assembler::notZero, done);
 5240     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5241     __ jccb(Assembler::notZero, msw_not_zero);
 5242     __ movl(Rdst, BitsPerInt);
 5243     __ bind(msw_not_zero);
 5244     __ addl(Rdst, BitsPerInt);
 5245     __ bind(done);
 5246   %}
 5247   ins_pipe(ialu_reg);
 5248 %}
 5249 
 5250 
 5251 //---------- Population Count Instructions -------------------------------------
 5252 
 5253 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5254   predicate(UsePopCountInstruction);
 5255   match(Set dst (PopCountI src));
 5256   effect(KILL cr);
 5257 
 5258   format %{ "POPCNT $dst, $src" %}
 5259   ins_encode %{
 5260     __ popcntl($dst$$Register, $src$$Register);
 5261   %}
 5262   ins_pipe(ialu_reg);
 5263 %}
 5264 
 5265 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5266   predicate(UsePopCountInstruction);
 5267   match(Set dst (PopCountI (LoadI mem)));
 5268   effect(KILL cr);
 5269 
 5270   format %{ "POPCNT $dst, $mem" %}
 5271   ins_encode %{
 5272     __ popcntl($dst$$Register, $mem$$Address);
 5273   %}
 5274   ins_pipe(ialu_reg);
 5275 %}
 5276 
 5277 // Note: Long.bitCount(long) returns an int.
 5278 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5279   predicate(UsePopCountInstruction);
 5280   match(Set dst (PopCountL src));
 5281   effect(KILL cr, TEMP tmp, TEMP dst);
 5282 
 5283   format %{ "POPCNT $dst, $src.lo\n\t"
 5284             "POPCNT $tmp, $src.hi\n\t"
 5285             "ADD    $dst, $tmp" %}
 5286   ins_encode %{
 5287     __ popcntl($dst$$Register, $src$$Register);
 5288     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5289     __ addl($dst$$Register, $tmp$$Register);
 5290   %}
 5291   ins_pipe(ialu_reg);
 5292 %}
 5293 
 5294 // Note: Long.bitCount(long) returns an int.
 5295 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5296   predicate(UsePopCountInstruction);
 5297   match(Set dst (PopCountL (LoadL mem)));
 5298   effect(KILL cr, TEMP tmp, TEMP dst);
 5299 
 5300   format %{ "POPCNT $dst, $mem\n\t"
 5301             "POPCNT $tmp, $mem+4\n\t"
 5302             "ADD    $dst, $tmp" %}
 5303   ins_encode %{
 5304     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5305     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5306     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5307     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5308     __ addl($dst$$Register, $tmp$$Register);
 5309   %}
 5310   ins_pipe(ialu_reg);
 5311 %}
 5312 
 5313 
 5314 //----------Load/Store/Move Instructions---------------------------------------
 5315 //----------Load Instructions--------------------------------------------------
 5316 // Load Byte (8bit signed)
 5317 instruct loadB(xRegI dst, memory mem) %{
 5318   match(Set dst (LoadB mem));
 5319 
 5320   ins_cost(125);
 5321   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5322 
 5323   ins_encode %{
 5324     __ movsbl($dst$$Register, $mem$$Address);
 5325   %}
 5326 
 5327   ins_pipe(ialu_reg_mem);
 5328 %}
 5329 
 5330 // Load Byte (8bit signed) into Long Register
 5331 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5332   match(Set dst (ConvI2L (LoadB mem)));
 5333   effect(KILL cr);
 5334 
 5335   ins_cost(375);
 5336   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5337             "MOV    $dst.hi,$dst.lo\n\t"
 5338             "SAR    $dst.hi,7" %}
 5339 
 5340   ins_encode %{
 5341     __ movsbl($dst$$Register, $mem$$Address);
 5342     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5343     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5344   %}
 5345 
 5346   ins_pipe(ialu_reg_mem);
 5347 %}
 5348 
 5349 // Load Unsigned Byte (8bit UNsigned)
 5350 instruct loadUB(xRegI dst, memory mem) %{
 5351   match(Set dst (LoadUB mem));
 5352 
 5353   ins_cost(125);
 5354   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5355 
 5356   ins_encode %{
 5357     __ movzbl($dst$$Register, $mem$$Address);
 5358   %}
 5359 
 5360   ins_pipe(ialu_reg_mem);
 5361 %}
 5362 
 5363 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5364 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5365   match(Set dst (ConvI2L (LoadUB mem)));
 5366   effect(KILL cr);
 5367 
 5368   ins_cost(250);
 5369   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5370             "XOR    $dst.hi,$dst.hi" %}
 5371 
 5372   ins_encode %{
 5373     Register Rdst = $dst$$Register;
 5374     __ movzbl(Rdst, $mem$$Address);
 5375     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5376   %}
 5377 
 5378   ins_pipe(ialu_reg_mem);
 5379 %}
 5380 
 5381 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5382 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5383   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5384   effect(KILL cr);
 5385 
 5386   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5387             "XOR    $dst.hi,$dst.hi\n\t"
 5388             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5389   ins_encode %{
 5390     Register Rdst = $dst$$Register;
 5391     __ movzbl(Rdst, $mem$$Address);
 5392     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5393     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5394   %}
 5395   ins_pipe(ialu_reg_mem);
 5396 %}
 5397 
 5398 // Load Short (16bit signed)
 5399 instruct loadS(rRegI dst, memory mem) %{
 5400   match(Set dst (LoadS mem));
 5401 
 5402   ins_cost(125);
 5403   format %{ "MOVSX  $dst,$mem\t# short" %}
 5404 
 5405   ins_encode %{
 5406     __ movswl($dst$$Register, $mem$$Address);
 5407   %}
 5408 
 5409   ins_pipe(ialu_reg_mem);
 5410 %}
 5411 
 5412 // Load Short (16 bit signed) to Byte (8 bit signed)
 5413 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5414   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5415 
 5416   ins_cost(125);
 5417   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5418   ins_encode %{
 5419     __ movsbl($dst$$Register, $mem$$Address);
 5420   %}
 5421   ins_pipe(ialu_reg_mem);
 5422 %}
 5423 
 5424 // Load Short (16bit signed) into Long Register
 5425 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5426   match(Set dst (ConvI2L (LoadS mem)));
 5427   effect(KILL cr);
 5428 
 5429   ins_cost(375);
 5430   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5431             "MOV    $dst.hi,$dst.lo\n\t"
 5432             "SAR    $dst.hi,15" %}
 5433 
 5434   ins_encode %{
 5435     __ movswl($dst$$Register, $mem$$Address);
 5436     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5437     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5438   %}
 5439 
 5440   ins_pipe(ialu_reg_mem);
 5441 %}
 5442 
 5443 // Load Unsigned Short/Char (16bit unsigned)
 5444 instruct loadUS(rRegI dst, memory mem) %{
 5445   match(Set dst (LoadUS mem));
 5446 
 5447   ins_cost(125);
 5448   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5449 
 5450   ins_encode %{
 5451     __ movzwl($dst$$Register, $mem$$Address);
 5452   %}
 5453 
 5454   ins_pipe(ialu_reg_mem);
 5455 %}
 5456 
 5457 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5458 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5459   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5460 
 5461   ins_cost(125);
 5462   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5463   ins_encode %{
 5464     __ movsbl($dst$$Register, $mem$$Address);
 5465   %}
 5466   ins_pipe(ialu_reg_mem);
 5467 %}
 5468 
 5469 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5470 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5471   match(Set dst (ConvI2L (LoadUS mem)));
 5472   effect(KILL cr);
 5473 
 5474   ins_cost(250);
 5475   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5476             "XOR    $dst.hi,$dst.hi" %}
 5477 
 5478   ins_encode %{
 5479     __ movzwl($dst$$Register, $mem$$Address);
 5480     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5481   %}
 5482 
 5483   ins_pipe(ialu_reg_mem);
 5484 %}
 5485 
 5486 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5487 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5488   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5489   effect(KILL cr);
 5490 
 5491   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5492             "XOR    $dst.hi,$dst.hi" %}
 5493   ins_encode %{
 5494     Register Rdst = $dst$$Register;
 5495     __ movzbl(Rdst, $mem$$Address);
 5496     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5497   %}
 5498   ins_pipe(ialu_reg_mem);
 5499 %}
 5500 
 5501 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5502 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5503   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5504   effect(KILL cr);
 5505 
 5506   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5507             "XOR    $dst.hi,$dst.hi\n\t"
 5508             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5509   ins_encode %{
 5510     Register Rdst = $dst$$Register;
 5511     __ movzwl(Rdst, $mem$$Address);
 5512     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5513     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5514   %}
 5515   ins_pipe(ialu_reg_mem);
 5516 %}
 5517 
 5518 // Load Integer
 5519 instruct loadI(rRegI dst, memory mem) %{
 5520   match(Set dst (LoadI mem));
 5521 
 5522   ins_cost(125);
 5523   format %{ "MOV    $dst,$mem\t# int" %}
 5524 
 5525   ins_encode %{
 5526     __ movl($dst$$Register, $mem$$Address);
 5527   %}
 5528 
 5529   ins_pipe(ialu_reg_mem);
 5530 %}
 5531 
 5532 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5533 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5534   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5535 
 5536   ins_cost(125);
 5537   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5538   ins_encode %{
 5539     __ movsbl($dst$$Register, $mem$$Address);
 5540   %}
 5541   ins_pipe(ialu_reg_mem);
 5542 %}
 5543 
 5544 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5545 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5546   match(Set dst (AndI (LoadI mem) mask));
 5547 
 5548   ins_cost(125);
 5549   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5550   ins_encode %{
 5551     __ movzbl($dst$$Register, $mem$$Address);
 5552   %}
 5553   ins_pipe(ialu_reg_mem);
 5554 %}
 5555 
 5556 // Load Integer (32 bit signed) to Short (16 bit signed)
 5557 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5558   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5559 
 5560   ins_cost(125);
 5561   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5562   ins_encode %{
 5563     __ movswl($dst$$Register, $mem$$Address);
 5564   %}
 5565   ins_pipe(ialu_reg_mem);
 5566 %}
 5567 
 5568 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5569 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5570   match(Set dst (AndI (LoadI mem) mask));
 5571 
 5572   ins_cost(125);
 5573   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5574   ins_encode %{
 5575     __ movzwl($dst$$Register, $mem$$Address);
 5576   %}
 5577   ins_pipe(ialu_reg_mem);
 5578 %}
 5579 
 5580 // Load Integer into Long Register
 5581 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5582   match(Set dst (ConvI2L (LoadI mem)));
 5583   effect(KILL cr);
 5584 
 5585   ins_cost(375);
 5586   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5587             "MOV    $dst.hi,$dst.lo\n\t"
 5588             "SAR    $dst.hi,31" %}
 5589 
 5590   ins_encode %{
 5591     __ movl($dst$$Register, $mem$$Address);
 5592     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5593     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5594   %}
 5595 
 5596   ins_pipe(ialu_reg_mem);
 5597 %}
 5598 
 5599 // Load Integer with mask 0xFF into Long Register
 5600 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5601   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5602   effect(KILL cr);
 5603 
 5604   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5605             "XOR    $dst.hi,$dst.hi" %}
 5606   ins_encode %{
 5607     Register Rdst = $dst$$Register;
 5608     __ movzbl(Rdst, $mem$$Address);
 5609     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5610   %}
 5611   ins_pipe(ialu_reg_mem);
 5612 %}
 5613 
 5614 // Load Integer with mask 0xFFFF into Long Register
 5615 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5616   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5617   effect(KILL cr);
 5618 
 5619   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5620             "XOR    $dst.hi,$dst.hi" %}
 5621   ins_encode %{
 5622     Register Rdst = $dst$$Register;
 5623     __ movzwl(Rdst, $mem$$Address);
 5624     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5625   %}
 5626   ins_pipe(ialu_reg_mem);
 5627 %}
 5628 
 5629 // Load Integer with 31-bit mask into Long Register
 5630 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5631   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5632   effect(KILL cr);
 5633 
 5634   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5635             "XOR    $dst.hi,$dst.hi\n\t"
 5636             "AND    $dst.lo,$mask" %}
 5637   ins_encode %{
 5638     Register Rdst = $dst$$Register;
 5639     __ movl(Rdst, $mem$$Address);
 5640     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5641     __ andl(Rdst, $mask$$constant);
 5642   %}
 5643   ins_pipe(ialu_reg_mem);
 5644 %}
 5645 
 5646 // Load Unsigned Integer into Long Register
 5647 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5648   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5649   effect(KILL cr);
 5650 
 5651   ins_cost(250);
 5652   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5653             "XOR    $dst.hi,$dst.hi" %}
 5654 
 5655   ins_encode %{
 5656     __ movl($dst$$Register, $mem$$Address);
 5657     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5658   %}
 5659 
 5660   ins_pipe(ialu_reg_mem);
 5661 %}
 5662 
 5663 // Load Long.  Cannot clobber address while loading, so restrict address
 5664 // register to ESI
 5665 instruct loadL(eRegL dst, load_long_memory mem) %{
 5666   predicate(!((LoadLNode*)n)->require_atomic_access());
 5667   match(Set dst (LoadL mem));
 5668 
 5669   ins_cost(250);
 5670   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5671             "MOV    $dst.hi,$mem+4" %}
 5672 
 5673   ins_encode %{
 5674     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5675     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5676     __ movl($dst$$Register, Amemlo);
 5677     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5678   %}
 5679 
 5680   ins_pipe(ialu_reg_long_mem);
 5681 %}
 5682 
 5683 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5684 // then store it down to the stack and reload on the int
 5685 // side.
 5686 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5687   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5688   match(Set dst (LoadL mem));
 5689 
 5690   ins_cost(200);
 5691   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5692             "FISTp  $dst" %}
 5693   ins_encode(enc_loadL_volatile(mem,dst));
 5694   ins_pipe( fpu_reg_mem );
 5695 %}
 5696 
 5697 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5698   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5699   match(Set dst (LoadL mem));
 5700   effect(TEMP tmp);
 5701   ins_cost(180);
 5702   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5703             "MOVSD  $dst,$tmp" %}
 5704   ins_encode %{
 5705     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5706     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5707   %}
 5708   ins_pipe( pipe_slow );
 5709 %}
 5710 
 5711 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5712   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5713   match(Set dst (LoadL mem));
 5714   effect(TEMP tmp);
 5715   ins_cost(160);
 5716   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5717             "MOVD   $dst.lo,$tmp\n\t"
 5718             "PSRLQ  $tmp,32\n\t"
 5719             "MOVD   $dst.hi,$tmp" %}
 5720   ins_encode %{
 5721     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5722     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5723     __ psrlq($tmp$$XMMRegister, 32);
 5724     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5725   %}
 5726   ins_pipe( pipe_slow );
 5727 %}
 5728 
 5729 // Load Range
 5730 instruct loadRange(rRegI dst, memory mem) %{
 5731   match(Set dst (LoadRange mem));
 5732 
 5733   ins_cost(125);
 5734   format %{ "MOV    $dst,$mem" %}
 5735   opcode(0x8B);
 5736   ins_encode( OpcP, RegMem(dst,mem));
 5737   ins_pipe( ialu_reg_mem );
 5738 %}
 5739 
 5740 
 5741 // Load Pointer
 5742 instruct loadP(eRegP dst, memory mem) %{
 5743   match(Set dst (LoadP mem));
 5744 
 5745   ins_cost(125);
 5746   format %{ "MOV    $dst,$mem" %}
 5747   opcode(0x8B);
 5748   ins_encode( OpcP, RegMem(dst,mem));
 5749   ins_pipe( ialu_reg_mem );
 5750 %}
 5751 
 5752 // Load Klass Pointer
 5753 instruct loadKlass(eRegP dst, memory mem) %{
 5754   match(Set dst (LoadKlass mem));
 5755 
 5756   ins_cost(125);
 5757   format %{ "MOV    $dst,$mem" %}
 5758   opcode(0x8B);
 5759   ins_encode( OpcP, RegMem(dst,mem));
 5760   ins_pipe( ialu_reg_mem );
 5761 %}
 5762 
 5763 // Load Float
 5764 instruct MoveF2LEG(legRegF dst, regF src) %{
 5765   match(Set dst src);
 5766   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5767   ins_encode %{
 5768     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 5769   %}
 5770   ins_pipe( fpu_reg_reg );
 5771 %}
 5772 
 5773 // Load Float
 5774 instruct MoveLEG2F(regF dst, legRegF src) %{
 5775   match(Set dst src);
 5776   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5777   ins_encode %{
 5778     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 5779   %}
 5780   ins_pipe( fpu_reg_reg );
 5781 %}
 5782 
 5783 // Load Double
 5784 instruct MoveD2LEG(legRegD dst, regD src) %{
 5785   match(Set dst src);
 5786   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5787   ins_encode %{
 5788     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 5789   %}
 5790   ins_pipe( fpu_reg_reg );
 5791 %}
 5792 
 5793 // Load Double
 5794 instruct MoveLEG2D(regD dst, legRegD src) %{
 5795   match(Set dst src);
 5796   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5797   ins_encode %{
 5798     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 5799   %}
 5800   ins_pipe( fpu_reg_reg );
 5801 %}
 5802 
 5803 // Load Double
 5804 instruct loadDPR(regDPR dst, memory mem) %{
 5805   predicate(UseSSE<=1);
 5806   match(Set dst (LoadD mem));
 5807 
 5808   ins_cost(150);
 5809   format %{ "FLD_D  ST,$mem\n\t"
 5810             "FSTP   $dst" %}
 5811   opcode(0xDD);               /* DD /0 */
 5812   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5813               Pop_Reg_DPR(dst) );
 5814   ins_pipe( fpu_reg_mem );
 5815 %}
 5816 
 5817 // Load Double to XMM
 5818 instruct loadD(regD dst, memory mem) %{
 5819   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5820   match(Set dst (LoadD mem));
 5821   ins_cost(145);
 5822   format %{ "MOVSD  $dst,$mem" %}
 5823   ins_encode %{
 5824     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5825   %}
 5826   ins_pipe( pipe_slow );
 5827 %}
 5828 
 5829 instruct loadD_partial(regD dst, memory mem) %{
 5830   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5831   match(Set dst (LoadD mem));
 5832   ins_cost(145);
 5833   format %{ "MOVLPD $dst,$mem" %}
 5834   ins_encode %{
 5835     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5836   %}
 5837   ins_pipe( pipe_slow );
 5838 %}
 5839 
 5840 // Load to XMM register (single-precision floating point)
 5841 // MOVSS instruction
 5842 instruct loadF(regF dst, memory mem) %{
 5843   predicate(UseSSE>=1);
 5844   match(Set dst (LoadF mem));
 5845   ins_cost(145);
 5846   format %{ "MOVSS  $dst,$mem" %}
 5847   ins_encode %{
 5848     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5849   %}
 5850   ins_pipe( pipe_slow );
 5851 %}
 5852 
 5853 // Load Float
 5854 instruct loadFPR(regFPR dst, memory mem) %{
 5855   predicate(UseSSE==0);
 5856   match(Set dst (LoadF mem));
 5857 
 5858   ins_cost(150);
 5859   format %{ "FLD_S  ST,$mem\n\t"
 5860             "FSTP   $dst" %}
 5861   opcode(0xD9);               /* D9 /0 */
 5862   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5863               Pop_Reg_FPR(dst) );
 5864   ins_pipe( fpu_reg_mem );
 5865 %}
 5866 
 5867 // Load Effective Address
 5868 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5869   match(Set dst mem);
 5870 
 5871   ins_cost(110);
 5872   format %{ "LEA    $dst,$mem" %}
 5873   opcode(0x8D);
 5874   ins_encode( OpcP, RegMem(dst,mem));
 5875   ins_pipe( ialu_reg_reg_fat );
 5876 %}
 5877 
 5878 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5879   match(Set dst mem);
 5880 
 5881   ins_cost(110);
 5882   format %{ "LEA    $dst,$mem" %}
 5883   opcode(0x8D);
 5884   ins_encode( OpcP, RegMem(dst,mem));
 5885   ins_pipe( ialu_reg_reg_fat );
 5886 %}
 5887 
 5888 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5889   match(Set dst mem);
 5890 
 5891   ins_cost(110);
 5892   format %{ "LEA    $dst,$mem" %}
 5893   opcode(0x8D);
 5894   ins_encode( OpcP, RegMem(dst,mem));
 5895   ins_pipe( ialu_reg_reg_fat );
 5896 %}
 5897 
 5898 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5899   match(Set dst mem);
 5900 
 5901   ins_cost(110);
 5902   format %{ "LEA    $dst,$mem" %}
 5903   opcode(0x8D);
 5904   ins_encode( OpcP, RegMem(dst,mem));
 5905   ins_pipe( ialu_reg_reg_fat );
 5906 %}
 5907 
 5908 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5909   match(Set dst mem);
 5910 
 5911   ins_cost(110);
 5912   format %{ "LEA    $dst,$mem" %}
 5913   opcode(0x8D);
 5914   ins_encode( OpcP, RegMem(dst,mem));
 5915   ins_pipe( ialu_reg_reg_fat );
 5916 %}
 5917 
 5918 // Load Constant
 5919 instruct loadConI(rRegI dst, immI src) %{
 5920   match(Set dst src);
 5921 
 5922   format %{ "MOV    $dst,$src" %}
 5923   ins_encode( LdImmI(dst, src) );
 5924   ins_pipe( ialu_reg_fat );
 5925 %}
 5926 
 5927 // Load Constant zero
 5928 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5929   match(Set dst src);
 5930   effect(KILL cr);
 5931 
 5932   ins_cost(50);
 5933   format %{ "XOR    $dst,$dst" %}
 5934   opcode(0x33);  /* + rd */
 5935   ins_encode( OpcP, RegReg( dst, dst ) );
 5936   ins_pipe( ialu_reg );
 5937 %}
 5938 
 5939 instruct loadConP(eRegP dst, immP src) %{
 5940   match(Set dst src);
 5941 
 5942   format %{ "MOV    $dst,$src" %}
 5943   opcode(0xB8);  /* + rd */
 5944   ins_encode( LdImmP(dst, src) );
 5945   ins_pipe( ialu_reg_fat );
 5946 %}
 5947 
 5948 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 5949   match(Set dst src);
 5950   effect(KILL cr);
 5951   ins_cost(200);
 5952   format %{ "MOV    $dst.lo,$src.lo\n\t"
 5953             "MOV    $dst.hi,$src.hi" %}
 5954   opcode(0xB8);
 5955   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 5956   ins_pipe( ialu_reg_long_fat );
 5957 %}
 5958 
 5959 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 5960   match(Set dst src);
 5961   effect(KILL cr);
 5962   ins_cost(150);
 5963   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 5964             "XOR    $dst.hi,$dst.hi" %}
 5965   opcode(0x33,0x33);
 5966   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 5967   ins_pipe( ialu_reg_long );
 5968 %}
 5969 
 5970 // The instruction usage is guarded by predicate in operand immFPR().
 5971 instruct loadConFPR(regFPR dst, immFPR con) %{
 5972   match(Set dst con);
 5973   ins_cost(125);
 5974   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 5975             "FSTP   $dst" %}
 5976   ins_encode %{
 5977     __ fld_s($constantaddress($con));
 5978     __ fstp_d($dst$$reg);
 5979   %}
 5980   ins_pipe(fpu_reg_con);
 5981 %}
 5982 
 5983 // The instruction usage is guarded by predicate in operand immFPR0().
 5984 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 5985   match(Set dst con);
 5986   ins_cost(125);
 5987   format %{ "FLDZ   ST\n\t"
 5988             "FSTP   $dst" %}
 5989   ins_encode %{
 5990     __ fldz();
 5991     __ fstp_d($dst$$reg);
 5992   %}
 5993   ins_pipe(fpu_reg_con);
 5994 %}
 5995 
 5996 // The instruction usage is guarded by predicate in operand immFPR1().
 5997 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 5998   match(Set dst con);
 5999   ins_cost(125);
 6000   format %{ "FLD1   ST\n\t"
 6001             "FSTP   $dst" %}
 6002   ins_encode %{
 6003     __ fld1();
 6004     __ fstp_d($dst$$reg);
 6005   %}
 6006   ins_pipe(fpu_reg_con);
 6007 %}
 6008 
 6009 // The instruction usage is guarded by predicate in operand immF().
 6010 instruct loadConF(regF dst, immF con) %{
 6011   match(Set dst con);
 6012   ins_cost(125);
 6013   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6014   ins_encode %{
 6015     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6016   %}
 6017   ins_pipe(pipe_slow);
 6018 %}
 6019 
 6020 // The instruction usage is guarded by predicate in operand immF0().
 6021 instruct loadConF0(regF dst, immF0 src) %{
 6022   match(Set dst src);
 6023   ins_cost(100);
 6024   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6025   ins_encode %{
 6026     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6027   %}
 6028   ins_pipe(pipe_slow);
 6029 %}
 6030 
 6031 // The instruction usage is guarded by predicate in operand immDPR().
 6032 instruct loadConDPR(regDPR dst, immDPR con) %{
 6033   match(Set dst con);
 6034   ins_cost(125);
 6035 
 6036   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6037             "FSTP   $dst" %}
 6038   ins_encode %{
 6039     __ fld_d($constantaddress($con));
 6040     __ fstp_d($dst$$reg);
 6041   %}
 6042   ins_pipe(fpu_reg_con);
 6043 %}
 6044 
 6045 // The instruction usage is guarded by predicate in operand immDPR0().
 6046 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6047   match(Set dst con);
 6048   ins_cost(125);
 6049 
 6050   format %{ "FLDZ   ST\n\t"
 6051             "FSTP   $dst" %}
 6052   ins_encode %{
 6053     __ fldz();
 6054     __ fstp_d($dst$$reg);
 6055   %}
 6056   ins_pipe(fpu_reg_con);
 6057 %}
 6058 
 6059 // The instruction usage is guarded by predicate in operand immDPR1().
 6060 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6061   match(Set dst con);
 6062   ins_cost(125);
 6063 
 6064   format %{ "FLD1   ST\n\t"
 6065             "FSTP   $dst" %}
 6066   ins_encode %{
 6067     __ fld1();
 6068     __ fstp_d($dst$$reg);
 6069   %}
 6070   ins_pipe(fpu_reg_con);
 6071 %}
 6072 
 6073 // The instruction usage is guarded by predicate in operand immD().
 6074 instruct loadConD(regD dst, immD con) %{
 6075   match(Set dst con);
 6076   ins_cost(125);
 6077   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6078   ins_encode %{
 6079     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6080   %}
 6081   ins_pipe(pipe_slow);
 6082 %}
 6083 
 6084 // The instruction usage is guarded by predicate in operand immD0().
 6085 instruct loadConD0(regD dst, immD0 src) %{
 6086   match(Set dst src);
 6087   ins_cost(100);
 6088   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6089   ins_encode %{
 6090     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6091   %}
 6092   ins_pipe( pipe_slow );
 6093 %}
 6094 
 6095 // Load Stack Slot
 6096 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6097   match(Set dst src);
 6098   ins_cost(125);
 6099 
 6100   format %{ "MOV    $dst,$src" %}
 6101   opcode(0x8B);
 6102   ins_encode( OpcP, RegMem(dst,src));
 6103   ins_pipe( ialu_reg_mem );
 6104 %}
 6105 
 6106 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6107   match(Set dst src);
 6108 
 6109   ins_cost(200);
 6110   format %{ "MOV    $dst,$src.lo\n\t"
 6111             "MOV    $dst+4,$src.hi" %}
 6112   opcode(0x8B, 0x8B);
 6113   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6114   ins_pipe( ialu_mem_long_reg );
 6115 %}
 6116 
 6117 // Load Stack Slot
 6118 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6119   match(Set dst src);
 6120   ins_cost(125);
 6121 
 6122   format %{ "MOV    $dst,$src" %}
 6123   opcode(0x8B);
 6124   ins_encode( OpcP, RegMem(dst,src));
 6125   ins_pipe( ialu_reg_mem );
 6126 %}
 6127 
 6128 // Load Stack Slot
 6129 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6130   match(Set dst src);
 6131   ins_cost(125);
 6132 
 6133   format %{ "FLD_S  $src\n\t"
 6134             "FSTP   $dst" %}
 6135   opcode(0xD9);               /* D9 /0, FLD m32real */
 6136   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6137               Pop_Reg_FPR(dst) );
 6138   ins_pipe( fpu_reg_mem );
 6139 %}
 6140 
 6141 // Load Stack Slot
 6142 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6143   match(Set dst src);
 6144   ins_cost(125);
 6145 
 6146   format %{ "FLD_D  $src\n\t"
 6147             "FSTP   $dst" %}
 6148   opcode(0xDD);               /* DD /0, FLD m64real */
 6149   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6150               Pop_Reg_DPR(dst) );
 6151   ins_pipe( fpu_reg_mem );
 6152 %}
 6153 
 6154 // Prefetch instructions for allocation.
 6155 // Must be safe to execute with invalid address (cannot fault).
 6156 
 6157 instruct prefetchAlloc0( memory mem ) %{
 6158   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6159   match(PrefetchAllocation mem);
 6160   ins_cost(0);
 6161   size(0);
 6162   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6163   ins_encode();
 6164   ins_pipe(empty);
 6165 %}
 6166 
 6167 instruct prefetchAlloc( memory mem ) %{
 6168   predicate(AllocatePrefetchInstr==3);
 6169   match( PrefetchAllocation mem );
 6170   ins_cost(100);
 6171 
 6172   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6173   ins_encode %{
 6174     __ prefetchw($mem$$Address);
 6175   %}
 6176   ins_pipe(ialu_mem);
 6177 %}
 6178 
 6179 instruct prefetchAllocNTA( memory mem ) %{
 6180   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6181   match(PrefetchAllocation mem);
 6182   ins_cost(100);
 6183 
 6184   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6185   ins_encode %{
 6186     __ prefetchnta($mem$$Address);
 6187   %}
 6188   ins_pipe(ialu_mem);
 6189 %}
 6190 
 6191 instruct prefetchAllocT0( memory mem ) %{
 6192   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6193   match(PrefetchAllocation mem);
 6194   ins_cost(100);
 6195 
 6196   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6197   ins_encode %{
 6198     __ prefetcht0($mem$$Address);
 6199   %}
 6200   ins_pipe(ialu_mem);
 6201 %}
 6202 
 6203 instruct prefetchAllocT2( memory mem ) %{
 6204   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6205   match(PrefetchAllocation mem);
 6206   ins_cost(100);
 6207 
 6208   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6209   ins_encode %{
 6210     __ prefetcht2($mem$$Address);
 6211   %}
 6212   ins_pipe(ialu_mem);
 6213 %}
 6214 
 6215 //----------Store Instructions-------------------------------------------------
 6216 
 6217 // Store Byte
 6218 instruct storeB(memory mem, xRegI src) %{
 6219   match(Set mem (StoreB mem src));
 6220 
 6221   ins_cost(125);
 6222   format %{ "MOV8   $mem,$src" %}
 6223   opcode(0x88);
 6224   ins_encode( OpcP, RegMem( src, mem ) );
 6225   ins_pipe( ialu_mem_reg );
 6226 %}
 6227 
 6228 // Store Char/Short
 6229 instruct storeC(memory mem, rRegI src) %{
 6230   match(Set mem (StoreC mem src));
 6231 
 6232   ins_cost(125);
 6233   format %{ "MOV16  $mem,$src" %}
 6234   opcode(0x89, 0x66);
 6235   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6236   ins_pipe( ialu_mem_reg );
 6237 %}
 6238 
 6239 // Store Integer
 6240 instruct storeI(memory mem, rRegI src) %{
 6241   match(Set mem (StoreI mem src));
 6242 
 6243   ins_cost(125);
 6244   format %{ "MOV    $mem,$src" %}
 6245   opcode(0x89);
 6246   ins_encode( OpcP, RegMem( src, mem ) );
 6247   ins_pipe( ialu_mem_reg );
 6248 %}
 6249 
 6250 // Store Long
 6251 instruct storeL(long_memory mem, eRegL src) %{
 6252   predicate(!((StoreLNode*)n)->require_atomic_access());
 6253   match(Set mem (StoreL mem src));
 6254 
 6255   ins_cost(200);
 6256   format %{ "MOV    $mem,$src.lo\n\t"
 6257             "MOV    $mem+4,$src.hi" %}
 6258   opcode(0x89, 0x89);
 6259   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6260   ins_pipe( ialu_mem_long_reg );
 6261 %}
 6262 
 6263 // Store Long to Integer
 6264 instruct storeL2I(memory mem, eRegL src) %{
 6265   match(Set mem (StoreI mem (ConvL2I src)));
 6266 
 6267   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6268   ins_encode %{
 6269     __ movl($mem$$Address, $src$$Register);
 6270   %}
 6271   ins_pipe(ialu_mem_reg);
 6272 %}
 6273 
 6274 // Volatile Store Long.  Must be atomic, so move it into
 6275 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6276 // target address before the store (for null-ptr checks)
 6277 // so the memory operand is used twice in the encoding.
 6278 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6279   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6280   match(Set mem (StoreL mem src));
 6281   effect( KILL cr );
 6282   ins_cost(400);
 6283   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6284             "FILD   $src\n\t"
 6285             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6286   opcode(0x3B);
 6287   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6288   ins_pipe( fpu_reg_mem );
 6289 %}
 6290 
 6291 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6292   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6293   match(Set mem (StoreL mem src));
 6294   effect( TEMP tmp, KILL cr );
 6295   ins_cost(380);
 6296   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6297             "MOVSD  $tmp,$src\n\t"
 6298             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6299   ins_encode %{
 6300     __ cmpl(rax, $mem$$Address);
 6301     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6302     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6303   %}
 6304   ins_pipe( pipe_slow );
 6305 %}
 6306 
 6307 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6308   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6309   match(Set mem (StoreL mem src));
 6310   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6311   ins_cost(360);
 6312   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6313             "MOVD   $tmp,$src.lo\n\t"
 6314             "MOVD   $tmp2,$src.hi\n\t"
 6315             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6316             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6317   ins_encode %{
 6318     __ cmpl(rax, $mem$$Address);
 6319     __ movdl($tmp$$XMMRegister, $src$$Register);
 6320     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6321     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6322     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6323   %}
 6324   ins_pipe( pipe_slow );
 6325 %}
 6326 
 6327 // Store Pointer; for storing unknown oops and raw pointers
 6328 instruct storeP(memory mem, anyRegP src) %{
 6329   match(Set mem (StoreP mem src));
 6330 
 6331   ins_cost(125);
 6332   format %{ "MOV    $mem,$src" %}
 6333   opcode(0x89);
 6334   ins_encode( OpcP, RegMem( src, mem ) );
 6335   ins_pipe( ialu_mem_reg );
 6336 %}
 6337 
 6338 // Store Integer Immediate
 6339 instruct storeImmI(memory mem, immI src) %{
 6340   match(Set mem (StoreI mem src));
 6341 
 6342   ins_cost(150);
 6343   format %{ "MOV    $mem,$src" %}
 6344   opcode(0xC7);               /* C7 /0 */
 6345   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6346   ins_pipe( ialu_mem_imm );
 6347 %}
 6348 
 6349 // Store Short/Char Immediate
 6350 instruct storeImmI16(memory mem, immI16 src) %{
 6351   predicate(UseStoreImmI16);
 6352   match(Set mem (StoreC mem src));
 6353 
 6354   ins_cost(150);
 6355   format %{ "MOV16  $mem,$src" %}
 6356   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6357   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6358   ins_pipe( ialu_mem_imm );
 6359 %}
 6360 
 6361 // Store Pointer Immediate; null pointers or constant oops that do not
 6362 // need card-mark barriers.
 6363 instruct storeImmP(memory mem, immP src) %{
 6364   match(Set mem (StoreP mem src));
 6365 
 6366   ins_cost(150);
 6367   format %{ "MOV    $mem,$src" %}
 6368   opcode(0xC7);               /* C7 /0 */
 6369   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6370   ins_pipe( ialu_mem_imm );
 6371 %}
 6372 
 6373 // Store Byte Immediate
 6374 instruct storeImmB(memory mem, immI8 src) %{
 6375   match(Set mem (StoreB mem src));
 6376 
 6377   ins_cost(150);
 6378   format %{ "MOV8   $mem,$src" %}
 6379   opcode(0xC6);               /* C6 /0 */
 6380   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6381   ins_pipe( ialu_mem_imm );
 6382 %}
 6383 
 6384 // Store CMS card-mark Immediate
 6385 instruct storeImmCM(memory mem, immI8 src) %{
 6386   match(Set mem (StoreCM mem src));
 6387 
 6388   ins_cost(150);
 6389   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6390   opcode(0xC6);               /* C6 /0 */
 6391   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6392   ins_pipe( ialu_mem_imm );
 6393 %}
 6394 
 6395 // Store Double
 6396 instruct storeDPR( memory mem, regDPR1 src) %{
 6397   predicate(UseSSE<=1);
 6398   match(Set mem (StoreD mem src));
 6399 
 6400   ins_cost(100);
 6401   format %{ "FST_D  $mem,$src" %}
 6402   opcode(0xDD);       /* DD /2 */
 6403   ins_encode( enc_FPR_store(mem,src) );
 6404   ins_pipe( fpu_mem_reg );
 6405 %}
 6406 
 6407 // Store double does rounding on x86
 6408 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6409   predicate(UseSSE<=1);
 6410   match(Set mem (StoreD mem (RoundDouble src)));
 6411 
 6412   ins_cost(100);
 6413   format %{ "FST_D  $mem,$src\t# round" %}
 6414   opcode(0xDD);       /* DD /2 */
 6415   ins_encode( enc_FPR_store(mem,src) );
 6416   ins_pipe( fpu_mem_reg );
 6417 %}
 6418 
 6419 // Store XMM register to memory (double-precision floating points)
 6420 // MOVSD instruction
 6421 instruct storeD(memory mem, regD src) %{
 6422   predicate(UseSSE>=2);
 6423   match(Set mem (StoreD mem src));
 6424   ins_cost(95);
 6425   format %{ "MOVSD  $mem,$src" %}
 6426   ins_encode %{
 6427     __ movdbl($mem$$Address, $src$$XMMRegister);
 6428   %}
 6429   ins_pipe( pipe_slow );
 6430 %}
 6431 
 6432 // Load Double
 6433 instruct MoveD2VL(vlRegD dst, regD src) %{
 6434   match(Set dst src);
 6435   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6436   ins_encode %{
 6437     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6438   %}
 6439   ins_pipe( fpu_reg_reg );
 6440 %}
 6441 
 6442 // Load Double
 6443 instruct MoveVL2D(regD dst, vlRegD src) %{
 6444   match(Set dst src);
 6445   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 6446   ins_encode %{
 6447     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6448   %}
 6449   ins_pipe( fpu_reg_reg );
 6450 %}
 6451 
 6452 // Store XMM register to memory (single-precision floating point)
 6453 // MOVSS instruction
 6454 instruct storeF(memory mem, regF src) %{
 6455   predicate(UseSSE>=1);
 6456   match(Set mem (StoreF mem src));
 6457   ins_cost(95);
 6458   format %{ "MOVSS  $mem,$src" %}
 6459   ins_encode %{
 6460     __ movflt($mem$$Address, $src$$XMMRegister);
 6461   %}
 6462   ins_pipe( pipe_slow );
 6463 %}
 6464 
 6465 // Load Float
 6466 instruct MoveF2VL(vlRegF dst, regF src) %{
 6467   match(Set dst src);
 6468   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6469   ins_encode %{
 6470     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6471   %}
 6472   ins_pipe( fpu_reg_reg );
 6473 %}
 6474 
 6475 // Load Float
 6476 instruct MoveVL2F(regF dst, vlRegF src) %{
 6477   match(Set dst src);
 6478   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 6479   ins_encode %{
 6480     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6481   %}
 6482   ins_pipe( fpu_reg_reg );
 6483 %}
 6484 
 6485 // Store Float
 6486 instruct storeFPR( memory mem, regFPR1 src) %{
 6487   predicate(UseSSE==0);
 6488   match(Set mem (StoreF mem src));
 6489 
 6490   ins_cost(100);
 6491   format %{ "FST_S  $mem,$src" %}
 6492   opcode(0xD9);       /* D9 /2 */
 6493   ins_encode( enc_FPR_store(mem,src) );
 6494   ins_pipe( fpu_mem_reg );
 6495 %}
 6496 
 6497 // Store Float does rounding on x86
 6498 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6499   predicate(UseSSE==0);
 6500   match(Set mem (StoreF mem (RoundFloat src)));
 6501 
 6502   ins_cost(100);
 6503   format %{ "FST_S  $mem,$src\t# round" %}
 6504   opcode(0xD9);       /* D9 /2 */
 6505   ins_encode( enc_FPR_store(mem,src) );
 6506   ins_pipe( fpu_mem_reg );
 6507 %}
 6508 
 6509 // Store Float does rounding on x86
 6510 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6511   predicate(UseSSE<=1);
 6512   match(Set mem (StoreF mem (ConvD2F src)));
 6513 
 6514   ins_cost(100);
 6515   format %{ "FST_S  $mem,$src\t# D-round" %}
 6516   opcode(0xD9);       /* D9 /2 */
 6517   ins_encode( enc_FPR_store(mem,src) );
 6518   ins_pipe( fpu_mem_reg );
 6519 %}
 6520 
 6521 // Store immediate Float value (it is faster than store from FPU register)
 6522 // The instruction usage is guarded by predicate in operand immFPR().
 6523 instruct storeFPR_imm( memory mem, immFPR src) %{
 6524   match(Set mem (StoreF mem src));
 6525 
 6526   ins_cost(50);
 6527   format %{ "MOV    $mem,$src\t# store float" %}
 6528   opcode(0xC7);               /* C7 /0 */
 6529   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6530   ins_pipe( ialu_mem_imm );
 6531 %}
 6532 
 6533 // Store immediate Float value (it is faster than store from XMM register)
 6534 // The instruction usage is guarded by predicate in operand immF().
 6535 instruct storeF_imm( memory mem, immF src) %{
 6536   match(Set mem (StoreF mem src));
 6537 
 6538   ins_cost(50);
 6539   format %{ "MOV    $mem,$src\t# store float" %}
 6540   opcode(0xC7);               /* C7 /0 */
 6541   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6542   ins_pipe( ialu_mem_imm );
 6543 %}
 6544 
 6545 // Store Integer to stack slot
 6546 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6547   match(Set dst src);
 6548 
 6549   ins_cost(100);
 6550   format %{ "MOV    $dst,$src" %}
 6551   opcode(0x89);
 6552   ins_encode( OpcPRegSS( dst, src ) );
 6553   ins_pipe( ialu_mem_reg );
 6554 %}
 6555 
 6556 // Store Integer to stack slot
 6557 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6558   match(Set dst src);
 6559 
 6560   ins_cost(100);
 6561   format %{ "MOV    $dst,$src" %}
 6562   opcode(0x89);
 6563   ins_encode( OpcPRegSS( dst, src ) );
 6564   ins_pipe( ialu_mem_reg );
 6565 %}
 6566 
 6567 // Store Long to stack slot
 6568 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6569   match(Set dst src);
 6570 
 6571   ins_cost(200);
 6572   format %{ "MOV    $dst,$src.lo\n\t"
 6573             "MOV    $dst+4,$src.hi" %}
 6574   opcode(0x89, 0x89);
 6575   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6576   ins_pipe( ialu_mem_long_reg );
 6577 %}
 6578 
 6579 //----------MemBar Instructions-----------------------------------------------
 6580 // Memory barrier flavors
 6581 
 6582 instruct membar_acquire() %{
 6583   match(MemBarAcquire);
 6584   match(LoadFence);
 6585   ins_cost(400);
 6586 
 6587   size(0);
 6588   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6589   ins_encode();
 6590   ins_pipe(empty);
 6591 %}
 6592 
 6593 instruct membar_acquire_lock() %{
 6594   match(MemBarAcquireLock);
 6595   ins_cost(0);
 6596 
 6597   size(0);
 6598   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6599   ins_encode( );
 6600   ins_pipe(empty);
 6601 %}
 6602 
 6603 instruct membar_release() %{
 6604   match(MemBarRelease);
 6605   match(StoreFence);
 6606   ins_cost(400);
 6607 
 6608   size(0);
 6609   format %{ "MEMBAR-release ! (empty encoding)" %}
 6610   ins_encode( );
 6611   ins_pipe(empty);
 6612 %}
 6613 
 6614 instruct membar_release_lock() %{
 6615   match(MemBarReleaseLock);
 6616   ins_cost(0);
 6617 
 6618   size(0);
 6619   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6620   ins_encode( );
 6621   ins_pipe(empty);
 6622 %}
 6623 
 6624 instruct membar_volatile(eFlagsReg cr) %{
 6625   match(MemBarVolatile);
 6626   effect(KILL cr);
 6627   ins_cost(400);
 6628 
 6629   format %{
 6630     $$template
 6631     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6632   %}
 6633   ins_encode %{
 6634     __ membar(Assembler::StoreLoad);
 6635   %}
 6636   ins_pipe(pipe_slow);
 6637 %}
 6638 
 6639 instruct unnecessary_membar_volatile() %{
 6640   match(MemBarVolatile);
 6641   predicate(Matcher::post_store_load_barrier(n));
 6642   ins_cost(0);
 6643 
 6644   size(0);
 6645   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6646   ins_encode( );
 6647   ins_pipe(empty);
 6648 %}
 6649 
 6650 instruct membar_storestore() %{
 6651   match(MemBarStoreStore);
 6652   ins_cost(0);
 6653 
 6654   size(0);
 6655   format %{ "MEMBAR-storestore (empty encoding)" %}
 6656   ins_encode( );
 6657   ins_pipe(empty);
 6658 %}
 6659 
 6660 //----------Move Instructions--------------------------------------------------
 6661 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6662   match(Set dst (CastX2P src));
 6663   format %{ "# X2P  $dst, $src" %}
 6664   ins_encode( /*empty encoding*/ );
 6665   ins_cost(0);
 6666   ins_pipe(empty);
 6667 %}
 6668 
 6669 instruct castP2X(rRegI dst, eRegP src ) %{
 6670   match(Set dst (CastP2X src));
 6671   ins_cost(50);
 6672   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6673   ins_encode( enc_Copy( dst, src) );
 6674   ins_pipe( ialu_reg_reg );
 6675 %}
 6676 
 6677 //----------Conditional Move---------------------------------------------------
 6678 // Conditional move
 6679 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6680   predicate(!VM_Version::supports_cmov() );
 6681   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6682   ins_cost(200);
 6683   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6684             "MOV    $dst,$src\n"
 6685       "skip:" %}
 6686   ins_encode %{
 6687     Label Lskip;
 6688     // Invert sense of branch from sense of CMOV
 6689     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6690     __ movl($dst$$Register, $src$$Register);
 6691     __ bind(Lskip);
 6692   %}
 6693   ins_pipe( pipe_cmov_reg );
 6694 %}
 6695 
 6696 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6697   predicate(!VM_Version::supports_cmov() );
 6698   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6699   ins_cost(200);
 6700   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6701             "MOV    $dst,$src\n"
 6702       "skip:" %}
 6703   ins_encode %{
 6704     Label Lskip;
 6705     // Invert sense of branch from sense of CMOV
 6706     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6707     __ movl($dst$$Register, $src$$Register);
 6708     __ bind(Lskip);
 6709   %}
 6710   ins_pipe( pipe_cmov_reg );
 6711 %}
 6712 
 6713 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6714   predicate(VM_Version::supports_cmov() );
 6715   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6716   ins_cost(200);
 6717   format %{ "CMOV$cop $dst,$src" %}
 6718   opcode(0x0F,0x40);
 6719   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6720   ins_pipe( pipe_cmov_reg );
 6721 %}
 6722 
 6723 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6724   predicate(VM_Version::supports_cmov() );
 6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6726   ins_cost(200);
 6727   format %{ "CMOV$cop $dst,$src" %}
 6728   opcode(0x0F,0x40);
 6729   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6730   ins_pipe( pipe_cmov_reg );
 6731 %}
 6732 
 6733 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6734   predicate(VM_Version::supports_cmov() );
 6735   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6736   ins_cost(200);
 6737   expand %{
 6738     cmovI_regU(cop, cr, dst, src);
 6739   %}
 6740 %}
 6741 
 6742 // Conditional move
 6743 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6744   predicate(VM_Version::supports_cmov() );
 6745   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6746   ins_cost(250);
 6747   format %{ "CMOV$cop $dst,$src" %}
 6748   opcode(0x0F,0x40);
 6749   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6750   ins_pipe( pipe_cmov_mem );
 6751 %}
 6752 
 6753 // Conditional move
 6754 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6755   predicate(VM_Version::supports_cmov() );
 6756   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6757   ins_cost(250);
 6758   format %{ "CMOV$cop $dst,$src" %}
 6759   opcode(0x0F,0x40);
 6760   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6761   ins_pipe( pipe_cmov_mem );
 6762 %}
 6763 
 6764 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6765   predicate(VM_Version::supports_cmov() );
 6766   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6767   ins_cost(250);
 6768   expand %{
 6769     cmovI_memU(cop, cr, dst, src);
 6770   %}
 6771 %}
 6772 
 6773 // Conditional move
 6774 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6775   predicate(VM_Version::supports_cmov() );
 6776   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6777   ins_cost(200);
 6778   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6779   opcode(0x0F,0x40);
 6780   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6781   ins_pipe( pipe_cmov_reg );
 6782 %}
 6783 
 6784 // Conditional move (non-P6 version)
 6785 // Note:  a CMoveP is generated for  stubs and native wrappers
 6786 //        regardless of whether we are on a P6, so we
 6787 //        emulate a cmov here
 6788 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6789   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6790   ins_cost(300);
 6791   format %{ "Jn$cop   skip\n\t"
 6792           "MOV    $dst,$src\t# pointer\n"
 6793       "skip:" %}
 6794   opcode(0x8b);
 6795   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6796   ins_pipe( pipe_cmov_reg );
 6797 %}
 6798 
 6799 // Conditional move
 6800 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6801   predicate(VM_Version::supports_cmov() );
 6802   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6803   ins_cost(200);
 6804   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6805   opcode(0x0F,0x40);
 6806   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6807   ins_pipe( pipe_cmov_reg );
 6808 %}
 6809 
 6810 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6811   predicate(VM_Version::supports_cmov() );
 6812   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6813   ins_cost(200);
 6814   expand %{
 6815     cmovP_regU(cop, cr, dst, src);
 6816   %}
 6817 %}
 6818 
 6819 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6820 // correctly meets the two pointer arguments; one is an incoming
 6821 // register but the other is a memory operand.  ALSO appears to
 6822 // be buggy with implicit null checks.
 6823 //
 6824 //// Conditional move
 6825 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6826 //  predicate(VM_Version::supports_cmov() );
 6827 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6828 //  ins_cost(250);
 6829 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6830 //  opcode(0x0F,0x40);
 6831 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6832 //  ins_pipe( pipe_cmov_mem );
 6833 //%}
 6834 //
 6835 //// Conditional move
 6836 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6837 //  predicate(VM_Version::supports_cmov() );
 6838 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6839 //  ins_cost(250);
 6840 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6841 //  opcode(0x0F,0x40);
 6842 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6843 //  ins_pipe( pipe_cmov_mem );
 6844 //%}
 6845 
 6846 // Conditional move
 6847 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6848   predicate(UseSSE<=1);
 6849   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6850   ins_cost(200);
 6851   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6852   opcode(0xDA);
 6853   ins_encode( enc_cmov_dpr(cop,src) );
 6854   ins_pipe( pipe_cmovDPR_reg );
 6855 %}
 6856 
 6857 // Conditional move
 6858 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6859   predicate(UseSSE==0);
 6860   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6861   ins_cost(200);
 6862   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6863   opcode(0xDA);
 6864   ins_encode( enc_cmov_dpr(cop,src) );
 6865   ins_pipe( pipe_cmovDPR_reg );
 6866 %}
 6867 
 6868 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6869 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6870   predicate(UseSSE<=1);
 6871   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6872   ins_cost(200);
 6873   format %{ "Jn$cop   skip\n\t"
 6874             "MOV    $dst,$src\t# double\n"
 6875       "skip:" %}
 6876   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6877   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6878   ins_pipe( pipe_cmovDPR_reg );
 6879 %}
 6880 
 6881 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6882 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6883   predicate(UseSSE==0);
 6884   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6885   ins_cost(200);
 6886   format %{ "Jn$cop    skip\n\t"
 6887             "MOV    $dst,$src\t# float\n"
 6888       "skip:" %}
 6889   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6890   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6891   ins_pipe( pipe_cmovDPR_reg );
 6892 %}
 6893 
 6894 // No CMOVE with SSE/SSE2
 6895 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6896   predicate (UseSSE>=1);
 6897   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6898   ins_cost(200);
 6899   format %{ "Jn$cop   skip\n\t"
 6900             "MOVSS  $dst,$src\t# float\n"
 6901       "skip:" %}
 6902   ins_encode %{
 6903     Label skip;
 6904     // Invert sense of branch from sense of CMOV
 6905     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6906     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6907     __ bind(skip);
 6908   %}
 6909   ins_pipe( pipe_slow );
 6910 %}
 6911 
 6912 // No CMOVE with SSE/SSE2
 6913 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6914   predicate (UseSSE>=2);
 6915   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6916   ins_cost(200);
 6917   format %{ "Jn$cop   skip\n\t"
 6918             "MOVSD  $dst,$src\t# float\n"
 6919       "skip:" %}
 6920   ins_encode %{
 6921     Label skip;
 6922     // Invert sense of branch from sense of CMOV
 6923     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6924     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6925     __ bind(skip);
 6926   %}
 6927   ins_pipe( pipe_slow );
 6928 %}
 6929 
 6930 // unsigned version
 6931 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6932   predicate (UseSSE>=1);
 6933   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6934   ins_cost(200);
 6935   format %{ "Jn$cop   skip\n\t"
 6936             "MOVSS  $dst,$src\t# float\n"
 6937       "skip:" %}
 6938   ins_encode %{
 6939     Label skip;
 6940     // Invert sense of branch from sense of CMOV
 6941     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6942     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6943     __ bind(skip);
 6944   %}
 6945   ins_pipe( pipe_slow );
 6946 %}
 6947 
 6948 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6949   predicate (UseSSE>=1);
 6950   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6951   ins_cost(200);
 6952   expand %{
 6953     fcmovF_regU(cop, cr, dst, src);
 6954   %}
 6955 %}
 6956 
 6957 // unsigned version
 6958 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6959   predicate (UseSSE>=2);
 6960   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6961   ins_cost(200);
 6962   format %{ "Jn$cop   skip\n\t"
 6963             "MOVSD  $dst,$src\t# float\n"
 6964       "skip:" %}
 6965   ins_encode %{
 6966     Label skip;
 6967     // Invert sense of branch from sense of CMOV
 6968     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6969     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6970     __ bind(skip);
 6971   %}
 6972   ins_pipe( pipe_slow );
 6973 %}
 6974 
 6975 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 6976   predicate (UseSSE>=2);
 6977   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6978   ins_cost(200);
 6979   expand %{
 6980     fcmovD_regU(cop, cr, dst, src);
 6981   %}
 6982 %}
 6983 
 6984 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 6985   predicate(VM_Version::supports_cmov() );
 6986   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6987   ins_cost(200);
 6988   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 6989             "CMOV$cop $dst.hi,$src.hi" %}
 6990   opcode(0x0F,0x40);
 6991   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 6992   ins_pipe( pipe_cmov_reg_long );
 6993 %}
 6994 
 6995 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 6996   predicate(VM_Version::supports_cmov() );
 6997   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 6998   ins_cost(200);
 6999   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7000             "CMOV$cop $dst.hi,$src.hi" %}
 7001   opcode(0x0F,0x40);
 7002   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7003   ins_pipe( pipe_cmov_reg_long );
 7004 %}
 7005 
 7006 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7007   predicate(VM_Version::supports_cmov() );
 7008   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7009   ins_cost(200);
 7010   expand %{
 7011     cmovL_regU(cop, cr, dst, src);
 7012   %}
 7013 %}
 7014 
 7015 //----------Arithmetic Instructions--------------------------------------------
 7016 //----------Addition Instructions----------------------------------------------
 7017 
 7018 // Integer Addition Instructions
 7019 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7020   match(Set dst (AddI dst src));
 7021   effect(KILL cr);
 7022 
 7023   size(2);
 7024   format %{ "ADD    $dst,$src" %}
 7025   opcode(0x03);
 7026   ins_encode( OpcP, RegReg( dst, src) );
 7027   ins_pipe( ialu_reg_reg );
 7028 %}
 7029 
 7030 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7031   match(Set dst (AddI dst src));
 7032   effect(KILL cr);
 7033 
 7034   format %{ "ADD    $dst,$src" %}
 7035   opcode(0x81, 0x00); /* /0 id */
 7036   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7037   ins_pipe( ialu_reg );
 7038 %}
 7039 
 7040 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7041   predicate(UseIncDec);
 7042   match(Set dst (AddI dst src));
 7043   effect(KILL cr);
 7044 
 7045   size(1);
 7046   format %{ "INC    $dst" %}
 7047   opcode(0x40); /*  */
 7048   ins_encode( Opc_plus( primary, dst ) );
 7049   ins_pipe( ialu_reg );
 7050 %}
 7051 
 7052 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7053   match(Set dst (AddI src0 src1));
 7054   ins_cost(110);
 7055 
 7056   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7057   opcode(0x8D); /* 0x8D /r */
 7058   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7059   ins_pipe( ialu_reg_reg );
 7060 %}
 7061 
 7062 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7063   match(Set dst (AddP src0 src1));
 7064   ins_cost(110);
 7065 
 7066   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7067   opcode(0x8D); /* 0x8D /r */
 7068   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7069   ins_pipe( ialu_reg_reg );
 7070 %}
 7071 
 7072 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7073   predicate(UseIncDec);
 7074   match(Set dst (AddI dst src));
 7075   effect(KILL cr);
 7076 
 7077   size(1);
 7078   format %{ "DEC    $dst" %}
 7079   opcode(0x48); /*  */
 7080   ins_encode( Opc_plus( primary, dst ) );
 7081   ins_pipe( ialu_reg );
 7082 %}
 7083 
 7084 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7085   match(Set dst (AddP dst src));
 7086   effect(KILL cr);
 7087 
 7088   size(2);
 7089   format %{ "ADD    $dst,$src" %}
 7090   opcode(0x03);
 7091   ins_encode( OpcP, RegReg( dst, src) );
 7092   ins_pipe( ialu_reg_reg );
 7093 %}
 7094 
 7095 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7096   match(Set dst (AddP dst src));
 7097   effect(KILL cr);
 7098 
 7099   format %{ "ADD    $dst,$src" %}
 7100   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7101   // ins_encode( RegImm( dst, src) );
 7102   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7103   ins_pipe( ialu_reg );
 7104 %}
 7105 
 7106 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7107   match(Set dst (AddI dst (LoadI src)));
 7108   effect(KILL cr);
 7109 
 7110   ins_cost(125);
 7111   format %{ "ADD    $dst,$src" %}
 7112   opcode(0x03);
 7113   ins_encode( OpcP, RegMem( dst, src) );
 7114   ins_pipe( ialu_reg_mem );
 7115 %}
 7116 
 7117 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7118   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7119   effect(KILL cr);
 7120 
 7121   ins_cost(150);
 7122   format %{ "ADD    $dst,$src" %}
 7123   opcode(0x01);  /* Opcode 01 /r */
 7124   ins_encode( OpcP, RegMem( src, dst ) );
 7125   ins_pipe( ialu_mem_reg );
 7126 %}
 7127 
 7128 // Add Memory with Immediate
 7129 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7130   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7131   effect(KILL cr);
 7132 
 7133   ins_cost(125);
 7134   format %{ "ADD    $dst,$src" %}
 7135   opcode(0x81);               /* Opcode 81 /0 id */
 7136   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7137   ins_pipe( ialu_mem_imm );
 7138 %}
 7139 
 7140 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7141   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7142   effect(KILL cr);
 7143 
 7144   ins_cost(125);
 7145   format %{ "INC    $dst" %}
 7146   opcode(0xFF);               /* Opcode FF /0 */
 7147   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7148   ins_pipe( ialu_mem_imm );
 7149 %}
 7150 
 7151 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7152   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7153   effect(KILL cr);
 7154 
 7155   ins_cost(125);
 7156   format %{ "DEC    $dst" %}
 7157   opcode(0xFF);               /* Opcode FF /1 */
 7158   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7159   ins_pipe( ialu_mem_imm );
 7160 %}
 7161 
 7162 
 7163 instruct checkCastPP( eRegP dst ) %{
 7164   match(Set dst (CheckCastPP dst));
 7165 
 7166   size(0);
 7167   format %{ "#checkcastPP of $dst" %}
 7168   ins_encode( /*empty encoding*/ );
 7169   ins_pipe( empty );
 7170 %}
 7171 
 7172 instruct castPP( eRegP dst ) %{
 7173   match(Set dst (CastPP dst));
 7174   format %{ "#castPP of $dst" %}
 7175   ins_encode( /*empty encoding*/ );
 7176   ins_pipe( empty );
 7177 %}
 7178 
 7179 instruct castII( rRegI dst ) %{
 7180   match(Set dst (CastII dst));
 7181   format %{ "#castII of $dst" %}
 7182   ins_encode( /*empty encoding*/ );
 7183   ins_cost(0);
 7184   ins_pipe( empty );
 7185 %}
 7186 
 7187 instruct castLL( eRegL dst ) %{
 7188   match(Set dst (CastLL dst));
 7189   format %{ "#castLL of $dst" %}
 7190   ins_encode( /*empty encoding*/ );
 7191   ins_cost(0);
 7192   ins_pipe( empty );
 7193 %}
 7194 
 7195 instruct castFF( regF dst ) %{
 7196   predicate(UseSSE >= 1);
 7197   match(Set dst (CastFF dst));
 7198   format %{ "#castFF of $dst" %}
 7199   ins_encode( /*empty encoding*/ );
 7200   ins_cost(0);
 7201   ins_pipe( empty );
 7202 %}
 7203 
 7204 instruct castDD( regD dst ) %{
 7205   predicate(UseSSE >= 2);
 7206   match(Set dst (CastDD dst));
 7207   format %{ "#castDD of $dst" %}
 7208   ins_encode( /*empty encoding*/ );
 7209   ins_cost(0);
 7210   ins_pipe( empty );
 7211 %}
 7212 
 7213 instruct castFF_PR( regFPR dst ) %{
 7214   predicate(UseSSE < 1);
 7215   match(Set dst (CastFF dst));
 7216   format %{ "#castFF of $dst" %}
 7217   ins_encode( /*empty encoding*/ );
 7218   ins_cost(0);
 7219   ins_pipe( empty );
 7220 %}
 7221 
 7222 instruct castDD_PR( regDPR dst ) %{
 7223   predicate(UseSSE < 2);
 7224   match(Set dst (CastDD dst));
 7225   format %{ "#castDD of $dst" %}
 7226   ins_encode( /*empty encoding*/ );
 7227   ins_cost(0);
 7228   ins_pipe( empty );
 7229 %}
 7230 
 7231 // Load-locked - same as a regular pointer load when used with compare-swap
 7232 instruct loadPLocked(eRegP dst, memory mem) %{
 7233   match(Set dst (LoadPLocked mem));
 7234 
 7235   ins_cost(125);
 7236   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
 7237   opcode(0x8B);
 7238   ins_encode( OpcP, RegMem(dst,mem));
 7239   ins_pipe( ialu_reg_mem );
 7240 %}
 7241 
 7242 // Conditional-store of the updated heap-top.
 7243 // Used during allocation of the shared heap.
 7244 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
 7245 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
 7246   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 7247   // EAX is killed if there is contention, but then it's also unused.
 7248   // In the common case of no contention, EAX holds the new oop address.
 7249   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
 7250   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
 7251   ins_pipe( pipe_cmpxchg );
 7252 %}
 7253 
 7254 // Conditional-store of an int value.
 7255 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
 7256 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
 7257   match(Set cr (StoreIConditional mem (Binary oldval newval)));
 7258   effect(KILL oldval);
 7259   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
 7260   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
 7261   ins_pipe( pipe_cmpxchg );
 7262 %}
 7263 
 7264 // Conditional-store of a long value.
 7265 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
 7266 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7267   match(Set cr (StoreLConditional mem (Binary oldval newval)));
 7268   effect(KILL oldval);
 7269   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
 7270             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
 7271             "XCHG   EBX,ECX"
 7272   %}
 7273   ins_encode %{
 7274     // Note: we need to swap rbx, and rcx before and after the
 7275     //       cmpxchg8 instruction because the instruction uses
 7276     //       rcx as the high order word of the new value to store but
 7277     //       our register encoding uses rbx.
 7278     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7279     __ lock();
 7280     __ cmpxchg8($mem$$Address);
 7281     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
 7282   %}
 7283   ins_pipe( pipe_cmpxchg );
 7284 %}
 7285 
 7286 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7287 
 7288 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7289   predicate(VM_Version::supports_cx8());
 7290   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7291   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7292   effect(KILL cr, KILL oldval);
 7293   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7294             "MOV    $res,0\n\t"
 7295             "JNE,s  fail\n\t"
 7296             "MOV    $res,1\n"
 7297           "fail:" %}
 7298   ins_encode( enc_cmpxchg8(mem_ptr),
 7299               enc_flags_ne_to_boolean(res) );
 7300   ins_pipe( pipe_cmpxchg );
 7301 %}
 7302 
 7303 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7304   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7305   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7306   effect(KILL cr, KILL oldval);
 7307   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7308             "MOV    $res,0\n\t"
 7309             "JNE,s  fail\n\t"
 7310             "MOV    $res,1\n"
 7311           "fail:" %}
 7312   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7313   ins_pipe( pipe_cmpxchg );
 7314 %}
 7315 
 7316 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7317   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7318   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7319   effect(KILL cr, KILL oldval);
 7320   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7321             "MOV    $res,0\n\t"
 7322             "JNE,s  fail\n\t"
 7323             "MOV    $res,1\n"
 7324           "fail:" %}
 7325   ins_encode( enc_cmpxchgb(mem_ptr),
 7326               enc_flags_ne_to_boolean(res) );
 7327   ins_pipe( pipe_cmpxchg );
 7328 %}
 7329 
 7330 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7331   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7332   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7333   effect(KILL cr, KILL oldval);
 7334   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7335             "MOV    $res,0\n\t"
 7336             "JNE,s  fail\n\t"
 7337             "MOV    $res,1\n"
 7338           "fail:" %}
 7339   ins_encode( enc_cmpxchgw(mem_ptr),
 7340               enc_flags_ne_to_boolean(res) );
 7341   ins_pipe( pipe_cmpxchg );
 7342 %}
 7343 
 7344 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7345   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7346   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7347   effect(KILL cr, KILL oldval);
 7348   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7349             "MOV    $res,0\n\t"
 7350             "JNE,s  fail\n\t"
 7351             "MOV    $res,1\n"
 7352           "fail:" %}
 7353   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7354   ins_pipe( pipe_cmpxchg );
 7355 %}
 7356 
 7357 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7358   predicate(VM_Version::supports_cx8());
 7359   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7360   effect(KILL cr);
 7361   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7362   ins_encode( enc_cmpxchg8(mem_ptr) );
 7363   ins_pipe( pipe_cmpxchg );
 7364 %}
 7365 
 7366 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7367   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7368   effect(KILL cr);
 7369   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7370   ins_encode( enc_cmpxchg(mem_ptr) );
 7371   ins_pipe( pipe_cmpxchg );
 7372 %}
 7373 
 7374 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7375   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7376   effect(KILL cr);
 7377   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7378   ins_encode( enc_cmpxchgb(mem_ptr) );
 7379   ins_pipe( pipe_cmpxchg );
 7380 %}
 7381 
 7382 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7383   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7384   effect(KILL cr);
 7385   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7386   ins_encode( enc_cmpxchgw(mem_ptr) );
 7387   ins_pipe( pipe_cmpxchg );
 7388 %}
 7389 
 7390 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7391   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7392   effect(KILL cr);
 7393   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7394   ins_encode( enc_cmpxchg(mem_ptr) );
 7395   ins_pipe( pipe_cmpxchg );
 7396 %}
 7397 
 7398 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7399   predicate(n->as_LoadStore()->result_not_used());
 7400   match(Set dummy (GetAndAddB mem add));
 7401   effect(KILL cr);
 7402   format %{ "ADDB  [$mem],$add" %}
 7403   ins_encode %{
 7404     __ lock();
 7405     __ addb($mem$$Address, $add$$constant);
 7406   %}
 7407   ins_pipe( pipe_cmpxchg );
 7408 %}
 7409 
 7410 // Important to match to xRegI: only 8-bit regs.
 7411 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7412   match(Set newval (GetAndAddB mem newval));
 7413   effect(KILL cr);
 7414   format %{ "XADDB  [$mem],$newval" %}
 7415   ins_encode %{
 7416     __ lock();
 7417     __ xaddb($mem$$Address, $newval$$Register);
 7418   %}
 7419   ins_pipe( pipe_cmpxchg );
 7420 %}
 7421 
 7422 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7423   predicate(n->as_LoadStore()->result_not_used());
 7424   match(Set dummy (GetAndAddS mem add));
 7425   effect(KILL cr);
 7426   format %{ "ADDS  [$mem],$add" %}
 7427   ins_encode %{
 7428     __ lock();
 7429     __ addw($mem$$Address, $add$$constant);
 7430   %}
 7431   ins_pipe( pipe_cmpxchg );
 7432 %}
 7433 
 7434 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7435   match(Set newval (GetAndAddS mem newval));
 7436   effect(KILL cr);
 7437   format %{ "XADDS  [$mem],$newval" %}
 7438   ins_encode %{
 7439     __ lock();
 7440     __ xaddw($mem$$Address, $newval$$Register);
 7441   %}
 7442   ins_pipe( pipe_cmpxchg );
 7443 %}
 7444 
 7445 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7446   predicate(n->as_LoadStore()->result_not_used());
 7447   match(Set dummy (GetAndAddI mem add));
 7448   effect(KILL cr);
 7449   format %{ "ADDL  [$mem],$add" %}
 7450   ins_encode %{
 7451     __ lock();
 7452     __ addl($mem$$Address, $add$$constant);
 7453   %}
 7454   ins_pipe( pipe_cmpxchg );
 7455 %}
 7456 
 7457 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7458   match(Set newval (GetAndAddI mem newval));
 7459   effect(KILL cr);
 7460   format %{ "XADDL  [$mem],$newval" %}
 7461   ins_encode %{
 7462     __ lock();
 7463     __ xaddl($mem$$Address, $newval$$Register);
 7464   %}
 7465   ins_pipe( pipe_cmpxchg );
 7466 %}
 7467 
 7468 // Important to match to xRegI: only 8-bit regs.
 7469 instruct xchgB( memory mem, xRegI newval) %{
 7470   match(Set newval (GetAndSetB mem newval));
 7471   format %{ "XCHGB  $newval,[$mem]" %}
 7472   ins_encode %{
 7473     __ xchgb($newval$$Register, $mem$$Address);
 7474   %}
 7475   ins_pipe( pipe_cmpxchg );
 7476 %}
 7477 
 7478 instruct xchgS( memory mem, rRegI newval) %{
 7479   match(Set newval (GetAndSetS mem newval));
 7480   format %{ "XCHGW  $newval,[$mem]" %}
 7481   ins_encode %{
 7482     __ xchgw($newval$$Register, $mem$$Address);
 7483   %}
 7484   ins_pipe( pipe_cmpxchg );
 7485 %}
 7486 
 7487 instruct xchgI( memory mem, rRegI newval) %{
 7488   match(Set newval (GetAndSetI mem newval));
 7489   format %{ "XCHGL  $newval,[$mem]" %}
 7490   ins_encode %{
 7491     __ xchgl($newval$$Register, $mem$$Address);
 7492   %}
 7493   ins_pipe( pipe_cmpxchg );
 7494 %}
 7495 
 7496 instruct xchgP( memory mem, pRegP newval) %{
 7497   match(Set newval (GetAndSetP mem newval));
 7498   format %{ "XCHGL  $newval,[$mem]" %}
 7499   ins_encode %{
 7500     __ xchgl($newval$$Register, $mem$$Address);
 7501   %}
 7502   ins_pipe( pipe_cmpxchg );
 7503 %}
 7504 
 7505 //----------Subtraction Instructions-------------------------------------------
 7506 
 7507 // Integer Subtraction Instructions
 7508 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7509   match(Set dst (SubI dst src));
 7510   effect(KILL cr);
 7511 
 7512   size(2);
 7513   format %{ "SUB    $dst,$src" %}
 7514   opcode(0x2B);
 7515   ins_encode( OpcP, RegReg( dst, src) );
 7516   ins_pipe( ialu_reg_reg );
 7517 %}
 7518 
 7519 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7520   match(Set dst (SubI dst src));
 7521   effect(KILL cr);
 7522 
 7523   format %{ "SUB    $dst,$src" %}
 7524   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7525   // ins_encode( RegImm( dst, src) );
 7526   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7527   ins_pipe( ialu_reg );
 7528 %}
 7529 
 7530 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7531   match(Set dst (SubI dst (LoadI src)));
 7532   effect(KILL cr);
 7533 
 7534   ins_cost(125);
 7535   format %{ "SUB    $dst,$src" %}
 7536   opcode(0x2B);
 7537   ins_encode( OpcP, RegMem( dst, src) );
 7538   ins_pipe( ialu_reg_mem );
 7539 %}
 7540 
 7541 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7542   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7543   effect(KILL cr);
 7544 
 7545   ins_cost(150);
 7546   format %{ "SUB    $dst,$src" %}
 7547   opcode(0x29);  /* Opcode 29 /r */
 7548   ins_encode( OpcP, RegMem( src, dst ) );
 7549   ins_pipe( ialu_mem_reg );
 7550 %}
 7551 
 7552 // Subtract from a pointer
 7553 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7554   match(Set dst (AddP dst (SubI zero src)));
 7555   effect(KILL cr);
 7556 
 7557   size(2);
 7558   format %{ "SUB    $dst,$src" %}
 7559   opcode(0x2B);
 7560   ins_encode( OpcP, RegReg( dst, src) );
 7561   ins_pipe( ialu_reg_reg );
 7562 %}
 7563 
 7564 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7565   match(Set dst (SubI zero dst));
 7566   effect(KILL cr);
 7567 
 7568   size(2);
 7569   format %{ "NEG    $dst" %}
 7570   opcode(0xF7,0x03);  // Opcode F7 /3
 7571   ins_encode( OpcP, RegOpc( dst ) );
 7572   ins_pipe( ialu_reg );
 7573 %}
 7574 
 7575 //----------Multiplication/Division Instructions-------------------------------
 7576 // Integer Multiplication Instructions
 7577 // Multiply Register
 7578 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7579   match(Set dst (MulI dst src));
 7580   effect(KILL cr);
 7581 
 7582   size(3);
 7583   ins_cost(300);
 7584   format %{ "IMUL   $dst,$src" %}
 7585   opcode(0xAF, 0x0F);
 7586   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7587   ins_pipe( ialu_reg_reg_alu0 );
 7588 %}
 7589 
 7590 // Multiply 32-bit Immediate
 7591 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7592   match(Set dst (MulI src imm));
 7593   effect(KILL cr);
 7594 
 7595   ins_cost(300);
 7596   format %{ "IMUL   $dst,$src,$imm" %}
 7597   opcode(0x69);  /* 69 /r id */
 7598   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7599   ins_pipe( ialu_reg_reg_alu0 );
 7600 %}
 7601 
 7602 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7603   match(Set dst src);
 7604   effect(KILL cr);
 7605 
 7606   // Note that this is artificially increased to make it more expensive than loadConL
 7607   ins_cost(250);
 7608   format %{ "MOV    EAX,$src\t// low word only" %}
 7609   opcode(0xB8);
 7610   ins_encode( LdImmL_Lo(dst, src) );
 7611   ins_pipe( ialu_reg_fat );
 7612 %}
 7613 
 7614 // Multiply by 32-bit Immediate, taking the shifted high order results
 7615 //  (special case for shift by 32)
 7616 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7617   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7618   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7619              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7620              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7621   effect(USE src1, KILL cr);
 7622 
 7623   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7624   ins_cost(0*100 + 1*400 - 150);
 7625   format %{ "IMUL   EDX:EAX,$src1" %}
 7626   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7627   ins_pipe( pipe_slow );
 7628 %}
 7629 
 7630 // Multiply by 32-bit Immediate, taking the shifted high order results
 7631 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7632   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7633   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7634              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7635              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7636   effect(USE src1, KILL cr);
 7637 
 7638   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7639   ins_cost(1*100 + 1*400 - 150);
 7640   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7641             "SAR    EDX,$cnt-32" %}
 7642   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7643   ins_pipe( pipe_slow );
 7644 %}
 7645 
 7646 // Multiply Memory 32-bit Immediate
 7647 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7648   match(Set dst (MulI (LoadI src) imm));
 7649   effect(KILL cr);
 7650 
 7651   ins_cost(300);
 7652   format %{ "IMUL   $dst,$src,$imm" %}
 7653   opcode(0x69);  /* 69 /r id */
 7654   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7655   ins_pipe( ialu_reg_mem_alu0 );
 7656 %}
 7657 
 7658 // Multiply Memory
 7659 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7660   match(Set dst (MulI dst (LoadI src)));
 7661   effect(KILL cr);
 7662 
 7663   ins_cost(350);
 7664   format %{ "IMUL   $dst,$src" %}
 7665   opcode(0xAF, 0x0F);
 7666   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7667   ins_pipe( ialu_reg_mem_alu0 );
 7668 %}
 7669 
 7670 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7671 %{
 7672   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7673   effect(KILL cr, KILL src2);
 7674 
 7675   expand %{ mulI_eReg(dst, src1, cr);
 7676            mulI_eReg(src2, src3, cr);
 7677            addI_eReg(dst, src2, cr); %}
 7678 %}
 7679 
 7680 // Multiply Register Int to Long
 7681 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7682   // Basic Idea: long = (long)int * (long)int
 7683   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7684   effect(DEF dst, USE src, USE src1, KILL flags);
 7685 
 7686   ins_cost(300);
 7687   format %{ "IMUL   $dst,$src1" %}
 7688 
 7689   ins_encode( long_int_multiply( dst, src1 ) );
 7690   ins_pipe( ialu_reg_reg_alu0 );
 7691 %}
 7692 
 7693 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7694   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7695   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7696   effect(KILL flags);
 7697 
 7698   ins_cost(300);
 7699   format %{ "MUL    $dst,$src1" %}
 7700 
 7701   ins_encode( long_uint_multiply(dst, src1) );
 7702   ins_pipe( ialu_reg_reg_alu0 );
 7703 %}
 7704 
 7705 // Multiply Register Long
 7706 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7707   match(Set dst (MulL dst src));
 7708   effect(KILL cr, TEMP tmp);
 7709   ins_cost(4*100+3*400);
 7710 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7711 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7712   format %{ "MOV    $tmp,$src.lo\n\t"
 7713             "IMUL   $tmp,EDX\n\t"
 7714             "MOV    EDX,$src.hi\n\t"
 7715             "IMUL   EDX,EAX\n\t"
 7716             "ADD    $tmp,EDX\n\t"
 7717             "MUL    EDX:EAX,$src.lo\n\t"
 7718             "ADD    EDX,$tmp" %}
 7719   ins_encode( long_multiply( dst, src, tmp ) );
 7720   ins_pipe( pipe_slow );
 7721 %}
 7722 
 7723 // Multiply Register Long where the left operand's high 32 bits are zero
 7724 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7725   predicate(is_operand_hi32_zero(n->in(1)));
 7726   match(Set dst (MulL dst src));
 7727   effect(KILL cr, TEMP tmp);
 7728   ins_cost(2*100+2*400);
 7729 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7730 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7731   format %{ "MOV    $tmp,$src.hi\n\t"
 7732             "IMUL   $tmp,EAX\n\t"
 7733             "MUL    EDX:EAX,$src.lo\n\t"
 7734             "ADD    EDX,$tmp" %}
 7735   ins_encode %{
 7736     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7737     __ imull($tmp$$Register, rax);
 7738     __ mull($src$$Register);
 7739     __ addl(rdx, $tmp$$Register);
 7740   %}
 7741   ins_pipe( pipe_slow );
 7742 %}
 7743 
 7744 // Multiply Register Long where the right operand's high 32 bits are zero
 7745 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7746   predicate(is_operand_hi32_zero(n->in(2)));
 7747   match(Set dst (MulL dst src));
 7748   effect(KILL cr, TEMP tmp);
 7749   ins_cost(2*100+2*400);
 7750 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7751 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7752   format %{ "MOV    $tmp,$src.lo\n\t"
 7753             "IMUL   $tmp,EDX\n\t"
 7754             "MUL    EDX:EAX,$src.lo\n\t"
 7755             "ADD    EDX,$tmp" %}
 7756   ins_encode %{
 7757     __ movl($tmp$$Register, $src$$Register);
 7758     __ imull($tmp$$Register, rdx);
 7759     __ mull($src$$Register);
 7760     __ addl(rdx, $tmp$$Register);
 7761   %}
 7762   ins_pipe( pipe_slow );
 7763 %}
 7764 
 7765 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7766 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7767   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7768   match(Set dst (MulL dst src));
 7769   effect(KILL cr);
 7770   ins_cost(1*400);
 7771 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7772 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7773   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7774   ins_encode %{
 7775     __ mull($src$$Register);
 7776   %}
 7777   ins_pipe( pipe_slow );
 7778 %}
 7779 
 7780 // Multiply Register Long by small constant
 7781 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7782   match(Set dst (MulL dst src));
 7783   effect(KILL cr, TEMP tmp);
 7784   ins_cost(2*100+2*400);
 7785   size(12);
 7786 // Basic idea: lo(result) = lo(src * EAX)
 7787 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7788   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7789             "MOV    EDX,$src\n\t"
 7790             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7791             "ADD    EDX,$tmp" %}
 7792   ins_encode( long_multiply_con( dst, src, tmp ) );
 7793   ins_pipe( pipe_slow );
 7794 %}
 7795 
 7796 // Integer DIV with Register
 7797 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7798   match(Set rax (DivI rax div));
 7799   effect(KILL rdx, KILL cr);
 7800   size(26);
 7801   ins_cost(30*100+10*100);
 7802   format %{ "CMP    EAX,0x80000000\n\t"
 7803             "JNE,s  normal\n\t"
 7804             "XOR    EDX,EDX\n\t"
 7805             "CMP    ECX,-1\n\t"
 7806             "JE,s   done\n"
 7807     "normal: CDQ\n\t"
 7808             "IDIV   $div\n\t"
 7809     "done:"        %}
 7810   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7811   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7812   ins_pipe( ialu_reg_reg_alu0 );
 7813 %}
 7814 
 7815 // Divide Register Long
 7816 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7817   match(Set dst (DivL src1 src2));
 7818   effect(CALL);
 7819   ins_cost(10000);
 7820   format %{ "PUSH   $src1.hi\n\t"
 7821             "PUSH   $src1.lo\n\t"
 7822             "PUSH   $src2.hi\n\t"
 7823             "PUSH   $src2.lo\n\t"
 7824             "CALL   SharedRuntime::ldiv\n\t"
 7825             "ADD    ESP,16" %}
 7826   ins_encode( long_div(src1,src2) );
 7827   ins_pipe( pipe_slow );
 7828 %}
 7829 
 7830 // Integer DIVMOD with Register, both quotient and mod results
 7831 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7832   match(DivModI rax div);
 7833   effect(KILL cr);
 7834   size(26);
 7835   ins_cost(30*100+10*100);
 7836   format %{ "CMP    EAX,0x80000000\n\t"
 7837             "JNE,s  normal\n\t"
 7838             "XOR    EDX,EDX\n\t"
 7839             "CMP    ECX,-1\n\t"
 7840             "JE,s   done\n"
 7841     "normal: CDQ\n\t"
 7842             "IDIV   $div\n\t"
 7843     "done:"        %}
 7844   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7845   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7846   ins_pipe( pipe_slow );
 7847 %}
 7848 
 7849 // Integer MOD with Register
 7850 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7851   match(Set rdx (ModI rax div));
 7852   effect(KILL rax, KILL cr);
 7853 
 7854   size(26);
 7855   ins_cost(300);
 7856   format %{ "CDQ\n\t"
 7857             "IDIV   $div" %}
 7858   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7859   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7860   ins_pipe( ialu_reg_reg_alu0 );
 7861 %}
 7862 
 7863 // Remainder Register Long
 7864 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7865   match(Set dst (ModL src1 src2));
 7866   effect(CALL);
 7867   ins_cost(10000);
 7868   format %{ "PUSH   $src1.hi\n\t"
 7869             "PUSH   $src1.lo\n\t"
 7870             "PUSH   $src2.hi\n\t"
 7871             "PUSH   $src2.lo\n\t"
 7872             "CALL   SharedRuntime::lrem\n\t"
 7873             "ADD    ESP,16" %}
 7874   ins_encode( long_mod(src1,src2) );
 7875   ins_pipe( pipe_slow );
 7876 %}
 7877 
 7878 // Divide Register Long (no special case since divisor != -1)
 7879 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7880   match(Set dst (DivL dst imm));
 7881   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7882   ins_cost(1000);
 7883   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7884             "XOR    $tmp2,$tmp2\n\t"
 7885             "CMP    $tmp,EDX\n\t"
 7886             "JA,s   fast\n\t"
 7887             "MOV    $tmp2,EAX\n\t"
 7888             "MOV    EAX,EDX\n\t"
 7889             "MOV    EDX,0\n\t"
 7890             "JLE,s  pos\n\t"
 7891             "LNEG   EAX : $tmp2\n\t"
 7892             "DIV    $tmp # unsigned division\n\t"
 7893             "XCHG   EAX,$tmp2\n\t"
 7894             "DIV    $tmp\n\t"
 7895             "LNEG   $tmp2 : EAX\n\t"
 7896             "JMP,s  done\n"
 7897     "pos:\n\t"
 7898             "DIV    $tmp\n\t"
 7899             "XCHG   EAX,$tmp2\n"
 7900     "fast:\n\t"
 7901             "DIV    $tmp\n"
 7902     "done:\n\t"
 7903             "MOV    EDX,$tmp2\n\t"
 7904             "NEG    EDX:EAX # if $imm < 0" %}
 7905   ins_encode %{
 7906     int con = (int)$imm$$constant;
 7907     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7908     int pcon = (con > 0) ? con : -con;
 7909     Label Lfast, Lpos, Ldone;
 7910 
 7911     __ movl($tmp$$Register, pcon);
 7912     __ xorl($tmp2$$Register,$tmp2$$Register);
 7913     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7914     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7915 
 7916     __ movl($tmp2$$Register, $dst$$Register); // save
 7917     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7918     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7919     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7920 
 7921     // Negative dividend.
 7922     // convert value to positive to use unsigned division
 7923     __ lneg($dst$$Register, $tmp2$$Register);
 7924     __ divl($tmp$$Register);
 7925     __ xchgl($dst$$Register, $tmp2$$Register);
 7926     __ divl($tmp$$Register);
 7927     // revert result back to negative
 7928     __ lneg($tmp2$$Register, $dst$$Register);
 7929     __ jmpb(Ldone);
 7930 
 7931     __ bind(Lpos);
 7932     __ divl($tmp$$Register); // Use unsigned division
 7933     __ xchgl($dst$$Register, $tmp2$$Register);
 7934     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7935 
 7936     __ bind(Lfast);
 7937     // fast path: src is positive
 7938     __ divl($tmp$$Register); // Use unsigned division
 7939 
 7940     __ bind(Ldone);
 7941     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7942     if (con < 0) {
 7943       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7944     }
 7945   %}
 7946   ins_pipe( pipe_slow );
 7947 %}
 7948 
 7949 // Remainder Register Long (remainder fit into 32 bits)
 7950 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7951   match(Set dst (ModL dst imm));
 7952   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7953   ins_cost(1000);
 7954   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7955             "CMP    $tmp,EDX\n\t"
 7956             "JA,s   fast\n\t"
 7957             "MOV    $tmp2,EAX\n\t"
 7958             "MOV    EAX,EDX\n\t"
 7959             "MOV    EDX,0\n\t"
 7960             "JLE,s  pos\n\t"
 7961             "LNEG   EAX : $tmp2\n\t"
 7962             "DIV    $tmp # unsigned division\n\t"
 7963             "MOV    EAX,$tmp2\n\t"
 7964             "DIV    $tmp\n\t"
 7965             "NEG    EDX\n\t"
 7966             "JMP,s  done\n"
 7967     "pos:\n\t"
 7968             "DIV    $tmp\n\t"
 7969             "MOV    EAX,$tmp2\n"
 7970     "fast:\n\t"
 7971             "DIV    $tmp\n"
 7972     "done:\n\t"
 7973             "MOV    EAX,EDX\n\t"
 7974             "SAR    EDX,31\n\t" %}
 7975   ins_encode %{
 7976     int con = (int)$imm$$constant;
 7977     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7978     int pcon = (con > 0) ? con : -con;
 7979     Label  Lfast, Lpos, Ldone;
 7980 
 7981     __ movl($tmp$$Register, pcon);
 7982     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7983     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7984 
 7985     __ movl($tmp2$$Register, $dst$$Register); // save
 7986     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7987     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7988     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7989 
 7990     // Negative dividend.
 7991     // convert value to positive to use unsigned division
 7992     __ lneg($dst$$Register, $tmp2$$Register);
 7993     __ divl($tmp$$Register);
 7994     __ movl($dst$$Register, $tmp2$$Register);
 7995     __ divl($tmp$$Register);
 7996     // revert remainder back to negative
 7997     __ negl(HIGH_FROM_LOW($dst$$Register));
 7998     __ jmpb(Ldone);
 7999 
 8000     __ bind(Lpos);
 8001     __ divl($tmp$$Register);
 8002     __ movl($dst$$Register, $tmp2$$Register);
 8003 
 8004     __ bind(Lfast);
 8005     // fast path: src is positive
 8006     __ divl($tmp$$Register);
 8007 
 8008     __ bind(Ldone);
 8009     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 8010     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 8011 
 8012   %}
 8013   ins_pipe( pipe_slow );
 8014 %}
 8015 
 8016 // Integer Shift Instructions
 8017 // Shift Left by one
 8018 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8019   match(Set dst (LShiftI dst shift));
 8020   effect(KILL cr);
 8021 
 8022   size(2);
 8023   format %{ "SHL    $dst,$shift" %}
 8024   opcode(0xD1, 0x4);  /* D1 /4 */
 8025   ins_encode( OpcP, RegOpc( dst ) );
 8026   ins_pipe( ialu_reg );
 8027 %}
 8028 
 8029 // Shift Left by 8-bit immediate
 8030 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8031   match(Set dst (LShiftI dst shift));
 8032   effect(KILL cr);
 8033 
 8034   size(3);
 8035   format %{ "SHL    $dst,$shift" %}
 8036   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8037   ins_encode( RegOpcImm( dst, shift) );
 8038   ins_pipe( ialu_reg );
 8039 %}
 8040 
 8041 // Shift Left by variable
 8042 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8043   match(Set dst (LShiftI dst shift));
 8044   effect(KILL cr);
 8045 
 8046   size(2);
 8047   format %{ "SHL    $dst,$shift" %}
 8048   opcode(0xD3, 0x4);  /* D3 /4 */
 8049   ins_encode( OpcP, RegOpc( dst ) );
 8050   ins_pipe( ialu_reg_reg );
 8051 %}
 8052 
 8053 // Arithmetic shift right by one
 8054 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8055   match(Set dst (RShiftI dst shift));
 8056   effect(KILL cr);
 8057 
 8058   size(2);
 8059   format %{ "SAR    $dst,$shift" %}
 8060   opcode(0xD1, 0x7);  /* D1 /7 */
 8061   ins_encode( OpcP, RegOpc( dst ) );
 8062   ins_pipe( ialu_reg );
 8063 %}
 8064 
 8065 // Arithmetic shift right by one
 8066 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8067   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8068   effect(KILL cr);
 8069   format %{ "SAR    $dst,$shift" %}
 8070   opcode(0xD1, 0x7);  /* D1 /7 */
 8071   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8072   ins_pipe( ialu_mem_imm );
 8073 %}
 8074 
 8075 // Arithmetic Shift Right by 8-bit immediate
 8076 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8077   match(Set dst (RShiftI dst shift));
 8078   effect(KILL cr);
 8079 
 8080   size(3);
 8081   format %{ "SAR    $dst,$shift" %}
 8082   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8083   ins_encode( RegOpcImm( dst, shift ) );
 8084   ins_pipe( ialu_mem_imm );
 8085 %}
 8086 
 8087 // Arithmetic Shift Right by 8-bit immediate
 8088 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8089   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8090   effect(KILL cr);
 8091 
 8092   format %{ "SAR    $dst,$shift" %}
 8093   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8094   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8095   ins_pipe( ialu_mem_imm );
 8096 %}
 8097 
 8098 // Arithmetic Shift Right by variable
 8099 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8100   match(Set dst (RShiftI dst shift));
 8101   effect(KILL cr);
 8102 
 8103   size(2);
 8104   format %{ "SAR    $dst,$shift" %}
 8105   opcode(0xD3, 0x7);  /* D3 /7 */
 8106   ins_encode( OpcP, RegOpc( dst ) );
 8107   ins_pipe( ialu_reg_reg );
 8108 %}
 8109 
 8110 // Logical shift right by one
 8111 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8112   match(Set dst (URShiftI dst shift));
 8113   effect(KILL cr);
 8114 
 8115   size(2);
 8116   format %{ "SHR    $dst,$shift" %}
 8117   opcode(0xD1, 0x5);  /* D1 /5 */
 8118   ins_encode( OpcP, RegOpc( dst ) );
 8119   ins_pipe( ialu_reg );
 8120 %}
 8121 
 8122 // Logical Shift Right by 8-bit immediate
 8123 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8124   match(Set dst (URShiftI dst shift));
 8125   effect(KILL cr);
 8126 
 8127   size(3);
 8128   format %{ "SHR    $dst,$shift" %}
 8129   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8130   ins_encode( RegOpcImm( dst, shift) );
 8131   ins_pipe( ialu_reg );
 8132 %}
 8133 
 8134 
 8135 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8136 // This idiom is used by the compiler for the i2b bytecode.
 8137 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8138   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8139 
 8140   size(3);
 8141   format %{ "MOVSX  $dst,$src :8" %}
 8142   ins_encode %{
 8143     __ movsbl($dst$$Register, $src$$Register);
 8144   %}
 8145   ins_pipe(ialu_reg_reg);
 8146 %}
 8147 
 8148 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8149 // This idiom is used by the compiler the i2s bytecode.
 8150 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8151   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8152 
 8153   size(3);
 8154   format %{ "MOVSX  $dst,$src :16" %}
 8155   ins_encode %{
 8156     __ movswl($dst$$Register, $src$$Register);
 8157   %}
 8158   ins_pipe(ialu_reg_reg);
 8159 %}
 8160 
 8161 
 8162 // Logical Shift Right by variable
 8163 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8164   match(Set dst (URShiftI dst shift));
 8165   effect(KILL cr);
 8166 
 8167   size(2);
 8168   format %{ "SHR    $dst,$shift" %}
 8169   opcode(0xD3, 0x5);  /* D3 /5 */
 8170   ins_encode( OpcP, RegOpc( dst ) );
 8171   ins_pipe( ialu_reg_reg );
 8172 %}
 8173 
 8174 
 8175 //----------Logical Instructions-----------------------------------------------
 8176 //----------Integer Logical Instructions---------------------------------------
 8177 // And Instructions
 8178 // And Register with Register
 8179 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8180   match(Set dst (AndI dst src));
 8181   effect(KILL cr);
 8182 
 8183   size(2);
 8184   format %{ "AND    $dst,$src" %}
 8185   opcode(0x23);
 8186   ins_encode( OpcP, RegReg( dst, src) );
 8187   ins_pipe( ialu_reg_reg );
 8188 %}
 8189 
 8190 // And Register with Immediate
 8191 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8192   match(Set dst (AndI dst src));
 8193   effect(KILL cr);
 8194 
 8195   format %{ "AND    $dst,$src" %}
 8196   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8197   // ins_encode( RegImm( dst, src) );
 8198   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8199   ins_pipe( ialu_reg );
 8200 %}
 8201 
 8202 // And Register with Memory
 8203 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8204   match(Set dst (AndI dst (LoadI src)));
 8205   effect(KILL cr);
 8206 
 8207   ins_cost(125);
 8208   format %{ "AND    $dst,$src" %}
 8209   opcode(0x23);
 8210   ins_encode( OpcP, RegMem( dst, src) );
 8211   ins_pipe( ialu_reg_mem );
 8212 %}
 8213 
 8214 // And Memory with Register
 8215 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8216   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8217   effect(KILL cr);
 8218 
 8219   ins_cost(150);
 8220   format %{ "AND    $dst,$src" %}
 8221   opcode(0x21);  /* Opcode 21 /r */
 8222   ins_encode( OpcP, RegMem( src, dst ) );
 8223   ins_pipe( ialu_mem_reg );
 8224 %}
 8225 
 8226 // And Memory with Immediate
 8227 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8228   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8229   effect(KILL cr);
 8230 
 8231   ins_cost(125);
 8232   format %{ "AND    $dst,$src" %}
 8233   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8234   // ins_encode( MemImm( dst, src) );
 8235   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8236   ins_pipe( ialu_mem_imm );
 8237 %}
 8238 
 8239 // BMI1 instructions
 8240 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8241   match(Set dst (AndI (XorI src1 minus_1) src2));
 8242   predicate(UseBMI1Instructions);
 8243   effect(KILL cr);
 8244 
 8245   format %{ "ANDNL  $dst, $src1, $src2" %}
 8246 
 8247   ins_encode %{
 8248     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8249   %}
 8250   ins_pipe(ialu_reg);
 8251 %}
 8252 
 8253 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8254   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8255   predicate(UseBMI1Instructions);
 8256   effect(KILL cr);
 8257 
 8258   ins_cost(125);
 8259   format %{ "ANDNL  $dst, $src1, $src2" %}
 8260 
 8261   ins_encode %{
 8262     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8263   %}
 8264   ins_pipe(ialu_reg_mem);
 8265 %}
 8266 
 8267 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8268   match(Set dst (AndI (SubI imm_zero src) src));
 8269   predicate(UseBMI1Instructions);
 8270   effect(KILL cr);
 8271 
 8272   format %{ "BLSIL  $dst, $src" %}
 8273 
 8274   ins_encode %{
 8275     __ blsil($dst$$Register, $src$$Register);
 8276   %}
 8277   ins_pipe(ialu_reg);
 8278 %}
 8279 
 8280 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8281   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8282   predicate(UseBMI1Instructions);
 8283   effect(KILL cr);
 8284 
 8285   ins_cost(125);
 8286   format %{ "BLSIL  $dst, $src" %}
 8287 
 8288   ins_encode %{
 8289     __ blsil($dst$$Register, $src$$Address);
 8290   %}
 8291   ins_pipe(ialu_reg_mem);
 8292 %}
 8293 
 8294 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8295 %{
 8296   match(Set dst (XorI (AddI src minus_1) src));
 8297   predicate(UseBMI1Instructions);
 8298   effect(KILL cr);
 8299 
 8300   format %{ "BLSMSKL $dst, $src" %}
 8301 
 8302   ins_encode %{
 8303     __ blsmskl($dst$$Register, $src$$Register);
 8304   %}
 8305 
 8306   ins_pipe(ialu_reg);
 8307 %}
 8308 
 8309 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8310 %{
 8311   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8312   predicate(UseBMI1Instructions);
 8313   effect(KILL cr);
 8314 
 8315   ins_cost(125);
 8316   format %{ "BLSMSKL $dst, $src" %}
 8317 
 8318   ins_encode %{
 8319     __ blsmskl($dst$$Register, $src$$Address);
 8320   %}
 8321 
 8322   ins_pipe(ialu_reg_mem);
 8323 %}
 8324 
 8325 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8326 %{
 8327   match(Set dst (AndI (AddI src minus_1) src) );
 8328   predicate(UseBMI1Instructions);
 8329   effect(KILL cr);
 8330 
 8331   format %{ "BLSRL  $dst, $src" %}
 8332 
 8333   ins_encode %{
 8334     __ blsrl($dst$$Register, $src$$Register);
 8335   %}
 8336 
 8337   ins_pipe(ialu_reg);
 8338 %}
 8339 
 8340 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8341 %{
 8342   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8343   predicate(UseBMI1Instructions);
 8344   effect(KILL cr);
 8345 
 8346   ins_cost(125);
 8347   format %{ "BLSRL  $dst, $src" %}
 8348 
 8349   ins_encode %{
 8350     __ blsrl($dst$$Register, $src$$Address);
 8351   %}
 8352 
 8353   ins_pipe(ialu_reg_mem);
 8354 %}
 8355 
 8356 // Or Instructions
 8357 // Or Register with Register
 8358 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8359   match(Set dst (OrI dst src));
 8360   effect(KILL cr);
 8361 
 8362   size(2);
 8363   format %{ "OR     $dst,$src" %}
 8364   opcode(0x0B);
 8365   ins_encode( OpcP, RegReg( dst, src) );
 8366   ins_pipe( ialu_reg_reg );
 8367 %}
 8368 
 8369 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8370   match(Set dst (OrI dst (CastP2X src)));
 8371   effect(KILL cr);
 8372 
 8373   size(2);
 8374   format %{ "OR     $dst,$src" %}
 8375   opcode(0x0B);
 8376   ins_encode( OpcP, RegReg( dst, src) );
 8377   ins_pipe( ialu_reg_reg );
 8378 %}
 8379 
 8380 
 8381 // Or Register with Immediate
 8382 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8383   match(Set dst (OrI dst src));
 8384   effect(KILL cr);
 8385 
 8386   format %{ "OR     $dst,$src" %}
 8387   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8388   // ins_encode( RegImm( dst, src) );
 8389   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8390   ins_pipe( ialu_reg );
 8391 %}
 8392 
 8393 // Or Register with Memory
 8394 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8395   match(Set dst (OrI dst (LoadI src)));
 8396   effect(KILL cr);
 8397 
 8398   ins_cost(125);
 8399   format %{ "OR     $dst,$src" %}
 8400   opcode(0x0B);
 8401   ins_encode( OpcP, RegMem( dst, src) );
 8402   ins_pipe( ialu_reg_mem );
 8403 %}
 8404 
 8405 // Or Memory with Register
 8406 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8407   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8408   effect(KILL cr);
 8409 
 8410   ins_cost(150);
 8411   format %{ "OR     $dst,$src" %}
 8412   opcode(0x09);  /* Opcode 09 /r */
 8413   ins_encode( OpcP, RegMem( src, dst ) );
 8414   ins_pipe( ialu_mem_reg );
 8415 %}
 8416 
 8417 // Or Memory with Immediate
 8418 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8419   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8420   effect(KILL cr);
 8421 
 8422   ins_cost(125);
 8423   format %{ "OR     $dst,$src" %}
 8424   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8425   // ins_encode( MemImm( dst, src) );
 8426   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8427   ins_pipe( ialu_mem_imm );
 8428 %}
 8429 
 8430 // ROL/ROR
 8431 // ROL expand
 8432 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8433   effect(USE_DEF dst, USE shift, KILL cr);
 8434 
 8435   format %{ "ROL    $dst, $shift" %}
 8436   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8437   ins_encode( OpcP, RegOpc( dst ));
 8438   ins_pipe( ialu_reg );
 8439 %}
 8440 
 8441 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8442   effect(USE_DEF dst, USE shift, KILL cr);
 8443 
 8444   format %{ "ROL    $dst, $shift" %}
 8445   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8446   ins_encode( RegOpcImm(dst, shift) );
 8447   ins_pipe(ialu_reg);
 8448 %}
 8449 
 8450 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8451   effect(USE_DEF dst, USE shift, KILL cr);
 8452 
 8453   format %{ "ROL    $dst, $shift" %}
 8454   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8455   ins_encode(OpcP, RegOpc(dst));
 8456   ins_pipe( ialu_reg_reg );
 8457 %}
 8458 // end of ROL expand
 8459 
 8460 // ROL 32bit by one once
 8461 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8462   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8463 
 8464   expand %{
 8465     rolI_eReg_imm1(dst, lshift, cr);
 8466   %}
 8467 %}
 8468 
 8469 // ROL 32bit var by imm8 once
 8470 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8471   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8472   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8473 
 8474   expand %{
 8475     rolI_eReg_imm8(dst, lshift, cr);
 8476   %}
 8477 %}
 8478 
 8479 // ROL 32bit var by var once
 8480 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8481   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8482 
 8483   expand %{
 8484     rolI_eReg_CL(dst, shift, cr);
 8485   %}
 8486 %}
 8487 
 8488 // ROL 32bit var by var once
 8489 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8490   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8491 
 8492   expand %{
 8493     rolI_eReg_CL(dst, shift, cr);
 8494   %}
 8495 %}
 8496 
 8497 // ROR expand
 8498 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8499   effect(USE_DEF dst, USE shift, KILL cr);
 8500 
 8501   format %{ "ROR    $dst, $shift" %}
 8502   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8503   ins_encode( OpcP, RegOpc( dst ) );
 8504   ins_pipe( ialu_reg );
 8505 %}
 8506 
 8507 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8508   effect (USE_DEF dst, USE shift, KILL cr);
 8509 
 8510   format %{ "ROR    $dst, $shift" %}
 8511   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8512   ins_encode( RegOpcImm(dst, shift) );
 8513   ins_pipe( ialu_reg );
 8514 %}
 8515 
 8516 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8517   effect(USE_DEF dst, USE shift, KILL cr);
 8518 
 8519   format %{ "ROR    $dst, $shift" %}
 8520   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8521   ins_encode(OpcP, RegOpc(dst));
 8522   ins_pipe( ialu_reg_reg );
 8523 %}
 8524 // end of ROR expand
 8525 
 8526 // ROR right once
 8527 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8528   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8529 
 8530   expand %{
 8531     rorI_eReg_imm1(dst, rshift, cr);
 8532   %}
 8533 %}
 8534 
 8535 // ROR 32bit by immI8 once
 8536 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8537   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8538   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8539 
 8540   expand %{
 8541     rorI_eReg_imm8(dst, rshift, cr);
 8542   %}
 8543 %}
 8544 
 8545 // ROR 32bit var by var once
 8546 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8547   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8548 
 8549   expand %{
 8550     rorI_eReg_CL(dst, shift, cr);
 8551   %}
 8552 %}
 8553 
 8554 // ROR 32bit var by var once
 8555 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8556   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8557 
 8558   expand %{
 8559     rorI_eReg_CL(dst, shift, cr);
 8560   %}
 8561 %}
 8562 
 8563 // Xor Instructions
 8564 // Xor Register with Register
 8565 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8566   match(Set dst (XorI dst src));
 8567   effect(KILL cr);
 8568 
 8569   size(2);
 8570   format %{ "XOR    $dst,$src" %}
 8571   opcode(0x33);
 8572   ins_encode( OpcP, RegReg( dst, src) );
 8573   ins_pipe( ialu_reg_reg );
 8574 %}
 8575 
 8576 // Xor Register with Immediate -1
 8577 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8578   match(Set dst (XorI dst imm));
 8579 
 8580   size(2);
 8581   format %{ "NOT    $dst" %}
 8582   ins_encode %{
 8583      __ notl($dst$$Register);
 8584   %}
 8585   ins_pipe( ialu_reg );
 8586 %}
 8587 
 8588 // Xor Register with Immediate
 8589 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8590   match(Set dst (XorI dst src));
 8591   effect(KILL cr);
 8592 
 8593   format %{ "XOR    $dst,$src" %}
 8594   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8595   // ins_encode( RegImm( dst, src) );
 8596   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8597   ins_pipe( ialu_reg );
 8598 %}
 8599 
 8600 // Xor Register with Memory
 8601 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8602   match(Set dst (XorI dst (LoadI src)));
 8603   effect(KILL cr);
 8604 
 8605   ins_cost(125);
 8606   format %{ "XOR    $dst,$src" %}
 8607   opcode(0x33);
 8608   ins_encode( OpcP, RegMem(dst, src) );
 8609   ins_pipe( ialu_reg_mem );
 8610 %}
 8611 
 8612 // Xor Memory with Register
 8613 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8614   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8615   effect(KILL cr);
 8616 
 8617   ins_cost(150);
 8618   format %{ "XOR    $dst,$src" %}
 8619   opcode(0x31);  /* Opcode 31 /r */
 8620   ins_encode( OpcP, RegMem( src, dst ) );
 8621   ins_pipe( ialu_mem_reg );
 8622 %}
 8623 
 8624 // Xor Memory with Immediate
 8625 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8626   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8627   effect(KILL cr);
 8628 
 8629   ins_cost(125);
 8630   format %{ "XOR    $dst,$src" %}
 8631   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8632   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8633   ins_pipe( ialu_mem_imm );
 8634 %}
 8635 
 8636 //----------Convert Int to Boolean---------------------------------------------
 8637 
 8638 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8639   effect( DEF dst, USE src );
 8640   format %{ "MOV    $dst,$src" %}
 8641   ins_encode( enc_Copy( dst, src) );
 8642   ins_pipe( ialu_reg_reg );
 8643 %}
 8644 
 8645 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8646   effect( USE_DEF dst, USE src, KILL cr );
 8647 
 8648   size(4);
 8649   format %{ "NEG    $dst\n\t"
 8650             "ADC    $dst,$src" %}
 8651   ins_encode( neg_reg(dst),
 8652               OpcRegReg(0x13,dst,src) );
 8653   ins_pipe( ialu_reg_reg_long );
 8654 %}
 8655 
 8656 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8657   match(Set dst (Conv2B src));
 8658 
 8659   expand %{
 8660     movI_nocopy(dst,src);
 8661     ci2b(dst,src,cr);
 8662   %}
 8663 %}
 8664 
 8665 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8666   effect( DEF dst, USE src );
 8667   format %{ "MOV    $dst,$src" %}
 8668   ins_encode( enc_Copy( dst, src) );
 8669   ins_pipe( ialu_reg_reg );
 8670 %}
 8671 
 8672 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8673   effect( USE_DEF dst, USE src, KILL cr );
 8674   format %{ "NEG    $dst\n\t"
 8675             "ADC    $dst,$src" %}
 8676   ins_encode( neg_reg(dst),
 8677               OpcRegReg(0x13,dst,src) );
 8678   ins_pipe( ialu_reg_reg_long );
 8679 %}
 8680 
 8681 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8682   match(Set dst (Conv2B src));
 8683 
 8684   expand %{
 8685     movP_nocopy(dst,src);
 8686     cp2b(dst,src,cr);
 8687   %}
 8688 %}
 8689 
 8690 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8691   match(Set dst (CmpLTMask p q));
 8692   effect(KILL cr);
 8693   ins_cost(400);
 8694 
 8695   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8696   format %{ "XOR    $dst,$dst\n\t"
 8697             "CMP    $p,$q\n\t"
 8698             "SETlt  $dst\n\t"
 8699             "NEG    $dst" %}
 8700   ins_encode %{
 8701     Register Rp = $p$$Register;
 8702     Register Rq = $q$$Register;
 8703     Register Rd = $dst$$Register;
 8704     Label done;
 8705     __ xorl(Rd, Rd);
 8706     __ cmpl(Rp, Rq);
 8707     __ setb(Assembler::less, Rd);
 8708     __ negl(Rd);
 8709   %}
 8710 
 8711   ins_pipe(pipe_slow);
 8712 %}
 8713 
 8714 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8715   match(Set dst (CmpLTMask dst zero));
 8716   effect(DEF dst, KILL cr);
 8717   ins_cost(100);
 8718 
 8719   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8720   ins_encode %{
 8721   __ sarl($dst$$Register, 31);
 8722   %}
 8723   ins_pipe(ialu_reg);
 8724 %}
 8725 
 8726 /* better to save a register than avoid a branch */
 8727 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8728   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8729   effect(KILL cr);
 8730   ins_cost(400);
 8731   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8732             "JGE    done\n\t"
 8733             "ADD    $p,$y\n"
 8734             "done:  " %}
 8735   ins_encode %{
 8736     Register Rp = $p$$Register;
 8737     Register Rq = $q$$Register;
 8738     Register Ry = $y$$Register;
 8739     Label done;
 8740     __ subl(Rp, Rq);
 8741     __ jccb(Assembler::greaterEqual, done);
 8742     __ addl(Rp, Ry);
 8743     __ bind(done);
 8744   %}
 8745 
 8746   ins_pipe(pipe_cmplt);
 8747 %}
 8748 
 8749 /* better to save a register than avoid a branch */
 8750 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8751   match(Set y (AndI (CmpLTMask p q) y));
 8752   effect(KILL cr);
 8753 
 8754   ins_cost(300);
 8755 
 8756   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8757             "JLT      done\n\t"
 8758             "XORL     $y, $y\n"
 8759             "done:  " %}
 8760   ins_encode %{
 8761     Register Rp = $p$$Register;
 8762     Register Rq = $q$$Register;
 8763     Register Ry = $y$$Register;
 8764     Label done;
 8765     __ cmpl(Rp, Rq);
 8766     __ jccb(Assembler::less, done);
 8767     __ xorl(Ry, Ry);
 8768     __ bind(done);
 8769   %}
 8770 
 8771   ins_pipe(pipe_cmplt);
 8772 %}
 8773 
 8774 /* If I enable this, I encourage spilling in the inner loop of compress.
 8775 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8776   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8777 */
 8778 //----------Overflow Math Instructions-----------------------------------------
 8779 
 8780 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8781 %{
 8782   match(Set cr (OverflowAddI op1 op2));
 8783   effect(DEF cr, USE_KILL op1, USE op2);
 8784 
 8785   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8786 
 8787   ins_encode %{
 8788     __ addl($op1$$Register, $op2$$Register);
 8789   %}
 8790   ins_pipe(ialu_reg_reg);
 8791 %}
 8792 
 8793 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8794 %{
 8795   match(Set cr (OverflowAddI op1 op2));
 8796   effect(DEF cr, USE_KILL op1, USE op2);
 8797 
 8798   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8799 
 8800   ins_encode %{
 8801     __ addl($op1$$Register, $op2$$constant);
 8802   %}
 8803   ins_pipe(ialu_reg_reg);
 8804 %}
 8805 
 8806 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8807 %{
 8808   match(Set cr (OverflowSubI op1 op2));
 8809 
 8810   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8811   ins_encode %{
 8812     __ cmpl($op1$$Register, $op2$$Register);
 8813   %}
 8814   ins_pipe(ialu_reg_reg);
 8815 %}
 8816 
 8817 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8818 %{
 8819   match(Set cr (OverflowSubI op1 op2));
 8820 
 8821   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8822   ins_encode %{
 8823     __ cmpl($op1$$Register, $op2$$constant);
 8824   %}
 8825   ins_pipe(ialu_reg_reg);
 8826 %}
 8827 
 8828 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8829 %{
 8830   match(Set cr (OverflowSubI zero op2));
 8831   effect(DEF cr, USE_KILL op2);
 8832 
 8833   format %{ "NEG    $op2\t# overflow check int" %}
 8834   ins_encode %{
 8835     __ negl($op2$$Register);
 8836   %}
 8837   ins_pipe(ialu_reg_reg);
 8838 %}
 8839 
 8840 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8841 %{
 8842   match(Set cr (OverflowMulI op1 op2));
 8843   effect(DEF cr, USE_KILL op1, USE op2);
 8844 
 8845   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8846   ins_encode %{
 8847     __ imull($op1$$Register, $op2$$Register);
 8848   %}
 8849   ins_pipe(ialu_reg_reg_alu0);
 8850 %}
 8851 
 8852 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8853 %{
 8854   match(Set cr (OverflowMulI op1 op2));
 8855   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8856 
 8857   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8858   ins_encode %{
 8859     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8860   %}
 8861   ins_pipe(ialu_reg_reg_alu0);
 8862 %}
 8863 
 8864 // Integer Absolute Instructions
 8865 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8866 %{
 8867   match(Set dst (AbsI src));
 8868   effect(TEMP dst, TEMP tmp, KILL cr);
 8869   format %{ "movl $tmp, $src\n\t"
 8870             "sarl $tmp, 31\n\t"
 8871             "movl $dst, $src\n\t"
 8872             "xorl $dst, $tmp\n\t"
 8873             "subl $dst, $tmp\n"
 8874           %}
 8875   ins_encode %{
 8876     __ movl($tmp$$Register, $src$$Register);
 8877     __ sarl($tmp$$Register, 31);
 8878     __ movl($dst$$Register, $src$$Register);
 8879     __ xorl($dst$$Register, $tmp$$Register);
 8880     __ subl($dst$$Register, $tmp$$Register);
 8881   %}
 8882 
 8883   ins_pipe(ialu_reg_reg);
 8884 %}
 8885 
 8886 //----------Long Instructions------------------------------------------------
 8887 // Add Long Register with Register
 8888 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8889   match(Set dst (AddL dst src));
 8890   effect(KILL cr);
 8891   ins_cost(200);
 8892   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8893             "ADC    $dst.hi,$src.hi" %}
 8894   opcode(0x03, 0x13);
 8895   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8896   ins_pipe( ialu_reg_reg_long );
 8897 %}
 8898 
 8899 // Add Long Register with Immediate
 8900 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8901   match(Set dst (AddL dst src));
 8902   effect(KILL cr);
 8903   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8904             "ADC    $dst.hi,$src.hi" %}
 8905   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8906   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8907   ins_pipe( ialu_reg_long );
 8908 %}
 8909 
 8910 // Add Long Register with Memory
 8911 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8912   match(Set dst (AddL dst (LoadL mem)));
 8913   effect(KILL cr);
 8914   ins_cost(125);
 8915   format %{ "ADD    $dst.lo,$mem\n\t"
 8916             "ADC    $dst.hi,$mem+4" %}
 8917   opcode(0x03, 0x13);
 8918   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8919   ins_pipe( ialu_reg_long_mem );
 8920 %}
 8921 
 8922 // Subtract Long Register with Register.
 8923 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8924   match(Set dst (SubL dst src));
 8925   effect(KILL cr);
 8926   ins_cost(200);
 8927   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8928             "SBB    $dst.hi,$src.hi" %}
 8929   opcode(0x2B, 0x1B);
 8930   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8931   ins_pipe( ialu_reg_reg_long );
 8932 %}
 8933 
 8934 // Subtract Long Register with Immediate
 8935 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8936   match(Set dst (SubL dst src));
 8937   effect(KILL cr);
 8938   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8939             "SBB    $dst.hi,$src.hi" %}
 8940   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8941   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8942   ins_pipe( ialu_reg_long );
 8943 %}
 8944 
 8945 // Subtract Long Register with Memory
 8946 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8947   match(Set dst (SubL dst (LoadL mem)));
 8948   effect(KILL cr);
 8949   ins_cost(125);
 8950   format %{ "SUB    $dst.lo,$mem\n\t"
 8951             "SBB    $dst.hi,$mem+4" %}
 8952   opcode(0x2B, 0x1B);
 8953   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8954   ins_pipe( ialu_reg_long_mem );
 8955 %}
 8956 
 8957 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8958   match(Set dst (SubL zero dst));
 8959   effect(KILL cr);
 8960   ins_cost(300);
 8961   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8962   ins_encode( neg_long(dst) );
 8963   ins_pipe( ialu_reg_reg_long );
 8964 %}
 8965 
 8966 // And Long Register with Register
 8967 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8968   match(Set dst (AndL dst src));
 8969   effect(KILL cr);
 8970   format %{ "AND    $dst.lo,$src.lo\n\t"
 8971             "AND    $dst.hi,$src.hi" %}
 8972   opcode(0x23,0x23);
 8973   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8974   ins_pipe( ialu_reg_reg_long );
 8975 %}
 8976 
 8977 // And Long Register with Immediate
 8978 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8979   match(Set dst (AndL dst src));
 8980   effect(KILL cr);
 8981   format %{ "AND    $dst.lo,$src.lo\n\t"
 8982             "AND    $dst.hi,$src.hi" %}
 8983   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8984   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8985   ins_pipe( ialu_reg_long );
 8986 %}
 8987 
 8988 // And Long Register with Memory
 8989 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8990   match(Set dst (AndL dst (LoadL mem)));
 8991   effect(KILL cr);
 8992   ins_cost(125);
 8993   format %{ "AND    $dst.lo,$mem\n\t"
 8994             "AND    $dst.hi,$mem+4" %}
 8995   opcode(0x23, 0x23);
 8996   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8997   ins_pipe( ialu_reg_long_mem );
 8998 %}
 8999 
 9000 // BMI1 instructions
 9001 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 9002   match(Set dst (AndL (XorL src1 minus_1) src2));
 9003   predicate(UseBMI1Instructions);
 9004   effect(KILL cr, TEMP dst);
 9005 
 9006   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 9007             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 9008          %}
 9009 
 9010   ins_encode %{
 9011     Register Rdst = $dst$$Register;
 9012     Register Rsrc1 = $src1$$Register;
 9013     Register Rsrc2 = $src2$$Register;
 9014     __ andnl(Rdst, Rsrc1, Rsrc2);
 9015     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 9016   %}
 9017   ins_pipe(ialu_reg_reg_long);
 9018 %}
 9019 
 9020 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 9021   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 9022   predicate(UseBMI1Instructions);
 9023   effect(KILL cr, TEMP dst);
 9024 
 9025   ins_cost(125);
 9026   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9027             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9028          %}
 9029 
 9030   ins_encode %{
 9031     Register Rdst = $dst$$Register;
 9032     Register Rsrc1 = $src1$$Register;
 9033     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9034 
 9035     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9036     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9037   %}
 9038   ins_pipe(ialu_reg_mem);
 9039 %}
 9040 
 9041 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9042   match(Set dst (AndL (SubL imm_zero src) src));
 9043   predicate(UseBMI1Instructions);
 9044   effect(KILL cr, TEMP dst);
 9045 
 9046   format %{ "MOVL   $dst.hi, 0\n\t"
 9047             "BLSIL  $dst.lo, $src.lo\n\t"
 9048             "JNZ    done\n\t"
 9049             "BLSIL  $dst.hi, $src.hi\n"
 9050             "done:"
 9051          %}
 9052 
 9053   ins_encode %{
 9054     Label done;
 9055     Register Rdst = $dst$$Register;
 9056     Register Rsrc = $src$$Register;
 9057     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9058     __ blsil(Rdst, Rsrc);
 9059     __ jccb(Assembler::notZero, done);
 9060     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9061     __ bind(done);
 9062   %}
 9063   ins_pipe(ialu_reg);
 9064 %}
 9065 
 9066 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9067   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9068   predicate(UseBMI1Instructions);
 9069   effect(KILL cr, TEMP dst);
 9070 
 9071   ins_cost(125);
 9072   format %{ "MOVL   $dst.hi, 0\n\t"
 9073             "BLSIL  $dst.lo, $src\n\t"
 9074             "JNZ    done\n\t"
 9075             "BLSIL  $dst.hi, $src+4\n"
 9076             "done:"
 9077          %}
 9078 
 9079   ins_encode %{
 9080     Label done;
 9081     Register Rdst = $dst$$Register;
 9082     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9083 
 9084     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9085     __ blsil(Rdst, $src$$Address);
 9086     __ jccb(Assembler::notZero, done);
 9087     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9088     __ bind(done);
 9089   %}
 9090   ins_pipe(ialu_reg_mem);
 9091 %}
 9092 
 9093 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9094 %{
 9095   match(Set dst (XorL (AddL src minus_1) src));
 9096   predicate(UseBMI1Instructions);
 9097   effect(KILL cr, TEMP dst);
 9098 
 9099   format %{ "MOVL    $dst.hi, 0\n\t"
 9100             "BLSMSKL $dst.lo, $src.lo\n\t"
 9101             "JNC     done\n\t"
 9102             "BLSMSKL $dst.hi, $src.hi\n"
 9103             "done:"
 9104          %}
 9105 
 9106   ins_encode %{
 9107     Label done;
 9108     Register Rdst = $dst$$Register;
 9109     Register Rsrc = $src$$Register;
 9110     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9111     __ blsmskl(Rdst, Rsrc);
 9112     __ jccb(Assembler::carryClear, done);
 9113     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9114     __ bind(done);
 9115   %}
 9116 
 9117   ins_pipe(ialu_reg);
 9118 %}
 9119 
 9120 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9121 %{
 9122   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9123   predicate(UseBMI1Instructions);
 9124   effect(KILL cr, TEMP dst);
 9125 
 9126   ins_cost(125);
 9127   format %{ "MOVL    $dst.hi, 0\n\t"
 9128             "BLSMSKL $dst.lo, $src\n\t"
 9129             "JNC     done\n\t"
 9130             "BLSMSKL $dst.hi, $src+4\n"
 9131             "done:"
 9132          %}
 9133 
 9134   ins_encode %{
 9135     Label done;
 9136     Register Rdst = $dst$$Register;
 9137     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9138 
 9139     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9140     __ blsmskl(Rdst, $src$$Address);
 9141     __ jccb(Assembler::carryClear, done);
 9142     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9143     __ bind(done);
 9144   %}
 9145 
 9146   ins_pipe(ialu_reg_mem);
 9147 %}
 9148 
 9149 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9150 %{
 9151   match(Set dst (AndL (AddL src minus_1) src) );
 9152   predicate(UseBMI1Instructions);
 9153   effect(KILL cr, TEMP dst);
 9154 
 9155   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9156             "BLSRL  $dst.lo, $src.lo\n\t"
 9157             "JNC    done\n\t"
 9158             "BLSRL  $dst.hi, $src.hi\n"
 9159             "done:"
 9160   %}
 9161 
 9162   ins_encode %{
 9163     Label done;
 9164     Register Rdst = $dst$$Register;
 9165     Register Rsrc = $src$$Register;
 9166     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9167     __ blsrl(Rdst, Rsrc);
 9168     __ jccb(Assembler::carryClear, done);
 9169     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9170     __ bind(done);
 9171   %}
 9172 
 9173   ins_pipe(ialu_reg);
 9174 %}
 9175 
 9176 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9177 %{
 9178   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9179   predicate(UseBMI1Instructions);
 9180   effect(KILL cr, TEMP dst);
 9181 
 9182   ins_cost(125);
 9183   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9184             "BLSRL  $dst.lo, $src\n\t"
 9185             "JNC    done\n\t"
 9186             "BLSRL  $dst.hi, $src+4\n"
 9187             "done:"
 9188   %}
 9189 
 9190   ins_encode %{
 9191     Label done;
 9192     Register Rdst = $dst$$Register;
 9193     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9194     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9195     __ blsrl(Rdst, $src$$Address);
 9196     __ jccb(Assembler::carryClear, done);
 9197     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9198     __ bind(done);
 9199   %}
 9200 
 9201   ins_pipe(ialu_reg_mem);
 9202 %}
 9203 
 9204 // Or Long Register with Register
 9205 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9206   match(Set dst (OrL dst src));
 9207   effect(KILL cr);
 9208   format %{ "OR     $dst.lo,$src.lo\n\t"
 9209             "OR     $dst.hi,$src.hi" %}
 9210   opcode(0x0B,0x0B);
 9211   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9212   ins_pipe( ialu_reg_reg_long );
 9213 %}
 9214 
 9215 // Or Long Register with Immediate
 9216 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9217   match(Set dst (OrL dst src));
 9218   effect(KILL cr);
 9219   format %{ "OR     $dst.lo,$src.lo\n\t"
 9220             "OR     $dst.hi,$src.hi" %}
 9221   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9222   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9223   ins_pipe( ialu_reg_long );
 9224 %}
 9225 
 9226 // Or Long Register with Memory
 9227 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9228   match(Set dst (OrL dst (LoadL mem)));
 9229   effect(KILL cr);
 9230   ins_cost(125);
 9231   format %{ "OR     $dst.lo,$mem\n\t"
 9232             "OR     $dst.hi,$mem+4" %}
 9233   opcode(0x0B,0x0B);
 9234   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9235   ins_pipe( ialu_reg_long_mem );
 9236 %}
 9237 
 9238 // Xor Long Register with Register
 9239 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9240   match(Set dst (XorL dst src));
 9241   effect(KILL cr);
 9242   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9243             "XOR    $dst.hi,$src.hi" %}
 9244   opcode(0x33,0x33);
 9245   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9246   ins_pipe( ialu_reg_reg_long );
 9247 %}
 9248 
 9249 // Xor Long Register with Immediate -1
 9250 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9251   match(Set dst (XorL dst imm));
 9252   format %{ "NOT    $dst.lo\n\t"
 9253             "NOT    $dst.hi" %}
 9254   ins_encode %{
 9255      __ notl($dst$$Register);
 9256      __ notl(HIGH_FROM_LOW($dst$$Register));
 9257   %}
 9258   ins_pipe( ialu_reg_long );
 9259 %}
 9260 
 9261 // Xor Long Register with Immediate
 9262 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9263   match(Set dst (XorL dst src));
 9264   effect(KILL cr);
 9265   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9266             "XOR    $dst.hi,$src.hi" %}
 9267   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9268   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9269   ins_pipe( ialu_reg_long );
 9270 %}
 9271 
 9272 // Xor Long Register with Memory
 9273 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9274   match(Set dst (XorL dst (LoadL mem)));
 9275   effect(KILL cr);
 9276   ins_cost(125);
 9277   format %{ "XOR    $dst.lo,$mem\n\t"
 9278             "XOR    $dst.hi,$mem+4" %}
 9279   opcode(0x33,0x33);
 9280   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9281   ins_pipe( ialu_reg_long_mem );
 9282 %}
 9283 
 9284 // Shift Left Long by 1
 9285 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9286   predicate(UseNewLongLShift);
 9287   match(Set dst (LShiftL dst cnt));
 9288   effect(KILL cr);
 9289   ins_cost(100);
 9290   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9291             "ADC    $dst.hi,$dst.hi" %}
 9292   ins_encode %{
 9293     __ addl($dst$$Register,$dst$$Register);
 9294     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9295   %}
 9296   ins_pipe( ialu_reg_long );
 9297 %}
 9298 
 9299 // Shift Left Long by 2
 9300 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9301   predicate(UseNewLongLShift);
 9302   match(Set dst (LShiftL dst cnt));
 9303   effect(KILL cr);
 9304   ins_cost(100);
 9305   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9306             "ADC    $dst.hi,$dst.hi\n\t"
 9307             "ADD    $dst.lo,$dst.lo\n\t"
 9308             "ADC    $dst.hi,$dst.hi" %}
 9309   ins_encode %{
 9310     __ addl($dst$$Register,$dst$$Register);
 9311     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9312     __ addl($dst$$Register,$dst$$Register);
 9313     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9314   %}
 9315   ins_pipe( ialu_reg_long );
 9316 %}
 9317 
 9318 // Shift Left Long by 3
 9319 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9320   predicate(UseNewLongLShift);
 9321   match(Set dst (LShiftL dst cnt));
 9322   effect(KILL cr);
 9323   ins_cost(100);
 9324   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9325             "ADC    $dst.hi,$dst.hi\n\t"
 9326             "ADD    $dst.lo,$dst.lo\n\t"
 9327             "ADC    $dst.hi,$dst.hi\n\t"
 9328             "ADD    $dst.lo,$dst.lo\n\t"
 9329             "ADC    $dst.hi,$dst.hi" %}
 9330   ins_encode %{
 9331     __ addl($dst$$Register,$dst$$Register);
 9332     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9333     __ addl($dst$$Register,$dst$$Register);
 9334     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9335     __ addl($dst$$Register,$dst$$Register);
 9336     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9337   %}
 9338   ins_pipe( ialu_reg_long );
 9339 %}
 9340 
 9341 // Shift Left Long by 1-31
 9342 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9343   match(Set dst (LShiftL dst cnt));
 9344   effect(KILL cr);
 9345   ins_cost(200);
 9346   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9347             "SHL    $dst.lo,$cnt" %}
 9348   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9349   ins_encode( move_long_small_shift(dst,cnt) );
 9350   ins_pipe( ialu_reg_long );
 9351 %}
 9352 
 9353 // Shift Left Long by 32-63
 9354 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9355   match(Set dst (LShiftL dst cnt));
 9356   effect(KILL cr);
 9357   ins_cost(300);
 9358   format %{ "MOV    $dst.hi,$dst.lo\n"
 9359           "\tSHL    $dst.hi,$cnt-32\n"
 9360           "\tXOR    $dst.lo,$dst.lo" %}
 9361   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9362   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9363   ins_pipe( ialu_reg_long );
 9364 %}
 9365 
 9366 // Shift Left Long by variable
 9367 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9368   match(Set dst (LShiftL dst shift));
 9369   effect(KILL cr);
 9370   ins_cost(500+200);
 9371   size(17);
 9372   format %{ "TEST   $shift,32\n\t"
 9373             "JEQ,s  small\n\t"
 9374             "MOV    $dst.hi,$dst.lo\n\t"
 9375             "XOR    $dst.lo,$dst.lo\n"
 9376     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9377             "SHL    $dst.lo,$shift" %}
 9378   ins_encode( shift_left_long( dst, shift ) );
 9379   ins_pipe( pipe_slow );
 9380 %}
 9381 
 9382 // Shift Right Long by 1-31
 9383 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9384   match(Set dst (URShiftL dst cnt));
 9385   effect(KILL cr);
 9386   ins_cost(200);
 9387   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9388             "SHR    $dst.hi,$cnt" %}
 9389   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9390   ins_encode( move_long_small_shift(dst,cnt) );
 9391   ins_pipe( ialu_reg_long );
 9392 %}
 9393 
 9394 // Shift Right Long by 32-63
 9395 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9396   match(Set dst (URShiftL dst cnt));
 9397   effect(KILL cr);
 9398   ins_cost(300);
 9399   format %{ "MOV    $dst.lo,$dst.hi\n"
 9400           "\tSHR    $dst.lo,$cnt-32\n"
 9401           "\tXOR    $dst.hi,$dst.hi" %}
 9402   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9403   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9404   ins_pipe( ialu_reg_long );
 9405 %}
 9406 
 9407 // Shift Right Long by variable
 9408 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9409   match(Set dst (URShiftL dst shift));
 9410   effect(KILL cr);
 9411   ins_cost(600);
 9412   size(17);
 9413   format %{ "TEST   $shift,32\n\t"
 9414             "JEQ,s  small\n\t"
 9415             "MOV    $dst.lo,$dst.hi\n\t"
 9416             "XOR    $dst.hi,$dst.hi\n"
 9417     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9418             "SHR    $dst.hi,$shift" %}
 9419   ins_encode( shift_right_long( dst, shift ) );
 9420   ins_pipe( pipe_slow );
 9421 %}
 9422 
 9423 // Shift Right Long by 1-31
 9424 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9425   match(Set dst (RShiftL dst cnt));
 9426   effect(KILL cr);
 9427   ins_cost(200);
 9428   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9429             "SAR    $dst.hi,$cnt" %}
 9430   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9431   ins_encode( move_long_small_shift(dst,cnt) );
 9432   ins_pipe( ialu_reg_long );
 9433 %}
 9434 
 9435 // Shift Right Long by 32-63
 9436 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9437   match(Set dst (RShiftL dst cnt));
 9438   effect(KILL cr);
 9439   ins_cost(300);
 9440   format %{ "MOV    $dst.lo,$dst.hi\n"
 9441           "\tSAR    $dst.lo,$cnt-32\n"
 9442           "\tSAR    $dst.hi,31" %}
 9443   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9444   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9445   ins_pipe( ialu_reg_long );
 9446 %}
 9447 
 9448 // Shift Right arithmetic Long by variable
 9449 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9450   match(Set dst (RShiftL dst shift));
 9451   effect(KILL cr);
 9452   ins_cost(600);
 9453   size(18);
 9454   format %{ "TEST   $shift,32\n\t"
 9455             "JEQ,s  small\n\t"
 9456             "MOV    $dst.lo,$dst.hi\n\t"
 9457             "SAR    $dst.hi,31\n"
 9458     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9459             "SAR    $dst.hi,$shift" %}
 9460   ins_encode( shift_right_arith_long( dst, shift ) );
 9461   ins_pipe( pipe_slow );
 9462 %}
 9463 
 9464 
 9465 //----------Double Instructions------------------------------------------------
 9466 // Double Math
 9467 
 9468 // Compare & branch
 9469 
 9470 // P6 version of float compare, sets condition codes in EFLAGS
 9471 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9472   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9473   match(Set cr (CmpD src1 src2));
 9474   effect(KILL rax);
 9475   ins_cost(150);
 9476   format %{ "FLD    $src1\n\t"
 9477             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9478             "JNP    exit\n\t"
 9479             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9480             "SAHF\n"
 9481      "exit:\tNOP               // avoid branch to branch" %}
 9482   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9483   ins_encode( Push_Reg_DPR(src1),
 9484               OpcP, RegOpc(src2),
 9485               cmpF_P6_fixup );
 9486   ins_pipe( pipe_slow );
 9487 %}
 9488 
 9489 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9490   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9491   match(Set cr (CmpD src1 src2));
 9492   ins_cost(150);
 9493   format %{ "FLD    $src1\n\t"
 9494             "FUCOMIP ST,$src2  // P6 instruction" %}
 9495   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9496   ins_encode( Push_Reg_DPR(src1),
 9497               OpcP, RegOpc(src2));
 9498   ins_pipe( pipe_slow );
 9499 %}
 9500 
 9501 // Compare & branch
 9502 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9503   predicate(UseSSE<=1);
 9504   match(Set cr (CmpD src1 src2));
 9505   effect(KILL rax);
 9506   ins_cost(200);
 9507   format %{ "FLD    $src1\n\t"
 9508             "FCOMp  $src2\n\t"
 9509             "FNSTSW AX\n\t"
 9510             "TEST   AX,0x400\n\t"
 9511             "JZ,s   flags\n\t"
 9512             "MOV    AH,1\t# unordered treat as LT\n"
 9513     "flags:\tSAHF" %}
 9514   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9515   ins_encode( Push_Reg_DPR(src1),
 9516               OpcP, RegOpc(src2),
 9517               fpu_flags);
 9518   ins_pipe( pipe_slow );
 9519 %}
 9520 
 9521 // Compare vs zero into -1,0,1
 9522 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9523   predicate(UseSSE<=1);
 9524   match(Set dst (CmpD3 src1 zero));
 9525   effect(KILL cr, KILL rax);
 9526   ins_cost(280);
 9527   format %{ "FTSTD  $dst,$src1" %}
 9528   opcode(0xE4, 0xD9);
 9529   ins_encode( Push_Reg_DPR(src1),
 9530               OpcS, OpcP, PopFPU,
 9531               CmpF_Result(dst));
 9532   ins_pipe( pipe_slow );
 9533 %}
 9534 
 9535 // Compare into -1,0,1
 9536 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9537   predicate(UseSSE<=1);
 9538   match(Set dst (CmpD3 src1 src2));
 9539   effect(KILL cr, KILL rax);
 9540   ins_cost(300);
 9541   format %{ "FCMPD  $dst,$src1,$src2" %}
 9542   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9543   ins_encode( Push_Reg_DPR(src1),
 9544               OpcP, RegOpc(src2),
 9545               CmpF_Result(dst));
 9546   ins_pipe( pipe_slow );
 9547 %}
 9548 
 9549 // float compare and set condition codes in EFLAGS by XMM regs
 9550 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9551   predicate(UseSSE>=2);
 9552   match(Set cr (CmpD src1 src2));
 9553   ins_cost(145);
 9554   format %{ "UCOMISD $src1,$src2\n\t"
 9555             "JNP,s   exit\n\t"
 9556             "PUSHF\t# saw NaN, set CF\n\t"
 9557             "AND     [rsp], #0xffffff2b\n\t"
 9558             "POPF\n"
 9559     "exit:" %}
 9560   ins_encode %{
 9561     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9562     emit_cmpfp_fixup(_masm);
 9563   %}
 9564   ins_pipe( pipe_slow );
 9565 %}
 9566 
 9567 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9568   predicate(UseSSE>=2);
 9569   match(Set cr (CmpD src1 src2));
 9570   ins_cost(100);
 9571   format %{ "UCOMISD $src1,$src2" %}
 9572   ins_encode %{
 9573     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9574   %}
 9575   ins_pipe( pipe_slow );
 9576 %}
 9577 
 9578 // float compare and set condition codes in EFLAGS by XMM regs
 9579 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9580   predicate(UseSSE>=2);
 9581   match(Set cr (CmpD src1 (LoadD src2)));
 9582   ins_cost(145);
 9583   format %{ "UCOMISD $src1,$src2\n\t"
 9584             "JNP,s   exit\n\t"
 9585             "PUSHF\t# saw NaN, set CF\n\t"
 9586             "AND     [rsp], #0xffffff2b\n\t"
 9587             "POPF\n"
 9588     "exit:" %}
 9589   ins_encode %{
 9590     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9591     emit_cmpfp_fixup(_masm);
 9592   %}
 9593   ins_pipe( pipe_slow );
 9594 %}
 9595 
 9596 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9597   predicate(UseSSE>=2);
 9598   match(Set cr (CmpD src1 (LoadD src2)));
 9599   ins_cost(100);
 9600   format %{ "UCOMISD $src1,$src2" %}
 9601   ins_encode %{
 9602     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9603   %}
 9604   ins_pipe( pipe_slow );
 9605 %}
 9606 
 9607 // Compare into -1,0,1 in XMM
 9608 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9609   predicate(UseSSE>=2);
 9610   match(Set dst (CmpD3 src1 src2));
 9611   effect(KILL cr);
 9612   ins_cost(255);
 9613   format %{ "UCOMISD $src1, $src2\n\t"
 9614             "MOV     $dst, #-1\n\t"
 9615             "JP,s    done\n\t"
 9616             "JB,s    done\n\t"
 9617             "SETNE   $dst\n\t"
 9618             "MOVZB   $dst, $dst\n"
 9619     "done:" %}
 9620   ins_encode %{
 9621     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9622     emit_cmpfp3(_masm, $dst$$Register);
 9623   %}
 9624   ins_pipe( pipe_slow );
 9625 %}
 9626 
 9627 // Compare into -1,0,1 in XMM and memory
 9628 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9629   predicate(UseSSE>=2);
 9630   match(Set dst (CmpD3 src1 (LoadD src2)));
 9631   effect(KILL cr);
 9632   ins_cost(275);
 9633   format %{ "UCOMISD $src1, $src2\n\t"
 9634             "MOV     $dst, #-1\n\t"
 9635             "JP,s    done\n\t"
 9636             "JB,s    done\n\t"
 9637             "SETNE   $dst\n\t"
 9638             "MOVZB   $dst, $dst\n"
 9639     "done:" %}
 9640   ins_encode %{
 9641     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9642     emit_cmpfp3(_masm, $dst$$Register);
 9643   %}
 9644   ins_pipe( pipe_slow );
 9645 %}
 9646 
 9647 
 9648 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9649   predicate (UseSSE <=1);
 9650   match(Set dst (SubD dst src));
 9651 
 9652   format %{ "FLD    $src\n\t"
 9653             "DSUBp  $dst,ST" %}
 9654   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9655   ins_cost(150);
 9656   ins_encode( Push_Reg_DPR(src),
 9657               OpcP, RegOpc(dst) );
 9658   ins_pipe( fpu_reg_reg );
 9659 %}
 9660 
 9661 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9662   predicate (UseSSE <=1);
 9663   match(Set dst (RoundDouble (SubD src1 src2)));
 9664   ins_cost(250);
 9665 
 9666   format %{ "FLD    $src2\n\t"
 9667             "DSUB   ST,$src1\n\t"
 9668             "FSTP_D $dst\t# D-round" %}
 9669   opcode(0xD8, 0x5);
 9670   ins_encode( Push_Reg_DPR(src2),
 9671               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9672   ins_pipe( fpu_mem_reg_reg );
 9673 %}
 9674 
 9675 
 9676 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9677   predicate (UseSSE <=1);
 9678   match(Set dst (SubD dst (LoadD src)));
 9679   ins_cost(150);
 9680 
 9681   format %{ "FLD    $src\n\t"
 9682             "DSUBp  $dst,ST" %}
 9683   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9684   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9685               OpcP, RegOpc(dst) );
 9686   ins_pipe( fpu_reg_mem );
 9687 %}
 9688 
 9689 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9690   predicate (UseSSE<=1);
 9691   match(Set dst (AbsD src));
 9692   ins_cost(100);
 9693   format %{ "FABS" %}
 9694   opcode(0xE1, 0xD9);
 9695   ins_encode( OpcS, OpcP );
 9696   ins_pipe( fpu_reg_reg );
 9697 %}
 9698 
 9699 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9700   predicate(UseSSE<=1);
 9701   match(Set dst (NegD src));
 9702   ins_cost(100);
 9703   format %{ "FCHS" %}
 9704   opcode(0xE0, 0xD9);
 9705   ins_encode( OpcS, OpcP );
 9706   ins_pipe( fpu_reg_reg );
 9707 %}
 9708 
 9709 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9710   predicate(UseSSE<=1);
 9711   match(Set dst (AddD dst src));
 9712   format %{ "FLD    $src\n\t"
 9713             "DADD   $dst,ST" %}
 9714   size(4);
 9715   ins_cost(150);
 9716   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9717   ins_encode( Push_Reg_DPR(src),
 9718               OpcP, RegOpc(dst) );
 9719   ins_pipe( fpu_reg_reg );
 9720 %}
 9721 
 9722 
 9723 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9724   predicate(UseSSE<=1);
 9725   match(Set dst (RoundDouble (AddD src1 src2)));
 9726   ins_cost(250);
 9727 
 9728   format %{ "FLD    $src2\n\t"
 9729             "DADD   ST,$src1\n\t"
 9730             "FSTP_D $dst\t# D-round" %}
 9731   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9732   ins_encode( Push_Reg_DPR(src2),
 9733               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9734   ins_pipe( fpu_mem_reg_reg );
 9735 %}
 9736 
 9737 
 9738 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9739   predicate(UseSSE<=1);
 9740   match(Set dst (AddD dst (LoadD src)));
 9741   ins_cost(150);
 9742 
 9743   format %{ "FLD    $src\n\t"
 9744             "DADDp  $dst,ST" %}
 9745   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9746   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9747               OpcP, RegOpc(dst) );
 9748   ins_pipe( fpu_reg_mem );
 9749 %}
 9750 
 9751 // add-to-memory
 9752 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9753   predicate(UseSSE<=1);
 9754   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9755   ins_cost(150);
 9756 
 9757   format %{ "FLD_D  $dst\n\t"
 9758             "DADD   ST,$src\n\t"
 9759             "FST_D  $dst" %}
 9760   opcode(0xDD, 0x0);
 9761   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9762               Opcode(0xD8), RegOpc(src),
 9763               set_instruction_start,
 9764               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9765   ins_pipe( fpu_reg_mem );
 9766 %}
 9767 
 9768 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9769   predicate(UseSSE<=1);
 9770   match(Set dst (AddD dst con));
 9771   ins_cost(125);
 9772   format %{ "FLD1\n\t"
 9773             "DADDp  $dst,ST" %}
 9774   ins_encode %{
 9775     __ fld1();
 9776     __ faddp($dst$$reg);
 9777   %}
 9778   ins_pipe(fpu_reg);
 9779 %}
 9780 
 9781 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9782   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9783   match(Set dst (AddD dst con));
 9784   ins_cost(200);
 9785   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9786             "DADDp  $dst,ST" %}
 9787   ins_encode %{
 9788     __ fld_d($constantaddress($con));
 9789     __ faddp($dst$$reg);
 9790   %}
 9791   ins_pipe(fpu_reg_mem);
 9792 %}
 9793 
 9794 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9795   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9796   match(Set dst (RoundDouble (AddD src con)));
 9797   ins_cost(200);
 9798   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9799             "DADD   ST,$src\n\t"
 9800             "FSTP_D $dst\t# D-round" %}
 9801   ins_encode %{
 9802     __ fld_d($constantaddress($con));
 9803     __ fadd($src$$reg);
 9804     __ fstp_d(Address(rsp, $dst$$disp));
 9805   %}
 9806   ins_pipe(fpu_mem_reg_con);
 9807 %}
 9808 
 9809 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9810   predicate(UseSSE<=1);
 9811   match(Set dst (MulD dst src));
 9812   format %{ "FLD    $src\n\t"
 9813             "DMULp  $dst,ST" %}
 9814   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9815   ins_cost(150);
 9816   ins_encode( Push_Reg_DPR(src),
 9817               OpcP, RegOpc(dst) );
 9818   ins_pipe( fpu_reg_reg );
 9819 %}
 9820 
 9821 // Strict FP instruction biases argument before multiply then
 9822 // biases result to avoid double rounding of subnormals.
 9823 //
 9824 // scale arg1 by multiplying arg1 by 2^(-15360)
 9825 // load arg2
 9826 // multiply scaled arg1 by arg2
 9827 // rescale product by 2^(15360)
 9828 //
 9829 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9830   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9831   match(Set dst (MulD dst src));
 9832   ins_cost(1);   // Select this instruction for all FP double multiplies
 9833 
 9834   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9835             "DMULp  $dst,ST\n\t"
 9836             "FLD    $src\n\t"
 9837             "DMULp  $dst,ST\n\t"
 9838             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9839             "DMULp  $dst,ST\n\t" %}
 9840   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9841   ins_encode( strictfp_bias1(dst),
 9842               Push_Reg_DPR(src),
 9843               OpcP, RegOpc(dst),
 9844               strictfp_bias2(dst) );
 9845   ins_pipe( fpu_reg_reg );
 9846 %}
 9847 
 9848 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9849   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9850   match(Set dst (MulD dst con));
 9851   ins_cost(200);
 9852   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9853             "DMULp  $dst,ST" %}
 9854   ins_encode %{
 9855     __ fld_d($constantaddress($con));
 9856     __ fmulp($dst$$reg);
 9857   %}
 9858   ins_pipe(fpu_reg_mem);
 9859 %}
 9860 
 9861 
 9862 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9863   predicate( UseSSE<=1 );
 9864   match(Set dst (MulD dst (LoadD src)));
 9865   ins_cost(200);
 9866   format %{ "FLD_D  $src\n\t"
 9867             "DMULp  $dst,ST" %}
 9868   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9869   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9870               OpcP, RegOpc(dst) );
 9871   ins_pipe( fpu_reg_mem );
 9872 %}
 9873 
 9874 //
 9875 // Cisc-alternate to reg-reg multiply
 9876 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9877   predicate( UseSSE<=1 );
 9878   match(Set dst (MulD src (LoadD mem)));
 9879   ins_cost(250);
 9880   format %{ "FLD_D  $mem\n\t"
 9881             "DMUL   ST,$src\n\t"
 9882             "FSTP_D $dst" %}
 9883   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9884   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9885               OpcReg_FPR(src),
 9886               Pop_Reg_DPR(dst) );
 9887   ins_pipe( fpu_reg_reg_mem );
 9888 %}
 9889 
 9890 
 9891 // MACRO3 -- addDPR a mulDPR
 9892 // This instruction is a '2-address' instruction in that the result goes
 9893 // back to src2.  This eliminates a move from the macro; possibly the
 9894 // register allocator will have to add it back (and maybe not).
 9895 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9896   predicate( UseSSE<=1 );
 9897   match(Set src2 (AddD (MulD src0 src1) src2));
 9898   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9899             "DMUL   ST,$src1\n\t"
 9900             "DADDp  $src2,ST" %}
 9901   ins_cost(250);
 9902   opcode(0xDD); /* LoadD DD /0 */
 9903   ins_encode( Push_Reg_FPR(src0),
 9904               FMul_ST_reg(src1),
 9905               FAddP_reg_ST(src2) );
 9906   ins_pipe( fpu_reg_reg_reg );
 9907 %}
 9908 
 9909 
 9910 // MACRO3 -- subDPR a mulDPR
 9911 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9912   predicate( UseSSE<=1 );
 9913   match(Set src2 (SubD (MulD src0 src1) src2));
 9914   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9915             "DMUL   ST,$src1\n\t"
 9916             "DSUBRp $src2,ST" %}
 9917   ins_cost(250);
 9918   ins_encode( Push_Reg_FPR(src0),
 9919               FMul_ST_reg(src1),
 9920               Opcode(0xDE), Opc_plus(0xE0,src2));
 9921   ins_pipe( fpu_reg_reg_reg );
 9922 %}
 9923 
 9924 
 9925 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9926   predicate( UseSSE<=1 );
 9927   match(Set dst (DivD dst src));
 9928 
 9929   format %{ "FLD    $src\n\t"
 9930             "FDIVp  $dst,ST" %}
 9931   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9932   ins_cost(150);
 9933   ins_encode( Push_Reg_DPR(src),
 9934               OpcP, RegOpc(dst) );
 9935   ins_pipe( fpu_reg_reg );
 9936 %}
 9937 
 9938 // Strict FP instruction biases argument before division then
 9939 // biases result, to avoid double rounding of subnormals.
 9940 //
 9941 // scale dividend by multiplying dividend by 2^(-15360)
 9942 // load divisor
 9943 // divide scaled dividend by divisor
 9944 // rescale quotient by 2^(15360)
 9945 //
 9946 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9947   predicate (UseSSE<=1);
 9948   match(Set dst (DivD dst src));
 9949   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9950   ins_cost(01);
 9951 
 9952   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9953             "DMULp  $dst,ST\n\t"
 9954             "FLD    $src\n\t"
 9955             "FDIVp  $dst,ST\n\t"
 9956             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9957             "DMULp  $dst,ST\n\t" %}
 9958   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9959   ins_encode( strictfp_bias1(dst),
 9960               Push_Reg_DPR(src),
 9961               OpcP, RegOpc(dst),
 9962               strictfp_bias2(dst) );
 9963   ins_pipe( fpu_reg_reg );
 9964 %}
 9965 
 9966 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9967   predicate(UseSSE<=1);
 9968   match(Set dst (ModD dst src));
 9969   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9970 
 9971   format %{ "DMOD   $dst,$src" %}
 9972   ins_cost(250);
 9973   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9974               emitModDPR(),
 9975               Push_Result_Mod_DPR(src),
 9976               Pop_Reg_DPR(dst));
 9977   ins_pipe( pipe_slow );
 9978 %}
 9979 
 9980 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9981   predicate(UseSSE>=2);
 9982   match(Set dst (ModD src0 src1));
 9983   effect(KILL rax, KILL cr);
 9984 
 9985   format %{ "SUB    ESP,8\t # DMOD\n"
 9986           "\tMOVSD  [ESP+0],$src1\n"
 9987           "\tFLD_D  [ESP+0]\n"
 9988           "\tMOVSD  [ESP+0],$src0\n"
 9989           "\tFLD_D  [ESP+0]\n"
 9990      "loop:\tFPREM\n"
 9991           "\tFWAIT\n"
 9992           "\tFNSTSW AX\n"
 9993           "\tSAHF\n"
 9994           "\tJP     loop\n"
 9995           "\tFSTP_D [ESP+0]\n"
 9996           "\tMOVSD  $dst,[ESP+0]\n"
 9997           "\tADD    ESP,8\n"
 9998           "\tFSTP   ST0\t # Restore FPU Stack"
 9999     %}
10000   ins_cost(250);
10001   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10002   ins_pipe( pipe_slow );
10003 %}
10004 
10005 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10006   predicate (UseSSE<=1);
10007   match(Set dst(AtanD dst src));
10008   format %{ "DATA   $dst,$src" %}
10009   opcode(0xD9, 0xF3);
10010   ins_encode( Push_Reg_DPR(src),
10011               OpcP, OpcS, RegOpc(dst) );
10012   ins_pipe( pipe_slow );
10013 %}
10014 
10015 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10016   predicate (UseSSE>=2);
10017   match(Set dst(AtanD dst src));
10018   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10019   format %{ "DATA   $dst,$src" %}
10020   opcode(0xD9, 0xF3);
10021   ins_encode( Push_SrcD(src),
10022               OpcP, OpcS, Push_ResultD(dst) );
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10027   predicate (UseSSE<=1);
10028   match(Set dst (SqrtD src));
10029   format %{ "DSQRT  $dst,$src" %}
10030   opcode(0xFA, 0xD9);
10031   ins_encode( Push_Reg_DPR(src),
10032               OpcS, OpcP, Pop_Reg_DPR(dst) );
10033   ins_pipe( pipe_slow );
10034 %}
10035 
10036 //-------------Float Instructions-------------------------------
10037 // Float Math
10038 
10039 // Code for float compare:
10040 //     fcompp();
10041 //     fwait(); fnstsw_ax();
10042 //     sahf();
10043 //     movl(dst, unordered_result);
10044 //     jcc(Assembler::parity, exit);
10045 //     movl(dst, less_result);
10046 //     jcc(Assembler::below, exit);
10047 //     movl(dst, equal_result);
10048 //     jcc(Assembler::equal, exit);
10049 //     movl(dst, greater_result);
10050 //   exit:
10051 
10052 // P6 version of float compare, sets condition codes in EFLAGS
10053 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10054   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10055   match(Set cr (CmpF src1 src2));
10056   effect(KILL rax);
10057   ins_cost(150);
10058   format %{ "FLD    $src1\n\t"
10059             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10060             "JNP    exit\n\t"
10061             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10062             "SAHF\n"
10063      "exit:\tNOP               // avoid branch to branch" %}
10064   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10065   ins_encode( Push_Reg_DPR(src1),
10066               OpcP, RegOpc(src2),
10067               cmpF_P6_fixup );
10068   ins_pipe( pipe_slow );
10069 %}
10070 
10071 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10072   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10073   match(Set cr (CmpF src1 src2));
10074   ins_cost(100);
10075   format %{ "FLD    $src1\n\t"
10076             "FUCOMIP ST,$src2  // P6 instruction" %}
10077   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10078   ins_encode( Push_Reg_DPR(src1),
10079               OpcP, RegOpc(src2));
10080   ins_pipe( pipe_slow );
10081 %}
10082 
10083 
10084 // Compare & branch
10085 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10086   predicate(UseSSE == 0);
10087   match(Set cr (CmpF src1 src2));
10088   effect(KILL rax);
10089   ins_cost(200);
10090   format %{ "FLD    $src1\n\t"
10091             "FCOMp  $src2\n\t"
10092             "FNSTSW AX\n\t"
10093             "TEST   AX,0x400\n\t"
10094             "JZ,s   flags\n\t"
10095             "MOV    AH,1\t# unordered treat as LT\n"
10096     "flags:\tSAHF" %}
10097   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10098   ins_encode( Push_Reg_DPR(src1),
10099               OpcP, RegOpc(src2),
10100               fpu_flags);
10101   ins_pipe( pipe_slow );
10102 %}
10103 
10104 // Compare vs zero into -1,0,1
10105 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10106   predicate(UseSSE == 0);
10107   match(Set dst (CmpF3 src1 zero));
10108   effect(KILL cr, KILL rax);
10109   ins_cost(280);
10110   format %{ "FTSTF  $dst,$src1" %}
10111   opcode(0xE4, 0xD9);
10112   ins_encode( Push_Reg_DPR(src1),
10113               OpcS, OpcP, PopFPU,
10114               CmpF_Result(dst));
10115   ins_pipe( pipe_slow );
10116 %}
10117 
10118 // Compare into -1,0,1
10119 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10120   predicate(UseSSE == 0);
10121   match(Set dst (CmpF3 src1 src2));
10122   effect(KILL cr, KILL rax);
10123   ins_cost(300);
10124   format %{ "FCMPF  $dst,$src1,$src2" %}
10125   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10126   ins_encode( Push_Reg_DPR(src1),
10127               OpcP, RegOpc(src2),
10128               CmpF_Result(dst));
10129   ins_pipe( pipe_slow );
10130 %}
10131 
10132 // float compare and set condition codes in EFLAGS by XMM regs
10133 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10134   predicate(UseSSE>=1);
10135   match(Set cr (CmpF src1 src2));
10136   ins_cost(145);
10137   format %{ "UCOMISS $src1,$src2\n\t"
10138             "JNP,s   exit\n\t"
10139             "PUSHF\t# saw NaN, set CF\n\t"
10140             "AND     [rsp], #0xffffff2b\n\t"
10141             "POPF\n"
10142     "exit:" %}
10143   ins_encode %{
10144     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10145     emit_cmpfp_fixup(_masm);
10146   %}
10147   ins_pipe( pipe_slow );
10148 %}
10149 
10150 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10151   predicate(UseSSE>=1);
10152   match(Set cr (CmpF src1 src2));
10153   ins_cost(100);
10154   format %{ "UCOMISS $src1,$src2" %}
10155   ins_encode %{
10156     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10157   %}
10158   ins_pipe( pipe_slow );
10159 %}
10160 
10161 // float compare and set condition codes in EFLAGS by XMM regs
10162 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10163   predicate(UseSSE>=1);
10164   match(Set cr (CmpF src1 (LoadF src2)));
10165   ins_cost(165);
10166   format %{ "UCOMISS $src1,$src2\n\t"
10167             "JNP,s   exit\n\t"
10168             "PUSHF\t# saw NaN, set CF\n\t"
10169             "AND     [rsp], #0xffffff2b\n\t"
10170             "POPF\n"
10171     "exit:" %}
10172   ins_encode %{
10173     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10174     emit_cmpfp_fixup(_masm);
10175   %}
10176   ins_pipe( pipe_slow );
10177 %}
10178 
10179 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10180   predicate(UseSSE>=1);
10181   match(Set cr (CmpF src1 (LoadF src2)));
10182   ins_cost(100);
10183   format %{ "UCOMISS $src1,$src2" %}
10184   ins_encode %{
10185     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10186   %}
10187   ins_pipe( pipe_slow );
10188 %}
10189 
10190 // Compare into -1,0,1 in XMM
10191 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10192   predicate(UseSSE>=1);
10193   match(Set dst (CmpF3 src1 src2));
10194   effect(KILL cr);
10195   ins_cost(255);
10196   format %{ "UCOMISS $src1, $src2\n\t"
10197             "MOV     $dst, #-1\n\t"
10198             "JP,s    done\n\t"
10199             "JB,s    done\n\t"
10200             "SETNE   $dst\n\t"
10201             "MOVZB   $dst, $dst\n"
10202     "done:" %}
10203   ins_encode %{
10204     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10205     emit_cmpfp3(_masm, $dst$$Register);
10206   %}
10207   ins_pipe( pipe_slow );
10208 %}
10209 
10210 // Compare into -1,0,1 in XMM and memory
10211 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10212   predicate(UseSSE>=1);
10213   match(Set dst (CmpF3 src1 (LoadF src2)));
10214   effect(KILL cr);
10215   ins_cost(275);
10216   format %{ "UCOMISS $src1, $src2\n\t"
10217             "MOV     $dst, #-1\n\t"
10218             "JP,s    done\n\t"
10219             "JB,s    done\n\t"
10220             "SETNE   $dst\n\t"
10221             "MOVZB   $dst, $dst\n"
10222     "done:" %}
10223   ins_encode %{
10224     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10225     emit_cmpfp3(_masm, $dst$$Register);
10226   %}
10227   ins_pipe( pipe_slow );
10228 %}
10229 
10230 // Spill to obtain 24-bit precision
10231 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10232   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10233   match(Set dst (SubF src1 src2));
10234 
10235   format %{ "FSUB   $dst,$src1 - $src2" %}
10236   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10237   ins_encode( Push_Reg_FPR(src1),
10238               OpcReg_FPR(src2),
10239               Pop_Mem_FPR(dst) );
10240   ins_pipe( fpu_mem_reg_reg );
10241 %}
10242 //
10243 // This instruction does not round to 24-bits
10244 instruct subFPR_reg(regFPR dst, regFPR src) %{
10245   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10246   match(Set dst (SubF dst src));
10247 
10248   format %{ "FSUB   $dst,$src" %}
10249   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10250   ins_encode( Push_Reg_FPR(src),
10251               OpcP, RegOpc(dst) );
10252   ins_pipe( fpu_reg_reg );
10253 %}
10254 
10255 // Spill to obtain 24-bit precision
10256 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10257   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10258   match(Set dst (AddF src1 src2));
10259 
10260   format %{ "FADD   $dst,$src1,$src2" %}
10261   opcode(0xD8, 0x0); /* D8 C0+i */
10262   ins_encode( Push_Reg_FPR(src2),
10263               OpcReg_FPR(src1),
10264               Pop_Mem_FPR(dst) );
10265   ins_pipe( fpu_mem_reg_reg );
10266 %}
10267 //
10268 // This instruction does not round to 24-bits
10269 instruct addFPR_reg(regFPR dst, regFPR src) %{
10270   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10271   match(Set dst (AddF dst src));
10272 
10273   format %{ "FLD    $src\n\t"
10274             "FADDp  $dst,ST" %}
10275   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10276   ins_encode( Push_Reg_FPR(src),
10277               OpcP, RegOpc(dst) );
10278   ins_pipe( fpu_reg_reg );
10279 %}
10280 
10281 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10282   predicate(UseSSE==0);
10283   match(Set dst (AbsF src));
10284   ins_cost(100);
10285   format %{ "FABS" %}
10286   opcode(0xE1, 0xD9);
10287   ins_encode( OpcS, OpcP );
10288   ins_pipe( fpu_reg_reg );
10289 %}
10290 
10291 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10292   predicate(UseSSE==0);
10293   match(Set dst (NegF src));
10294   ins_cost(100);
10295   format %{ "FCHS" %}
10296   opcode(0xE0, 0xD9);
10297   ins_encode( OpcS, OpcP );
10298   ins_pipe( fpu_reg_reg );
10299 %}
10300 
10301 // Cisc-alternate to addFPR_reg
10302 // Spill to obtain 24-bit precision
10303 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10304   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10305   match(Set dst (AddF src1 (LoadF src2)));
10306 
10307   format %{ "FLD    $src2\n\t"
10308             "FADD   ST,$src1\n\t"
10309             "FSTP_S $dst" %}
10310   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10311   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10312               OpcReg_FPR(src1),
10313               Pop_Mem_FPR(dst) );
10314   ins_pipe( fpu_mem_reg_mem );
10315 %}
10316 //
10317 // Cisc-alternate to addFPR_reg
10318 // This instruction does not round to 24-bits
10319 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10320   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10321   match(Set dst (AddF dst (LoadF src)));
10322 
10323   format %{ "FADD   $dst,$src" %}
10324   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10325   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10326               OpcP, RegOpc(dst) );
10327   ins_pipe( fpu_reg_mem );
10328 %}
10329 
10330 // // Following two instructions for _222_mpegaudio
10331 // Spill to obtain 24-bit precision
10332 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10333   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10334   match(Set dst (AddF src1 src2));
10335 
10336   format %{ "FADD   $dst,$src1,$src2" %}
10337   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10338   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10339               OpcReg_FPR(src2),
10340               Pop_Mem_FPR(dst) );
10341   ins_pipe( fpu_mem_reg_mem );
10342 %}
10343 
10344 // Cisc-spill variant
10345 // Spill to obtain 24-bit precision
10346 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10347   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10348   match(Set dst (AddF src1 (LoadF src2)));
10349 
10350   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10351   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10352   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10353               set_instruction_start,
10354               OpcP, RMopc_Mem(secondary,src1),
10355               Pop_Mem_FPR(dst) );
10356   ins_pipe( fpu_mem_mem_mem );
10357 %}
10358 
10359 // Spill to obtain 24-bit precision
10360 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10361   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10362   match(Set dst (AddF src1 src2));
10363 
10364   format %{ "FADD   $dst,$src1,$src2" %}
10365   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10366   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10367               set_instruction_start,
10368               OpcP, RMopc_Mem(secondary,src1),
10369               Pop_Mem_FPR(dst) );
10370   ins_pipe( fpu_mem_mem_mem );
10371 %}
10372 
10373 
10374 // Spill to obtain 24-bit precision
10375 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10376   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10377   match(Set dst (AddF src con));
10378   format %{ "FLD    $src\n\t"
10379             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10380             "FSTP_S $dst"  %}
10381   ins_encode %{
10382     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10383     __ fadd_s($constantaddress($con));
10384     __ fstp_s(Address(rsp, $dst$$disp));
10385   %}
10386   ins_pipe(fpu_mem_reg_con);
10387 %}
10388 //
10389 // This instruction does not round to 24-bits
10390 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10391   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10392   match(Set dst (AddF src con));
10393   format %{ "FLD    $src\n\t"
10394             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10395             "FSTP   $dst"  %}
10396   ins_encode %{
10397     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10398     __ fadd_s($constantaddress($con));
10399     __ fstp_d($dst$$reg);
10400   %}
10401   ins_pipe(fpu_reg_reg_con);
10402 %}
10403 
10404 // Spill to obtain 24-bit precision
10405 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10406   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10407   match(Set dst (MulF src1 src2));
10408 
10409   format %{ "FLD    $src1\n\t"
10410             "FMUL   $src2\n\t"
10411             "FSTP_S $dst"  %}
10412   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10413   ins_encode( Push_Reg_FPR(src1),
10414               OpcReg_FPR(src2),
10415               Pop_Mem_FPR(dst) );
10416   ins_pipe( fpu_mem_reg_reg );
10417 %}
10418 //
10419 // This instruction does not round to 24-bits
10420 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10421   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10422   match(Set dst (MulF src1 src2));
10423 
10424   format %{ "FLD    $src1\n\t"
10425             "FMUL   $src2\n\t"
10426             "FSTP_S $dst"  %}
10427   opcode(0xD8, 0x1); /* D8 C8+i */
10428   ins_encode( Push_Reg_FPR(src2),
10429               OpcReg_FPR(src1),
10430               Pop_Reg_FPR(dst) );
10431   ins_pipe( fpu_reg_reg_reg );
10432 %}
10433 
10434 
10435 // Spill to obtain 24-bit precision
10436 // Cisc-alternate to reg-reg multiply
10437 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10438   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10439   match(Set dst (MulF src1 (LoadF src2)));
10440 
10441   format %{ "FLD_S  $src2\n\t"
10442             "FMUL   $src1\n\t"
10443             "FSTP_S $dst"  %}
10444   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10445   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10446               OpcReg_FPR(src1),
10447               Pop_Mem_FPR(dst) );
10448   ins_pipe( fpu_mem_reg_mem );
10449 %}
10450 //
10451 // This instruction does not round to 24-bits
10452 // Cisc-alternate to reg-reg multiply
10453 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10454   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10455   match(Set dst (MulF src1 (LoadF src2)));
10456 
10457   format %{ "FMUL   $dst,$src1,$src2" %}
10458   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10459   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10460               OpcReg_FPR(src1),
10461               Pop_Reg_FPR(dst) );
10462   ins_pipe( fpu_reg_reg_mem );
10463 %}
10464 
10465 // Spill to obtain 24-bit precision
10466 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10467   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10468   match(Set dst (MulF src1 src2));
10469 
10470   format %{ "FMUL   $dst,$src1,$src2" %}
10471   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10472   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10473               set_instruction_start,
10474               OpcP, RMopc_Mem(secondary,src1),
10475               Pop_Mem_FPR(dst) );
10476   ins_pipe( fpu_mem_mem_mem );
10477 %}
10478 
10479 // Spill to obtain 24-bit precision
10480 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10481   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10482   match(Set dst (MulF src con));
10483 
10484   format %{ "FLD    $src\n\t"
10485             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10486             "FSTP_S $dst"  %}
10487   ins_encode %{
10488     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10489     __ fmul_s($constantaddress($con));
10490     __ fstp_s(Address(rsp, $dst$$disp));
10491   %}
10492   ins_pipe(fpu_mem_reg_con);
10493 %}
10494 //
10495 // This instruction does not round to 24-bits
10496 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10497   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10498   match(Set dst (MulF src con));
10499 
10500   format %{ "FLD    $src\n\t"
10501             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10502             "FSTP   $dst"  %}
10503   ins_encode %{
10504     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10505     __ fmul_s($constantaddress($con));
10506     __ fstp_d($dst$$reg);
10507   %}
10508   ins_pipe(fpu_reg_reg_con);
10509 %}
10510 
10511 
10512 //
10513 // MACRO1 -- subsume unshared load into mulFPR
10514 // This instruction does not round to 24-bits
10515 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10516   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10517   match(Set dst (MulF (LoadF mem1) src));
10518 
10519   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10520             "FMUL   ST,$src\n\t"
10521             "FSTP   $dst" %}
10522   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10523   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10524               OpcReg_FPR(src),
10525               Pop_Reg_FPR(dst) );
10526   ins_pipe( fpu_reg_reg_mem );
10527 %}
10528 //
10529 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10530 // This instruction does not round to 24-bits
10531 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10532   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10533   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10534   ins_cost(95);
10535 
10536   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10537             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10538             "FADD   ST,$src2\n\t"
10539             "FSTP   $dst" %}
10540   opcode(0xD9); /* LoadF D9 /0 */
10541   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10542               FMul_ST_reg(src1),
10543               FAdd_ST_reg(src2),
10544               Pop_Reg_FPR(dst) );
10545   ins_pipe( fpu_reg_mem_reg_reg );
10546 %}
10547 
10548 // MACRO3 -- addFPR a mulFPR
10549 // This instruction does not round to 24-bits.  It is a '2-address'
10550 // instruction in that the result goes back to src2.  This eliminates
10551 // a move from the macro; possibly the register allocator will have
10552 // to add it back (and maybe not).
10553 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10554   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10555   match(Set src2 (AddF (MulF src0 src1) src2));
10556 
10557   format %{ "FLD    $src0     ===MACRO3===\n\t"
10558             "FMUL   ST,$src1\n\t"
10559             "FADDP  $src2,ST" %}
10560   opcode(0xD9); /* LoadF D9 /0 */
10561   ins_encode( Push_Reg_FPR(src0),
10562               FMul_ST_reg(src1),
10563               FAddP_reg_ST(src2) );
10564   ins_pipe( fpu_reg_reg_reg );
10565 %}
10566 
10567 // MACRO4 -- divFPR subFPR
10568 // This instruction does not round to 24-bits
10569 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10570   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10571   match(Set dst (DivF (SubF src2 src1) src3));
10572 
10573   format %{ "FLD    $src2   ===MACRO4===\n\t"
10574             "FSUB   ST,$src1\n\t"
10575             "FDIV   ST,$src3\n\t"
10576             "FSTP  $dst" %}
10577   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10578   ins_encode( Push_Reg_FPR(src2),
10579               subFPR_divFPR_encode(src1,src3),
10580               Pop_Reg_FPR(dst) );
10581   ins_pipe( fpu_reg_reg_reg_reg );
10582 %}
10583 
10584 // Spill to obtain 24-bit precision
10585 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10586   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10587   match(Set dst (DivF src1 src2));
10588 
10589   format %{ "FDIV   $dst,$src1,$src2" %}
10590   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10591   ins_encode( Push_Reg_FPR(src1),
10592               OpcReg_FPR(src2),
10593               Pop_Mem_FPR(dst) );
10594   ins_pipe( fpu_mem_reg_reg );
10595 %}
10596 //
10597 // This instruction does not round to 24-bits
10598 instruct divFPR_reg(regFPR dst, regFPR src) %{
10599   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10600   match(Set dst (DivF dst src));
10601 
10602   format %{ "FDIV   $dst,$src" %}
10603   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10604   ins_encode( Push_Reg_FPR(src),
10605               OpcP, RegOpc(dst) );
10606   ins_pipe( fpu_reg_reg );
10607 %}
10608 
10609 
10610 // Spill to obtain 24-bit precision
10611 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10612   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10613   match(Set dst (ModF src1 src2));
10614   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10615 
10616   format %{ "FMOD   $dst,$src1,$src2" %}
10617   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10618               emitModDPR(),
10619               Push_Result_Mod_DPR(src2),
10620               Pop_Mem_FPR(dst));
10621   ins_pipe( pipe_slow );
10622 %}
10623 //
10624 // This instruction does not round to 24-bits
10625 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10626   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10627   match(Set dst (ModF dst src));
10628   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10629 
10630   format %{ "FMOD   $dst,$src" %}
10631   ins_encode(Push_Reg_Mod_DPR(dst, src),
10632               emitModDPR(),
10633               Push_Result_Mod_DPR(src),
10634               Pop_Reg_FPR(dst));
10635   ins_pipe( pipe_slow );
10636 %}
10637 
10638 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10639   predicate(UseSSE>=1);
10640   match(Set dst (ModF src0 src1));
10641   effect(KILL rax, KILL cr);
10642   format %{ "SUB    ESP,4\t # FMOD\n"
10643           "\tMOVSS  [ESP+0],$src1\n"
10644           "\tFLD_S  [ESP+0]\n"
10645           "\tMOVSS  [ESP+0],$src0\n"
10646           "\tFLD_S  [ESP+0]\n"
10647      "loop:\tFPREM\n"
10648           "\tFWAIT\n"
10649           "\tFNSTSW AX\n"
10650           "\tSAHF\n"
10651           "\tJP     loop\n"
10652           "\tFSTP_S [ESP+0]\n"
10653           "\tMOVSS  $dst,[ESP+0]\n"
10654           "\tADD    ESP,4\n"
10655           "\tFSTP   ST0\t # Restore FPU Stack"
10656     %}
10657   ins_cost(250);
10658   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10659   ins_pipe( pipe_slow );
10660 %}
10661 
10662 
10663 //----------Arithmetic Conversion Instructions---------------------------------
10664 // The conversions operations are all Alpha sorted.  Please keep it that way!
10665 
10666 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10667   predicate(UseSSE==0);
10668   match(Set dst (RoundFloat src));
10669   ins_cost(125);
10670   format %{ "FST_S  $dst,$src\t# F-round" %}
10671   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10672   ins_pipe( fpu_mem_reg );
10673 %}
10674 
10675 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10676   predicate(UseSSE<=1);
10677   match(Set dst (RoundDouble src));
10678   ins_cost(125);
10679   format %{ "FST_D  $dst,$src\t# D-round" %}
10680   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10681   ins_pipe( fpu_mem_reg );
10682 %}
10683 
10684 // Force rounding to 24-bit precision and 6-bit exponent
10685 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10686   predicate(UseSSE==0);
10687   match(Set dst (ConvD2F src));
10688   format %{ "FST_S  $dst,$src\t# F-round" %}
10689   expand %{
10690     roundFloat_mem_reg(dst,src);
10691   %}
10692 %}
10693 
10694 // Force rounding to 24-bit precision and 6-bit exponent
10695 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10696   predicate(UseSSE==1);
10697   match(Set dst (ConvD2F src));
10698   effect( KILL cr );
10699   format %{ "SUB    ESP,4\n\t"
10700             "FST_S  [ESP],$src\t# F-round\n\t"
10701             "MOVSS  $dst,[ESP]\n\t"
10702             "ADD ESP,4" %}
10703   ins_encode %{
10704     __ subptr(rsp, 4);
10705     if ($src$$reg != FPR1L_enc) {
10706       __ fld_s($src$$reg-1);
10707       __ fstp_s(Address(rsp, 0));
10708     } else {
10709       __ fst_s(Address(rsp, 0));
10710     }
10711     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10712     __ addptr(rsp, 4);
10713   %}
10714   ins_pipe( pipe_slow );
10715 %}
10716 
10717 // Force rounding double precision to single precision
10718 instruct convD2F_reg(regF dst, regD src) %{
10719   predicate(UseSSE>=2);
10720   match(Set dst (ConvD2F src));
10721   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10722   ins_encode %{
10723     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10724   %}
10725   ins_pipe( pipe_slow );
10726 %}
10727 
10728 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10729   predicate(UseSSE==0);
10730   match(Set dst (ConvF2D src));
10731   format %{ "FST_S  $dst,$src\t# D-round" %}
10732   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10733   ins_pipe( fpu_reg_reg );
10734 %}
10735 
10736 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10737   predicate(UseSSE==1);
10738   match(Set dst (ConvF2D src));
10739   format %{ "FST_D  $dst,$src\t# D-round" %}
10740   expand %{
10741     roundDouble_mem_reg(dst,src);
10742   %}
10743 %}
10744 
10745 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10746   predicate(UseSSE==1);
10747   match(Set dst (ConvF2D src));
10748   effect( KILL cr );
10749   format %{ "SUB    ESP,4\n\t"
10750             "MOVSS  [ESP] $src\n\t"
10751             "FLD_S  [ESP]\n\t"
10752             "ADD    ESP,4\n\t"
10753             "FSTP   $dst\t# D-round" %}
10754   ins_encode %{
10755     __ subptr(rsp, 4);
10756     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10757     __ fld_s(Address(rsp, 0));
10758     __ addptr(rsp, 4);
10759     __ fstp_d($dst$$reg);
10760   %}
10761   ins_pipe( pipe_slow );
10762 %}
10763 
10764 instruct convF2D_reg(regD dst, regF src) %{
10765   predicate(UseSSE>=2);
10766   match(Set dst (ConvF2D src));
10767   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10768   ins_encode %{
10769     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10770   %}
10771   ins_pipe( pipe_slow );
10772 %}
10773 
10774 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10775 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10776   predicate(UseSSE<=1);
10777   match(Set dst (ConvD2I src));
10778   effect( KILL tmp, KILL cr );
10779   format %{ "FLD    $src\t# Convert double to int \n\t"
10780             "FLDCW  trunc mode\n\t"
10781             "SUB    ESP,4\n\t"
10782             "FISTp  [ESP + #0]\n\t"
10783             "FLDCW  std/24-bit mode\n\t"
10784             "POP    EAX\n\t"
10785             "CMP    EAX,0x80000000\n\t"
10786             "JNE,s  fast\n\t"
10787             "FLD_D  $src\n\t"
10788             "CALL   d2i_wrapper\n"
10789       "fast:" %}
10790   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10791   ins_pipe( pipe_slow );
10792 %}
10793 
10794 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10795 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10796   predicate(UseSSE>=2);
10797   match(Set dst (ConvD2I src));
10798   effect( KILL tmp, KILL cr );
10799   format %{ "CVTTSD2SI $dst, $src\n\t"
10800             "CMP    $dst,0x80000000\n\t"
10801             "JNE,s  fast\n\t"
10802             "SUB    ESP, 8\n\t"
10803             "MOVSD  [ESP], $src\n\t"
10804             "FLD_D  [ESP]\n\t"
10805             "ADD    ESP, 8\n\t"
10806             "CALL   d2i_wrapper\n"
10807       "fast:" %}
10808   ins_encode %{
10809     Label fast;
10810     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10811     __ cmpl($dst$$Register, 0x80000000);
10812     __ jccb(Assembler::notEqual, fast);
10813     __ subptr(rsp, 8);
10814     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10815     __ fld_d(Address(rsp, 0));
10816     __ addptr(rsp, 8);
10817     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10818     __ bind(fast);
10819   %}
10820   ins_pipe( pipe_slow );
10821 %}
10822 
10823 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10824   predicate(UseSSE<=1);
10825   match(Set dst (ConvD2L src));
10826   effect( KILL cr );
10827   format %{ "FLD    $src\t# Convert double to long\n\t"
10828             "FLDCW  trunc mode\n\t"
10829             "SUB    ESP,8\n\t"
10830             "FISTp  [ESP + #0]\n\t"
10831             "FLDCW  std/24-bit mode\n\t"
10832             "POP    EAX\n\t"
10833             "POP    EDX\n\t"
10834             "CMP    EDX,0x80000000\n\t"
10835             "JNE,s  fast\n\t"
10836             "TEST   EAX,EAX\n\t"
10837             "JNE,s  fast\n\t"
10838             "FLD    $src\n\t"
10839             "CALL   d2l_wrapper\n"
10840       "fast:" %}
10841   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10842   ins_pipe( pipe_slow );
10843 %}
10844 
10845 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10846 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10847   predicate (UseSSE>=2);
10848   match(Set dst (ConvD2L src));
10849   effect( KILL cr );
10850   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10851             "MOVSD  [ESP],$src\n\t"
10852             "FLD_D  [ESP]\n\t"
10853             "FLDCW  trunc mode\n\t"
10854             "FISTp  [ESP + #0]\n\t"
10855             "FLDCW  std/24-bit mode\n\t"
10856             "POP    EAX\n\t"
10857             "POP    EDX\n\t"
10858             "CMP    EDX,0x80000000\n\t"
10859             "JNE,s  fast\n\t"
10860             "TEST   EAX,EAX\n\t"
10861             "JNE,s  fast\n\t"
10862             "SUB    ESP,8\n\t"
10863             "MOVSD  [ESP],$src\n\t"
10864             "FLD_D  [ESP]\n\t"
10865             "ADD    ESP,8\n\t"
10866             "CALL   d2l_wrapper\n"
10867       "fast:" %}
10868   ins_encode %{
10869     Label fast;
10870     __ subptr(rsp, 8);
10871     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10872     __ fld_d(Address(rsp, 0));
10873     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10874     __ fistp_d(Address(rsp, 0));
10875     // Restore the rounding mode, mask the exception
10876     if (Compile::current()->in_24_bit_fp_mode()) {
10877       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10878     } else {
10879       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10880     }
10881     // Load the converted long, adjust CPU stack
10882     __ pop(rax);
10883     __ pop(rdx);
10884     __ cmpl(rdx, 0x80000000);
10885     __ jccb(Assembler::notEqual, fast);
10886     __ testl(rax, rax);
10887     __ jccb(Assembler::notEqual, fast);
10888     __ subptr(rsp, 8);
10889     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10890     __ fld_d(Address(rsp, 0));
10891     __ addptr(rsp, 8);
10892     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10893     __ bind(fast);
10894   %}
10895   ins_pipe( pipe_slow );
10896 %}
10897 
10898 // Convert a double to an int.  Java semantics require we do complex
10899 // manglations in the corner cases.  So we set the rounding mode to
10900 // 'zero', store the darned double down as an int, and reset the
10901 // rounding mode to 'nearest'.  The hardware stores a flag value down
10902 // if we would overflow or converted a NAN; we check for this and
10903 // and go the slow path if needed.
10904 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10905   predicate(UseSSE==0);
10906   match(Set dst (ConvF2I src));
10907   effect( KILL tmp, KILL cr );
10908   format %{ "FLD    $src\t# Convert float to int \n\t"
10909             "FLDCW  trunc mode\n\t"
10910             "SUB    ESP,4\n\t"
10911             "FISTp  [ESP + #0]\n\t"
10912             "FLDCW  std/24-bit mode\n\t"
10913             "POP    EAX\n\t"
10914             "CMP    EAX,0x80000000\n\t"
10915             "JNE,s  fast\n\t"
10916             "FLD    $src\n\t"
10917             "CALL   d2i_wrapper\n"
10918       "fast:" %}
10919   // DPR2I_encoding works for FPR2I
10920   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10921   ins_pipe( pipe_slow );
10922 %}
10923 
10924 // Convert a float in xmm to an int reg.
10925 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10926   predicate(UseSSE>=1);
10927   match(Set dst (ConvF2I src));
10928   effect( KILL tmp, KILL cr );
10929   format %{ "CVTTSS2SI $dst, $src\n\t"
10930             "CMP    $dst,0x80000000\n\t"
10931             "JNE,s  fast\n\t"
10932             "SUB    ESP, 4\n\t"
10933             "MOVSS  [ESP], $src\n\t"
10934             "FLD    [ESP]\n\t"
10935             "ADD    ESP, 4\n\t"
10936             "CALL   d2i_wrapper\n"
10937       "fast:" %}
10938   ins_encode %{
10939     Label fast;
10940     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10941     __ cmpl($dst$$Register, 0x80000000);
10942     __ jccb(Assembler::notEqual, fast);
10943     __ subptr(rsp, 4);
10944     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10945     __ fld_s(Address(rsp, 0));
10946     __ addptr(rsp, 4);
10947     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10948     __ bind(fast);
10949   %}
10950   ins_pipe( pipe_slow );
10951 %}
10952 
10953 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10954   predicate(UseSSE==0);
10955   match(Set dst (ConvF2L src));
10956   effect( KILL cr );
10957   format %{ "FLD    $src\t# Convert float to long\n\t"
10958             "FLDCW  trunc mode\n\t"
10959             "SUB    ESP,8\n\t"
10960             "FISTp  [ESP + #0]\n\t"
10961             "FLDCW  std/24-bit mode\n\t"
10962             "POP    EAX\n\t"
10963             "POP    EDX\n\t"
10964             "CMP    EDX,0x80000000\n\t"
10965             "JNE,s  fast\n\t"
10966             "TEST   EAX,EAX\n\t"
10967             "JNE,s  fast\n\t"
10968             "FLD    $src\n\t"
10969             "CALL   d2l_wrapper\n"
10970       "fast:" %}
10971   // DPR2L_encoding works for FPR2L
10972   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10973   ins_pipe( pipe_slow );
10974 %}
10975 
10976 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10977 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10978   predicate (UseSSE>=1);
10979   match(Set dst (ConvF2L src));
10980   effect( KILL cr );
10981   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10982             "MOVSS  [ESP],$src\n\t"
10983             "FLD_S  [ESP]\n\t"
10984             "FLDCW  trunc mode\n\t"
10985             "FISTp  [ESP + #0]\n\t"
10986             "FLDCW  std/24-bit mode\n\t"
10987             "POP    EAX\n\t"
10988             "POP    EDX\n\t"
10989             "CMP    EDX,0x80000000\n\t"
10990             "JNE,s  fast\n\t"
10991             "TEST   EAX,EAX\n\t"
10992             "JNE,s  fast\n\t"
10993             "SUB    ESP,4\t# Convert float to long\n\t"
10994             "MOVSS  [ESP],$src\n\t"
10995             "FLD_S  [ESP]\n\t"
10996             "ADD    ESP,4\n\t"
10997             "CALL   d2l_wrapper\n"
10998       "fast:" %}
10999   ins_encode %{
11000     Label fast;
11001     __ subptr(rsp, 8);
11002     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11003     __ fld_s(Address(rsp, 0));
11004     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
11005     __ fistp_d(Address(rsp, 0));
11006     // Restore the rounding mode, mask the exception
11007     if (Compile::current()->in_24_bit_fp_mode()) {
11008       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
11009     } else {
11010       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
11011     }
11012     // Load the converted long, adjust CPU stack
11013     __ pop(rax);
11014     __ pop(rdx);
11015     __ cmpl(rdx, 0x80000000);
11016     __ jccb(Assembler::notEqual, fast);
11017     __ testl(rax, rax);
11018     __ jccb(Assembler::notEqual, fast);
11019     __ subptr(rsp, 4);
11020     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11021     __ fld_s(Address(rsp, 0));
11022     __ addptr(rsp, 4);
11023     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11024     __ bind(fast);
11025   %}
11026   ins_pipe( pipe_slow );
11027 %}
11028 
11029 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11030   predicate( UseSSE<=1 );
11031   match(Set dst (ConvI2D src));
11032   format %{ "FILD   $src\n\t"
11033             "FSTP   $dst" %}
11034   opcode(0xDB, 0x0);  /* DB /0 */
11035   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11036   ins_pipe( fpu_reg_mem );
11037 %}
11038 
11039 instruct convI2D_reg(regD dst, rRegI src) %{
11040   predicate( UseSSE>=2 && !UseXmmI2D );
11041   match(Set dst (ConvI2D src));
11042   format %{ "CVTSI2SD $dst,$src" %}
11043   ins_encode %{
11044     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11045   %}
11046   ins_pipe( pipe_slow );
11047 %}
11048 
11049 instruct convI2D_mem(regD dst, memory mem) %{
11050   predicate( UseSSE>=2 );
11051   match(Set dst (ConvI2D (LoadI mem)));
11052   format %{ "CVTSI2SD $dst,$mem" %}
11053   ins_encode %{
11054     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11055   %}
11056   ins_pipe( pipe_slow );
11057 %}
11058 
11059 instruct convXI2D_reg(regD dst, rRegI src)
11060 %{
11061   predicate( UseSSE>=2 && UseXmmI2D );
11062   match(Set dst (ConvI2D src));
11063 
11064   format %{ "MOVD  $dst,$src\n\t"
11065             "CVTDQ2PD $dst,$dst\t# i2d" %}
11066   ins_encode %{
11067     __ movdl($dst$$XMMRegister, $src$$Register);
11068     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11069   %}
11070   ins_pipe(pipe_slow); // XXX
11071 %}
11072 
11073 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11074   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11075   match(Set dst (ConvI2D (LoadI mem)));
11076   format %{ "FILD   $mem\n\t"
11077             "FSTP   $dst" %}
11078   opcode(0xDB);      /* DB /0 */
11079   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11080               Pop_Reg_DPR(dst));
11081   ins_pipe( fpu_reg_mem );
11082 %}
11083 
11084 // Convert a byte to a float; no rounding step needed.
11085 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11086   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11087   match(Set dst (ConvI2F src));
11088   format %{ "FILD   $src\n\t"
11089             "FSTP   $dst" %}
11090 
11091   opcode(0xDB, 0x0);  /* DB /0 */
11092   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11093   ins_pipe( fpu_reg_mem );
11094 %}
11095 
11096 // In 24-bit mode, force exponent rounding by storing back out
11097 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11098   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11099   match(Set dst (ConvI2F src));
11100   ins_cost(200);
11101   format %{ "FILD   $src\n\t"
11102             "FSTP_S $dst" %}
11103   opcode(0xDB, 0x0);  /* DB /0 */
11104   ins_encode( Push_Mem_I(src),
11105               Pop_Mem_FPR(dst));
11106   ins_pipe( fpu_mem_mem );
11107 %}
11108 
11109 // In 24-bit mode, force exponent rounding by storing back out
11110 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11111   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11112   match(Set dst (ConvI2F (LoadI mem)));
11113   ins_cost(200);
11114   format %{ "FILD   $mem\n\t"
11115             "FSTP_S $dst" %}
11116   opcode(0xDB);  /* DB /0 */
11117   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11118               Pop_Mem_FPR(dst));
11119   ins_pipe( fpu_mem_mem );
11120 %}
11121 
11122 // This instruction does not round to 24-bits
11123 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11124   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11125   match(Set dst (ConvI2F src));
11126   format %{ "FILD   $src\n\t"
11127             "FSTP   $dst" %}
11128   opcode(0xDB, 0x0);  /* DB /0 */
11129   ins_encode( Push_Mem_I(src),
11130               Pop_Reg_FPR(dst));
11131   ins_pipe( fpu_reg_mem );
11132 %}
11133 
11134 // This instruction does not round to 24-bits
11135 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11136   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11137   match(Set dst (ConvI2F (LoadI mem)));
11138   format %{ "FILD   $mem\n\t"
11139             "FSTP   $dst" %}
11140   opcode(0xDB);      /* DB /0 */
11141   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11142               Pop_Reg_FPR(dst));
11143   ins_pipe( fpu_reg_mem );
11144 %}
11145 
11146 // Convert an int to a float in xmm; no rounding step needed.
11147 instruct convI2F_reg(regF dst, rRegI src) %{
11148   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11149   match(Set dst (ConvI2F src));
11150   format %{ "CVTSI2SS $dst, $src" %}
11151   ins_encode %{
11152     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11153   %}
11154   ins_pipe( pipe_slow );
11155 %}
11156 
11157  instruct convXI2F_reg(regF dst, rRegI src)
11158 %{
11159   predicate( UseSSE>=2 && UseXmmI2F );
11160   match(Set dst (ConvI2F src));
11161 
11162   format %{ "MOVD  $dst,$src\n\t"
11163             "CVTDQ2PS $dst,$dst\t# i2f" %}
11164   ins_encode %{
11165     __ movdl($dst$$XMMRegister, $src$$Register);
11166     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11167   %}
11168   ins_pipe(pipe_slow); // XXX
11169 %}
11170 
11171 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11172   match(Set dst (ConvI2L src));
11173   effect(KILL cr);
11174   ins_cost(375);
11175   format %{ "MOV    $dst.lo,$src\n\t"
11176             "MOV    $dst.hi,$src\n\t"
11177             "SAR    $dst.hi,31" %}
11178   ins_encode(convert_int_long(dst,src));
11179   ins_pipe( ialu_reg_reg_long );
11180 %}
11181 
11182 // Zero-extend convert int to long
11183 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11184   match(Set dst (AndL (ConvI2L src) mask) );
11185   effect( KILL flags );
11186   ins_cost(250);
11187   format %{ "MOV    $dst.lo,$src\n\t"
11188             "XOR    $dst.hi,$dst.hi" %}
11189   opcode(0x33); // XOR
11190   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11191   ins_pipe( ialu_reg_reg_long );
11192 %}
11193 
11194 // Zero-extend long
11195 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11196   match(Set dst (AndL src mask) );
11197   effect( KILL flags );
11198   ins_cost(250);
11199   format %{ "MOV    $dst.lo,$src.lo\n\t"
11200             "XOR    $dst.hi,$dst.hi\n\t" %}
11201   opcode(0x33); // XOR
11202   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11203   ins_pipe( ialu_reg_reg_long );
11204 %}
11205 
11206 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11207   predicate (UseSSE<=1);
11208   match(Set dst (ConvL2D src));
11209   effect( KILL cr );
11210   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11211             "PUSH   $src.lo\n\t"
11212             "FILD   ST,[ESP + #0]\n\t"
11213             "ADD    ESP,8\n\t"
11214             "FSTP_D $dst\t# D-round" %}
11215   opcode(0xDF, 0x5);  /* DF /5 */
11216   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11217   ins_pipe( pipe_slow );
11218 %}
11219 
11220 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11221   predicate (UseSSE>=2);
11222   match(Set dst (ConvL2D src));
11223   effect( KILL cr );
11224   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11225             "PUSH   $src.lo\n\t"
11226             "FILD_D [ESP]\n\t"
11227             "FSTP_D [ESP]\n\t"
11228             "MOVSD  $dst,[ESP]\n\t"
11229             "ADD    ESP,8" %}
11230   opcode(0xDF, 0x5);  /* DF /5 */
11231   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11232   ins_pipe( pipe_slow );
11233 %}
11234 
11235 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11236   predicate (UseSSE>=1);
11237   match(Set dst (ConvL2F src));
11238   effect( KILL cr );
11239   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11240             "PUSH   $src.lo\n\t"
11241             "FILD_D [ESP]\n\t"
11242             "FSTP_S [ESP]\n\t"
11243             "MOVSS  $dst,[ESP]\n\t"
11244             "ADD    ESP,8" %}
11245   opcode(0xDF, 0x5);  /* DF /5 */
11246   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11247   ins_pipe( pipe_slow );
11248 %}
11249 
11250 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11251   match(Set dst (ConvL2F src));
11252   effect( KILL cr );
11253   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11254             "PUSH   $src.lo\n\t"
11255             "FILD   ST,[ESP + #0]\n\t"
11256             "ADD    ESP,8\n\t"
11257             "FSTP_S $dst\t# F-round" %}
11258   opcode(0xDF, 0x5);  /* DF /5 */
11259   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11260   ins_pipe( pipe_slow );
11261 %}
11262 
11263 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11264   match(Set dst (ConvL2I src));
11265   effect( DEF dst, USE src );
11266   format %{ "MOV    $dst,$src.lo" %}
11267   ins_encode(enc_CopyL_Lo(dst,src));
11268   ins_pipe( ialu_reg_reg );
11269 %}
11270 
11271 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11272   match(Set dst (MoveF2I src));
11273   effect( DEF dst, USE src );
11274   ins_cost(100);
11275   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11276   ins_encode %{
11277     __ movl($dst$$Register, Address(rsp, $src$$disp));
11278   %}
11279   ins_pipe( ialu_reg_mem );
11280 %}
11281 
11282 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11283   predicate(UseSSE==0);
11284   match(Set dst (MoveF2I src));
11285   effect( DEF dst, USE src );
11286 
11287   ins_cost(125);
11288   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11289   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11290   ins_pipe( fpu_mem_reg );
11291 %}
11292 
11293 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11294   predicate(UseSSE>=1);
11295   match(Set dst (MoveF2I src));
11296   effect( DEF dst, USE src );
11297 
11298   ins_cost(95);
11299   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11300   ins_encode %{
11301     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11302   %}
11303   ins_pipe( pipe_slow );
11304 %}
11305 
11306 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11307   predicate(UseSSE>=2);
11308   match(Set dst (MoveF2I src));
11309   effect( DEF dst, USE src );
11310   ins_cost(85);
11311   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11312   ins_encode %{
11313     __ movdl($dst$$Register, $src$$XMMRegister);
11314   %}
11315   ins_pipe( pipe_slow );
11316 %}
11317 
11318 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11319   match(Set dst (MoveI2F src));
11320   effect( DEF dst, USE src );
11321 
11322   ins_cost(100);
11323   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11324   ins_encode %{
11325     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11326   %}
11327   ins_pipe( ialu_mem_reg );
11328 %}
11329 
11330 
11331 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11332   predicate(UseSSE==0);
11333   match(Set dst (MoveI2F src));
11334   effect(DEF dst, USE src);
11335 
11336   ins_cost(125);
11337   format %{ "FLD_S  $src\n\t"
11338             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11339   opcode(0xD9);               /* D9 /0, FLD m32real */
11340   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11341               Pop_Reg_FPR(dst) );
11342   ins_pipe( fpu_reg_mem );
11343 %}
11344 
11345 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11346   predicate(UseSSE>=1);
11347   match(Set dst (MoveI2F src));
11348   effect( DEF dst, USE src );
11349 
11350   ins_cost(95);
11351   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11352   ins_encode %{
11353     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11354   %}
11355   ins_pipe( pipe_slow );
11356 %}
11357 
11358 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11359   predicate(UseSSE>=2);
11360   match(Set dst (MoveI2F src));
11361   effect( DEF dst, USE src );
11362 
11363   ins_cost(85);
11364   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11365   ins_encode %{
11366     __ movdl($dst$$XMMRegister, $src$$Register);
11367   %}
11368   ins_pipe( pipe_slow );
11369 %}
11370 
11371 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11372   match(Set dst (MoveD2L src));
11373   effect(DEF dst, USE src);
11374 
11375   ins_cost(250);
11376   format %{ "MOV    $dst.lo,$src\n\t"
11377             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11378   opcode(0x8B, 0x8B);
11379   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11380   ins_pipe( ialu_mem_long_reg );
11381 %}
11382 
11383 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11384   predicate(UseSSE<=1);
11385   match(Set dst (MoveD2L src));
11386   effect(DEF dst, USE src);
11387 
11388   ins_cost(125);
11389   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11390   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11391   ins_pipe( fpu_mem_reg );
11392 %}
11393 
11394 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11395   predicate(UseSSE>=2);
11396   match(Set dst (MoveD2L src));
11397   effect(DEF dst, USE src);
11398   ins_cost(95);
11399   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11400   ins_encode %{
11401     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11402   %}
11403   ins_pipe( pipe_slow );
11404 %}
11405 
11406 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11407   predicate(UseSSE>=2);
11408   match(Set dst (MoveD2L src));
11409   effect(DEF dst, USE src, TEMP tmp);
11410   ins_cost(85);
11411   format %{ "MOVD   $dst.lo,$src\n\t"
11412             "PSHUFLW $tmp,$src,0x4E\n\t"
11413             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11414   ins_encode %{
11415     __ movdl($dst$$Register, $src$$XMMRegister);
11416     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11417     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11418   %}
11419   ins_pipe( pipe_slow );
11420 %}
11421 
11422 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11423   match(Set dst (MoveL2D src));
11424   effect(DEF dst, USE src);
11425 
11426   ins_cost(200);
11427   format %{ "MOV    $dst,$src.lo\n\t"
11428             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11429   opcode(0x89, 0x89);
11430   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11431   ins_pipe( ialu_mem_long_reg );
11432 %}
11433 
11434 
11435 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11436   predicate(UseSSE<=1);
11437   match(Set dst (MoveL2D src));
11438   effect(DEF dst, USE src);
11439   ins_cost(125);
11440 
11441   format %{ "FLD_D  $src\n\t"
11442             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11443   opcode(0xDD);               /* DD /0, FLD m64real */
11444   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11445               Pop_Reg_DPR(dst) );
11446   ins_pipe( fpu_reg_mem );
11447 %}
11448 
11449 
11450 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11451   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11452   match(Set dst (MoveL2D src));
11453   effect(DEF dst, USE src);
11454 
11455   ins_cost(95);
11456   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11457   ins_encode %{
11458     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11459   %}
11460   ins_pipe( pipe_slow );
11461 %}
11462 
11463 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11464   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11465   match(Set dst (MoveL2D src));
11466   effect(DEF dst, USE src);
11467 
11468   ins_cost(95);
11469   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11470   ins_encode %{
11471     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11472   %}
11473   ins_pipe( pipe_slow );
11474 %}
11475 
11476 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11477   predicate(UseSSE>=2);
11478   match(Set dst (MoveL2D src));
11479   effect(TEMP dst, USE src, TEMP tmp);
11480   ins_cost(85);
11481   format %{ "MOVD   $dst,$src.lo\n\t"
11482             "MOVD   $tmp,$src.hi\n\t"
11483             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11484   ins_encode %{
11485     __ movdl($dst$$XMMRegister, $src$$Register);
11486     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11487     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11488   %}
11489   ins_pipe( pipe_slow );
11490 %}
11491 
11492 
11493 // =======================================================================
11494 // fast clearing of an array
11495 // Small ClearArray non-AVX512.
11496 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11497   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11498   match(Set dummy (ClearArray cnt base));
11499   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11500 
11501   format %{ $$template
11502     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11503     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11504     $$emit$$"JG     LARGE\n\t"
11505     $$emit$$"SHL    ECX, 1\n\t"
11506     $$emit$$"DEC    ECX\n\t"
11507     $$emit$$"JS     DONE\t# Zero length\n\t"
11508     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11509     $$emit$$"DEC    ECX\n\t"
11510     $$emit$$"JGE    LOOP\n\t"
11511     $$emit$$"JMP    DONE\n\t"
11512     $$emit$$"# LARGE:\n\t"
11513     if (UseFastStosb) {
11514        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11515        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11516     } else if (UseXMMForObjInit) {
11517        $$emit$$"MOV     RDI,RAX\n\t"
11518        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11519        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11520        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11521        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11522        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11523        $$emit$$"ADD     0x40,RAX\n\t"
11524        $$emit$$"# L_zero_64_bytes:\n\t"
11525        $$emit$$"SUB     0x8,RCX\n\t"
11526        $$emit$$"JGE     L_loop\n\t"
11527        $$emit$$"ADD     0x4,RCX\n\t"
11528        $$emit$$"JL      L_tail\n\t"
11529        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11530        $$emit$$"ADD     0x20,RAX\n\t"
11531        $$emit$$"SUB     0x4,RCX\n\t"
11532        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11533        $$emit$$"ADD     0x4,RCX\n\t"
11534        $$emit$$"JLE     L_end\n\t"
11535        $$emit$$"DEC     RCX\n\t"
11536        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11537        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11538        $$emit$$"ADD     0x8,RAX\n\t"
11539        $$emit$$"DEC     RCX\n\t"
11540        $$emit$$"JGE     L_sloop\n\t"
11541        $$emit$$"# L_end:\n\t"
11542     } else {
11543        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11544        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11545     }
11546     $$emit$$"# DONE"
11547   %}
11548   ins_encode %{
11549     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11550                  $tmp$$XMMRegister, false, knoreg);
11551   %}
11552   ins_pipe( pipe_slow );
11553 %}
11554 
11555 // Small ClearArray AVX512 non-constant length.
11556 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11557   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11558   match(Set dummy (ClearArray cnt base));
11559   ins_cost(125);
11560   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11561 
11562   format %{ $$template
11563     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11564     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11565     $$emit$$"JG     LARGE\n\t"
11566     $$emit$$"SHL    ECX, 1\n\t"
11567     $$emit$$"DEC    ECX\n\t"
11568     $$emit$$"JS     DONE\t# Zero length\n\t"
11569     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11570     $$emit$$"DEC    ECX\n\t"
11571     $$emit$$"JGE    LOOP\n\t"
11572     $$emit$$"JMP    DONE\n\t"
11573     $$emit$$"# LARGE:\n\t"
11574     if (UseFastStosb) {
11575        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11576        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11577     } else if (UseXMMForObjInit) {
11578        $$emit$$"MOV     RDI,RAX\n\t"
11579        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11580        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11581        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11582        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11583        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11584        $$emit$$"ADD     0x40,RAX\n\t"
11585        $$emit$$"# L_zero_64_bytes:\n\t"
11586        $$emit$$"SUB     0x8,RCX\n\t"
11587        $$emit$$"JGE     L_loop\n\t"
11588        $$emit$$"ADD     0x4,RCX\n\t"
11589        $$emit$$"JL      L_tail\n\t"
11590        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11591        $$emit$$"ADD     0x20,RAX\n\t"
11592        $$emit$$"SUB     0x4,RCX\n\t"
11593        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11594        $$emit$$"ADD     0x4,RCX\n\t"
11595        $$emit$$"JLE     L_end\n\t"
11596        $$emit$$"DEC     RCX\n\t"
11597        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11598        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11599        $$emit$$"ADD     0x8,RAX\n\t"
11600        $$emit$$"DEC     RCX\n\t"
11601        $$emit$$"JGE     L_sloop\n\t"
11602        $$emit$$"# L_end:\n\t"
11603     } else {
11604        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11605        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11606     }
11607     $$emit$$"# DONE"
11608   %}
11609   ins_encode %{
11610     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11611                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11612   %}
11613   ins_pipe( pipe_slow );
11614 %}
11615 
11616 // Large ClearArray non-AVX512.
11617 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11618   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11619   match(Set dummy (ClearArray cnt base));
11620   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11621   format %{ $$template
11622     if (UseFastStosb) {
11623        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11624        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11625        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11626     } else if (UseXMMForObjInit) {
11627        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11628        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11629        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11630        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11631        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11632        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11633        $$emit$$"ADD     0x40,RAX\n\t"
11634        $$emit$$"# L_zero_64_bytes:\n\t"
11635        $$emit$$"SUB     0x8,RCX\n\t"
11636        $$emit$$"JGE     L_loop\n\t"
11637        $$emit$$"ADD     0x4,RCX\n\t"
11638        $$emit$$"JL      L_tail\n\t"
11639        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11640        $$emit$$"ADD     0x20,RAX\n\t"
11641        $$emit$$"SUB     0x4,RCX\n\t"
11642        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11643        $$emit$$"ADD     0x4,RCX\n\t"
11644        $$emit$$"JLE     L_end\n\t"
11645        $$emit$$"DEC     RCX\n\t"
11646        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11647        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11648        $$emit$$"ADD     0x8,RAX\n\t"
11649        $$emit$$"DEC     RCX\n\t"
11650        $$emit$$"JGE     L_sloop\n\t"
11651        $$emit$$"# L_end:\n\t"
11652     } else {
11653        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11654        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11655        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11656     }
11657     $$emit$$"# DONE"
11658   %}
11659   ins_encode %{
11660     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11661                  $tmp$$XMMRegister, true, knoreg);
11662   %}
11663   ins_pipe( pipe_slow );
11664 %}
11665 
11666 // Large ClearArray AVX512.
11667 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11668   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11669   match(Set dummy (ClearArray cnt base));
11670   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11671   format %{ $$template
11672     if (UseFastStosb) {
11673        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11674        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11675        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11676     } else if (UseXMMForObjInit) {
11677        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11678        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11679        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11680        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11681        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11682        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11683        $$emit$$"ADD     0x40,RAX\n\t"
11684        $$emit$$"# L_zero_64_bytes:\n\t"
11685        $$emit$$"SUB     0x8,RCX\n\t"
11686        $$emit$$"JGE     L_loop\n\t"
11687        $$emit$$"ADD     0x4,RCX\n\t"
11688        $$emit$$"JL      L_tail\n\t"
11689        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11690        $$emit$$"ADD     0x20,RAX\n\t"
11691        $$emit$$"SUB     0x4,RCX\n\t"
11692        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11693        $$emit$$"ADD     0x4,RCX\n\t"
11694        $$emit$$"JLE     L_end\n\t"
11695        $$emit$$"DEC     RCX\n\t"
11696        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11697        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11698        $$emit$$"ADD     0x8,RAX\n\t"
11699        $$emit$$"DEC     RCX\n\t"
11700        $$emit$$"JGE     L_sloop\n\t"
11701        $$emit$$"# L_end:\n\t"
11702     } else {
11703        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11704        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11705        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11706     }
11707     $$emit$$"# DONE"
11708   %}
11709   ins_encode %{
11710     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11711                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11712   %}
11713   ins_pipe( pipe_slow );
11714 %}
11715 
11716 // Small ClearArray AVX512 constant length.
11717 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11718 %{
11719   predicate(!((ClearArrayNode*)n)->is_large() &&
11720                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11721   match(Set dummy (ClearArray cnt base));
11722   ins_cost(100);
11723   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11724   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11725   ins_encode %{
11726    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11727   %}
11728   ins_pipe(pipe_slow);
11729 %}
11730 
11731 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11732                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11733   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11734   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11735   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11736 
11737   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11738   ins_encode %{
11739     __ string_compare($str1$$Register, $str2$$Register,
11740                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11741                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11742   %}
11743   ins_pipe( pipe_slow );
11744 %}
11745 
11746 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11747                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11748   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11749   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11750   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11751 
11752   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11753   ins_encode %{
11754     __ string_compare($str1$$Register, $str2$$Register,
11755                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11756                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11757   %}
11758   ins_pipe( pipe_slow );
11759 %}
11760 
11761 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11762                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11763   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11764   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11765   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11766 
11767   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11768   ins_encode %{
11769     __ string_compare($str1$$Register, $str2$$Register,
11770                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11771                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11772   %}
11773   ins_pipe( pipe_slow );
11774 %}
11775 
11776 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11777                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11778   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11779   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11780   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11781 
11782   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11783   ins_encode %{
11784     __ string_compare($str1$$Register, $str2$$Register,
11785                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11786                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11787   %}
11788   ins_pipe( pipe_slow );
11789 %}
11790 
11791 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11792                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11793   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11794   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11795   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11796 
11797   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11798   ins_encode %{
11799     __ string_compare($str1$$Register, $str2$$Register,
11800                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11801                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11802   %}
11803   ins_pipe( pipe_slow );
11804 %}
11805 
11806 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11807                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11808   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11809   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11810   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11811 
11812   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11813   ins_encode %{
11814     __ string_compare($str1$$Register, $str2$$Register,
11815                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11816                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11817   %}
11818   ins_pipe( pipe_slow );
11819 %}
11820 
11821 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11822                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11823   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11824   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11825   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11826 
11827   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11828   ins_encode %{
11829     __ string_compare($str2$$Register, $str1$$Register,
11830                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11831                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11832   %}
11833   ins_pipe( pipe_slow );
11834 %}
11835 
11836 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11837                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11838   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11839   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11840   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11841 
11842   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11843   ins_encode %{
11844     __ string_compare($str2$$Register, $str1$$Register,
11845                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11846                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11847   %}
11848   ins_pipe( pipe_slow );
11849 %}
11850 
11851 // fast string equals
11852 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11853                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11854   predicate(!VM_Version::supports_avx512vlbw());
11855   match(Set result (StrEquals (Binary str1 str2) cnt));
11856   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11857 
11858   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11859   ins_encode %{
11860     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11861                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11862                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11863   %}
11864 
11865   ins_pipe( pipe_slow );
11866 %}
11867 
11868 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11869                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11870   predicate(VM_Version::supports_avx512vlbw());
11871   match(Set result (StrEquals (Binary str1 str2) cnt));
11872   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11873 
11874   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11875   ins_encode %{
11876     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11877                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11878                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11879   %}
11880 
11881   ins_pipe( pipe_slow );
11882 %}
11883 
11884 
11885 // fast search of substring with known size.
11886 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11887                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11888   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11889   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11890   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11891 
11892   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11893   ins_encode %{
11894     int icnt2 = (int)$int_cnt2$$constant;
11895     if (icnt2 >= 16) {
11896       // IndexOf for constant substrings with size >= 16 elements
11897       // which don't need to be loaded through stack.
11898       __ string_indexofC8($str1$$Register, $str2$$Register,
11899                           $cnt1$$Register, $cnt2$$Register,
11900                           icnt2, $result$$Register,
11901                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11902     } else {
11903       // Small strings are loaded through stack if they cross page boundary.
11904       __ string_indexof($str1$$Register, $str2$$Register,
11905                         $cnt1$$Register, $cnt2$$Register,
11906                         icnt2, $result$$Register,
11907                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11908     }
11909   %}
11910   ins_pipe( pipe_slow );
11911 %}
11912 
11913 // fast search of substring with known size.
11914 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11915                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11916   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11917   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11918   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11919 
11920   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11921   ins_encode %{
11922     int icnt2 = (int)$int_cnt2$$constant;
11923     if (icnt2 >= 8) {
11924       // IndexOf for constant substrings with size >= 8 elements
11925       // which don't need to be loaded through stack.
11926       __ string_indexofC8($str1$$Register, $str2$$Register,
11927                           $cnt1$$Register, $cnt2$$Register,
11928                           icnt2, $result$$Register,
11929                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11930     } else {
11931       // Small strings are loaded through stack if they cross page boundary.
11932       __ string_indexof($str1$$Register, $str2$$Register,
11933                         $cnt1$$Register, $cnt2$$Register,
11934                         icnt2, $result$$Register,
11935                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11936     }
11937   %}
11938   ins_pipe( pipe_slow );
11939 %}
11940 
11941 // fast search of substring with known size.
11942 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11943                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11944   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11945   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11946   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11947 
11948   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11949   ins_encode %{
11950     int icnt2 = (int)$int_cnt2$$constant;
11951     if (icnt2 >= 8) {
11952       // IndexOf for constant substrings with size >= 8 elements
11953       // which don't need to be loaded through stack.
11954       __ string_indexofC8($str1$$Register, $str2$$Register,
11955                           $cnt1$$Register, $cnt2$$Register,
11956                           icnt2, $result$$Register,
11957                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11958     } else {
11959       // Small strings are loaded through stack if they cross page boundary.
11960       __ string_indexof($str1$$Register, $str2$$Register,
11961                         $cnt1$$Register, $cnt2$$Register,
11962                         icnt2, $result$$Register,
11963                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11964     }
11965   %}
11966   ins_pipe( pipe_slow );
11967 %}
11968 
11969 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11970                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11971   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11972   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11973   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11974 
11975   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11976   ins_encode %{
11977     __ string_indexof($str1$$Register, $str2$$Register,
11978                       $cnt1$$Register, $cnt2$$Register,
11979                       (-1), $result$$Register,
11980                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11981   %}
11982   ins_pipe( pipe_slow );
11983 %}
11984 
11985 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11986                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11987   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11988   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11989   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11990 
11991   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11992   ins_encode %{
11993     __ string_indexof($str1$$Register, $str2$$Register,
11994                       $cnt1$$Register, $cnt2$$Register,
11995                       (-1), $result$$Register,
11996                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11997   %}
11998   ins_pipe( pipe_slow );
11999 %}
12000 
12001 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12002                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12003   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12004   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12005   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12006 
12007   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12008   ins_encode %{
12009     __ string_indexof($str1$$Register, $str2$$Register,
12010                       $cnt1$$Register, $cnt2$$Register,
12011                       (-1), $result$$Register,
12012                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12013   %}
12014   ins_pipe( pipe_slow );
12015 %}
12016 
12017 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12018                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12019   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12020   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12021   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12022   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12023   ins_encode %{
12024     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12025                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12026   %}
12027   ins_pipe( pipe_slow );
12028 %}
12029 
12030 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12031                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12032   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12033   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12034   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12035   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12036   ins_encode %{
12037     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12038                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12039   %}
12040   ins_pipe( pipe_slow );
12041 %}
12042 
12043 
12044 // fast array equals
12045 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12046                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12047 %{
12048   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12049   match(Set result (AryEq ary1 ary2));
12050   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12051   //ins_cost(300);
12052 
12053   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12054   ins_encode %{
12055     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12056                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12057                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12058   %}
12059   ins_pipe( pipe_slow );
12060 %}
12061 
12062 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12063                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12064 %{
12065   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12066   match(Set result (AryEq ary1 ary2));
12067   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12068   //ins_cost(300);
12069 
12070   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12071   ins_encode %{
12072     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12073                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12074                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12075   %}
12076   ins_pipe( pipe_slow );
12077 %}
12078 
12079 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12080                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12081 %{
12082   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12083   match(Set result (AryEq ary1 ary2));
12084   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12085   //ins_cost(300);
12086 
12087   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12088   ins_encode %{
12089     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12090                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12091                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12092   %}
12093   ins_pipe( pipe_slow );
12094 %}
12095 
12096 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12097                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12098 %{
12099   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12100   match(Set result (AryEq ary1 ary2));
12101   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12102   //ins_cost(300);
12103 
12104   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12105   ins_encode %{
12106     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12107                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12108                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12109   %}
12110   ins_pipe( pipe_slow );
12111 %}
12112 
12113 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12114                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12115 %{
12116   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12117   match(Set result (HasNegatives ary1 len));
12118   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12119 
12120   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12121   ins_encode %{
12122     __ has_negatives($ary1$$Register, $len$$Register,
12123                      $result$$Register, $tmp3$$Register,
12124                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12125   %}
12126   ins_pipe( pipe_slow );
12127 %}
12128 
12129 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12130                            regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12131 %{
12132   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12133   match(Set result (HasNegatives ary1 len));
12134   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12135 
12136   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12137   ins_encode %{
12138     __ has_negatives($ary1$$Register, $len$$Register,
12139                      $result$$Register, $tmp3$$Register,
12140                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12141   %}
12142   ins_pipe( pipe_slow );
12143 %}
12144 
12145 
12146 // fast char[] to byte[] compression
12147 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12148                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12149   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12150   match(Set result (StrCompressedCopy src (Binary dst len)));
12151   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12152 
12153   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12154   ins_encode %{
12155     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12156                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12157                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12158                            knoreg, knoreg);
12159   %}
12160   ins_pipe( pipe_slow );
12161 %}
12162 
12163 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12164                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12165   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12166   match(Set result (StrCompressedCopy src (Binary dst len)));
12167   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12168 
12169   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12170   ins_encode %{
12171     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12172                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12173                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12174                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12175   %}
12176   ins_pipe( pipe_slow );
12177 %}
12178 
12179 // fast byte[] to char[] inflation
12180 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12181                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12182   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12183   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12184   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12185 
12186   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12187   ins_encode %{
12188     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12189                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12190   %}
12191   ins_pipe( pipe_slow );
12192 %}
12193 
12194 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12195                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12196   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12197   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12198   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12199 
12200   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12201   ins_encode %{
12202     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12203                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12204   %}
12205   ins_pipe( pipe_slow );
12206 %}
12207 
12208 // encode char[] to byte[] in ISO_8859_1
12209 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12210                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12211                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12212   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12213   match(Set result (EncodeISOArray src (Binary dst len)));
12214   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12215 
12216   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12217   ins_encode %{
12218     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12219                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12220                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12221   %}
12222   ins_pipe( pipe_slow );
12223 %}
12224 
12225 // encode char[] to byte[] in ASCII
12226 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12227                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12228                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12229   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12230   match(Set result (EncodeISOArray src (Binary dst len)));
12231   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12232 
12233   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12234   ins_encode %{
12235     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12236                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12237                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12238   %}
12239   ins_pipe( pipe_slow );
12240 %}
12241 
12242 //----------Control Flow Instructions------------------------------------------
12243 // Signed compare Instructions
12244 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12245   match(Set cr (CmpI op1 op2));
12246   effect( DEF cr, USE op1, USE op2 );
12247   format %{ "CMP    $op1,$op2" %}
12248   opcode(0x3B);  /* Opcode 3B /r */
12249   ins_encode( OpcP, RegReg( op1, op2) );
12250   ins_pipe( ialu_cr_reg_reg );
12251 %}
12252 
12253 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12254   match(Set cr (CmpI op1 op2));
12255   effect( DEF cr, USE op1 );
12256   format %{ "CMP    $op1,$op2" %}
12257   opcode(0x81,0x07);  /* Opcode 81 /7 */
12258   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12259   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12260   ins_pipe( ialu_cr_reg_imm );
12261 %}
12262 
12263 // Cisc-spilled version of cmpI_eReg
12264 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12265   match(Set cr (CmpI op1 (LoadI op2)));
12266 
12267   format %{ "CMP    $op1,$op2" %}
12268   ins_cost(500);
12269   opcode(0x3B);  /* Opcode 3B /r */
12270   ins_encode( OpcP, RegMem( op1, op2) );
12271   ins_pipe( ialu_cr_reg_mem );
12272 %}
12273 
12274 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12275   match(Set cr (CmpI src zero));
12276   effect( DEF cr, USE src );
12277 
12278   format %{ "TEST   $src,$src" %}
12279   opcode(0x85);
12280   ins_encode( OpcP, RegReg( src, src ) );
12281   ins_pipe( ialu_cr_reg_imm );
12282 %}
12283 
12284 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12285   match(Set cr (CmpI (AndI src con) zero));
12286 
12287   format %{ "TEST   $src,$con" %}
12288   opcode(0xF7,0x00);
12289   ins_encode( OpcP, RegOpc(src), Con32(con) );
12290   ins_pipe( ialu_cr_reg_imm );
12291 %}
12292 
12293 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12294   match(Set cr (CmpI (AndI src mem) zero));
12295 
12296   format %{ "TEST   $src,$mem" %}
12297   opcode(0x85);
12298   ins_encode( OpcP, RegMem( src, mem ) );
12299   ins_pipe( ialu_cr_reg_mem );
12300 %}
12301 
12302 // Unsigned compare Instructions; really, same as signed except they
12303 // produce an eFlagsRegU instead of eFlagsReg.
12304 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12305   match(Set cr (CmpU op1 op2));
12306 
12307   format %{ "CMPu   $op1,$op2" %}
12308   opcode(0x3B);  /* Opcode 3B /r */
12309   ins_encode( OpcP, RegReg( op1, op2) );
12310   ins_pipe( ialu_cr_reg_reg );
12311 %}
12312 
12313 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12314   match(Set cr (CmpU op1 op2));
12315 
12316   format %{ "CMPu   $op1,$op2" %}
12317   opcode(0x81,0x07);  /* Opcode 81 /7 */
12318   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12319   ins_pipe( ialu_cr_reg_imm );
12320 %}
12321 
12322 // // Cisc-spilled version of cmpU_eReg
12323 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12324   match(Set cr (CmpU op1 (LoadI op2)));
12325 
12326   format %{ "CMPu   $op1,$op2" %}
12327   ins_cost(500);
12328   opcode(0x3B);  /* Opcode 3B /r */
12329   ins_encode( OpcP, RegMem( op1, op2) );
12330   ins_pipe( ialu_cr_reg_mem );
12331 %}
12332 
12333 // // Cisc-spilled version of cmpU_eReg
12334 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12335 //  match(Set cr (CmpU (LoadI op1) op2));
12336 //
12337 //  format %{ "CMPu   $op1,$op2" %}
12338 //  ins_cost(500);
12339 //  opcode(0x39);  /* Opcode 39 /r */
12340 //  ins_encode( OpcP, RegMem( op1, op2) );
12341 //%}
12342 
12343 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12344   match(Set cr (CmpU src zero));
12345 
12346   format %{ "TESTu  $src,$src" %}
12347   opcode(0x85);
12348   ins_encode( OpcP, RegReg( src, src ) );
12349   ins_pipe( ialu_cr_reg_imm );
12350 %}
12351 
12352 // Unsigned pointer compare Instructions
12353 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12354   match(Set cr (CmpP op1 op2));
12355 
12356   format %{ "CMPu   $op1,$op2" %}
12357   opcode(0x3B);  /* Opcode 3B /r */
12358   ins_encode( OpcP, RegReg( op1, op2) );
12359   ins_pipe( ialu_cr_reg_reg );
12360 %}
12361 
12362 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12363   match(Set cr (CmpP op1 op2));
12364 
12365   format %{ "CMPu   $op1,$op2" %}
12366   opcode(0x81,0x07);  /* Opcode 81 /7 */
12367   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12368   ins_pipe( ialu_cr_reg_imm );
12369 %}
12370 
12371 // // Cisc-spilled version of cmpP_eReg
12372 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12373   match(Set cr (CmpP op1 (LoadP op2)));
12374 
12375   format %{ "CMPu   $op1,$op2" %}
12376   ins_cost(500);
12377   opcode(0x3B);  /* Opcode 3B /r */
12378   ins_encode( OpcP, RegMem( op1, op2) );
12379   ins_pipe( ialu_cr_reg_mem );
12380 %}
12381 
12382 // // Cisc-spilled version of cmpP_eReg
12383 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12384 //  match(Set cr (CmpP (LoadP op1) op2));
12385 //
12386 //  format %{ "CMPu   $op1,$op2" %}
12387 //  ins_cost(500);
12388 //  opcode(0x39);  /* Opcode 39 /r */
12389 //  ins_encode( OpcP, RegMem( op1, op2) );
12390 //%}
12391 
12392 // Compare raw pointer (used in out-of-heap check).
12393 // Only works because non-oop pointers must be raw pointers
12394 // and raw pointers have no anti-dependencies.
12395 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12396   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12397   match(Set cr (CmpP op1 (LoadP op2)));
12398 
12399   format %{ "CMPu   $op1,$op2" %}
12400   opcode(0x3B);  /* Opcode 3B /r */
12401   ins_encode( OpcP, RegMem( op1, op2) );
12402   ins_pipe( ialu_cr_reg_mem );
12403 %}
12404 
12405 //
12406 // This will generate a signed flags result. This should be ok
12407 // since any compare to a zero should be eq/neq.
12408 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12409   match(Set cr (CmpP src zero));
12410 
12411   format %{ "TEST   $src,$src" %}
12412   opcode(0x85);
12413   ins_encode( OpcP, RegReg( src, src ) );
12414   ins_pipe( ialu_cr_reg_imm );
12415 %}
12416 
12417 // Cisc-spilled version of testP_reg
12418 // This will generate a signed flags result. This should be ok
12419 // since any compare to a zero should be eq/neq.
12420 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12421   match(Set cr (CmpP (LoadP op) zero));
12422 
12423   format %{ "TEST   $op,0xFFFFFFFF" %}
12424   ins_cost(500);
12425   opcode(0xF7);               /* Opcode F7 /0 */
12426   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12427   ins_pipe( ialu_cr_reg_imm );
12428 %}
12429 
12430 // Yanked all unsigned pointer compare operations.
12431 // Pointer compares are done with CmpP which is already unsigned.
12432 
12433 //----------Max and Min--------------------------------------------------------
12434 // Min Instructions
12435 ////
12436 //   *** Min and Max using the conditional move are slower than the
12437 //   *** branch version on a Pentium III.
12438 // // Conditional move for min
12439 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12440 //  effect( USE_DEF op2, USE op1, USE cr );
12441 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12442 //  opcode(0x4C,0x0F);
12443 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12444 //  ins_pipe( pipe_cmov_reg );
12445 //%}
12446 //
12447 //// Min Register with Register (P6 version)
12448 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12449 //  predicate(VM_Version::supports_cmov() );
12450 //  match(Set op2 (MinI op1 op2));
12451 //  ins_cost(200);
12452 //  expand %{
12453 //    eFlagsReg cr;
12454 //    compI_eReg(cr,op1,op2);
12455 //    cmovI_reg_lt(op2,op1,cr);
12456 //  %}
12457 //%}
12458 
12459 // Min Register with Register (generic version)
12460 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12461   match(Set dst (MinI dst src));
12462   effect(KILL flags);
12463   ins_cost(300);
12464 
12465   format %{ "MIN    $dst,$src" %}
12466   opcode(0xCC);
12467   ins_encode( min_enc(dst,src) );
12468   ins_pipe( pipe_slow );
12469 %}
12470 
12471 // Max Register with Register
12472 //   *** Min and Max using the conditional move are slower than the
12473 //   *** branch version on a Pentium III.
12474 // // Conditional move for max
12475 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12476 //  effect( USE_DEF op2, USE op1, USE cr );
12477 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12478 //  opcode(0x4F,0x0F);
12479 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12480 //  ins_pipe( pipe_cmov_reg );
12481 //%}
12482 //
12483 // // Max Register with Register (P6 version)
12484 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12485 //  predicate(VM_Version::supports_cmov() );
12486 //  match(Set op2 (MaxI op1 op2));
12487 //  ins_cost(200);
12488 //  expand %{
12489 //    eFlagsReg cr;
12490 //    compI_eReg(cr,op1,op2);
12491 //    cmovI_reg_gt(op2,op1,cr);
12492 //  %}
12493 //%}
12494 
12495 // Max Register with Register (generic version)
12496 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12497   match(Set dst (MaxI dst src));
12498   effect(KILL flags);
12499   ins_cost(300);
12500 
12501   format %{ "MAX    $dst,$src" %}
12502   opcode(0xCC);
12503   ins_encode( max_enc(dst,src) );
12504   ins_pipe( pipe_slow );
12505 %}
12506 
12507 // ============================================================================
12508 // Counted Loop limit node which represents exact final iterator value.
12509 // Note: the resulting value should fit into integer range since
12510 // counted loops have limit check on overflow.
12511 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12512   match(Set limit (LoopLimit (Binary init limit) stride));
12513   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12514   ins_cost(300);
12515 
12516   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12517   ins_encode %{
12518     int strd = (int)$stride$$constant;
12519     assert(strd != 1 && strd != -1, "sanity");
12520     int m1 = (strd > 0) ? 1 : -1;
12521     // Convert limit to long (EAX:EDX)
12522     __ cdql();
12523     // Convert init to long (init:tmp)
12524     __ movl($tmp$$Register, $init$$Register);
12525     __ sarl($tmp$$Register, 31);
12526     // $limit - $init
12527     __ subl($limit$$Register, $init$$Register);
12528     __ sbbl($limit_hi$$Register, $tmp$$Register);
12529     // + ($stride - 1)
12530     if (strd > 0) {
12531       __ addl($limit$$Register, (strd - 1));
12532       __ adcl($limit_hi$$Register, 0);
12533       __ movl($tmp$$Register, strd);
12534     } else {
12535       __ addl($limit$$Register, (strd + 1));
12536       __ adcl($limit_hi$$Register, -1);
12537       __ lneg($limit_hi$$Register, $limit$$Register);
12538       __ movl($tmp$$Register, -strd);
12539     }
12540     // signed devision: (EAX:EDX) / pos_stride
12541     __ idivl($tmp$$Register);
12542     if (strd < 0) {
12543       // restore sign
12544       __ negl($tmp$$Register);
12545     }
12546     // (EAX) * stride
12547     __ mull($tmp$$Register);
12548     // + init (ignore upper bits)
12549     __ addl($limit$$Register, $init$$Register);
12550   %}
12551   ins_pipe( pipe_slow );
12552 %}
12553 
12554 // ============================================================================
12555 // Branch Instructions
12556 // Jump Table
12557 instruct jumpXtnd(rRegI switch_val) %{
12558   match(Jump switch_val);
12559   ins_cost(350);
12560   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12561   ins_encode %{
12562     // Jump to Address(table_base + switch_reg)
12563     Address index(noreg, $switch_val$$Register, Address::times_1);
12564     __ jump(ArrayAddress($constantaddress, index));
12565   %}
12566   ins_pipe(pipe_jmp);
12567 %}
12568 
12569 // Jump Direct - Label defines a relative address from JMP+1
12570 instruct jmpDir(label labl) %{
12571   match(Goto);
12572   effect(USE labl);
12573 
12574   ins_cost(300);
12575   format %{ "JMP    $labl" %}
12576   size(5);
12577   ins_encode %{
12578     Label* L = $labl$$label;
12579     __ jmp(*L, false); // Always long jump
12580   %}
12581   ins_pipe( pipe_jmp );
12582 %}
12583 
12584 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12585 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12586   match(If cop cr);
12587   effect(USE labl);
12588 
12589   ins_cost(300);
12590   format %{ "J$cop    $labl" %}
12591   size(6);
12592   ins_encode %{
12593     Label* L = $labl$$label;
12594     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12595   %}
12596   ins_pipe( pipe_jcc );
12597 %}
12598 
12599 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12600 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12601   predicate(!n->has_vector_mask_set());
12602   match(CountedLoopEnd cop cr);
12603   effect(USE labl);
12604 
12605   ins_cost(300);
12606   format %{ "J$cop    $labl\t# Loop end" %}
12607   size(6);
12608   ins_encode %{
12609     Label* L = $labl$$label;
12610     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12611   %}
12612   ins_pipe( pipe_jcc );
12613 %}
12614 
12615 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12616 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12617   predicate(!n->has_vector_mask_set());
12618   match(CountedLoopEnd cop cmp);
12619   effect(USE labl);
12620 
12621   ins_cost(300);
12622   format %{ "J$cop,u  $labl\t# Loop end" %}
12623   size(6);
12624   ins_encode %{
12625     Label* L = $labl$$label;
12626     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12627   %}
12628   ins_pipe( pipe_jcc );
12629 %}
12630 
12631 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12632   predicate(!n->has_vector_mask_set());
12633   match(CountedLoopEnd cop cmp);
12634   effect(USE labl);
12635 
12636   ins_cost(200);
12637   format %{ "J$cop,u  $labl\t# Loop end" %}
12638   size(6);
12639   ins_encode %{
12640     Label* L = $labl$$label;
12641     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12642   %}
12643   ins_pipe( pipe_jcc );
12644 %}
12645 
12646 // mask version
12647 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12648 // Bounded mask operand used in following patten is needed for
12649 // post-loop multiversioning.
12650 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
12651   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12652   match(CountedLoopEnd cop cr);
12653   effect(USE labl, TEMP ktmp);
12654 
12655   ins_cost(400);
12656   format %{ "J$cop    $labl\t# Loop end\n\t"
12657             "restorevectmask \t# vector mask restore for loops" %}
12658   size(10);
12659   ins_encode %{
12660     Label* L = $labl$$label;
12661     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12662     __ restorevectmask($ktmp$$KRegister);
12663   %}
12664   ins_pipe( pipe_jcc );
12665 %}
12666 
12667 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12668 // Bounded mask operand used in following patten is needed for
12669 // post-loop multiversioning.
12670 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
12671   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12672   match(CountedLoopEnd cop cmp);
12673   effect(USE labl, TEMP ktmp);
12674 
12675   ins_cost(400);
12676   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12677             "restorevectmask \t# vector mask restore for loops" %}
12678   size(10);
12679   ins_encode %{
12680     Label* L = $labl$$label;
12681     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12682     __ restorevectmask($ktmp$$KRegister);
12683   %}
12684   ins_pipe( pipe_jcc );
12685 %}
12686 
12687 // Bounded mask operand used in following patten is needed for
12688 // post-loop multiversioning.
12689 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
12690   predicate(PostLoopMultiversioning && n->has_vector_mask_set());
12691   match(CountedLoopEnd cop cmp);
12692   effect(USE labl, TEMP ktmp);
12693 
12694   ins_cost(300);
12695   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12696             "restorevectmask \t# vector mask restore for loops" %}
12697   size(10);
12698   ins_encode %{
12699     Label* L = $labl$$label;
12700     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12701     __ restorevectmask($ktmp$$KRegister);
12702   %}
12703   ins_pipe( pipe_jcc );
12704 %}
12705 
12706 // Jump Direct Conditional - using unsigned comparison
12707 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12708   match(If cop cmp);
12709   effect(USE labl);
12710 
12711   ins_cost(300);
12712   format %{ "J$cop,u  $labl" %}
12713   size(6);
12714   ins_encode %{
12715     Label* L = $labl$$label;
12716     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12717   %}
12718   ins_pipe(pipe_jcc);
12719 %}
12720 
12721 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12722   match(If cop cmp);
12723   effect(USE labl);
12724 
12725   ins_cost(200);
12726   format %{ "J$cop,u  $labl" %}
12727   size(6);
12728   ins_encode %{
12729     Label* L = $labl$$label;
12730     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12731   %}
12732   ins_pipe(pipe_jcc);
12733 %}
12734 
12735 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12736   match(If cop cmp);
12737   effect(USE labl);
12738 
12739   ins_cost(200);
12740   format %{ $$template
12741     if ($cop$$cmpcode == Assembler::notEqual) {
12742       $$emit$$"JP,u   $labl\n\t"
12743       $$emit$$"J$cop,u   $labl"
12744     } else {
12745       $$emit$$"JP,u   done\n\t"
12746       $$emit$$"J$cop,u   $labl\n\t"
12747       $$emit$$"done:"
12748     }
12749   %}
12750   ins_encode %{
12751     Label* l = $labl$$label;
12752     if ($cop$$cmpcode == Assembler::notEqual) {
12753       __ jcc(Assembler::parity, *l, false);
12754       __ jcc(Assembler::notEqual, *l, false);
12755     } else if ($cop$$cmpcode == Assembler::equal) {
12756       Label done;
12757       __ jccb(Assembler::parity, done);
12758       __ jcc(Assembler::equal, *l, false);
12759       __ bind(done);
12760     } else {
12761        ShouldNotReachHere();
12762     }
12763   %}
12764   ins_pipe(pipe_jcc);
12765 %}
12766 
12767 // ============================================================================
12768 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12769 // array for an instance of the superklass.  Set a hidden internal cache on a
12770 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12771 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12772 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12773   match(Set result (PartialSubtypeCheck sub super));
12774   effect( KILL rcx, KILL cr );
12775 
12776   ins_cost(1100);  // slightly larger than the next version
12777   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12778             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12779             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12780             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12781             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12782             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12783             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12784      "miss:\t" %}
12785 
12786   opcode(0x1); // Force a XOR of EDI
12787   ins_encode( enc_PartialSubtypeCheck() );
12788   ins_pipe( pipe_slow );
12789 %}
12790 
12791 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12792   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12793   effect( KILL rcx, KILL result );
12794 
12795   ins_cost(1000);
12796   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12797             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12798             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12799             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12800             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12801             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12802      "miss:\t" %}
12803 
12804   opcode(0x0);  // No need to XOR EDI
12805   ins_encode( enc_PartialSubtypeCheck() );
12806   ins_pipe( pipe_slow );
12807 %}
12808 
12809 // ============================================================================
12810 // Branch Instructions -- short offset versions
12811 //
12812 // These instructions are used to replace jumps of a long offset (the default
12813 // match) with jumps of a shorter offset.  These instructions are all tagged
12814 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12815 // match rules in general matching.  Instead, the ADLC generates a conversion
12816 // method in the MachNode which can be used to do in-place replacement of the
12817 // long variant with the shorter variant.  The compiler will determine if a
12818 // branch can be taken by the is_short_branch_offset() predicate in the machine
12819 // specific code section of the file.
12820 
12821 // Jump Direct - Label defines a relative address from JMP+1
12822 instruct jmpDir_short(label labl) %{
12823   match(Goto);
12824   effect(USE labl);
12825 
12826   ins_cost(300);
12827   format %{ "JMP,s  $labl" %}
12828   size(2);
12829   ins_encode %{
12830     Label* L = $labl$$label;
12831     __ jmpb(*L);
12832   %}
12833   ins_pipe( pipe_jmp );
12834   ins_short_branch(1);
12835 %}
12836 
12837 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12838 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12839   match(If cop cr);
12840   effect(USE labl);
12841 
12842   ins_cost(300);
12843   format %{ "J$cop,s  $labl" %}
12844   size(2);
12845   ins_encode %{
12846     Label* L = $labl$$label;
12847     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12848   %}
12849   ins_pipe( pipe_jcc );
12850   ins_short_branch(1);
12851 %}
12852 
12853 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12854 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12855   match(CountedLoopEnd cop cr);
12856   effect(USE labl);
12857 
12858   ins_cost(300);
12859   format %{ "J$cop,s  $labl\t# Loop end" %}
12860   size(2);
12861   ins_encode %{
12862     Label* L = $labl$$label;
12863     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12864   %}
12865   ins_pipe( pipe_jcc );
12866   ins_short_branch(1);
12867 %}
12868 
12869 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12870 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12871   match(CountedLoopEnd cop cmp);
12872   effect(USE labl);
12873 
12874   ins_cost(300);
12875   format %{ "J$cop,us $labl\t# Loop end" %}
12876   size(2);
12877   ins_encode %{
12878     Label* L = $labl$$label;
12879     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12880   %}
12881   ins_pipe( pipe_jcc );
12882   ins_short_branch(1);
12883 %}
12884 
12885 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12886   match(CountedLoopEnd cop cmp);
12887   effect(USE labl);
12888 
12889   ins_cost(300);
12890   format %{ "J$cop,us $labl\t# Loop end" %}
12891   size(2);
12892   ins_encode %{
12893     Label* L = $labl$$label;
12894     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12895   %}
12896   ins_pipe( pipe_jcc );
12897   ins_short_branch(1);
12898 %}
12899 
12900 // Jump Direct Conditional - using unsigned comparison
12901 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12902   match(If cop cmp);
12903   effect(USE labl);
12904 
12905   ins_cost(300);
12906   format %{ "J$cop,us $labl" %}
12907   size(2);
12908   ins_encode %{
12909     Label* L = $labl$$label;
12910     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12911   %}
12912   ins_pipe( pipe_jcc );
12913   ins_short_branch(1);
12914 %}
12915 
12916 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12917   match(If cop cmp);
12918   effect(USE labl);
12919 
12920   ins_cost(300);
12921   format %{ "J$cop,us $labl" %}
12922   size(2);
12923   ins_encode %{
12924     Label* L = $labl$$label;
12925     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12926   %}
12927   ins_pipe( pipe_jcc );
12928   ins_short_branch(1);
12929 %}
12930 
12931 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12932   match(If cop cmp);
12933   effect(USE labl);
12934 
12935   ins_cost(300);
12936   format %{ $$template
12937     if ($cop$$cmpcode == Assembler::notEqual) {
12938       $$emit$$"JP,u,s   $labl\n\t"
12939       $$emit$$"J$cop,u,s   $labl"
12940     } else {
12941       $$emit$$"JP,u,s   done\n\t"
12942       $$emit$$"J$cop,u,s  $labl\n\t"
12943       $$emit$$"done:"
12944     }
12945   %}
12946   size(4);
12947   ins_encode %{
12948     Label* l = $labl$$label;
12949     if ($cop$$cmpcode == Assembler::notEqual) {
12950       __ jccb(Assembler::parity, *l);
12951       __ jccb(Assembler::notEqual, *l);
12952     } else if ($cop$$cmpcode == Assembler::equal) {
12953       Label done;
12954       __ jccb(Assembler::parity, done);
12955       __ jccb(Assembler::equal, *l);
12956       __ bind(done);
12957     } else {
12958        ShouldNotReachHere();
12959     }
12960   %}
12961   ins_pipe(pipe_jcc);
12962   ins_short_branch(1);
12963 %}
12964 
12965 // ============================================================================
12966 // Long Compare
12967 //
12968 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12969 // is tricky.  The flavor of compare used depends on whether we are testing
12970 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12971 // The GE test is the negated LT test.  The LE test can be had by commuting
12972 // the operands (yielding a GE test) and then negating; negate again for the
12973 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12974 // NE test is negated from that.
12975 
12976 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12977 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12978 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12979 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12980 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12981 // foo match ends up with the wrong leaf.  One fix is to not match both
12982 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12983 // both forms beat the trinary form of long-compare and both are very useful
12984 // on Intel which has so few registers.
12985 
12986 // Manifest a CmpL result in an integer register.  Very painful.
12987 // This is the test to avoid.
12988 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12989   match(Set dst (CmpL3 src1 src2));
12990   effect( KILL flags );
12991   ins_cost(1000);
12992   format %{ "XOR    $dst,$dst\n\t"
12993             "CMP    $src1.hi,$src2.hi\n\t"
12994             "JLT,s  m_one\n\t"
12995             "JGT,s  p_one\n\t"
12996             "CMP    $src1.lo,$src2.lo\n\t"
12997             "JB,s   m_one\n\t"
12998             "JEQ,s  done\n"
12999     "p_one:\tINC    $dst\n\t"
13000             "JMP,s  done\n"
13001     "m_one:\tDEC    $dst\n"
13002      "done:" %}
13003   ins_encode %{
13004     Label p_one, m_one, done;
13005     __ xorptr($dst$$Register, $dst$$Register);
13006     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13007     __ jccb(Assembler::less,    m_one);
13008     __ jccb(Assembler::greater, p_one);
13009     __ cmpl($src1$$Register, $src2$$Register);
13010     __ jccb(Assembler::below,   m_one);
13011     __ jccb(Assembler::equal,   done);
13012     __ bind(p_one);
13013     __ incrementl($dst$$Register);
13014     __ jmpb(done);
13015     __ bind(m_one);
13016     __ decrementl($dst$$Register);
13017     __ bind(done);
13018   %}
13019   ins_pipe( pipe_slow );
13020 %}
13021 
13022 //======
13023 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13024 // compares.  Can be used for LE or GT compares by reversing arguments.
13025 // NOT GOOD FOR EQ/NE tests.
13026 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13027   match( Set flags (CmpL src zero ));
13028   ins_cost(100);
13029   format %{ "TEST   $src.hi,$src.hi" %}
13030   opcode(0x85);
13031   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13032   ins_pipe( ialu_cr_reg_reg );
13033 %}
13034 
13035 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13036 // compares.  Can be used for LE or GT compares by reversing arguments.
13037 // NOT GOOD FOR EQ/NE tests.
13038 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13039   match( Set flags (CmpL src1 src2 ));
13040   effect( TEMP tmp );
13041   ins_cost(300);
13042   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13043             "MOV    $tmp,$src1.hi\n\t"
13044             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13045   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13046   ins_pipe( ialu_cr_reg_reg );
13047 %}
13048 
13049 // Long compares reg < zero/req OR reg >= zero/req.
13050 // Just a wrapper for a normal branch, plus the predicate test.
13051 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13052   match(If cmp flags);
13053   effect(USE labl);
13054   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13055   expand %{
13056     jmpCon(cmp,flags,labl);    // JLT or JGE...
13057   %}
13058 %}
13059 
13060 //======
13061 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13062 // compares.  Can be used for LE or GT compares by reversing arguments.
13063 // NOT GOOD FOR EQ/NE tests.
13064 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13065   match(Set flags (CmpUL src zero));
13066   ins_cost(100);
13067   format %{ "TEST   $src.hi,$src.hi" %}
13068   opcode(0x85);
13069   ins_encode(OpcP, RegReg_Hi2(src, src));
13070   ins_pipe(ialu_cr_reg_reg);
13071 %}
13072 
13073 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13074 // compares.  Can be used for LE or GT compares by reversing arguments.
13075 // NOT GOOD FOR EQ/NE tests.
13076 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13077   match(Set flags (CmpUL src1 src2));
13078   effect(TEMP tmp);
13079   ins_cost(300);
13080   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13081             "MOV    $tmp,$src1.hi\n\t"
13082             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13083   ins_encode(long_cmp_flags2(src1, src2, tmp));
13084   ins_pipe(ialu_cr_reg_reg);
13085 %}
13086 
13087 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13088 // Just a wrapper for a normal branch, plus the predicate test.
13089 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13090   match(If cmp flags);
13091   effect(USE labl);
13092   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13093   expand %{
13094     jmpCon(cmp, flags, labl);    // JLT or JGE...
13095   %}
13096 %}
13097 
13098 // Compare 2 longs and CMOVE longs.
13099 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13100   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13101   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13102   ins_cost(400);
13103   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13104             "CMOV$cmp $dst.hi,$src.hi" %}
13105   opcode(0x0F,0x40);
13106   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13107   ins_pipe( pipe_cmov_reg_long );
13108 %}
13109 
13110 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13111   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13112   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13113   ins_cost(500);
13114   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13115             "CMOV$cmp $dst.hi,$src.hi" %}
13116   opcode(0x0F,0x40);
13117   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13118   ins_pipe( pipe_cmov_reg_long );
13119 %}
13120 
13121 // Compare 2 longs and CMOVE ints.
13122 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13123   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13124   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13125   ins_cost(200);
13126   format %{ "CMOV$cmp $dst,$src" %}
13127   opcode(0x0F,0x40);
13128   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13129   ins_pipe( pipe_cmov_reg );
13130 %}
13131 
13132 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13133   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13134   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13135   ins_cost(250);
13136   format %{ "CMOV$cmp $dst,$src" %}
13137   opcode(0x0F,0x40);
13138   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13139   ins_pipe( pipe_cmov_mem );
13140 %}
13141 
13142 // Compare 2 longs and CMOVE ints.
13143 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13144   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13145   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13146   ins_cost(200);
13147   format %{ "CMOV$cmp $dst,$src" %}
13148   opcode(0x0F,0x40);
13149   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13150   ins_pipe( pipe_cmov_reg );
13151 %}
13152 
13153 // Compare 2 longs and CMOVE doubles
13154 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13155   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13156   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13157   ins_cost(200);
13158   expand %{
13159     fcmovDPR_regS(cmp,flags,dst,src);
13160   %}
13161 %}
13162 
13163 // Compare 2 longs and CMOVE doubles
13164 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13165   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13166   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13167   ins_cost(200);
13168   expand %{
13169     fcmovD_regS(cmp,flags,dst,src);
13170   %}
13171 %}
13172 
13173 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13174   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13175   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13176   ins_cost(200);
13177   expand %{
13178     fcmovFPR_regS(cmp,flags,dst,src);
13179   %}
13180 %}
13181 
13182 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13183   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13184   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13185   ins_cost(200);
13186   expand %{
13187     fcmovF_regS(cmp,flags,dst,src);
13188   %}
13189 %}
13190 
13191 //======
13192 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13193 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13194   match( Set flags (CmpL src zero ));
13195   effect(TEMP tmp);
13196   ins_cost(200);
13197   format %{ "MOV    $tmp,$src.lo\n\t"
13198             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13199   ins_encode( long_cmp_flags0( src, tmp ) );
13200   ins_pipe( ialu_reg_reg_long );
13201 %}
13202 
13203 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13204 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13205   match( Set flags (CmpL src1 src2 ));
13206   ins_cost(200+300);
13207   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13208             "JNE,s  skip\n\t"
13209             "CMP    $src1.hi,$src2.hi\n\t"
13210      "skip:\t" %}
13211   ins_encode( long_cmp_flags1( src1, src2 ) );
13212   ins_pipe( ialu_cr_reg_reg );
13213 %}
13214 
13215 // Long compare reg == zero/reg OR reg != zero/reg
13216 // Just a wrapper for a normal branch, plus the predicate test.
13217 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13218   match(If cmp flags);
13219   effect(USE labl);
13220   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13221   expand %{
13222     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13223   %}
13224 %}
13225 
13226 //======
13227 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13228 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13229   match(Set flags (CmpUL src zero));
13230   effect(TEMP tmp);
13231   ins_cost(200);
13232   format %{ "MOV    $tmp,$src.lo\n\t"
13233             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13234   ins_encode(long_cmp_flags0(src, tmp));
13235   ins_pipe(ialu_reg_reg_long);
13236 %}
13237 
13238 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13239 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13240   match(Set flags (CmpUL src1 src2));
13241   ins_cost(200+300);
13242   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13243             "JNE,s  skip\n\t"
13244             "CMP    $src1.hi,$src2.hi\n\t"
13245      "skip:\t" %}
13246   ins_encode(long_cmp_flags1(src1, src2));
13247   ins_pipe(ialu_cr_reg_reg);
13248 %}
13249 
13250 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13251 // Just a wrapper for a normal branch, plus the predicate test.
13252 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13253   match(If cmp flags);
13254   effect(USE labl);
13255   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13256   expand %{
13257     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13258   %}
13259 %}
13260 
13261 // Compare 2 longs and CMOVE longs.
13262 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13263   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13264   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13265   ins_cost(400);
13266   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13267             "CMOV$cmp $dst.hi,$src.hi" %}
13268   opcode(0x0F,0x40);
13269   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13270   ins_pipe( pipe_cmov_reg_long );
13271 %}
13272 
13273 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13274   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13275   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13276   ins_cost(500);
13277   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13278             "CMOV$cmp $dst.hi,$src.hi" %}
13279   opcode(0x0F,0x40);
13280   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13281   ins_pipe( pipe_cmov_reg_long );
13282 %}
13283 
13284 // Compare 2 longs and CMOVE ints.
13285 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13286   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13287   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13288   ins_cost(200);
13289   format %{ "CMOV$cmp $dst,$src" %}
13290   opcode(0x0F,0x40);
13291   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13292   ins_pipe( pipe_cmov_reg );
13293 %}
13294 
13295 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13296   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13297   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13298   ins_cost(250);
13299   format %{ "CMOV$cmp $dst,$src" %}
13300   opcode(0x0F,0x40);
13301   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13302   ins_pipe( pipe_cmov_mem );
13303 %}
13304 
13305 // Compare 2 longs and CMOVE ints.
13306 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13307   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13308   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13309   ins_cost(200);
13310   format %{ "CMOV$cmp $dst,$src" %}
13311   opcode(0x0F,0x40);
13312   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13313   ins_pipe( pipe_cmov_reg );
13314 %}
13315 
13316 // Compare 2 longs and CMOVE doubles
13317 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13318   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13319   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13320   ins_cost(200);
13321   expand %{
13322     fcmovDPR_regS(cmp,flags,dst,src);
13323   %}
13324 %}
13325 
13326 // Compare 2 longs and CMOVE doubles
13327 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13328   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13329   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13330   ins_cost(200);
13331   expand %{
13332     fcmovD_regS(cmp,flags,dst,src);
13333   %}
13334 %}
13335 
13336 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13337   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13338   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13339   ins_cost(200);
13340   expand %{
13341     fcmovFPR_regS(cmp,flags,dst,src);
13342   %}
13343 %}
13344 
13345 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13346   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13347   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13348   ins_cost(200);
13349   expand %{
13350     fcmovF_regS(cmp,flags,dst,src);
13351   %}
13352 %}
13353 
13354 //======
13355 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13356 // Same as cmpL_reg_flags_LEGT except must negate src
13357 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13358   match( Set flags (CmpL src zero ));
13359   effect( TEMP tmp );
13360   ins_cost(300);
13361   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13362             "CMP    $tmp,$src.lo\n\t"
13363             "SBB    $tmp,$src.hi\n\t" %}
13364   ins_encode( long_cmp_flags3(src, tmp) );
13365   ins_pipe( ialu_reg_reg_long );
13366 %}
13367 
13368 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13369 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13370 // requires a commuted test to get the same result.
13371 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13372   match( Set flags (CmpL src1 src2 ));
13373   effect( TEMP tmp );
13374   ins_cost(300);
13375   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13376             "MOV    $tmp,$src2.hi\n\t"
13377             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13378   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13379   ins_pipe( ialu_cr_reg_reg );
13380 %}
13381 
13382 // Long compares reg < zero/req OR reg >= zero/req.
13383 // Just a wrapper for a normal branch, plus the predicate test
13384 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13385   match(If cmp flags);
13386   effect(USE labl);
13387   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13388   ins_cost(300);
13389   expand %{
13390     jmpCon(cmp,flags,labl);    // JGT or JLE...
13391   %}
13392 %}
13393 
13394 //======
13395 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13396 // Same as cmpUL_reg_flags_LEGT except must negate src
13397 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13398   match(Set flags (CmpUL src zero));
13399   effect(TEMP tmp);
13400   ins_cost(300);
13401   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13402             "CMP    $tmp,$src.lo\n\t"
13403             "SBB    $tmp,$src.hi\n\t" %}
13404   ins_encode(long_cmp_flags3(src, tmp));
13405   ins_pipe(ialu_reg_reg_long);
13406 %}
13407 
13408 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13409 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13410 // requires a commuted test to get the same result.
13411 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13412   match(Set flags (CmpUL src1 src2));
13413   effect(TEMP tmp);
13414   ins_cost(300);
13415   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13416             "MOV    $tmp,$src2.hi\n\t"
13417             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13418   ins_encode(long_cmp_flags2( src2, src1, tmp));
13419   ins_pipe(ialu_cr_reg_reg);
13420 %}
13421 
13422 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13423 // Just a wrapper for a normal branch, plus the predicate test
13424 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13425   match(If cmp flags);
13426   effect(USE labl);
13427   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13428   ins_cost(300);
13429   expand %{
13430     jmpCon(cmp, flags, labl);    // JGT or JLE...
13431   %}
13432 %}
13433 
13434 // Compare 2 longs and CMOVE longs.
13435 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13436   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13437   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13438   ins_cost(400);
13439   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13440             "CMOV$cmp $dst.hi,$src.hi" %}
13441   opcode(0x0F,0x40);
13442   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13443   ins_pipe( pipe_cmov_reg_long );
13444 %}
13445 
13446 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13447   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13448   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13449   ins_cost(500);
13450   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13451             "CMOV$cmp $dst.hi,$src.hi+4" %}
13452   opcode(0x0F,0x40);
13453   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13454   ins_pipe( pipe_cmov_reg_long );
13455 %}
13456 
13457 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13458   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13459   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13460   ins_cost(400);
13461   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13462             "CMOV$cmp $dst.hi,$src.hi" %}
13463   opcode(0x0F,0x40);
13464   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13465   ins_pipe( pipe_cmov_reg_long );
13466 %}
13467 
13468 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13469   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13470   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13471   ins_cost(500);
13472   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13473             "CMOV$cmp $dst.hi,$src.hi+4" %}
13474   opcode(0x0F,0x40);
13475   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13476   ins_pipe( pipe_cmov_reg_long );
13477 %}
13478 
13479 // Compare 2 longs and CMOVE ints.
13480 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13481   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13482   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13483   ins_cost(200);
13484   format %{ "CMOV$cmp $dst,$src" %}
13485   opcode(0x0F,0x40);
13486   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13487   ins_pipe( pipe_cmov_reg );
13488 %}
13489 
13490 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13491   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13492   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13493   ins_cost(250);
13494   format %{ "CMOV$cmp $dst,$src" %}
13495   opcode(0x0F,0x40);
13496   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13497   ins_pipe( pipe_cmov_mem );
13498 %}
13499 
13500 // Compare 2 longs and CMOVE ptrs.
13501 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13502   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13503   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13504   ins_cost(200);
13505   format %{ "CMOV$cmp $dst,$src" %}
13506   opcode(0x0F,0x40);
13507   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13508   ins_pipe( pipe_cmov_reg );
13509 %}
13510 
13511 // Compare 2 longs and CMOVE doubles
13512 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13513   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13514   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13515   ins_cost(200);
13516   expand %{
13517     fcmovDPR_regS(cmp,flags,dst,src);
13518   %}
13519 %}
13520 
13521 // Compare 2 longs and CMOVE doubles
13522 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13523   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13524   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13525   ins_cost(200);
13526   expand %{
13527     fcmovD_regS(cmp,flags,dst,src);
13528   %}
13529 %}
13530 
13531 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13532   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13533   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13534   ins_cost(200);
13535   expand %{
13536     fcmovFPR_regS(cmp,flags,dst,src);
13537   %}
13538 %}
13539 
13540 
13541 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13542   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13543   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13544   ins_cost(200);
13545   expand %{
13546     fcmovF_regS(cmp,flags,dst,src);
13547   %}
13548 %}
13549 
13550 
13551 // ============================================================================
13552 // Procedure Call/Return Instructions
13553 // Call Java Static Instruction
13554 // Note: If this code changes, the corresponding ret_addr_offset() and
13555 //       compute_padding() functions will have to be adjusted.
13556 instruct CallStaticJavaDirect(method meth) %{
13557   match(CallStaticJava);
13558   effect(USE meth);
13559 
13560   ins_cost(300);
13561   format %{ "CALL,static " %}
13562   opcode(0xE8); /* E8 cd */
13563   ins_encode( pre_call_resets,
13564               Java_Static_Call( meth ),
13565               call_epilog,
13566               post_call_FPU );
13567   ins_pipe( pipe_slow );
13568   ins_alignment(4);
13569 %}
13570 
13571 // Call Java Dynamic Instruction
13572 // Note: If this code changes, the corresponding ret_addr_offset() and
13573 //       compute_padding() functions will have to be adjusted.
13574 instruct CallDynamicJavaDirect(method meth) %{
13575   match(CallDynamicJava);
13576   effect(USE meth);
13577 
13578   ins_cost(300);
13579   format %{ "MOV    EAX,(oop)-1\n\t"
13580             "CALL,dynamic" %}
13581   opcode(0xE8); /* E8 cd */
13582   ins_encode( pre_call_resets,
13583               Java_Dynamic_Call( meth ),
13584               call_epilog,
13585               post_call_FPU );
13586   ins_pipe( pipe_slow );
13587   ins_alignment(4);
13588 %}
13589 
13590 // Call Runtime Instruction
13591 instruct CallRuntimeDirect(method meth) %{
13592   match(CallRuntime );
13593   effect(USE meth);
13594 
13595   ins_cost(300);
13596   format %{ "CALL,runtime " %}
13597   opcode(0xE8); /* E8 cd */
13598   // Use FFREEs to clear entries in float stack
13599   ins_encode( pre_call_resets,
13600               FFree_Float_Stack_All,
13601               Java_To_Runtime( meth ),
13602               post_call_FPU );
13603   ins_pipe( pipe_slow );
13604 %}
13605 
13606 // Call runtime without safepoint
13607 instruct CallLeafDirect(method meth) %{
13608   match(CallLeaf);
13609   effect(USE meth);
13610 
13611   ins_cost(300);
13612   format %{ "CALL_LEAF,runtime " %}
13613   opcode(0xE8); /* E8 cd */
13614   ins_encode( pre_call_resets,
13615               FFree_Float_Stack_All,
13616               Java_To_Runtime( meth ),
13617               Verify_FPU_For_Leaf, post_call_FPU );
13618   ins_pipe( pipe_slow );
13619 %}
13620 
13621 instruct CallLeafNoFPDirect(method meth) %{
13622   match(CallLeafNoFP);
13623   effect(USE meth);
13624 
13625   ins_cost(300);
13626   format %{ "CALL_LEAF_NOFP,runtime " %}
13627   opcode(0xE8); /* E8 cd */
13628   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13629   ins_pipe( pipe_slow );
13630 %}
13631 
13632 
13633 // Return Instruction
13634 // Remove the return address & jump to it.
13635 instruct Ret() %{
13636   match(Return);
13637   format %{ "RET" %}
13638   opcode(0xC3);
13639   ins_encode(OpcP);
13640   ins_pipe( pipe_jmp );
13641 %}
13642 
13643 // Tail Call; Jump from runtime stub to Java code.
13644 // Also known as an 'interprocedural jump'.
13645 // Target of jump will eventually return to caller.
13646 // TailJump below removes the return address.
13647 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13648   match(TailCall jump_target method_ptr);
13649   ins_cost(300);
13650   format %{ "JMP    $jump_target \t# EBX holds method" %}
13651   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13652   ins_encode( OpcP, RegOpc(jump_target) );
13653   ins_pipe( pipe_jmp );
13654 %}
13655 
13656 
13657 // Tail Jump; remove the return address; jump to target.
13658 // TailCall above leaves the return address around.
13659 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13660   match( TailJump jump_target ex_oop );
13661   ins_cost(300);
13662   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13663             "JMP    $jump_target " %}
13664   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13665   ins_encode( enc_pop_rdx,
13666               OpcP, RegOpc(jump_target) );
13667   ins_pipe( pipe_jmp );
13668 %}
13669 
13670 // Create exception oop: created by stack-crawling runtime code.
13671 // Created exception is now available to this handler, and is setup
13672 // just prior to jumping to this handler.  No code emitted.
13673 instruct CreateException( eAXRegP ex_oop )
13674 %{
13675   match(Set ex_oop (CreateEx));
13676 
13677   size(0);
13678   // use the following format syntax
13679   format %{ "# exception oop is in EAX; no code emitted" %}
13680   ins_encode();
13681   ins_pipe( empty );
13682 %}
13683 
13684 
13685 // Rethrow exception:
13686 // The exception oop will come in the first argument position.
13687 // Then JUMP (not call) to the rethrow stub code.
13688 instruct RethrowException()
13689 %{
13690   match(Rethrow);
13691 
13692   // use the following format syntax
13693   format %{ "JMP    rethrow_stub" %}
13694   ins_encode(enc_rethrow);
13695   ins_pipe( pipe_jmp );
13696 %}
13697 
13698 // inlined locking and unlocking
13699 
13700 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
13701   predicate(Compile::current()->use_rtm());
13702   match(Set cr (FastLock object box));
13703   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
13704   ins_cost(300);
13705   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13706   ins_encode %{
13707     __ get_thread($thread$$Register);
13708     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13709                  $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
13710                  _counters, _rtm_counters, _stack_rtm_counters,
13711                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13712                  true, ra_->C->profile_rtm());
13713   %}
13714   ins_pipe(pipe_slow);
13715 %}
13716 
13717 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
13718   predicate(!Compile::current()->use_rtm());
13719   match(Set cr (FastLock object box));
13720   effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
13721   ins_cost(300);
13722   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13723   ins_encode %{
13724     __ get_thread($thread$$Register);
13725     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13726                  $scr$$Register, noreg, noreg, $thread$$Register, NULL, NULL, NULL, NULL, false, false);
13727   %}
13728   ins_pipe(pipe_slow);
13729 %}
13730 
13731 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13732   match(Set cr (FastUnlock object box));
13733   effect(TEMP tmp, USE_KILL box);
13734   ins_cost(300);
13735   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13736   ins_encode %{
13737     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13738   %}
13739   ins_pipe(pipe_slow);
13740 %}
13741 
13742 
13743 
13744 // ============================================================================
13745 // Safepoint Instruction
13746 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13747   match(SafePoint poll);
13748   effect(KILL cr, USE poll);
13749 
13750   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13751   ins_cost(125);
13752   // EBP would need size(3)
13753   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13754   ins_encode %{
13755     __ relocate(relocInfo::poll_type);
13756     address pre_pc = __ pc();
13757     __ testl(rax, Address($poll$$Register, 0));
13758     address post_pc = __ pc();
13759     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13760   %}
13761   ins_pipe(ialu_reg_mem);
13762 %}
13763 
13764 
13765 // ============================================================================
13766 // This name is KNOWN by the ADLC and cannot be changed.
13767 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13768 // for this guy.
13769 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13770   match(Set dst (ThreadLocal));
13771   effect(DEF dst, KILL cr);
13772 
13773   format %{ "MOV    $dst, Thread::current()" %}
13774   ins_encode %{
13775     Register dstReg = as_Register($dst$$reg);
13776     __ get_thread(dstReg);
13777   %}
13778   ins_pipe( ialu_reg_fat );
13779 %}
13780 
13781 
13782 
13783 //----------PEEPHOLE RULES-----------------------------------------------------
13784 // These must follow all instruction definitions as they use the names
13785 // defined in the instructions definitions.
13786 //
13787 // peepmatch ( root_instr_name [preceding_instruction]* );
13788 //
13789 // peepconstraint %{
13790 // (instruction_number.operand_name relational_op instruction_number.operand_name
13791 //  [, ...] );
13792 // // instruction numbers are zero-based using left to right order in peepmatch
13793 //
13794 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13795 // // provide an instruction_number.operand_name for each operand that appears
13796 // // in the replacement instruction's match rule
13797 //
13798 // ---------VM FLAGS---------------------------------------------------------
13799 //
13800 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13801 //
13802 // Each peephole rule is given an identifying number starting with zero and
13803 // increasing by one in the order seen by the parser.  An individual peephole
13804 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13805 // on the command-line.
13806 //
13807 // ---------CURRENT LIMITATIONS----------------------------------------------
13808 //
13809 // Only match adjacent instructions in same basic block
13810 // Only equality constraints
13811 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13812 // Only one replacement instruction
13813 //
13814 // ---------EXAMPLE----------------------------------------------------------
13815 //
13816 // // pertinent parts of existing instructions in architecture description
13817 // instruct movI(rRegI dst, rRegI src) %{
13818 //   match(Set dst (CopyI src));
13819 // %}
13820 //
13821 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13822 //   match(Set dst (AddI dst src));
13823 //   effect(KILL cr);
13824 // %}
13825 //
13826 // // Change (inc mov) to lea
13827 // peephole %{
13828 //   // increment preceeded by register-register move
13829 //   peepmatch ( incI_eReg movI );
13830 //   // require that the destination register of the increment
13831 //   // match the destination register of the move
13832 //   peepconstraint ( 0.dst == 1.dst );
13833 //   // construct a replacement instruction that sets
13834 //   // the destination to ( move's source register + one )
13835 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13836 // %}
13837 //
13838 // Implementation no longer uses movX instructions since
13839 // machine-independent system no longer uses CopyX nodes.
13840 //
13841 // peephole %{
13842 //   peepmatch ( incI_eReg movI );
13843 //   peepconstraint ( 0.dst == 1.dst );
13844 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13845 // %}
13846 //
13847 // peephole %{
13848 //   peepmatch ( decI_eReg movI );
13849 //   peepconstraint ( 0.dst == 1.dst );
13850 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13851 // %}
13852 //
13853 // peephole %{
13854 //   peepmatch ( addI_eReg_imm movI );
13855 //   peepconstraint ( 0.dst == 1.dst );
13856 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13857 // %}
13858 //
13859 // peephole %{
13860 //   peepmatch ( addP_eReg_imm movP );
13861 //   peepconstraint ( 0.dst == 1.dst );
13862 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13863 // %}
13864 
13865 // // Change load of spilled value to only a spill
13866 // instruct storeI(memory mem, rRegI src) %{
13867 //   match(Set mem (StoreI mem src));
13868 // %}
13869 //
13870 // instruct loadI(rRegI dst, memory mem) %{
13871 //   match(Set dst (LoadI mem));
13872 // %}
13873 //
13874 peephole %{
13875   peepmatch ( loadI storeI );
13876   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13877   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13878 %}
13879 
13880 //----------SMARTSPILL RULES---------------------------------------------------
13881 // These must follow all instruction definitions as they use the names
13882 // defined in the instructions definitions.