1 //
    2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
    3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4 //
    5 // This code is free software; you can redistribute it and/or modify it
    6 // under the terms of the GNU General Public License version 2 only, as
    7 // published by the Free Software Foundation.
    8 //
    9 // This code is distributed in the hope that it will be useful, but WITHOUT
   10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   12 // version 2 for more details (a copy is included in the LICENSE file that
   13 // accompanied this code).
   14 //
   15 // You should have received a copy of the GNU General Public License version
   16 // 2 along with this work; if not, write to the Free Software Foundation,
   17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   18 //
   19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   20 // or visit www.oracle.com if you need additional information or have any
   21 // questions.
   22 //
   23 //
   24 
   25 // X86 Architecture Description File
   26 
   27 //----------REGISTER DEFINITION BLOCK------------------------------------------
   28 // This information is used by the matcher and the register allocator to
   29 // describe individual registers and classes of registers within the target
   30 // architecture.
   31 
   32 register %{
   33 //----------Architecture Description Register Definitions----------------------
   34 // General Registers
   35 // "reg_def"  name ( register save type, C convention save type,
   36 //                   ideal register type, encoding );
   37 // Register Save Types:
   38 //
   39 // NS  = No-Save:       The register allocator assumes that these registers
   40 //                      can be used without saving upon entry to the method, &
   41 //                      that they do not need to be saved at call sites.
   42 //
   43 // SOC = Save-On-Call:  The register allocator assumes that these registers
   44 //                      can be used without saving upon entry to the method,
   45 //                      but that they must be saved at call sites.
   46 //
   47 // SOE = Save-On-Entry: The register allocator assumes that these registers
   48 //                      must be saved before using them upon entry to the
   49 //                      method, but they do not need to be saved at call
   50 //                      sites.
   51 //
   52 // AS  = Always-Save:   The register allocator assumes that these registers
   53 //                      must be saved before using them upon entry to the
   54 //                      method, & that they must be saved at call sites.
   55 //
   56 // Ideal Register Type is used to determine how to save & restore a
   57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
   58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
   59 //
   60 // The encoding number is the actual bit-pattern placed into the opcodes.
   61 
   62 // General Registers
   63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
   64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
   65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
   66 
   67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
   68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
   69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
   70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
   71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
   72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
   73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
   74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
   75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
   76 
   77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
   78 // allocator, and only shows up in the encodings.
   79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
   81 // Ok so here's the trick FPR1 is really st(0) except in the midst
   82 // of emission of assembly for a machnode. During the emission the fpu stack
   83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
   84 // the stack will not have this element so FPR1 == st(0) from the
   85 // oopMap viewpoint. This same weirdness with numbering causes
   86 // instruction encoding to have to play games with the register
   87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
   88 // where it does flt->flt moves to see an example
   89 //
   90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
   91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
   92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
   93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
   94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
   95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
   96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
   97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
   98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
   99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
  100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
  101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
  102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
  103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
  104 //
  105 // Empty fill registers, which are never used, but supply alignment to xmm regs
  106 //
  107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
  108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
  109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
  110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
  111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
  112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
  113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
  114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
  115 
  116 // Specify priority of register selection within phases of register
  117 // allocation.  Highest priority is first.  A useful heuristic is to
  118 // give registers a low priority when they are required by machine
  119 // instructions, like EAX and EDX.  Registers which are used as
  120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
  121 // For the Intel integer registers, the equivalent Long pairs are
  122 // EDX:EAX, EBX:ECX, and EDI:EBP.
  123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
  124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
  125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
  126                     FPR6L, FPR6H, FPR7L, FPR7H,
  127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
  128 
  129 
  130 //----------Architecture Description Register Classes--------------------------
  131 // Several register classes are automatically defined based upon information in
  132 // this architecture description.
  133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
  134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
  135 //
  136 // Class for no registers (empty set).
  137 reg_class no_reg();
  138 
  139 // Class for all registers
  140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
  141 // Class for all registers (excluding EBP)
  142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
  143 // Dynamic register class that selects at runtime between register classes
  144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
  145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
  146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
  147 
  148 // Class for general registers
  149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
  150 // Class for general registers (excluding EBP).
  151 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
  152 // Used also if the PreserveFramePointer flag is true.
  153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
  154 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
  155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
  156 
  157 // Class of "X" registers
  158 reg_class int_x_reg(EBX, ECX, EDX, EAX);
  159 
  160 // Class of registers that can appear in an address with no offset.
  161 // EBP and ESP require an extra instruction byte for zero offset.
  162 // Used in fast-unlock
  163 reg_class p_reg(EDX, EDI, ESI, EBX);
  164 
  165 // Class for general registers excluding ECX
  166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
  167 // Class for general registers excluding ECX (and EBP)
  168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
  169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
  170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
  171 
  172 // Class for general registers excluding EAX
  173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
  174 
  175 // Class for general registers excluding EAX and EBX.
  176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
  177 // Class for general registers excluding EAX and EBX (and EBP)
  178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
  179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
  180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
  181 
  182 // Class of EAX (for multiply and divide operations)
  183 reg_class eax_reg(EAX);
  184 
  185 // Class of EBX (for atomic add)
  186 reg_class ebx_reg(EBX);
  187 
  188 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
  189 reg_class ecx_reg(ECX);
  190 
  191 // Class of EDX (for multiply and divide operations)
  192 reg_class edx_reg(EDX);
  193 
  194 // Class of EDI (for synchronization)
  195 reg_class edi_reg(EDI);
  196 
  197 // Class of ESI (for synchronization)
  198 reg_class esi_reg(ESI);
  199 
  200 // Singleton class for stack pointer
  201 reg_class sp_reg(ESP);
  202 
  203 // Singleton class for instruction pointer
  204 // reg_class ip_reg(EIP);
  205 
  206 // Class of integer register pairs
  207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
  208 // Class of integer register pairs (excluding EBP and EDI);
  209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
  210 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
  211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
  212 
  213 // Class of integer register pairs that aligns with calling convention
  214 reg_class eadx_reg( EAX,EDX );
  215 reg_class ebcx_reg( ECX,EBX );
  216 reg_class ebpd_reg( EBP,EDI );
  217 
  218 // Not AX or DX, used in divides
  219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
  220 // Not AX or DX (and neither EBP), used in divides
  221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
  222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
  223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
  224 
  225 // Floating point registers.  Notice FPR0 is not a choice.
  226 // FPR0 is not ever allocated; we use clever encodings to fake
  227 // a 2-address instructions out of Intels FP stack.
  228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
  229 
  230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
  231                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
  232                       FPR7L,FPR7H );
  233 
  234 reg_class fp_flt_reg0( FPR1L );
  235 reg_class fp_dbl_reg0( FPR1L,FPR1H );
  236 reg_class fp_dbl_reg1( FPR2L,FPR2H );
  237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
  238                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
  239 
  240 %}
  241 
  242 
  243 //----------SOURCE BLOCK-------------------------------------------------------
  244 // This is a block of C++ code which provides values, functions, and
  245 // definitions necessary in the rest of the architecture description
  246 source_hpp %{
  247 // Must be visible to the DFA in dfa_x86_32.cpp
  248 extern bool is_operand_hi32_zero(Node* n);
  249 %}
  250 
  251 source %{
  252 #define   RELOC_IMM32    Assembler::imm_operand
  253 #define   RELOC_DISP32   Assembler::disp32_operand
  254 
  255 #define __ _masm.
  256 
  257 // How to find the high register of a Long pair, given the low register
  258 #define   HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2))
  259 #define   HIGH_FROM_LOW_ENC(x) ((x)+2)
  260 
  261 // These masks are used to provide 128-bit aligned bitmasks to the XMM
  262 // instructions, to allow sign-masking or sign-bit flipping.  They allow
  263 // fast versions of NegF/NegD and AbsF/AbsD.
  264 
  265 void reg_mask_init() {}
  266 
  267 // Note: 'double' and 'long long' have 32-bits alignment on x86.
  268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
  269   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
  270   // of 128-bits operands for SSE instructions.
  271   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
  272   // Store the value to a 128-bits operand.
  273   operand[0] = lo;
  274   operand[1] = hi;
  275   return operand;
  276 }
  277 
  278 // Buffer for 128-bits masks used by SSE instructions.
  279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
  280 
  281 // Static initialization during VM startup.
  282 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
  283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
  284 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
  285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
  286 
  287 // Offset hacking within calls.
  288 static int pre_call_resets_size() {
  289   int size = 0;
  290   Compile* C = Compile::current();
  291   if (C->in_24_bit_fp_mode()) {
  292     size += 6; // fldcw
  293   }
  294   if (VM_Version::supports_vzeroupper()) {
  295     size += 3; // vzeroupper
  296   }
  297   return size;
  298 }
  299 
  300 // !!!!! Special hack to get all type of calls to specify the byte offset
  301 //       from the start of the call to the point where the return address
  302 //       will point.
  303 int MachCallStaticJavaNode::ret_addr_offset() {
  304   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
  305 }
  306 
  307 int MachCallDynamicJavaNode::ret_addr_offset() {
  308   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
  309 }
  310 
  311 static int sizeof_FFree_Float_Stack_All = -1;
  312 
  313 int MachCallRuntimeNode::ret_addr_offset() {
  314   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
  315   return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All);
  316 }
  317 
  318 //
  319 // Compute padding required for nodes which need alignment
  320 //
  321 
  322 // The address of the call instruction needs to be 4-byte aligned to
  323 // ensure that it does not span a cache line so that it can be patched.
  324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
  325   current_offset += pre_call_resets_size();  // skip fldcw, if any
  326   current_offset += 1;      // skip call opcode byte
  327   return align_up(current_offset, alignment_required()) - current_offset;
  328 }
  329 
  330 // The address of the call instruction needs to be 4-byte aligned to
  331 // ensure that it does not span a cache line so that it can be patched.
  332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
  333   current_offset += pre_call_resets_size();  // skip fldcw, if any
  334   current_offset += 5;      // skip MOV instruction
  335   current_offset += 1;      // skip call opcode byte
  336   return align_up(current_offset, alignment_required()) - current_offset;
  337 }
  338 
  339 // EMIT_RM()
  340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  341   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
  342   cbuf.insts()->emit_int8(c);
  343 }
  344 
  345 // EMIT_CC()
  346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  347   unsigned char c = (unsigned char)( f1 | f2 );
  348   cbuf.insts()->emit_int8(c);
  349 }
  350 
  351 // EMIT_OPCODE()
  352 void emit_opcode(CodeBuffer &cbuf, int code) {
  353   cbuf.insts()->emit_int8((unsigned char) code);
  354 }
  355 
  356 // EMIT_OPCODE() w/ relocation information
  357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
  358   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
  359   emit_opcode(cbuf, code);
  360 }
  361 
  362 // EMIT_D8()
  363 void emit_d8(CodeBuffer &cbuf, int d8) {
  364   cbuf.insts()->emit_int8((unsigned char) d8);
  365 }
  366 
  367 // EMIT_D16()
  368 void emit_d16(CodeBuffer &cbuf, int d16) {
  369   cbuf.insts()->emit_int16(d16);
  370 }
  371 
  372 // EMIT_D32()
  373 void emit_d32(CodeBuffer &cbuf, int d32) {
  374   cbuf.insts()->emit_int32(d32);
  375 }
  376 
  377 // emit 32 bit value and construct relocation entry from relocInfo::relocType
  378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
  379         int format) {
  380   cbuf.relocate(cbuf.insts_mark(), reloc, format);
  381   cbuf.insts()->emit_int32(d32);
  382 }
  383 
  384 // emit 32 bit value and construct relocation entry from RelocationHolder
  385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
  386         int format) {
  387 #ifdef ASSERT
  388   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
  389     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
  390   }
  391 #endif
  392   cbuf.relocate(cbuf.insts_mark(), rspec, format);
  393   cbuf.insts()->emit_int32(d32);
  394 }
  395 
  396 // Access stack slot for load or store
  397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
  398   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
  399   if( -128 <= disp && disp <= 127 ) {
  400     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
  401     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  402     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
  403   } else {
  404     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
  405     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
  406     emit_d32(cbuf, disp);     // Displacement  // R/M byte
  407   }
  408 }
  409 
  410    // rRegI ereg, memory mem) %{    // emit_reg_mem
  411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
  412   // There is no index & no scale, use form without SIB byte
  413   if ((index == 0x4) &&
  414       (scale == 0) && (base != ESP_enc)) {
  415     // If no displacement, mode is 0x0; unless base is [EBP]
  416     if ( (displace == 0) && (base != EBP_enc) ) {
  417       emit_rm(cbuf, 0x0, reg_encoding, base);
  418     }
  419     else {                    // If 8-bit displacement, mode 0x1
  420       if ((displace >= -128) && (displace <= 127)
  421           && (disp_reloc == relocInfo::none) ) {
  422         emit_rm(cbuf, 0x1, reg_encoding, base);
  423         emit_d8(cbuf, displace);
  424       }
  425       else {                  // If 32-bit displacement
  426         if (base == -1) { // Special flag for absolute address
  427           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
  428           // (manual lies; no SIB needed here)
  429           if ( disp_reloc != relocInfo::none ) {
  430             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  431           } else {
  432             emit_d32      (cbuf, displace);
  433           }
  434         }
  435         else {                // Normal base + offset
  436           emit_rm(cbuf, 0x2, reg_encoding, base);
  437           if ( disp_reloc != relocInfo::none ) {
  438             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  439           } else {
  440             emit_d32      (cbuf, displace);
  441           }
  442         }
  443       }
  444     }
  445   }
  446   else {                      // Else, encode with the SIB byte
  447     // If no displacement, mode is 0x0; unless base is [EBP]
  448     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
  449       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
  450       emit_rm(cbuf, scale, index, base);
  451     }
  452     else {                    // If 8-bit displacement, mode 0x1
  453       if ((displace >= -128) && (displace <= 127)
  454           && (disp_reloc == relocInfo::none) ) {
  455         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
  456         emit_rm(cbuf, scale, index, base);
  457         emit_d8(cbuf, displace);
  458       }
  459       else {                  // If 32-bit displacement
  460         if (base == 0x04 ) {
  461           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  462           emit_rm(cbuf, scale, index, 0x04);
  463         } else {
  464           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
  465           emit_rm(cbuf, scale, index, base);
  466         }
  467         if ( disp_reloc != relocInfo::none ) {
  468           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
  469         } else {
  470           emit_d32      (cbuf, displace);
  471         }
  472       }
  473     }
  474   }
  475 }
  476 
  477 
  478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
  479   if( dst_encoding == src_encoding ) {
  480     // reg-reg copy, use an empty encoding
  481   } else {
  482     emit_opcode( cbuf, 0x8B );
  483     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
  484   }
  485 }
  486 
  487 void emit_cmpfp_fixup(MacroAssembler& _masm) {
  488   Label exit;
  489   __ jccb(Assembler::noParity, exit);
  490   __ pushf();
  491   //
  492   // comiss/ucomiss instructions set ZF,PF,CF flags and
  493   // zero OF,AF,SF for NaN values.
  494   // Fixup flags by zeroing ZF,PF so that compare of NaN
  495   // values returns 'less than' result (CF is set).
  496   // Leave the rest of flags unchanged.
  497   //
  498   //    7 6 5 4 3 2 1 0
  499   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  500   //    0 0 1 0 1 0 1 1   (0x2B)
  501   //
  502   __ andl(Address(rsp, 0), 0xffffff2b);
  503   __ popf();
  504   __ bind(exit);
  505 }
  506 
  507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  508   Label done;
  509   __ movl(dst, -1);
  510   __ jcc(Assembler::parity, done);
  511   __ jcc(Assembler::below, done);
  512   __ setb(Assembler::notEqual, dst);
  513   __ movzbl(dst, dst);
  514   __ bind(done);
  515 }
  516 
  517 
  518 //=============================================================================
  519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
  520 
  521 int ConstantTable::calculate_table_base_offset() const {
  522   return 0;  // absolute addressing, no offset
  523 }
  524 
  525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
  526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  527   ShouldNotReachHere();
  528 }
  529 
  530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  531   // Empty encoding
  532 }
  533 
  534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  535   return 0;
  536 }
  537 
  538 #ifndef PRODUCT
  539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  540   st->print("# MachConstantBaseNode (empty encoding)");
  541 }
  542 #endif
  543 
  544 
  545 //=============================================================================
  546 #ifndef PRODUCT
  547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  548   Compile* C = ra_->C;
  549 
  550   int framesize = C->output()->frame_size_in_bytes();
  551   int bangsize = C->output()->bang_size_in_bytes();
  552   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  553   // Remove wordSize for return addr which is already pushed.
  554   framesize -= wordSize;
  555 
  556   if (C->output()->need_stack_bang(bangsize)) {
  557     framesize -= wordSize;
  558     st->print("# stack bang (%d bytes)", bangsize);
  559     st->print("\n\t");
  560     st->print("PUSH   EBP\t# Save EBP");
  561     if (PreserveFramePointer) {
  562       st->print("\n\t");
  563       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  564     }
  565     if (framesize) {
  566       st->print("\n\t");
  567       st->print("SUB    ESP, #%d\t# Create frame",framesize);
  568     }
  569   } else {
  570     st->print("SUB    ESP, #%d\t# Create frame",framesize);
  571     st->print("\n\t");
  572     framesize -= wordSize;
  573     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
  574     if (PreserveFramePointer) {
  575       st->print("\n\t");
  576       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
  577       if (framesize > 0) {
  578         st->print("\n\t");
  579         st->print("ADD    EBP, #%d", framesize);
  580       }
  581     }
  582   }
  583 
  584   if (VerifyStackAtCalls) {
  585     st->print("\n\t");
  586     framesize -= wordSize;
  587     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
  588   }
  589 
  590   if( C->in_24_bit_fp_mode() ) {
  591     st->print("\n\t");
  592     st->print("FLDCW  \t# load 24 bit fpu control word");
  593   }
  594   if (UseSSE >= 2 && VerifyFPU) {
  595     st->print("\n\t");
  596     st->print("# verify FPU stack (must be clean on entry)");
  597   }
  598 
  599 #ifdef ASSERT
  600   if (VerifyStackAtCalls) {
  601     st->print("\n\t");
  602     st->print("# stack alignment check");
  603   }
  604 #endif
  605   st->cr();
  606 }
  607 #endif
  608 
  609 
  610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  611   Compile* C = ra_->C;
  612   C2_MacroAssembler _masm(&cbuf);
  613 
  614   __ verified_entry(C);
  615 
  616   C->output()->set_frame_complete(cbuf.insts_size());
  617 
  618   if (C->has_mach_constant_base_node()) {
  619     // NOTE: We set the table base offset here because users might be
  620     // emitted before MachConstantBaseNode.
  621     ConstantTable& constant_table = C->output()->constant_table();
  622     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  623   }
  624 }
  625 
  626 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  627   return MachNode::size(ra_); // too many variables; just compute it the hard way
  628 }
  629 
  630 int MachPrologNode::reloc() const {
  631   return 0; // a large enough number
  632 }
  633 
  634 //=============================================================================
  635 #ifndef PRODUCT
  636 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
  637   Compile *C = ra_->C;
  638   int framesize = C->output()->frame_size_in_bytes();
  639   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  640   // Remove two words for return addr and rbp,
  641   framesize -= 2*wordSize;
  642 
  643   if (C->max_vector_size() > 16) {
  644     st->print("VZEROUPPER");
  645     st->cr(); st->print("\t");
  646   }
  647   if (C->in_24_bit_fp_mode()) {
  648     st->print("FLDCW  standard control word");
  649     st->cr(); st->print("\t");
  650   }
  651   if (framesize) {
  652     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
  653     st->cr(); st->print("\t");
  654   }
  655   st->print_cr("POPL   EBP"); st->print("\t");
  656   if (do_polling() && C->is_method_compilation()) {
  657     st->print("CMPL    rsp, poll_offset[thread]  \n\t"
  658               "JA      #safepoint_stub\t"
  659               "# Safepoint: poll for GC");
  660   }
  661 }
  662 #endif
  663 
  664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  665   Compile *C = ra_->C;
  666   MacroAssembler _masm(&cbuf);
  667 
  668   if (C->max_vector_size() > 16) {
  669     // Clear upper bits of YMM registers when current compiled code uses
  670     // wide vectors to avoid AVX <-> SSE transition penalty during call.
  671     _masm.vzeroupper();
  672   }
  673   // If method set FPU control word, restore to standard control word
  674   if (C->in_24_bit_fp_mode()) {
  675     _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
  676   }
  677 
  678   int framesize = C->output()->frame_size_in_bytes();
  679   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  680   // Remove two words for return addr and rbp,
  681   framesize -= 2*wordSize;
  682 
  683   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
  684 
  685   if (framesize >= 128) {
  686     emit_opcode(cbuf, 0x81); // add  SP, #framesize
  687     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  688     emit_d32(cbuf, framesize);
  689   } else if (framesize) {
  690     emit_opcode(cbuf, 0x83); // add  SP, #framesize
  691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
  692     emit_d8(cbuf, framesize);
  693   }
  694 
  695   emit_opcode(cbuf, 0x58 | EBP_enc);
  696 
  697   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
  698     __ reserved_stack_check();
  699   }
  700 
  701   if (do_polling() && C->is_method_compilation()) {
  702     Register thread = as_Register(EBX_enc);
  703     MacroAssembler masm(&cbuf);
  704     __ get_thread(thread);
  705     Label dummy_label;
  706     Label* code_stub = &dummy_label;
  707     if (!C->output()->in_scratch_emit_size()) {
  708       C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
  709       C->output()->add_stub(stub);
  710       code_stub = &stub->entry();
  711     }
  712     __ relocate(relocInfo::poll_return_type);
  713     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
  714   }
  715 }
  716 
  717 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  718   return MachNode::size(ra_); // too many variables; just compute it
  719                               // the hard way
  720 }
  721 
  722 int MachEpilogNode::reloc() const {
  723   return 0; // a large enough number
  724 }
  725 
  726 const Pipeline * MachEpilogNode::pipeline() const {
  727   return MachNode::pipeline_class();
  728 }
  729 
  730 //=============================================================================
  731 
  732 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
  733 static enum RC rc_class( OptoReg::Name reg ) {
  734 
  735   if( !OptoReg::is_valid(reg)  ) return rc_bad;
  736   if (OptoReg::is_stack(reg)) return rc_stack;
  737 
  738   VMReg r = OptoReg::as_VMReg(reg);
  739   if (r->is_Register()) return rc_int;
  740   if (r->is_FloatRegister()) {
  741     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
  742     return rc_float;
  743   }
  744   if (r->is_KRegister()) return rc_kreg;
  745   assert(r->is_XMMRegister(), "must be");
  746   return rc_xmm;
  747 }
  748 
  749 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
  750                         int opcode, const char *op_str, int size, outputStream* st ) {
  751   if( cbuf ) {
  752     emit_opcode  (*cbuf, opcode );
  753     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
  754 #ifndef PRODUCT
  755   } else if( !do_size ) {
  756     if( size != 0 ) st->print("\n\t");
  757     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
  758       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
  759       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
  760     } else { // FLD, FST, PUSH, POP
  761       st->print("%s [ESP + #%d]",op_str,offset);
  762     }
  763 #endif
  764   }
  765   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  766   return size+3+offset_size;
  767 }
  768 
  769 // Helper for XMM registers.  Extra opcode bits, limited syntax.
  770 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
  771                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
  772   int in_size_in_bits = Assembler::EVEX_32bit;
  773   int evex_encoding = 0;
  774   if (reg_lo+1 == reg_hi) {
  775     in_size_in_bits = Assembler::EVEX_64bit;
  776     evex_encoding = Assembler::VEX_W;
  777   }
  778   if (cbuf) {
  779     MacroAssembler _masm(cbuf);
  780     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
  781     //                          it maps more cases to single byte displacement
  782     _masm.set_managed();
  783     if (reg_lo+1 == reg_hi) { // double move?
  784       if (is_load) {
  785         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  786       } else {
  787         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  788       }
  789     } else {
  790       if (is_load) {
  791         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
  792       } else {
  793         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
  794       }
  795     }
  796 #ifndef PRODUCT
  797   } else if (!do_size) {
  798     if (size != 0) st->print("\n\t");
  799     if (reg_lo+1 == reg_hi) { // double move?
  800       if (is_load) st->print("%s %s,[ESP + #%d]",
  801                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
  802                               Matcher::regName[reg_lo], offset);
  803       else         st->print("MOVSD  [ESP + #%d],%s",
  804                               offset, Matcher::regName[reg_lo]);
  805     } else {
  806       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
  807                               Matcher::regName[reg_lo], offset);
  808       else         st->print("MOVSS  [ESP + #%d],%s",
  809                               offset, Matcher::regName[reg_lo]);
  810     }
  811 #endif
  812   }
  813   bool is_single_byte = false;
  814   if ((UseAVX > 2) && (offset != 0)) {
  815     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
  816   }
  817   int offset_size = 0;
  818   if (UseAVX > 2 ) {
  819     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
  820   } else {
  821     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
  822   }
  823   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
  824   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
  825   return size+5+offset_size;
  826 }
  827 
  828 
  829 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  830                             int src_hi, int dst_hi, int size, outputStream* st ) {
  831   if (cbuf) {
  832     MacroAssembler _masm(cbuf);
  833     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  834     _masm.set_managed();
  835     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  836       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  837                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  838     } else {
  839       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  840                 as_XMMRegister(Matcher::_regEncode[src_lo]));
  841     }
  842 #ifndef PRODUCT
  843   } else if (!do_size) {
  844     if (size != 0) st->print("\n\t");
  845     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
  846       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
  847         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  848       } else {
  849         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  850       }
  851     } else {
  852       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
  853         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  854       } else {
  855         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
  856       }
  857     }
  858 #endif
  859   }
  860   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
  861   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
  862   int sz = (UseAVX > 2) ? 6 : 4;
  863   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
  864       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
  865   return size + sz;
  866 }
  867 
  868 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  869                             int src_hi, int dst_hi, int size, outputStream* st ) {
  870   // 32-bit
  871   if (cbuf) {
  872     MacroAssembler _masm(cbuf);
  873     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  874     _masm.set_managed();
  875     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
  876              as_Register(Matcher::_regEncode[src_lo]));
  877 #ifndef PRODUCT
  878   } else if (!do_size) {
  879     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  880 #endif
  881   }
  882   return (UseAVX> 2) ? 6 : 4;
  883 }
  884 
  885 
  886 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
  887                                  int src_hi, int dst_hi, int size, outputStream* st ) {
  888   // 32-bit
  889   if (cbuf) {
  890     MacroAssembler _masm(cbuf);
  891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
  892     _masm.set_managed();
  893     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
  894              as_XMMRegister(Matcher::_regEncode[src_lo]));
  895 #ifndef PRODUCT
  896   } else if (!do_size) {
  897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
  898 #endif
  899   }
  900   return (UseAVX> 2) ? 6 : 4;
  901 }
  902 
  903 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
  904   if( cbuf ) {
  905     emit_opcode(*cbuf, 0x8B );
  906     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
  907 #ifndef PRODUCT
  908   } else if( !do_size ) {
  909     if( size != 0 ) st->print("\n\t");
  910     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
  911 #endif
  912   }
  913   return size+2;
  914 }
  915 
  916 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
  917                                  int offset, int size, outputStream* st ) {
  918   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
  919     if( cbuf ) {
  920       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
  921       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
  922 #ifndef PRODUCT
  923     } else if( !do_size ) {
  924       if( size != 0 ) st->print("\n\t");
  925       st->print("FLD    %s",Matcher::regName[src_lo]);
  926 #endif
  927     }
  928     size += 2;
  929   }
  930 
  931   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
  932   const char *op_str;
  933   int op;
  934   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
  935     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
  936     op = 0xDD;
  937   } else {                   // 32-bit store
  938     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
  939     op = 0xD9;
  940     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
  941   }
  942 
  943   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
  944 }
  945 
  946 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
  947 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
  948                           int src_hi, int dst_hi, uint ireg, outputStream* st);
  949 
  950 void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
  951                             int stack_offset, int reg, uint ireg, outputStream* st);
  952 
  953 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
  954                                      int dst_offset, uint ireg, outputStream* st) {
  955   if (cbuf) {
  956     MacroAssembler _masm(cbuf);
  957     switch (ireg) {
  958     case Op_VecS:
  959       __ pushl(Address(rsp, src_offset));
  960       __ popl (Address(rsp, dst_offset));
  961       break;
  962     case Op_VecD:
  963       __ pushl(Address(rsp, src_offset));
  964       __ popl (Address(rsp, dst_offset));
  965       __ pushl(Address(rsp, src_offset+4));
  966       __ popl (Address(rsp, dst_offset+4));
  967       break;
  968     case Op_VecX:
  969       __ movdqu(Address(rsp, -16), xmm0);
  970       __ movdqu(xmm0, Address(rsp, src_offset));
  971       __ movdqu(Address(rsp, dst_offset), xmm0);
  972       __ movdqu(xmm0, Address(rsp, -16));
  973       break;
  974     case Op_VecY:
  975       __ vmovdqu(Address(rsp, -32), xmm0);
  976       __ vmovdqu(xmm0, Address(rsp, src_offset));
  977       __ vmovdqu(Address(rsp, dst_offset), xmm0);
  978       __ vmovdqu(xmm0, Address(rsp, -32));
  979       break;
  980     case Op_VecZ:
  981       __ evmovdquq(Address(rsp, -64), xmm0, 2);
  982       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
  983       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
  984       __ evmovdquq(xmm0, Address(rsp, -64), 2);
  985       break;
  986     default:
  987       ShouldNotReachHere();
  988     }
  989 #ifndef PRODUCT
  990   } else {
  991     switch (ireg) {
  992     case Op_VecS:
  993       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
  994                 "popl    [rsp + #%d]",
  995                 src_offset, dst_offset);
  996       break;
  997     case Op_VecD:
  998       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
  999                 "popq    [rsp + #%d]\n\t"
 1000                 "pushl   [rsp + #%d]\n\t"
 1001                 "popq    [rsp + #%d]",
 1002                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 1003       break;
 1004      case Op_VecX:
 1005       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 1006                 "movdqu  xmm0, [rsp + #%d]\n\t"
 1007                 "movdqu  [rsp + #%d], xmm0\n\t"
 1008                 "movdqu  xmm0, [rsp - #16]",
 1009                 src_offset, dst_offset);
 1010       break;
 1011     case Op_VecY:
 1012       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
 1013                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1014                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1015                 "vmovdqu xmm0, [rsp - #32]",
 1016                 src_offset, dst_offset);
 1017       break;
 1018     case Op_VecZ:
 1019       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
 1020                 "vmovdqu xmm0, [rsp + #%d]\n\t"
 1021                 "vmovdqu [rsp + #%d], xmm0\n\t"
 1022                 "vmovdqu xmm0, [rsp - #64]",
 1023                 src_offset, dst_offset);
 1024       break;
 1025     default:
 1026       ShouldNotReachHere();
 1027     }
 1028 #endif
 1029   }
 1030 }
 1031 
 1032 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 1033   // Get registers to move
 1034   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 1035   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 1036   OptoReg::Name dst_second = ra_->get_reg_second(this );
 1037   OptoReg::Name dst_first = ra_->get_reg_first(this );
 1038 
 1039   enum RC src_second_rc = rc_class(src_second);
 1040   enum RC src_first_rc = rc_class(src_first);
 1041   enum RC dst_second_rc = rc_class(dst_second);
 1042   enum RC dst_first_rc = rc_class(dst_first);
 1043 
 1044   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 1045 
 1046   // Generate spill code!
 1047   int size = 0;
 1048 
 1049   if( src_first == dst_first && src_second == dst_second )
 1050     return size;            // Self copy, no move
 1051 
 1052   if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
 1053     uint ireg = ideal_reg();
 1054     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
 1055     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
 1056     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
 1057     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1058       // mem -> mem
 1059       int src_offset = ra_->reg2offset(src_first);
 1060       int dst_offset = ra_->reg2offset(dst_first);
 1061       vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
 1062     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1063       vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
 1064     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1065       int stack_offset = ra_->reg2offset(dst_first);
 1066       vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
 1067     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1068       int stack_offset = ra_->reg2offset(src_first);
 1069       vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
 1070     } else {
 1071       ShouldNotReachHere();
 1072     }
 1073     return 0;
 1074   }
 1075 
 1076   // --------------------------------------
 1077   // Check for mem-mem move.  push/pop to move.
 1078   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 1079     if( src_second == dst_first ) { // overlapping stack copy ranges
 1080       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 1081       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1082       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1083       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 1084     }
 1085     // move low bits
 1086     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 1087     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 1088     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 1089       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 1090       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 1091     }
 1092     return size;
 1093   }
 1094 
 1095   // --------------------------------------
 1096   // Check for integer reg-reg copy
 1097   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 1098     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 1099 
 1100   // Check for integer store
 1101   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 1102     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 1103 
 1104   // Check for integer load
 1105   if( src_first_rc == rc_stack && dst_first_rc == rc_int )
 1106     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 1107 
 1108   // Check for integer reg-xmm reg copy
 1109   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 1110     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1111             "no 64 bit integer-float reg moves" );
 1112     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1113   }
 1114   // --------------------------------------
 1115   // Check for float reg-reg copy
 1116   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 1117     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1118             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 1119     if( cbuf ) {
 1120 
 1121       // Note the mucking with the register encode to compensate for the 0/1
 1122       // indexing issue mentioned in a comment in the reg_def sections
 1123       // for FPR registers many lines above here.
 1124 
 1125       if( src_first != FPR1L_num ) {
 1126         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 1127         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 1128         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1129         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1130      } else {
 1131         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 1132         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 1133      }
 1134 #ifndef PRODUCT
 1135     } else if( !do_size ) {
 1136       if( size != 0 ) st->print("\n\t");
 1137       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
 1138       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
 1139 #endif
 1140     }
 1141     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
 1142   }
 1143 
 1144   // Check for float store
 1145   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
 1146     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
 1147   }
 1148 
 1149   // Check for float load
 1150   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
 1151     int offset = ra_->reg2offset(src_first);
 1152     const char *op_str;
 1153     int op;
 1154     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
 1155       op_str = "FLD_D";
 1156       op = 0xDD;
 1157     } else {                   // 32-bit load
 1158       op_str = "FLD_S";
 1159       op = 0xD9;
 1160       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
 1161     }
 1162     if( cbuf ) {
 1163       emit_opcode  (*cbuf, op );
 1164       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
 1165       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 1166       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 1167 #ifndef PRODUCT
 1168     } else if( !do_size ) {
 1169       if( size != 0 ) st->print("\n\t");
 1170       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
 1171 #endif
 1172     }
 1173     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 1174     return size + 3+offset_size+2;
 1175   }
 1176 
 1177   // Check for xmm reg-reg copy
 1178   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
 1179     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 1180             (src_first+1 == src_second && dst_first+1 == dst_second),
 1181             "no non-adjacent float-moves" );
 1182     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1183   }
 1184 
 1185   // Check for xmm reg-integer reg copy
 1186   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
 1187     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 1188             "no 64 bit float-integer reg moves" );
 1189     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 1190   }
 1191 
 1192   // Check for xmm store
 1193   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
 1194     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
 1195   }
 1196 
 1197   // Check for float xmm load
 1198   if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
 1199     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
 1200   }
 1201 
 1202   // Copy from float reg to xmm reg
 1203   if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
 1204     // copy to the top of stack from floating point reg
 1205     // and use LEA to preserve flags
 1206     if( cbuf ) {
 1207       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
 1208       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1209       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1210       emit_d8(*cbuf,0xF8);
 1211 #ifndef PRODUCT
 1212     } else if( !do_size ) {
 1213       if( size != 0 ) st->print("\n\t");
 1214       st->print("LEA    ESP,[ESP-8]");
 1215 #endif
 1216     }
 1217     size += 4;
 1218 
 1219     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
 1220 
 1221     // Copy from the temp memory to the xmm reg.
 1222     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
 1223 
 1224     if( cbuf ) {
 1225       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
 1226       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
 1227       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
 1228       emit_d8(*cbuf,0x08);
 1229 #ifndef PRODUCT
 1230     } else if( !do_size ) {
 1231       if( size != 0 ) st->print("\n\t");
 1232       st->print("LEA    ESP,[ESP+8]");
 1233 #endif
 1234     }
 1235     size += 4;
 1236     return size;
 1237   }
 1238 
 1239   // AVX-512 opmask specific spilling.
 1240   if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
 1241     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1242     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1243     MacroAssembler _masm(cbuf);
 1244     int offset = ra_->reg2offset(src_first);
 1245     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 1246     return 0;
 1247   }
 1248 
 1249   if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
 1250     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1251     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1252     MacroAssembler _masm(cbuf);
 1253     int offset = ra_->reg2offset(dst_first);
 1254     __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
 1255     return 0;
 1256   }
 1257 
 1258   if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
 1259     Unimplemented();
 1260     return 0;
 1261   }
 1262 
 1263   if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
 1264     Unimplemented();
 1265     return 0;
 1266   }
 1267 
 1268   if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
 1269     assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
 1270     assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
 1271     MacroAssembler _masm(cbuf);
 1272     __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
 1273     return 0;
 1274   }
 1275 
 1276   assert( size > 0, "missed a case" );
 1277 
 1278   // --------------------------------------------------------------------
 1279   // Check for second bits still needing moving.
 1280   if( src_second == dst_second )
 1281     return size;               // Self copy; no move
 1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
 1283 
 1284   // Check for second word int-int move
 1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
 1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
 1287 
 1288   // Check for second word integer store
 1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
 1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
 1291 
 1292   // Check for second word integer load
 1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
 1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
 1295 
 1296   Unimplemented();
 1297   return 0; // Mute compiler
 1298 }
 1299 
 1300 #ifndef PRODUCT
 1301 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
 1302   implementation( NULL, ra_, false, st );
 1303 }
 1304 #endif
 1305 
 1306 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1307   implementation( &cbuf, ra_, false, NULL );
 1308 }
 1309 
 1310 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
 1311   return MachNode::size(ra_);
 1312 }
 1313 
 1314 
 1315 //=============================================================================
 1316 #ifndef PRODUCT
 1317 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1318   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1319   int reg = ra_->get_reg_first(this);
 1320   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
 1321 }
 1322 #endif
 1323 
 1324 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1325   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1326   int reg = ra_->get_encode(this);
 1327   if( offset >= 128 ) {
 1328     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1329     emit_rm(cbuf, 0x2, reg, 0x04);
 1330     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1331     emit_d32(cbuf, offset);
 1332   }
 1333   else {
 1334     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
 1335     emit_rm(cbuf, 0x1, reg, 0x04);
 1336     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
 1337     emit_d8(cbuf, offset);
 1338   }
 1339 }
 1340 
 1341 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
 1342   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
 1343   if( offset >= 128 ) {
 1344     return 7;
 1345   }
 1346   else {
 1347     return 4;
 1348   }
 1349 }
 1350 
 1351 //=============================================================================
 1352 #ifndef PRODUCT
 1353 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 1354   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
 1355   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
 1356   st->print_cr("\tNOP");
 1357   st->print_cr("\tNOP");
 1358   if( !OptoBreakpoint )
 1359     st->print_cr("\tNOP");
 1360 }
 1361 #endif
 1362 
 1363 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 1364   MacroAssembler masm(&cbuf);
 1365 #ifdef ASSERT
 1366   uint insts_size = cbuf.insts_size();
 1367 #endif
 1368   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
 1369   masm.jump_cc(Assembler::notEqual,
 1370                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 1371   /* WARNING these NOPs are critical so that verified entry point is properly
 1372      aligned for patching by NativeJump::patch_verified_entry() */
 1373   int nops_cnt = 2;
 1374   if( !OptoBreakpoint ) // Leave space for int3
 1375      nops_cnt += 1;
 1376   masm.nop(nops_cnt);
 1377 
 1378   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
 1379 }
 1380 
 1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
 1382   return OptoBreakpoint ? 11 : 12;
 1383 }
 1384 
 1385 
 1386 //=============================================================================
 1387 
 1388 // Vector calling convention not supported.
 1389 const bool Matcher::supports_vector_calling_convention() {
 1390   return false;
 1391 }
 1392 
 1393 OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
 1394   Unimplemented();
 1395   return OptoRegPair(0, 0);
 1396 }
 1397 
 1398 // Is this branch offset short enough that a short branch can be used?
 1399 //
 1400 // NOTE: If the platform does not provide any short branch variants, then
 1401 //       this method should return false for offset 0.
 1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 1403   // The passed offset is relative to address of the branch.
 1404   // On 86 a branch displacement is calculated relative to address
 1405   // of a next instruction.
 1406   offset -= br_size;
 1407 
 1408   // the short version of jmpConUCF2 contains multiple branches,
 1409   // making the reach slightly less
 1410   if (rule == jmpConUCF2_rule)
 1411     return (-126 <= offset && offset <= 125);
 1412   return (-128 <= offset && offset <= 127);
 1413 }
 1414 
 1415 // Return whether or not this register is ever used as an argument.  This
 1416 // function is used on startup to build the trampoline stubs in generateOptoStub.
 1417 // Registers not mentioned will be killed by the VM call in the trampoline, and
 1418 // arguments in those registers not be available to the callee.
 1419 bool Matcher::can_be_java_arg( int reg ) {
 1420   if(  reg == ECX_num   || reg == EDX_num   ) return true;
 1421   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
 1422   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
 1423   return false;
 1424 }
 1425 
 1426 bool Matcher::is_spillable_arg( int reg ) {
 1427   return can_be_java_arg(reg);
 1428 }
 1429 
 1430 uint Matcher::int_pressure_limit()
 1431 {
 1432   return (INTPRESSURE == -1) ? 6 : INTPRESSURE;
 1433 }
 1434 
 1435 uint Matcher::float_pressure_limit()
 1436 {
 1437   return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE;
 1438 }
 1439 
 1440 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
 1441   // Use hardware integer DIV instruction when
 1442   // it is faster than a code which use multiply.
 1443   // Only when constant divisor fits into 32 bit
 1444   // (min_jint is excluded to get only correct
 1445   // positive 32 bit values from negative).
 1446   return VM_Version::has_fast_idiv() &&
 1447          (divisor == (int)divisor && divisor != min_jint);
 1448 }
 1449 
 1450 // Register for DIVI projection of divmodI
 1451 RegMask Matcher::divI_proj_mask() {
 1452   return EAX_REG_mask();
 1453 }
 1454 
 1455 // Register for MODI projection of divmodI
 1456 RegMask Matcher::modI_proj_mask() {
 1457   return EDX_REG_mask();
 1458 }
 1459 
 1460 // Register for DIVL projection of divmodL
 1461 RegMask Matcher::divL_proj_mask() {
 1462   ShouldNotReachHere();
 1463   return RegMask();
 1464 }
 1465 
 1466 // Register for MODL projection of divmodL
 1467 RegMask Matcher::modL_proj_mask() {
 1468   ShouldNotReachHere();
 1469   return RegMask();
 1470 }
 1471 
 1472 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
 1473   return NO_REG_mask();
 1474 }
 1475 
 1476 // Returns true if the high 32 bits of the value is known to be zero.
 1477 bool is_operand_hi32_zero(Node* n) {
 1478   int opc = n->Opcode();
 1479   if (opc == Op_AndL) {
 1480     Node* o2 = n->in(2);
 1481     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1482       return true;
 1483     }
 1484   }
 1485   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
 1486     return true;
 1487   }
 1488   return false;
 1489 }
 1490 
 1491 %}
 1492 
 1493 //----------ENCODING BLOCK-----------------------------------------------------
 1494 // This block specifies the encoding classes used by the compiler to output
 1495 // byte streams.  Encoding classes generate functions which are called by
 1496 // Machine Instruction Nodes in order to generate the bit encoding of the
 1497 // instruction.  Operands specify their base encoding interface with the
 1498 // interface keyword.  There are currently supported four interfaces,
 1499 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
 1500 // operand to generate a function which returns its register number when
 1501 // queried.   CONST_INTER causes an operand to generate a function which
 1502 // returns the value of the constant when queried.  MEMORY_INTER causes an
 1503 // operand to generate four functions which return the Base Register, the
 1504 // Index Register, the Scale Value, and the Offset Value of the operand when
 1505 // queried.  COND_INTER causes an operand to generate six functions which
 1506 // return the encoding code (ie - encoding bits for the instruction)
 1507 // associated with each basic boolean condition for a conditional instruction.
 1508 // Instructions specify two basic values for encoding.  They use the
 1509 // ins_encode keyword to specify their encoding class (which must be one of
 1510 // the class names specified in the encoding block), and they use the
 1511 // opcode keyword to specify, in order, their primary, secondary, and
 1512 // tertiary opcode.  Only the opcode sections which a particular instruction
 1513 // needs for encoding need to be specified.
 1514 encode %{
 1515   // Build emit functions for each basic byte or larger field in the intel
 1516   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
 1517   // code in the enc_class source block.  Emit functions will live in the
 1518   // main source block for now.  In future, we can generalize this by
 1519   // adding a syntax that specifies the sizes of fields in an order,
 1520   // so that the adlc can build the emit functions automagically
 1521 
 1522   // Emit primary opcode
 1523   enc_class OpcP %{
 1524     emit_opcode(cbuf, $primary);
 1525   %}
 1526 
 1527   // Emit secondary opcode
 1528   enc_class OpcS %{
 1529     emit_opcode(cbuf, $secondary);
 1530   %}
 1531 
 1532   // Emit opcode directly
 1533   enc_class Opcode(immI d8) %{
 1534     emit_opcode(cbuf, $d8$$constant);
 1535   %}
 1536 
 1537   enc_class SizePrefix %{
 1538     emit_opcode(cbuf,0x66);
 1539   %}
 1540 
 1541   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1542     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1543   %}
 1544 
 1545   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
 1546     emit_opcode(cbuf,$opcode$$constant);
 1547     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1548   %}
 1549 
 1550   enc_class mov_r32_imm0( rRegI dst ) %{
 1551     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
 1552     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
 1553   %}
 1554 
 1555   enc_class cdq_enc %{
 1556     // Full implementation of Java idiv and irem; checks for
 1557     // special case as described in JVM spec., p.243 & p.271.
 1558     //
 1559     //         normal case                           special case
 1560     //
 1561     // input : rax,: dividend                         min_int
 1562     //         reg: divisor                          -1
 1563     //
 1564     // output: rax,: quotient  (= rax, idiv reg)       min_int
 1565     //         rdx: remainder (= rax, irem reg)       0
 1566     //
 1567     //  Code sequnce:
 1568     //
 1569     //  81 F8 00 00 00 80    cmp         rax,80000000h
 1570     //  0F 85 0B 00 00 00    jne         normal_case
 1571     //  33 D2                xor         rdx,edx
 1572     //  83 F9 FF             cmp         rcx,0FFh
 1573     //  0F 84 03 00 00 00    je          done
 1574     //                  normal_case:
 1575     //  99                   cdq
 1576     //  F7 F9                idiv        rax,ecx
 1577     //                  done:
 1578     //
 1579     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
 1580     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
 1581     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
 1582     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
 1583     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
 1584     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
 1585     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
 1586     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
 1587     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
 1588     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
 1589     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
 1590     // normal_case:
 1591     emit_opcode(cbuf,0x99);                                         // cdq
 1592     // idiv (note: must be emitted by the user of this rule)
 1593     // normal:
 1594   %}
 1595 
 1596   // Dense encoding for older common ops
 1597   enc_class Opc_plus(immI opcode, rRegI reg) %{
 1598     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
 1599   %}
 1600 
 1601 
 1602   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
 1603   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
 1604     // Check for 8-bit immediate, and set sign extend bit in opcode
 1605     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1606       emit_opcode(cbuf, $primary | 0x02);
 1607     }
 1608     else {                          // If 32-bit immediate
 1609       emit_opcode(cbuf, $primary);
 1610     }
 1611   %}
 1612 
 1613   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
 1614     // Emit primary opcode and set sign-extend bit
 1615     // Check for 8-bit immediate, and set sign extend bit in opcode
 1616     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1617       emit_opcode(cbuf, $primary | 0x02);    }
 1618     else {                          // If 32-bit immediate
 1619       emit_opcode(cbuf, $primary);
 1620     }
 1621     // Emit r/m byte with secondary opcode, after primary opcode.
 1622     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1623   %}
 1624 
 1625   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
 1626     // Check for 8-bit immediate, and set sign extend bit in opcode
 1627     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
 1628       $$$emit8$imm$$constant;
 1629     }
 1630     else {                          // If 32-bit immediate
 1631       // Output immediate
 1632       $$$emit32$imm$$constant;
 1633     }
 1634   %}
 1635 
 1636   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
 1637     // Emit primary opcode and set sign-extend bit
 1638     // Check for 8-bit immediate, and set sign extend bit in opcode
 1639     int con = (int)$imm$$constant; // Throw away top bits
 1640     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1641     // Emit r/m byte with secondary opcode, after primary opcode.
 1642     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1643     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1644     else                               emit_d32(cbuf,con);
 1645   %}
 1646 
 1647   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
 1648     // Emit primary opcode and set sign-extend bit
 1649     // Check for 8-bit immediate, and set sign extend bit in opcode
 1650     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
 1651     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
 1652     // Emit r/m byte with tertiary opcode, after primary opcode.
 1653     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg));
 1654     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
 1655     else                               emit_d32(cbuf,con);
 1656   %}
 1657 
 1658   enc_class OpcSReg (rRegI dst) %{    // BSWAP
 1659     emit_cc(cbuf, $secondary, $dst$$reg );
 1660   %}
 1661 
 1662   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
 1663     int destlo = $dst$$reg;
 1664     int desthi = HIGH_FROM_LOW_ENC(destlo);
 1665     // bswap lo
 1666     emit_opcode(cbuf, 0x0F);
 1667     emit_cc(cbuf, 0xC8, destlo);
 1668     // bswap hi
 1669     emit_opcode(cbuf, 0x0F);
 1670     emit_cc(cbuf, 0xC8, desthi);
 1671     // xchg lo and hi
 1672     emit_opcode(cbuf, 0x87);
 1673     emit_rm(cbuf, 0x3, destlo, desthi);
 1674   %}
 1675 
 1676   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
 1677     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
 1678   %}
 1679 
 1680   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
 1681     $$$emit8$primary;
 1682     emit_cc(cbuf, $secondary, $cop$$cmpcode);
 1683   %}
 1684 
 1685   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
 1686     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
 1687     emit_d8(cbuf, op >> 8 );
 1688     emit_d8(cbuf, op & 255);
 1689   %}
 1690 
 1691   // emulate a CMOV with a conditional branch around a MOV
 1692   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
 1693     // Invert sense of branch from sense of CMOV
 1694     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
 1695     emit_d8( cbuf, $brOffs$$constant );
 1696   %}
 1697 
 1698   enc_class enc_PartialSubtypeCheck( ) %{
 1699     Register Redi = as_Register(EDI_enc); // result register
 1700     Register Reax = as_Register(EAX_enc); // super class
 1701     Register Recx = as_Register(ECX_enc); // killed
 1702     Register Resi = as_Register(ESI_enc); // sub class
 1703     Label miss;
 1704 
 1705     MacroAssembler _masm(&cbuf);
 1706     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
 1707                                      NULL, &miss,
 1708                                      /*set_cond_codes:*/ true);
 1709     if ($primary) {
 1710       __ xorptr(Redi, Redi);
 1711     }
 1712     __ bind(miss);
 1713   %}
 1714 
 1715   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
 1716     MacroAssembler masm(&cbuf);
 1717     int start = masm.offset();
 1718     if (UseSSE >= 2) {
 1719       if (VerifyFPU) {
 1720         masm.verify_FPU(0, "must be empty in SSE2+ mode");
 1721       }
 1722     } else {
 1723       // External c_calling_convention expects the FPU stack to be 'clean'.
 1724       // Compiled code leaves it dirty.  Do cleanup now.
 1725       masm.empty_FPU_stack();
 1726     }
 1727     if (sizeof_FFree_Float_Stack_All == -1) {
 1728       sizeof_FFree_Float_Stack_All = masm.offset() - start;
 1729     } else {
 1730       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
 1731     }
 1732   %}
 1733 
 1734   enc_class Verify_FPU_For_Leaf %{
 1735     if( VerifyFPU ) {
 1736       MacroAssembler masm(&cbuf);
 1737       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
 1738     }
 1739   %}
 1740 
 1741   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
 1742     // This is the instruction starting address for relocation info.
 1743     MacroAssembler _masm(&cbuf);
 1744     cbuf.set_insts_mark();
 1745     $$$emit8$primary;
 1746     // CALL directly to the runtime
 1747     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1748                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1749     __ post_call_nop();
 1750 
 1751     if (UseSSE >= 2) {
 1752       MacroAssembler _masm(&cbuf);
 1753       BasicType rt = tf()->return_type();
 1754 
 1755       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
 1756         // A C runtime call where the return value is unused.  In SSE2+
 1757         // mode the result needs to be removed from the FPU stack.  It's
 1758         // likely that this function call could be removed by the
 1759         // optimizer if the C function is a pure function.
 1760         __ ffree(0);
 1761       } else if (rt == T_FLOAT) {
 1762         __ lea(rsp, Address(rsp, -4));
 1763         __ fstp_s(Address(rsp, 0));
 1764         __ movflt(xmm0, Address(rsp, 0));
 1765         __ lea(rsp, Address(rsp,  4));
 1766       } else if (rt == T_DOUBLE) {
 1767         __ lea(rsp, Address(rsp, -8));
 1768         __ fstp_d(Address(rsp, 0));
 1769         __ movdbl(xmm0, Address(rsp, 0));
 1770         __ lea(rsp, Address(rsp,  8));
 1771       }
 1772     }
 1773   %}
 1774 
 1775   enc_class pre_call_resets %{
 1776     // If method sets FPU control word restore it here
 1777     debug_only(int off0 = cbuf.insts_size());
 1778     if (ra_->C->in_24_bit_fp_mode()) {
 1779       MacroAssembler _masm(&cbuf);
 1780       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
 1781     }
 1782     // Clear upper bits of YMM registers when current compiled code uses
 1783     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 1784     MacroAssembler _masm(&cbuf);
 1785     __ vzeroupper();
 1786     debug_only(int off1 = cbuf.insts_size());
 1787     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
 1788   %}
 1789 
 1790   enc_class post_call_FPU %{
 1791     // If method sets FPU control word do it here also
 1792     if (Compile::current()->in_24_bit_fp_mode()) {
 1793       MacroAssembler masm(&cbuf);
 1794       masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
 1795     }
 1796   %}
 1797 
 1798   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
 1799     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
 1800     // who we intended to call.
 1801     MacroAssembler _masm(&cbuf);
 1802     cbuf.set_insts_mark();
 1803     $$$emit8$primary;
 1804 
 1805     if (!_method) {
 1806       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1807                      runtime_call_Relocation::spec(),
 1808                      RELOC_IMM32);
 1809       __ post_call_nop();
 1810     } else {
 1811       int method_index = resolved_method_index(cbuf);
 1812       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 1813                                                   : static_call_Relocation::spec(method_index);
 1814       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
 1815                      rspec, RELOC_DISP32);
 1816       __ post_call_nop();
 1817       address mark = cbuf.insts_mark();
 1818       if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
 1819         // Calls of the same statically bound method can share
 1820         // a stub to the interpreter.
 1821         cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
 1822       } else {
 1823         // Emit stubs for static call.
 1824         address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
 1825         if (stub == NULL) {
 1826           ciEnv::current()->record_failure("CodeCache is full");
 1827           return;
 1828         }
 1829       }
 1830     }
 1831   %}
 1832 
 1833   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
 1834     MacroAssembler _masm(&cbuf);
 1835     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
 1836     __ post_call_nop();
 1837   %}
 1838 
 1839   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
 1840     int disp = in_bytes(Method::from_compiled_offset());
 1841     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
 1842 
 1843     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
 1844     MacroAssembler _masm(&cbuf);
 1845     cbuf.set_insts_mark();
 1846     $$$emit8$primary;
 1847     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
 1848     emit_d8(cbuf, disp);             // Displacement
 1849     __ post_call_nop();
 1850   %}
 1851 
 1852 //   Following encoding is no longer used, but may be restored if calling
 1853 //   convention changes significantly.
 1854 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
 1855 //
 1856 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
 1857 //     // int ic_reg     = Matcher::inline_cache_reg();
 1858 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
 1859 //     // int imo_reg    = Matcher::interpreter_method_reg();
 1860 //     // int imo_encode = Matcher::_regEncode[imo_reg];
 1861 //
 1862 //     // // Interpreter expects method_ptr in EBX, currently a callee-saved register,
 1863 //     // // so we load it immediately before the call
 1864 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_ptr
 1865 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
 1866 //
 1867 //     // xor rbp,ebp
 1868 //     emit_opcode(cbuf, 0x33);
 1869 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
 1870 //
 1871 //     // CALL to interpreter.
 1872 //     cbuf.set_insts_mark();
 1873 //     $$$emit8$primary;
 1874 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
 1875 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 1876 //   %}
 1877 
 1878   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
 1879     $$$emit8$primary;
 1880     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 1881     $$$emit8$shift$$constant;
 1882   %}
 1883 
 1884   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
 1885     // Load immediate does not have a zero or sign extended version
 1886     // for 8-bit immediates
 1887     emit_opcode(cbuf, 0xB8 + $dst$$reg);
 1888     $$$emit32$src$$constant;
 1889   %}
 1890 
 1891   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
 1892     // Load immediate does not have a zero or sign extended version
 1893     // for 8-bit immediates
 1894     emit_opcode(cbuf, $primary + $dst$$reg);
 1895     $$$emit32$src$$constant;
 1896   %}
 1897 
 1898   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
 1899     // Load immediate does not have a zero or sign extended version
 1900     // for 8-bit immediates
 1901     int dst_enc = $dst$$reg;
 1902     int src_con = $src$$constant & 0x0FFFFFFFFL;
 1903     if (src_con == 0) {
 1904       // xor dst, dst
 1905       emit_opcode(cbuf, 0x33);
 1906       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1907     } else {
 1908       emit_opcode(cbuf, $primary + dst_enc);
 1909       emit_d32(cbuf, src_con);
 1910     }
 1911   %}
 1912 
 1913   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
 1914     // Load immediate does not have a zero or sign extended version
 1915     // for 8-bit immediates
 1916     int dst_enc = $dst$$reg + 2;
 1917     int src_con = ((julong)($src$$constant)) >> 32;
 1918     if (src_con == 0) {
 1919       // xor dst, dst
 1920       emit_opcode(cbuf, 0x33);
 1921       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
 1922     } else {
 1923       emit_opcode(cbuf, $primary + dst_enc);
 1924       emit_d32(cbuf, src_con);
 1925     }
 1926   %}
 1927 
 1928 
 1929   // Encode a reg-reg copy.  If it is useless, then empty encoding.
 1930   enc_class enc_Copy( rRegI dst, rRegI src ) %{
 1931     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1932   %}
 1933 
 1934   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
 1935     encode_Copy( cbuf, $dst$$reg, $src$$reg );
 1936   %}
 1937 
 1938   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
 1939     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1940   %}
 1941 
 1942   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
 1943     $$$emit8$primary;
 1944     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1945   %}
 1946 
 1947   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
 1948     $$$emit8$secondary;
 1949     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1950   %}
 1951 
 1952   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1953     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 1954   %}
 1955 
 1956   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
 1957     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg));
 1958   %}
 1959 
 1960   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
 1961     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 1962   %}
 1963 
 1964   enc_class Con32 (immI src) %{    // Con32(storeImmI)
 1965     // Output immediate
 1966     $$$emit32$src$$constant;
 1967   %}
 1968 
 1969   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
 1970     // Output Float immediate bits
 1971     jfloat jf = $src$$constant;
 1972     int    jf_as_bits = jint_cast( jf );
 1973     emit_d32(cbuf, jf_as_bits);
 1974   %}
 1975 
 1976   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
 1977     // Output Float immediate bits
 1978     jfloat jf = $src$$constant;
 1979     int    jf_as_bits = jint_cast( jf );
 1980     emit_d32(cbuf, jf_as_bits);
 1981   %}
 1982 
 1983   enc_class Con16 (immI src) %{    // Con16(storeImmI)
 1984     // Output immediate
 1985     $$$emit16$src$$constant;
 1986   %}
 1987 
 1988   enc_class Con_d32(immI src) %{
 1989     emit_d32(cbuf,$src$$constant);
 1990   %}
 1991 
 1992   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
 1993     // Output immediate memory reference
 1994     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
 1995     emit_d32(cbuf, 0x00);
 1996   %}
 1997 
 1998   enc_class lock_prefix( ) %{
 1999     emit_opcode(cbuf,0xF0);         // [Lock]
 2000   %}
 2001 
 2002   // Cmp-xchg long value.
 2003   // Note: we need to swap rbx, and rcx before and after the
 2004   //       cmpxchg8 instruction because the instruction uses
 2005   //       rcx as the high order word of the new value to store but
 2006   //       our register encoding uses rbx,.
 2007   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
 2008 
 2009     // XCHG  rbx,ecx
 2010     emit_opcode(cbuf,0x87);
 2011     emit_opcode(cbuf,0xD9);
 2012     // [Lock]
 2013     emit_opcode(cbuf,0xF0);
 2014     // CMPXCHG8 [Eptr]
 2015     emit_opcode(cbuf,0x0F);
 2016     emit_opcode(cbuf,0xC7);
 2017     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2018     // XCHG  rbx,ecx
 2019     emit_opcode(cbuf,0x87);
 2020     emit_opcode(cbuf,0xD9);
 2021   %}
 2022 
 2023   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
 2024     // [Lock]
 2025     emit_opcode(cbuf,0xF0);
 2026 
 2027     // CMPXCHG [Eptr]
 2028     emit_opcode(cbuf,0x0F);
 2029     emit_opcode(cbuf,0xB1);
 2030     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2031   %}
 2032 
 2033   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
 2034     // [Lock]
 2035     emit_opcode(cbuf,0xF0);
 2036 
 2037     // CMPXCHGB [Eptr]
 2038     emit_opcode(cbuf,0x0F);
 2039     emit_opcode(cbuf,0xB0);
 2040     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2041   %}
 2042 
 2043   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
 2044     // [Lock]
 2045     emit_opcode(cbuf,0xF0);
 2046 
 2047     // 16-bit mode
 2048     emit_opcode(cbuf, 0x66);
 2049 
 2050     // CMPXCHGW [Eptr]
 2051     emit_opcode(cbuf,0x0F);
 2052     emit_opcode(cbuf,0xB1);
 2053     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
 2054   %}
 2055 
 2056   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
 2057     int res_encoding = $res$$reg;
 2058 
 2059     // MOV  res,0
 2060     emit_opcode( cbuf, 0xB8 + res_encoding);
 2061     emit_d32( cbuf, 0 );
 2062     // JNE,s  fail
 2063     emit_opcode(cbuf,0x75);
 2064     emit_d8(cbuf, 5 );
 2065     // MOV  res,1
 2066     emit_opcode( cbuf, 0xB8 + res_encoding);
 2067     emit_d32( cbuf, 1 );
 2068     // fail:
 2069   %}
 2070 
 2071   enc_class set_instruction_start( ) %{
 2072     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
 2073   %}
 2074 
 2075   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
 2076     int reg_encoding = $ereg$$reg;
 2077     int base  = $mem$$base;
 2078     int index = $mem$$index;
 2079     int scale = $mem$$scale;
 2080     int displace = $mem$$disp;
 2081     relocInfo::relocType disp_reloc = $mem->disp_reloc();
 2082     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2083   %}
 2084 
 2085   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
 2086     int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg);  // Hi register of pair, computed from lo
 2087     int base  = $mem$$base;
 2088     int index = $mem$$index;
 2089     int scale = $mem$$scale;
 2090     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
 2091     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
 2092     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
 2093   %}
 2094 
 2095   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
 2096     int r1, r2;
 2097     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2098     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2099     emit_opcode(cbuf,0x0F);
 2100     emit_opcode(cbuf,$tertiary);
 2101     emit_rm(cbuf, 0x3, r1, r2);
 2102     emit_d8(cbuf,$cnt$$constant);
 2103     emit_d8(cbuf,$primary);
 2104     emit_rm(cbuf, 0x3, $secondary, r1);
 2105     emit_d8(cbuf,$cnt$$constant);
 2106   %}
 2107 
 2108   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
 2109     emit_opcode( cbuf, 0x8B ); // Move
 2110     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2111     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2112       emit_d8(cbuf,$primary);
 2113       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
 2114       emit_d8(cbuf,$cnt$$constant-32);
 2115     }
 2116     emit_d8(cbuf,$primary);
 2117     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg));
 2118     emit_d8(cbuf,31);
 2119   %}
 2120 
 2121   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
 2122     int r1, r2;
 2123     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2124     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW_ENC($dst$$reg); }
 2125 
 2126     emit_opcode( cbuf, 0x8B ); // Move r1,r2
 2127     emit_rm(cbuf, 0x3, r1, r2);
 2128     if( $cnt$$constant > 32 ) { // Shift, if not by zero
 2129       emit_opcode(cbuf,$primary);
 2130       emit_rm(cbuf, 0x3, $secondary, r1);
 2131       emit_d8(cbuf,$cnt$$constant-32);
 2132     }
 2133     emit_opcode(cbuf,0x33);  // XOR r2,r2
 2134     emit_rm(cbuf, 0x3, r2, r2);
 2135   %}
 2136 
 2137   // Clone of RegMem but accepts an extra parameter to access each
 2138   // half of a double in memory; it never needs relocation info.
 2139   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
 2140     emit_opcode(cbuf,$opcode$$constant);
 2141     int reg_encoding = $rm_reg$$reg;
 2142     int base     = $mem$$base;
 2143     int index    = $mem$$index;
 2144     int scale    = $mem$$scale;
 2145     int displace = $mem$$disp + $disp_for_half$$constant;
 2146     relocInfo::relocType disp_reloc = relocInfo::none;
 2147     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2148   %}
 2149 
 2150   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
 2151   //
 2152   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
 2153   // and it never needs relocation information.
 2154   // Frequently used to move data between FPU's Stack Top and memory.
 2155   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
 2156     int rm_byte_opcode = $rm_opcode$$constant;
 2157     int base     = $mem$$base;
 2158     int index    = $mem$$index;
 2159     int scale    = $mem$$scale;
 2160     int displace = $mem$$disp;
 2161     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
 2162     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
 2163   %}
 2164 
 2165   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
 2166     int rm_byte_opcode = $rm_opcode$$constant;
 2167     int base     = $mem$$base;
 2168     int index    = $mem$$index;
 2169     int scale    = $mem$$scale;
 2170     int displace = $mem$$disp;
 2171     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2172     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 2173   %}
 2174 
 2175   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
 2176     int reg_encoding = $dst$$reg;
 2177     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
 2178     int index        = 0x04;            // 0x04 indicates no index
 2179     int scale        = 0x00;            // 0x00 indicates no scale
 2180     int displace     = $src1$$constant; // 0x00 indicates no displacement
 2181     relocInfo::relocType disp_reloc = relocInfo::none;
 2182     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2183   %}
 2184 
 2185   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
 2186     // Compare dst,src
 2187     emit_opcode(cbuf,0x3B);
 2188     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2189     // jmp dst < src around move
 2190     emit_opcode(cbuf,0x7C);
 2191     emit_d8(cbuf,2);
 2192     // move dst,src
 2193     emit_opcode(cbuf,0x8B);
 2194     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2195   %}
 2196 
 2197   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
 2198     // Compare dst,src
 2199     emit_opcode(cbuf,0x3B);
 2200     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2201     // jmp dst > src around move
 2202     emit_opcode(cbuf,0x7F);
 2203     emit_d8(cbuf,2);
 2204     // move dst,src
 2205     emit_opcode(cbuf,0x8B);
 2206     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
 2207   %}
 2208 
 2209   enc_class enc_FPR_store(memory mem, regDPR src) %{
 2210     // If src is FPR1, we can just FST to store it.
 2211     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
 2212     int reg_encoding = 0x2; // Just store
 2213     int base  = $mem$$base;
 2214     int index = $mem$$index;
 2215     int scale = $mem$$scale;
 2216     int displace = $mem$$disp;
 2217     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 2218     if( $src$$reg != FPR1L_enc ) {
 2219       reg_encoding = 0x3;  // Store & pop
 2220       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
 2221       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2222     }
 2223     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
 2224     emit_opcode(cbuf,$primary);
 2225     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
 2226   %}
 2227 
 2228   enc_class neg_reg(rRegI dst) %{
 2229     // NEG $dst
 2230     emit_opcode(cbuf,0xF7);
 2231     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
 2232   %}
 2233 
 2234   enc_class setLT_reg(eCXRegI dst) %{
 2235     // SETLT $dst
 2236     emit_opcode(cbuf,0x0F);
 2237     emit_opcode(cbuf,0x9C);
 2238     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
 2239   %}
 2240 
 2241   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
 2242     int tmpReg = $tmp$$reg;
 2243 
 2244     // SUB $p,$q
 2245     emit_opcode(cbuf,0x2B);
 2246     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
 2247     // SBB $tmp,$tmp
 2248     emit_opcode(cbuf,0x1B);
 2249     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
 2250     // AND $tmp,$y
 2251     emit_opcode(cbuf,0x23);
 2252     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
 2253     // ADD $p,$tmp
 2254     emit_opcode(cbuf,0x03);
 2255     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
 2256   %}
 2257 
 2258   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
 2259     // TEST shift,32
 2260     emit_opcode(cbuf,0xF7);
 2261     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2262     emit_d32(cbuf,0x20);
 2263     // JEQ,s small
 2264     emit_opcode(cbuf, 0x74);
 2265     emit_d8(cbuf, 0x04);
 2266     // MOV    $dst.hi,$dst.lo
 2267     emit_opcode( cbuf, 0x8B );
 2268     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2269     // CLR    $dst.lo
 2270     emit_opcode(cbuf, 0x33);
 2271     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
 2272 // small:
 2273     // SHLD   $dst.hi,$dst.lo,$shift
 2274     emit_opcode(cbuf,0x0F);
 2275     emit_opcode(cbuf,0xA5);
 2276     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg));
 2277     // SHL    $dst.lo,$shift"
 2278     emit_opcode(cbuf,0xD3);
 2279     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
 2280   %}
 2281 
 2282   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
 2283     // TEST shift,32
 2284     emit_opcode(cbuf,0xF7);
 2285     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2286     emit_d32(cbuf,0x20);
 2287     // JEQ,s small
 2288     emit_opcode(cbuf, 0x74);
 2289     emit_d8(cbuf, 0x04);
 2290     // MOV    $dst.lo,$dst.hi
 2291     emit_opcode( cbuf, 0x8B );
 2292     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2293     // CLR    $dst.hi
 2294     emit_opcode(cbuf, 0x33);
 2295     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg));
 2296 // small:
 2297     // SHRD   $dst.lo,$dst.hi,$shift
 2298     emit_opcode(cbuf,0x0F);
 2299     emit_opcode(cbuf,0xAD);
 2300     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2301     // SHR    $dst.hi,$shift"
 2302     emit_opcode(cbuf,0xD3);
 2303     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) );
 2304   %}
 2305 
 2306   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
 2307     // TEST shift,32
 2308     emit_opcode(cbuf,0xF7);
 2309     emit_rm(cbuf, 0x3, 0, ECX_enc);
 2310     emit_d32(cbuf,0x20);
 2311     // JEQ,s small
 2312     emit_opcode(cbuf, 0x74);
 2313     emit_d8(cbuf, 0x05);
 2314     // MOV    $dst.lo,$dst.hi
 2315     emit_opcode( cbuf, 0x8B );
 2316     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2317     // SAR    $dst.hi,31
 2318     emit_opcode(cbuf, 0xC1);
 2319     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2320     emit_d8(cbuf, 0x1F );
 2321 // small:
 2322     // SHRD   $dst.lo,$dst.hi,$shift
 2323     emit_opcode(cbuf,0x0F);
 2324     emit_opcode(cbuf,0xAD);
 2325     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg);
 2326     // SAR    $dst.hi,$shift"
 2327     emit_opcode(cbuf,0xD3);
 2328     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) );
 2329   %}
 2330 
 2331 
 2332   // ----------------- Encodings for floating point unit -----------------
 2333   // May leave result in FPU-TOS or FPU reg depending on opcodes
 2334   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
 2335     $$$emit8$primary;
 2336     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
 2337   %}
 2338 
 2339   // Pop argument in FPR0 with FSTP ST(0)
 2340   enc_class PopFPU() %{
 2341     emit_opcode( cbuf, 0xDD );
 2342     emit_d8( cbuf, 0xD8 );
 2343   %}
 2344 
 2345   // !!!!! equivalent to Pop_Reg_F
 2346   enc_class Pop_Reg_DPR( regDPR dst ) %{
 2347     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2348     emit_d8( cbuf, 0xD8+$dst$$reg );
 2349   %}
 2350 
 2351   enc_class Push_Reg_DPR( regDPR dst ) %{
 2352     emit_opcode( cbuf, 0xD9 );
 2353     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
 2354   %}
 2355 
 2356   enc_class strictfp_bias1( regDPR dst ) %{
 2357     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2358     emit_opcode( cbuf, 0x2D );
 2359     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() );
 2360     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2361     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2362   %}
 2363 
 2364   enc_class strictfp_bias2( regDPR dst ) %{
 2365     emit_opcode( cbuf, 0xDB );           // FLD m80real
 2366     emit_opcode( cbuf, 0x2D );
 2367     emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() );
 2368     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
 2369     emit_opcode( cbuf, 0xC8+$dst$$reg );
 2370   %}
 2371 
 2372   // Special case for moving an integer register to a stack slot.
 2373   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2374     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
 2375   %}
 2376 
 2377   // Special case for moving a register to a stack slot.
 2378   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
 2379     // Opcode already emitted
 2380     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
 2381     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
 2382     emit_d32(cbuf, $dst$$disp);   // Displacement
 2383   %}
 2384 
 2385   // Push the integer in stackSlot 'src' onto FP-stack
 2386   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
 2387     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
 2388   %}
 2389 
 2390   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
 2391   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
 2392     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
 2393   %}
 2394 
 2395   // Same as Pop_Mem_F except for opcode
 2396   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
 2397   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
 2398     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
 2399   %}
 2400 
 2401   enc_class Pop_Reg_FPR( regFPR dst ) %{
 2402     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
 2403     emit_d8( cbuf, 0xD8+$dst$$reg );
 2404   %}
 2405 
 2406   enc_class Push_Reg_FPR( regFPR dst ) %{
 2407     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
 2408     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2409   %}
 2410 
 2411   // Push FPU's float to a stack-slot, and pop FPU-stack
 2412   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
 2413     int pop = 0x02;
 2414     if ($src$$reg != FPR1L_enc) {
 2415       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2416       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2417       pop = 0x03;
 2418     }
 2419     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
 2420   %}
 2421 
 2422   // Push FPU's double to a stack-slot, and pop FPU-stack
 2423   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
 2424     int pop = 0x02;
 2425     if ($src$$reg != FPR1L_enc) {
 2426       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
 2427       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2428       pop = 0x03;
 2429     }
 2430     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
 2431   %}
 2432 
 2433   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
 2434   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
 2435     int pop = 0xD0 - 1; // -1 since we skip FLD
 2436     if ($src$$reg != FPR1L_enc) {
 2437       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
 2438       emit_d8( cbuf, 0xC0-1+$src$$reg );
 2439       pop = 0xD8;
 2440     }
 2441     emit_opcode( cbuf, 0xDD );
 2442     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
 2443   %}
 2444 
 2445 
 2446   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
 2447     // load dst in FPR0
 2448     emit_opcode( cbuf, 0xD9 );
 2449     emit_d8( cbuf, 0xC0-1+$dst$$reg );
 2450     if ($src$$reg != FPR1L_enc) {
 2451       // fincstp
 2452       emit_opcode (cbuf, 0xD9);
 2453       emit_opcode (cbuf, 0xF7);
 2454       // swap src with FPR1:
 2455       // FXCH FPR1 with src
 2456       emit_opcode(cbuf, 0xD9);
 2457       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2458       // fdecstp
 2459       emit_opcode (cbuf, 0xD9);
 2460       emit_opcode (cbuf, 0xF6);
 2461     }
 2462   %}
 2463 
 2464   enc_class Push_ModD_encoding(regD src0, regD src1) %{
 2465     MacroAssembler _masm(&cbuf);
 2466     __ subptr(rsp, 8);
 2467     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
 2468     __ fld_d(Address(rsp, 0));
 2469     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
 2470     __ fld_d(Address(rsp, 0));
 2471   %}
 2472 
 2473   enc_class Push_ModF_encoding(regF src0, regF src1) %{
 2474     MacroAssembler _masm(&cbuf);
 2475     __ subptr(rsp, 4);
 2476     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
 2477     __ fld_s(Address(rsp, 0));
 2478     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
 2479     __ fld_s(Address(rsp, 0));
 2480   %}
 2481 
 2482   enc_class Push_ResultD(regD dst) %{
 2483     MacroAssembler _masm(&cbuf);
 2484     __ fstp_d(Address(rsp, 0));
 2485     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
 2486     __ addptr(rsp, 8);
 2487   %}
 2488 
 2489   enc_class Push_ResultF(regF dst, immI d8) %{
 2490     MacroAssembler _masm(&cbuf);
 2491     __ fstp_s(Address(rsp, 0));
 2492     __ movflt($dst$$XMMRegister, Address(rsp, 0));
 2493     __ addptr(rsp, $d8$$constant);
 2494   %}
 2495 
 2496   enc_class Push_SrcD(regD src) %{
 2497     MacroAssembler _masm(&cbuf);
 2498     __ subptr(rsp, 8);
 2499     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2500     __ fld_d(Address(rsp, 0));
 2501   %}
 2502 
 2503   enc_class push_stack_temp_qword() %{
 2504     MacroAssembler _masm(&cbuf);
 2505     __ subptr(rsp, 8);
 2506   %}
 2507 
 2508   enc_class pop_stack_temp_qword() %{
 2509     MacroAssembler _masm(&cbuf);
 2510     __ addptr(rsp, 8);
 2511   %}
 2512 
 2513   enc_class push_xmm_to_fpr1(regD src) %{
 2514     MacroAssembler _masm(&cbuf);
 2515     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
 2516     __ fld_d(Address(rsp, 0));
 2517   %}
 2518 
 2519   enc_class Push_Result_Mod_DPR( regDPR src) %{
 2520     if ($src$$reg != FPR1L_enc) {
 2521       // fincstp
 2522       emit_opcode (cbuf, 0xD9);
 2523       emit_opcode (cbuf, 0xF7);
 2524       // FXCH FPR1 with src
 2525       emit_opcode(cbuf, 0xD9);
 2526       emit_d8(cbuf, 0xC8-1+$src$$reg );
 2527       // fdecstp
 2528       emit_opcode (cbuf, 0xD9);
 2529       emit_opcode (cbuf, 0xF6);
 2530     }
 2531     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
 2532     // // FSTP   FPR$dst$$reg
 2533     // emit_opcode( cbuf, 0xDD );
 2534     // emit_d8( cbuf, 0xD8+$dst$$reg );
 2535   %}
 2536 
 2537   enc_class fnstsw_sahf_skip_parity() %{
 2538     // fnstsw ax
 2539     emit_opcode( cbuf, 0xDF );
 2540     emit_opcode( cbuf, 0xE0 );
 2541     // sahf
 2542     emit_opcode( cbuf, 0x9E );
 2543     // jnp  ::skip
 2544     emit_opcode( cbuf, 0x7B );
 2545     emit_opcode( cbuf, 0x05 );
 2546   %}
 2547 
 2548   enc_class emitModDPR() %{
 2549     // fprem must be iterative
 2550     // :: loop
 2551     // fprem
 2552     emit_opcode( cbuf, 0xD9 );
 2553     emit_opcode( cbuf, 0xF8 );
 2554     // wait
 2555     emit_opcode( cbuf, 0x9b );
 2556     // fnstsw ax
 2557     emit_opcode( cbuf, 0xDF );
 2558     emit_opcode( cbuf, 0xE0 );
 2559     // sahf
 2560     emit_opcode( cbuf, 0x9E );
 2561     // jp  ::loop
 2562     emit_opcode( cbuf, 0x0F );
 2563     emit_opcode( cbuf, 0x8A );
 2564     emit_opcode( cbuf, 0xF4 );
 2565     emit_opcode( cbuf, 0xFF );
 2566     emit_opcode( cbuf, 0xFF );
 2567     emit_opcode( cbuf, 0xFF );
 2568   %}
 2569 
 2570   enc_class fpu_flags() %{
 2571     // fnstsw_ax
 2572     emit_opcode( cbuf, 0xDF);
 2573     emit_opcode( cbuf, 0xE0);
 2574     // test ax,0x0400
 2575     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
 2576     emit_opcode( cbuf, 0xA9 );
 2577     emit_d16   ( cbuf, 0x0400 );
 2578     // // // This sequence works, but stalls for 12-16 cycles on PPro
 2579     // // test rax,0x0400
 2580     // emit_opcode( cbuf, 0xA9 );
 2581     // emit_d32   ( cbuf, 0x00000400 );
 2582     //
 2583     // jz exit (no unordered comparison)
 2584     emit_opcode( cbuf, 0x74 );
 2585     emit_d8    ( cbuf, 0x02 );
 2586     // mov ah,1 - treat as LT case (set carry flag)
 2587     emit_opcode( cbuf, 0xB4 );
 2588     emit_d8    ( cbuf, 0x01 );
 2589     // sahf
 2590     emit_opcode( cbuf, 0x9E);
 2591   %}
 2592 
 2593   enc_class cmpF_P6_fixup() %{
 2594     // Fixup the integer flags in case comparison involved a NaN
 2595     //
 2596     // JNP exit (no unordered comparison, P-flag is set by NaN)
 2597     emit_opcode( cbuf, 0x7B );
 2598     emit_d8    ( cbuf, 0x03 );
 2599     // MOV AH,1 - treat as LT case (set carry flag)
 2600     emit_opcode( cbuf, 0xB4 );
 2601     emit_d8    ( cbuf, 0x01 );
 2602     // SAHF
 2603     emit_opcode( cbuf, 0x9E);
 2604     // NOP     // target for branch to avoid branch to branch
 2605     emit_opcode( cbuf, 0x90);
 2606   %}
 2607 
 2608 //     fnstsw_ax();
 2609 //     sahf();
 2610 //     movl(dst, nan_result);
 2611 //     jcc(Assembler::parity, exit);
 2612 //     movl(dst, less_result);
 2613 //     jcc(Assembler::below, exit);
 2614 //     movl(dst, equal_result);
 2615 //     jcc(Assembler::equal, exit);
 2616 //     movl(dst, greater_result);
 2617 
 2618 // less_result     =  1;
 2619 // greater_result  = -1;
 2620 // equal_result    = 0;
 2621 // nan_result      = -1;
 2622 
 2623   enc_class CmpF_Result(rRegI dst) %{
 2624     // fnstsw_ax();
 2625     emit_opcode( cbuf, 0xDF);
 2626     emit_opcode( cbuf, 0xE0);
 2627     // sahf
 2628     emit_opcode( cbuf, 0x9E);
 2629     // movl(dst, nan_result);
 2630     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2631     emit_d32( cbuf, -1 );
 2632     // jcc(Assembler::parity, exit);
 2633     emit_opcode( cbuf, 0x7A );
 2634     emit_d8    ( cbuf, 0x13 );
 2635     // movl(dst, less_result);
 2636     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2637     emit_d32( cbuf, -1 );
 2638     // jcc(Assembler::below, exit);
 2639     emit_opcode( cbuf, 0x72 );
 2640     emit_d8    ( cbuf, 0x0C );
 2641     // movl(dst, equal_result);
 2642     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2643     emit_d32( cbuf, 0 );
 2644     // jcc(Assembler::equal, exit);
 2645     emit_opcode( cbuf, 0x74 );
 2646     emit_d8    ( cbuf, 0x05 );
 2647     // movl(dst, greater_result);
 2648     emit_opcode( cbuf, 0xB8 + $dst$$reg);
 2649     emit_d32( cbuf, 1 );
 2650   %}
 2651 
 2652 
 2653   // Compare the longs and set flags
 2654   // BROKEN!  Do Not use as-is
 2655   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
 2656     // CMP    $src1.hi,$src2.hi
 2657     emit_opcode( cbuf, 0x3B );
 2658     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2659     // JNE,s  done
 2660     emit_opcode(cbuf,0x75);
 2661     emit_d8(cbuf, 2 );
 2662     // CMP    $src1.lo,$src2.lo
 2663     emit_opcode( cbuf, 0x3B );
 2664     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2665 // done:
 2666   %}
 2667 
 2668   enc_class convert_int_long( regL dst, rRegI src ) %{
 2669     // mov $dst.lo,$src
 2670     int dst_encoding = $dst$$reg;
 2671     int src_encoding = $src$$reg;
 2672     encode_Copy( cbuf, dst_encoding  , src_encoding );
 2673     // mov $dst.hi,$src
 2674     encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding );
 2675     // sar $dst.hi,31
 2676     emit_opcode( cbuf, 0xC1 );
 2677     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) );
 2678     emit_d8(cbuf, 0x1F );
 2679   %}
 2680 
 2681   enc_class convert_long_double( eRegL src ) %{
 2682     // push $src.hi
 2683     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2684     // push $src.lo
 2685     emit_opcode(cbuf, 0x50+$src$$reg  );
 2686     // fild 64-bits at [SP]
 2687     emit_opcode(cbuf,0xdf);
 2688     emit_d8(cbuf, 0x6C);
 2689     emit_d8(cbuf, 0x24);
 2690     emit_d8(cbuf, 0x00);
 2691     // pop stack
 2692     emit_opcode(cbuf, 0x83); // add  SP, #8
 2693     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2694     emit_d8(cbuf, 0x8);
 2695   %}
 2696 
 2697   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
 2698     // IMUL   EDX:EAX,$src1
 2699     emit_opcode( cbuf, 0xF7 );
 2700     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
 2701     // SAR    EDX,$cnt-32
 2702     int shift_count = ((int)$cnt$$constant) - 32;
 2703     if (shift_count > 0) {
 2704       emit_opcode(cbuf, 0xC1);
 2705       emit_rm(cbuf, 0x3, 7, $dst$$reg );
 2706       emit_d8(cbuf, shift_count);
 2707     }
 2708   %}
 2709 
 2710   // this version doesn't have add sp, 8
 2711   enc_class convert_long_double2( eRegL src ) %{
 2712     // push $src.hi
 2713     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg));
 2714     // push $src.lo
 2715     emit_opcode(cbuf, 0x50+$src$$reg  );
 2716     // fild 64-bits at [SP]
 2717     emit_opcode(cbuf,0xdf);
 2718     emit_d8(cbuf, 0x6C);
 2719     emit_d8(cbuf, 0x24);
 2720     emit_d8(cbuf, 0x00);
 2721   %}
 2722 
 2723   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
 2724     // Basic idea: long = (long)int * (long)int
 2725     // IMUL EDX:EAX, src
 2726     emit_opcode( cbuf, 0xF7 );
 2727     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
 2728   %}
 2729 
 2730   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
 2731     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 2732     // MUL EDX:EAX, src
 2733     emit_opcode( cbuf, 0xF7 );
 2734     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
 2735   %}
 2736 
 2737   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
 2738     // Basic idea: lo(result) = lo(x_lo * y_lo)
 2739     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 2740     // MOV    $tmp,$src.lo
 2741     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
 2742     // IMUL   $tmp,EDX
 2743     emit_opcode( cbuf, 0x0F );
 2744     emit_opcode( cbuf, 0xAF );
 2745     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2746     // MOV    EDX,$src.hi
 2747     encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) );
 2748     // IMUL   EDX,EAX
 2749     emit_opcode( cbuf, 0x0F );
 2750     emit_opcode( cbuf, 0xAF );
 2751     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg );
 2752     // ADD    $tmp,EDX
 2753     emit_opcode( cbuf, 0x03 );
 2754     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2755     // MUL   EDX:EAX,$src.lo
 2756     emit_opcode( cbuf, 0xF7 );
 2757     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
 2758     // ADD    EDX,ESI
 2759     emit_opcode( cbuf, 0x03 );
 2760     emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg );
 2761   %}
 2762 
 2763   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
 2764     // Basic idea: lo(result) = lo(src * y_lo)
 2765     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
 2766     // IMUL   $tmp,EDX,$src
 2767     emit_opcode( cbuf, 0x6B );
 2768     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) );
 2769     emit_d8( cbuf, (int)$src$$constant );
 2770     // MOV    EDX,$src
 2771     emit_opcode(cbuf, 0xB8 + EDX_enc);
 2772     emit_d32( cbuf, (int)$src$$constant );
 2773     // MUL   EDX:EAX,EDX
 2774     emit_opcode( cbuf, 0xF7 );
 2775     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
 2776     // ADD    EDX,ESI
 2777     emit_opcode( cbuf, 0x03 );
 2778     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
 2779   %}
 2780 
 2781   enc_class long_div( eRegL src1, eRegL src2 ) %{
 2782     // PUSH src1.hi
 2783     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2784     // PUSH src1.lo
 2785     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2786     // PUSH src2.hi
 2787     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2788     // PUSH src2.lo
 2789     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2790     // CALL directly to the runtime
 2791     MacroAssembler _masm(&cbuf);
 2792     cbuf.set_insts_mark();
 2793     emit_opcode(cbuf,0xE8);       // Call into runtime
 2794     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2795     __ post_call_nop();
 2796     // Restore stack
 2797     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2798     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2799     emit_d8(cbuf, 4*4);
 2800   %}
 2801 
 2802   enc_class long_mod( eRegL src1, eRegL src2 ) %{
 2803     // PUSH src1.hi
 2804     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) );
 2805     // PUSH src1.lo
 2806     emit_opcode(cbuf,               0x50+$src1$$reg  );
 2807     // PUSH src2.hi
 2808     emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) );
 2809     // PUSH src2.lo
 2810     emit_opcode(cbuf,               0x50+$src2$$reg  );
 2811     // CALL directly to the runtime
 2812     MacroAssembler _masm(&cbuf);
 2813     cbuf.set_insts_mark();
 2814     emit_opcode(cbuf,0xE8);       // Call into runtime
 2815     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2816     __ post_call_nop();
 2817     // Restore stack
 2818     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 2819     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 2820     emit_d8(cbuf, 4*4);
 2821   %}
 2822 
 2823   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
 2824     // MOV   $tmp,$src.lo
 2825     emit_opcode(cbuf, 0x8B);
 2826     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
 2827     // OR    $tmp,$src.hi
 2828     emit_opcode(cbuf, 0x0B);
 2829     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg));
 2830   %}
 2831 
 2832   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
 2833     // CMP    $src1.lo,$src2.lo
 2834     emit_opcode( cbuf, 0x3B );
 2835     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2836     // JNE,s  skip
 2837     emit_cc(cbuf, 0x70, 0x5);
 2838     emit_d8(cbuf,2);
 2839     // CMP    $src1.hi,$src2.hi
 2840     emit_opcode( cbuf, 0x3B );
 2841     emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) );
 2842   %}
 2843 
 2844   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
 2845     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
 2846     emit_opcode( cbuf, 0x3B );
 2847     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
 2848     // MOV    $tmp,$src1.hi
 2849     emit_opcode( cbuf, 0x8B );
 2850     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) );
 2851     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
 2852     emit_opcode( cbuf, 0x1B );
 2853     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) );
 2854   %}
 2855 
 2856   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
 2857     // XOR    $tmp,$tmp
 2858     emit_opcode(cbuf,0x33);  // XOR
 2859     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
 2860     // CMP    $tmp,$src.lo
 2861     emit_opcode( cbuf, 0x3B );
 2862     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
 2863     // SBB    $tmp,$src.hi
 2864     emit_opcode( cbuf, 0x1B );
 2865     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) );
 2866   %}
 2867 
 2868  // Sniff, sniff... smells like Gnu Superoptimizer
 2869   enc_class neg_long( eRegL dst ) %{
 2870     emit_opcode(cbuf,0xF7);    // NEG hi
 2871     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2872     emit_opcode(cbuf,0xF7);    // NEG lo
 2873     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
 2874     emit_opcode(cbuf,0x83);    // SBB hi,0
 2875     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg));
 2876     emit_d8    (cbuf,0 );
 2877   %}
 2878 
 2879   enc_class enc_pop_rdx() %{
 2880     emit_opcode(cbuf,0x5A);
 2881   %}
 2882 
 2883   enc_class enc_rethrow() %{
 2884     MacroAssembler _masm(&cbuf);
 2885     cbuf.set_insts_mark();
 2886     emit_opcode(cbuf, 0xE9);        // jmp    entry
 2887     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
 2888                    runtime_call_Relocation::spec(), RELOC_IMM32 );
 2889     __ post_call_nop();
 2890   %}
 2891 
 2892 
 2893   // Convert a double to an int.  Java semantics require we do complex
 2894   // manglelations in the corner cases.  So we set the rounding mode to
 2895   // 'zero', store the darned double down as an int, and reset the
 2896   // rounding mode to 'nearest'.  The hardware throws an exception which
 2897   // patches up the correct value directly to the stack.
 2898   enc_class DPR2I_encoding( regDPR src ) %{
 2899     // Flip to round-to-zero mode.  We attempted to allow invalid-op
 2900     // exceptions here, so that a NAN or other corner-case value will
 2901     // thrown an exception (but normal values get converted at full speed).
 2902     // However, I2C adapters and other float-stack manglers leave pending
 2903     // invalid-op exceptions hanging.  We would have to clear them before
 2904     // enabling them and that is more expensive than just testing for the
 2905     // invalid value Intel stores down in the corner cases.
 2906     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2907     emit_opcode(cbuf,0x2D);
 2908     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2909     // Allocate a word
 2910     emit_opcode(cbuf,0x83);            // SUB ESP,4
 2911     emit_opcode(cbuf,0xEC);
 2912     emit_d8(cbuf,0x04);
 2913     // Encoding assumes a double has been pushed into FPR0.
 2914     // Store down the double as an int, popping the FPU stack
 2915     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
 2916     emit_opcode(cbuf,0x1C);
 2917     emit_d8(cbuf,0x24);
 2918     // Restore the rounding mode; mask the exception
 2919     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2920     emit_opcode(cbuf,0x2D);
 2921     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2922         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2923         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2924 
 2925     // Load the converted int; adjust CPU stack
 2926     emit_opcode(cbuf,0x58);       // POP EAX
 2927     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
 2928     emit_d32   (cbuf,0x80000000); //         0x80000000
 2929     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2930     emit_d8    (cbuf,0x07);       // Size of slow_call
 2931     // Push src onto stack slow-path
 2932     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2933     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2934     // CALL directly to the runtime
 2935     MacroAssembler _masm(&cbuf);
 2936     cbuf.set_insts_mark();
 2937     emit_opcode(cbuf,0xE8);       // Call into runtime
 2938     emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2939     __ post_call_nop();
 2940     // Carry on here...
 2941   %}
 2942 
 2943   enc_class DPR2L_encoding( regDPR src ) %{
 2944     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
 2945     emit_opcode(cbuf,0x2D);
 2946     emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc());
 2947     // Allocate a word
 2948     emit_opcode(cbuf,0x83);            // SUB ESP,8
 2949     emit_opcode(cbuf,0xEC);
 2950     emit_d8(cbuf,0x08);
 2951     // Encoding assumes a double has been pushed into FPR0.
 2952     // Store down the double as a long, popping the FPU stack
 2953     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
 2954     emit_opcode(cbuf,0x3C);
 2955     emit_d8(cbuf,0x24);
 2956     // Restore the rounding mode; mask the exception
 2957     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
 2958     emit_opcode(cbuf,0x2D);
 2959     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
 2960         ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24()
 2961         : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std());
 2962 
 2963     // Load the converted int; adjust CPU stack
 2964     emit_opcode(cbuf,0x58);       // POP EAX
 2965     emit_opcode(cbuf,0x5A);       // POP EDX
 2966     emit_opcode(cbuf,0x81);       // CMP EDX,imm
 2967     emit_d8    (cbuf,0xFA);       // rdx
 2968     emit_d32   (cbuf,0x80000000); //         0x80000000
 2969     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2970     emit_d8    (cbuf,0x07+4);     // Size of slow_call
 2971     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
 2972     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
 2973     emit_opcode(cbuf,0x75);       // JNE around_slow_call
 2974     emit_d8    (cbuf,0x07);       // Size of slow_call
 2975     // Push src onto stack slow-path
 2976     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
 2977     emit_d8    (cbuf,0xC0-1+$src$$reg );
 2978     // CALL directly to the runtime
 2979     MacroAssembler _masm(&cbuf);
 2980     cbuf.set_insts_mark();
 2981     emit_opcode(cbuf,0xE8);       // Call into runtime
 2982     emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
 2983     __ post_call_nop();
 2984     // Carry on here...
 2985   %}
 2986 
 2987   enc_class FMul_ST_reg( eRegFPR src1 ) %{
 2988     // Operand was loaded from memory into fp ST (stack top)
 2989     // FMUL   ST,$src  /* D8 C8+i */
 2990     emit_opcode(cbuf, 0xD8);
 2991     emit_opcode(cbuf, 0xC8 + $src1$$reg);
 2992   %}
 2993 
 2994   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
 2995     // FADDP  ST,src2  /* D8 C0+i */
 2996     emit_opcode(cbuf, 0xD8);
 2997     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 2998     //could use FADDP  src2,fpST  /* DE C0+i */
 2999   %}
 3000 
 3001   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
 3002     // FADDP  src2,ST  /* DE C0+i */
 3003     emit_opcode(cbuf, 0xDE);
 3004     emit_opcode(cbuf, 0xC0 + $src2$$reg);
 3005   %}
 3006 
 3007   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
 3008     // Operand has been loaded into fp ST (stack top)
 3009       // FSUB   ST,$src1
 3010       emit_opcode(cbuf, 0xD8);
 3011       emit_opcode(cbuf, 0xE0 + $src1$$reg);
 3012 
 3013       // FDIV
 3014       emit_opcode(cbuf, 0xD8);
 3015       emit_opcode(cbuf, 0xF0 + $src2$$reg);
 3016   %}
 3017 
 3018   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
 3019     // Operand was loaded from memory into fp ST (stack top)
 3020     // FADD   ST,$src  /* D8 C0+i */
 3021     emit_opcode(cbuf, 0xD8);
 3022     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3023 
 3024     // FMUL  ST,src2  /* D8 C*+i */
 3025     emit_opcode(cbuf, 0xD8);
 3026     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3027   %}
 3028 
 3029 
 3030   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
 3031     // Operand was loaded from memory into fp ST (stack top)
 3032     // FADD   ST,$src  /* D8 C0+i */
 3033     emit_opcode(cbuf, 0xD8);
 3034     emit_opcode(cbuf, 0xC0 + $src1$$reg);
 3035 
 3036     // FMULP  src2,ST  /* DE C8+i */
 3037     emit_opcode(cbuf, 0xDE);
 3038     emit_opcode(cbuf, 0xC8 + $src2$$reg);
 3039   %}
 3040 
 3041   // Atomically load the volatile long
 3042   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
 3043     emit_opcode(cbuf,0xDF);
 3044     int rm_byte_opcode = 0x05;
 3045     int base     = $mem$$base;
 3046     int index    = $mem$$index;
 3047     int scale    = $mem$$scale;
 3048     int displace = $mem$$disp;
 3049     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3050     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3051     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
 3052   %}
 3053 
 3054   // Volatile Store Long.  Must be atomic, so move it into
 3055   // the FP TOS and then do a 64-bit FIST.  Has to probe the
 3056   // target address before the store (for null-ptr checks)
 3057   // so the memory operand is used twice in the encoding.
 3058   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
 3059     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
 3060     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
 3061     emit_opcode(cbuf,0xDF);
 3062     int rm_byte_opcode = 0x07;
 3063     int base     = $mem$$base;
 3064     int index    = $mem$$index;
 3065     int scale    = $mem$$scale;
 3066     int displace = $mem$$disp;
 3067     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
 3068     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
 3069   %}
 3070 
 3071 %}
 3072 
 3073 
 3074 //----------FRAME--------------------------------------------------------------
 3075 // Definition of frame structure and management information.
 3076 //
 3077 //  S T A C K   L A Y O U T    Allocators stack-slot number
 3078 //                             |   (to get allocators register number
 3079 //  G  Owned by    |        |  v    add OptoReg::stack0())
 3080 //  r   CALLER     |        |
 3081 //  o     |        +--------+      pad to even-align allocators stack-slot
 3082 //  w     V        |  pad0  |        numbers; owned by CALLER
 3083 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
 3084 //  h     ^        |   in   |  5
 3085 //        |        |  args  |  4   Holes in incoming args owned by SELF
 3086 //  |     |        |        |  3
 3087 //  |     |        +--------+
 3088 //  V     |        | old out|      Empty on Intel, window on Sparc
 3089 //        |    old |preserve|      Must be even aligned.
 3090 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
 3091 //        |        |   in   |  3   area for Intel ret address
 3092 //     Owned by    |preserve|      Empty on Sparc.
 3093 //       SELF      +--------+
 3094 //        |        |  pad2  |  2   pad to align old SP
 3095 //        |        +--------+  1
 3096 //        |        | locks  |  0
 3097 //        |        +--------+----> OptoReg::stack0(), even aligned
 3098 //        |        |  pad1  | 11   pad to align new SP
 3099 //        |        +--------+
 3100 //        |        |        | 10
 3101 //        |        | spills |  9   spills
 3102 //        V        |        |  8   (pad0 slot for callee)
 3103 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
 3104 //        ^        |  out   |  7
 3105 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
 3106 //     Owned by    +--------+
 3107 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
 3108 //        |    new |preserve|      Must be even-aligned.
 3109 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
 3110 //        |        |        |
 3111 //
 3112 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
 3113 //         known from SELF's arguments and the Java calling convention.
 3114 //         Region 6-7 is determined per call site.
 3115 // Note 2: If the calling convention leaves holes in the incoming argument
 3116 //         area, those holes are owned by SELF.  Holes in the outgoing area
 3117 //         are owned by the CALLEE.  Holes should not be necessary in the
 3118 //         incoming area, as the Java calling convention is completely under
 3119 //         the control of the AD file.  Doubles can be sorted and packed to
 3120 //         avoid holes.  Holes in the outgoing arguments may be necessary for
 3121 //         varargs C calling conventions.
 3122 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
 3123 //         even aligned with pad0 as needed.
 3124 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
 3125 //         region 6-11 is even aligned; it may be padded out more so that
 3126 //         the region from SP to FP meets the minimum stack alignment.
 3127 
 3128 frame %{
 3129   // These three registers define part of the calling convention
 3130   // between compiled code and the interpreter.
 3131   inline_cache_reg(EAX);                // Inline Cache Register
 3132 
 3133   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
 3134   cisc_spilling_operand_name(indOffset32);
 3135 
 3136   // Number of stack slots consumed by locking an object
 3137   sync_stack_slots(1);
 3138 
 3139   // Compiled code's Frame Pointer
 3140   frame_pointer(ESP);
 3141   // Interpreter stores its frame pointer in a register which is
 3142   // stored to the stack by I2CAdaptors.
 3143   // I2CAdaptors convert from interpreted java to compiled java.
 3144   interpreter_frame_pointer(EBP);
 3145 
 3146   // Stack alignment requirement
 3147   // Alignment size in bytes (128-bit -> 16 bytes)
 3148   stack_alignment(StackAlignmentInBytes);
 3149 
 3150   // Number of outgoing stack slots killed above the out_preserve_stack_slots
 3151   // for calls to C.  Supports the var-args backing area for register parms.
 3152   varargs_C_out_slots_killed(0);
 3153 
 3154   // The after-PROLOG location of the return address.  Location of
 3155   // return address specifies a type (REG or STACK) and a number
 3156   // representing the register number (i.e. - use a register name) or
 3157   // stack slot.
 3158   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
 3159   // Otherwise, it is above the locks and verification slot and alignment word
 3160   return_addr(STACK - 1 +
 3161               align_up((Compile::current()->in_preserve_stack_slots() +
 3162                         Compile::current()->fixed_slots()),
 3163                        stack_alignment_in_slots()));
 3164 
 3165   // Location of C & interpreter return values
 3166   c_return_value %{
 3167     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3168     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3169     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3170 
 3171     // in SSE2+ mode we want to keep the FPU stack clean so pretend
 3172     // that C functions return float and double results in XMM0.
 3173     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3174       return OptoRegPair(XMM0b_num,XMM0_num);
 3175     if( ideal_reg == Op_RegF && UseSSE>=2 )
 3176       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3177 
 3178     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3179   %}
 3180 
 3181   // Location of return values
 3182   return_value %{
 3183     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
 3184     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
 3185     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
 3186     if( ideal_reg == Op_RegD && UseSSE>=2 )
 3187       return OptoRegPair(XMM0b_num,XMM0_num);
 3188     if( ideal_reg == Op_RegF && UseSSE>=1 )
 3189       return OptoRegPair(OptoReg::Bad,XMM0_num);
 3190     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
 3191   %}
 3192 
 3193 %}
 3194 
 3195 //----------ATTRIBUTES---------------------------------------------------------
 3196 //----------Operand Attributes-------------------------------------------------
 3197 op_attrib op_cost(0);        // Required cost attribute
 3198 
 3199 //----------Instruction Attributes---------------------------------------------
 3200 ins_attrib ins_cost(100);       // Required cost attribute
 3201 ins_attrib ins_size(8);         // Required size attribute (in bits)
 3202 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
 3203                                 // non-matching short branch variant of some
 3204                                                             // long branch?
 3205 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
 3206                                 // specifies the alignment that some part of the instruction (not
 3207                                 // necessarily the start) requires.  If > 1, a compute_padding()
 3208                                 // function must be provided for the instruction
 3209 
 3210 //----------OPERANDS-----------------------------------------------------------
 3211 // Operand definitions must precede instruction definitions for correct parsing
 3212 // in the ADLC because operands constitute user defined types which are used in
 3213 // instruction definitions.
 3214 
 3215 //----------Simple Operands----------------------------------------------------
 3216 // Immediate Operands
 3217 // Integer Immediate
 3218 operand immI() %{
 3219   match(ConI);
 3220 
 3221   op_cost(10);
 3222   format %{ %}
 3223   interface(CONST_INTER);
 3224 %}
 3225 
 3226 // Constant for test vs zero
 3227 operand immI_0() %{
 3228   predicate(n->get_int() == 0);
 3229   match(ConI);
 3230 
 3231   op_cost(0);
 3232   format %{ %}
 3233   interface(CONST_INTER);
 3234 %}
 3235 
 3236 // Constant for increment
 3237 operand immI_1() %{
 3238   predicate(n->get_int() == 1);
 3239   match(ConI);
 3240 
 3241   op_cost(0);
 3242   format %{ %}
 3243   interface(CONST_INTER);
 3244 %}
 3245 
 3246 // Constant for decrement
 3247 operand immI_M1() %{
 3248   predicate(n->get_int() == -1);
 3249   match(ConI);
 3250 
 3251   op_cost(0);
 3252   format %{ %}
 3253   interface(CONST_INTER);
 3254 %}
 3255 
 3256 // Valid scale values for addressing modes
 3257 operand immI2() %{
 3258   predicate(0 <= n->get_int() && (n->get_int() <= 3));
 3259   match(ConI);
 3260 
 3261   format %{ %}
 3262   interface(CONST_INTER);
 3263 %}
 3264 
 3265 operand immI8() %{
 3266   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
 3267   match(ConI);
 3268 
 3269   op_cost(5);
 3270   format %{ %}
 3271   interface(CONST_INTER);
 3272 %}
 3273 
 3274 operand immU8() %{
 3275   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
 3276   match(ConI);
 3277 
 3278   op_cost(5);
 3279   format %{ %}
 3280   interface(CONST_INTER);
 3281 %}
 3282 
 3283 operand immI16() %{
 3284   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
 3285   match(ConI);
 3286 
 3287   op_cost(10);
 3288   format %{ %}
 3289   interface(CONST_INTER);
 3290 %}
 3291 
 3292 // Int Immediate non-negative
 3293 operand immU31()
 3294 %{
 3295   predicate(n->get_int() >= 0);
 3296   match(ConI);
 3297 
 3298   op_cost(0);
 3299   format %{ %}
 3300   interface(CONST_INTER);
 3301 %}
 3302 
 3303 // Constant for long shifts
 3304 operand immI_32() %{
 3305   predicate( n->get_int() == 32 );
 3306   match(ConI);
 3307 
 3308   op_cost(0);
 3309   format %{ %}
 3310   interface(CONST_INTER);
 3311 %}
 3312 
 3313 operand immI_1_31() %{
 3314   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
 3315   match(ConI);
 3316 
 3317   op_cost(0);
 3318   format %{ %}
 3319   interface(CONST_INTER);
 3320 %}
 3321 
 3322 operand immI_32_63() %{
 3323   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
 3324   match(ConI);
 3325   op_cost(0);
 3326 
 3327   format %{ %}
 3328   interface(CONST_INTER);
 3329 %}
 3330 
 3331 operand immI_2() %{
 3332   predicate( n->get_int() == 2 );
 3333   match(ConI);
 3334 
 3335   op_cost(0);
 3336   format %{ %}
 3337   interface(CONST_INTER);
 3338 %}
 3339 
 3340 operand immI_3() %{
 3341   predicate( n->get_int() == 3 );
 3342   match(ConI);
 3343 
 3344   op_cost(0);
 3345   format %{ %}
 3346   interface(CONST_INTER);
 3347 %}
 3348 
 3349 operand immI_4()
 3350 %{
 3351   predicate(n->get_int() == 4);
 3352   match(ConI);
 3353 
 3354   op_cost(0);
 3355   format %{ %}
 3356   interface(CONST_INTER);
 3357 %}
 3358 
 3359 operand immI_8()
 3360 %{
 3361   predicate(n->get_int() == 8);
 3362   match(ConI);
 3363 
 3364   op_cost(0);
 3365   format %{ %}
 3366   interface(CONST_INTER);
 3367 %}
 3368 
 3369 // Pointer Immediate
 3370 operand immP() %{
 3371   match(ConP);
 3372 
 3373   op_cost(10);
 3374   format %{ %}
 3375   interface(CONST_INTER);
 3376 %}
 3377 
 3378 // NULL Pointer Immediate
 3379 operand immP0() %{
 3380   predicate( n->get_ptr() == 0 );
 3381   match(ConP);
 3382   op_cost(0);
 3383 
 3384   format %{ %}
 3385   interface(CONST_INTER);
 3386 %}
 3387 
 3388 // Long Immediate
 3389 operand immL() %{
 3390   match(ConL);
 3391 
 3392   op_cost(20);
 3393   format %{ %}
 3394   interface(CONST_INTER);
 3395 %}
 3396 
 3397 // Long Immediate zero
 3398 operand immL0() %{
 3399   predicate( n->get_long() == 0L );
 3400   match(ConL);
 3401   op_cost(0);
 3402 
 3403   format %{ %}
 3404   interface(CONST_INTER);
 3405 %}
 3406 
 3407 // Long Immediate zero
 3408 operand immL_M1() %{
 3409   predicate( n->get_long() == -1L );
 3410   match(ConL);
 3411   op_cost(0);
 3412 
 3413   format %{ %}
 3414   interface(CONST_INTER);
 3415 %}
 3416 
 3417 // Long immediate from 0 to 127.
 3418 // Used for a shorter form of long mul by 10.
 3419 operand immL_127() %{
 3420   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
 3421   match(ConL);
 3422   op_cost(0);
 3423 
 3424   format %{ %}
 3425   interface(CONST_INTER);
 3426 %}
 3427 
 3428 // Long Immediate: low 32-bit mask
 3429 operand immL_32bits() %{
 3430   predicate(n->get_long() == 0xFFFFFFFFL);
 3431   match(ConL);
 3432   op_cost(0);
 3433 
 3434   format %{ %}
 3435   interface(CONST_INTER);
 3436 %}
 3437 
 3438 // Long Immediate: low 32-bit mask
 3439 operand immL32() %{
 3440   predicate(n->get_long() == (int)(n->get_long()));
 3441   match(ConL);
 3442   op_cost(20);
 3443 
 3444   format %{ %}
 3445   interface(CONST_INTER);
 3446 %}
 3447 
 3448 //Double Immediate zero
 3449 operand immDPR0() %{
 3450   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3451   // bug that generates code such that NaNs compare equal to 0.0
 3452   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
 3453   match(ConD);
 3454 
 3455   op_cost(5);
 3456   format %{ %}
 3457   interface(CONST_INTER);
 3458 %}
 3459 
 3460 // Double Immediate one
 3461 operand immDPR1() %{
 3462   predicate( UseSSE<=1 && n->getd() == 1.0 );
 3463   match(ConD);
 3464 
 3465   op_cost(5);
 3466   format %{ %}
 3467   interface(CONST_INTER);
 3468 %}
 3469 
 3470 // Double Immediate
 3471 operand immDPR() %{
 3472   predicate(UseSSE<=1);
 3473   match(ConD);
 3474 
 3475   op_cost(5);
 3476   format %{ %}
 3477   interface(CONST_INTER);
 3478 %}
 3479 
 3480 operand immD() %{
 3481   predicate(UseSSE>=2);
 3482   match(ConD);
 3483 
 3484   op_cost(5);
 3485   format %{ %}
 3486   interface(CONST_INTER);
 3487 %}
 3488 
 3489 // Double Immediate zero
 3490 operand immD0() %{
 3491   // Do additional (and counter-intuitive) test against NaN to work around VC++
 3492   // bug that generates code such that NaNs compare equal to 0.0 AND do not
 3493   // compare equal to -0.0.
 3494   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
 3495   match(ConD);
 3496 
 3497   format %{ %}
 3498   interface(CONST_INTER);
 3499 %}
 3500 
 3501 // Float Immediate zero
 3502 operand immFPR0() %{
 3503   predicate(UseSSE == 0 && n->getf() == 0.0F);
 3504   match(ConF);
 3505 
 3506   op_cost(5);
 3507   format %{ %}
 3508   interface(CONST_INTER);
 3509 %}
 3510 
 3511 // Float Immediate one
 3512 operand immFPR1() %{
 3513   predicate(UseSSE == 0 && n->getf() == 1.0F);
 3514   match(ConF);
 3515 
 3516   op_cost(5);
 3517   format %{ %}
 3518   interface(CONST_INTER);
 3519 %}
 3520 
 3521 // Float Immediate
 3522 operand immFPR() %{
 3523   predicate( UseSSE == 0 );
 3524   match(ConF);
 3525 
 3526   op_cost(5);
 3527   format %{ %}
 3528   interface(CONST_INTER);
 3529 %}
 3530 
 3531 // Float Immediate
 3532 operand immF() %{
 3533   predicate(UseSSE >= 1);
 3534   match(ConF);
 3535 
 3536   op_cost(5);
 3537   format %{ %}
 3538   interface(CONST_INTER);
 3539 %}
 3540 
 3541 // Float Immediate zero.  Zero and not -0.0
 3542 operand immF0() %{
 3543   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
 3544   match(ConF);
 3545 
 3546   op_cost(5);
 3547   format %{ %}
 3548   interface(CONST_INTER);
 3549 %}
 3550 
 3551 // Immediates for special shifts (sign extend)
 3552 
 3553 // Constants for increment
 3554 operand immI_16() %{
 3555   predicate( n->get_int() == 16 );
 3556   match(ConI);
 3557 
 3558   format %{ %}
 3559   interface(CONST_INTER);
 3560 %}
 3561 
 3562 operand immI_24() %{
 3563   predicate( n->get_int() == 24 );
 3564   match(ConI);
 3565 
 3566   format %{ %}
 3567   interface(CONST_INTER);
 3568 %}
 3569 
 3570 // Constant for byte-wide masking
 3571 operand immI_255() %{
 3572   predicate( n->get_int() == 255 );
 3573   match(ConI);
 3574 
 3575   format %{ %}
 3576   interface(CONST_INTER);
 3577 %}
 3578 
 3579 // Constant for short-wide masking
 3580 operand immI_65535() %{
 3581   predicate(n->get_int() == 65535);
 3582   match(ConI);
 3583 
 3584   format %{ %}
 3585   interface(CONST_INTER);
 3586 %}
 3587 
 3588 operand kReg()
 3589 %{
 3590   constraint(ALLOC_IN_RC(vectmask_reg));
 3591   match(RegVectMask);
 3592   format %{%}
 3593   interface(REG_INTER);
 3594 %}
 3595 
 3596 operand kReg_K1()
 3597 %{
 3598   constraint(ALLOC_IN_RC(vectmask_reg_K1));
 3599   match(RegVectMask);
 3600   format %{%}
 3601   interface(REG_INTER);
 3602 %}
 3603 
 3604 operand kReg_K2()
 3605 %{
 3606   constraint(ALLOC_IN_RC(vectmask_reg_K2));
 3607   match(RegVectMask);
 3608   format %{%}
 3609   interface(REG_INTER);
 3610 %}
 3611 
 3612 // Special Registers
 3613 operand kReg_K3()
 3614 %{
 3615   constraint(ALLOC_IN_RC(vectmask_reg_K3));
 3616   match(RegVectMask);
 3617   format %{%}
 3618   interface(REG_INTER);
 3619 %}
 3620 
 3621 operand kReg_K4()
 3622 %{
 3623   constraint(ALLOC_IN_RC(vectmask_reg_K4));
 3624   match(RegVectMask);
 3625   format %{%}
 3626   interface(REG_INTER);
 3627 %}
 3628 
 3629 operand kReg_K5()
 3630 %{
 3631   constraint(ALLOC_IN_RC(vectmask_reg_K5));
 3632   match(RegVectMask);
 3633   format %{%}
 3634   interface(REG_INTER);
 3635 %}
 3636 
 3637 operand kReg_K6()
 3638 %{
 3639   constraint(ALLOC_IN_RC(vectmask_reg_K6));
 3640   match(RegVectMask);
 3641   format %{%}
 3642   interface(REG_INTER);
 3643 %}
 3644 
 3645 // Special Registers
 3646 operand kReg_K7()
 3647 %{
 3648   constraint(ALLOC_IN_RC(vectmask_reg_K7));
 3649   match(RegVectMask);
 3650   format %{%}
 3651   interface(REG_INTER);
 3652 %}
 3653 
 3654 // Register Operands
 3655 // Integer Register
 3656 operand rRegI() %{
 3657   constraint(ALLOC_IN_RC(int_reg));
 3658   match(RegI);
 3659   match(xRegI);
 3660   match(eAXRegI);
 3661   match(eBXRegI);
 3662   match(eCXRegI);
 3663   match(eDXRegI);
 3664   match(eDIRegI);
 3665   match(eSIRegI);
 3666 
 3667   format %{ %}
 3668   interface(REG_INTER);
 3669 %}
 3670 
 3671 // Subset of Integer Register
 3672 operand xRegI(rRegI reg) %{
 3673   constraint(ALLOC_IN_RC(int_x_reg));
 3674   match(reg);
 3675   match(eAXRegI);
 3676   match(eBXRegI);
 3677   match(eCXRegI);
 3678   match(eDXRegI);
 3679 
 3680   format %{ %}
 3681   interface(REG_INTER);
 3682 %}
 3683 
 3684 // Special Registers
 3685 operand eAXRegI(xRegI reg) %{
 3686   constraint(ALLOC_IN_RC(eax_reg));
 3687   match(reg);
 3688   match(rRegI);
 3689 
 3690   format %{ "EAX" %}
 3691   interface(REG_INTER);
 3692 %}
 3693 
 3694 // Special Registers
 3695 operand eBXRegI(xRegI reg) %{
 3696   constraint(ALLOC_IN_RC(ebx_reg));
 3697   match(reg);
 3698   match(rRegI);
 3699 
 3700   format %{ "EBX" %}
 3701   interface(REG_INTER);
 3702 %}
 3703 
 3704 operand eCXRegI(xRegI reg) %{
 3705   constraint(ALLOC_IN_RC(ecx_reg));
 3706   match(reg);
 3707   match(rRegI);
 3708 
 3709   format %{ "ECX" %}
 3710   interface(REG_INTER);
 3711 %}
 3712 
 3713 operand eDXRegI(xRegI reg) %{
 3714   constraint(ALLOC_IN_RC(edx_reg));
 3715   match(reg);
 3716   match(rRegI);
 3717 
 3718   format %{ "EDX" %}
 3719   interface(REG_INTER);
 3720 %}
 3721 
 3722 operand eDIRegI(xRegI reg) %{
 3723   constraint(ALLOC_IN_RC(edi_reg));
 3724   match(reg);
 3725   match(rRegI);
 3726 
 3727   format %{ "EDI" %}
 3728   interface(REG_INTER);
 3729 %}
 3730 
 3731 operand naxRegI() %{
 3732   constraint(ALLOC_IN_RC(nax_reg));
 3733   match(RegI);
 3734   match(eCXRegI);
 3735   match(eDXRegI);
 3736   match(eSIRegI);
 3737   match(eDIRegI);
 3738 
 3739   format %{ %}
 3740   interface(REG_INTER);
 3741 %}
 3742 
 3743 operand nadxRegI() %{
 3744   constraint(ALLOC_IN_RC(nadx_reg));
 3745   match(RegI);
 3746   match(eBXRegI);
 3747   match(eCXRegI);
 3748   match(eSIRegI);
 3749   match(eDIRegI);
 3750 
 3751   format %{ %}
 3752   interface(REG_INTER);
 3753 %}
 3754 
 3755 operand ncxRegI() %{
 3756   constraint(ALLOC_IN_RC(ncx_reg));
 3757   match(RegI);
 3758   match(eAXRegI);
 3759   match(eDXRegI);
 3760   match(eSIRegI);
 3761   match(eDIRegI);
 3762 
 3763   format %{ %}
 3764   interface(REG_INTER);
 3765 %}
 3766 
 3767 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
 3768 // //
 3769 operand eSIRegI(xRegI reg) %{
 3770    constraint(ALLOC_IN_RC(esi_reg));
 3771    match(reg);
 3772    match(rRegI);
 3773 
 3774    format %{ "ESI" %}
 3775    interface(REG_INTER);
 3776 %}
 3777 
 3778 // Pointer Register
 3779 operand anyRegP() %{
 3780   constraint(ALLOC_IN_RC(any_reg));
 3781   match(RegP);
 3782   match(eAXRegP);
 3783   match(eBXRegP);
 3784   match(eCXRegP);
 3785   match(eDIRegP);
 3786   match(eRegP);
 3787 
 3788   format %{ %}
 3789   interface(REG_INTER);
 3790 %}
 3791 
 3792 operand eRegP() %{
 3793   constraint(ALLOC_IN_RC(int_reg));
 3794   match(RegP);
 3795   match(eAXRegP);
 3796   match(eBXRegP);
 3797   match(eCXRegP);
 3798   match(eDIRegP);
 3799 
 3800   format %{ %}
 3801   interface(REG_INTER);
 3802 %}
 3803 
 3804 operand rRegP() %{
 3805   constraint(ALLOC_IN_RC(int_reg));
 3806   match(RegP);
 3807   match(eAXRegP);
 3808   match(eBXRegP);
 3809   match(eCXRegP);
 3810   match(eDIRegP);
 3811 
 3812   format %{ %}
 3813   interface(REG_INTER);
 3814 %}
 3815 
 3816 // On windows95, EBP is not safe to use for implicit null tests.
 3817 operand eRegP_no_EBP() %{
 3818   constraint(ALLOC_IN_RC(int_reg_no_ebp));
 3819   match(RegP);
 3820   match(eAXRegP);
 3821   match(eBXRegP);
 3822   match(eCXRegP);
 3823   match(eDIRegP);
 3824 
 3825   op_cost(100);
 3826   format %{ %}
 3827   interface(REG_INTER);
 3828 %}
 3829 
 3830 operand naxRegP() %{
 3831   constraint(ALLOC_IN_RC(nax_reg));
 3832   match(RegP);
 3833   match(eBXRegP);
 3834   match(eDXRegP);
 3835   match(eCXRegP);
 3836   match(eSIRegP);
 3837   match(eDIRegP);
 3838 
 3839   format %{ %}
 3840   interface(REG_INTER);
 3841 %}
 3842 
 3843 operand nabxRegP() %{
 3844   constraint(ALLOC_IN_RC(nabx_reg));
 3845   match(RegP);
 3846   match(eCXRegP);
 3847   match(eDXRegP);
 3848   match(eSIRegP);
 3849   match(eDIRegP);
 3850 
 3851   format %{ %}
 3852   interface(REG_INTER);
 3853 %}
 3854 
 3855 operand pRegP() %{
 3856   constraint(ALLOC_IN_RC(p_reg));
 3857   match(RegP);
 3858   match(eBXRegP);
 3859   match(eDXRegP);
 3860   match(eSIRegP);
 3861   match(eDIRegP);
 3862 
 3863   format %{ %}
 3864   interface(REG_INTER);
 3865 %}
 3866 
 3867 // Special Registers
 3868 // Return a pointer value
 3869 operand eAXRegP(eRegP reg) %{
 3870   constraint(ALLOC_IN_RC(eax_reg));
 3871   match(reg);
 3872   format %{ "EAX" %}
 3873   interface(REG_INTER);
 3874 %}
 3875 
 3876 // Used in AtomicAdd
 3877 operand eBXRegP(eRegP reg) %{
 3878   constraint(ALLOC_IN_RC(ebx_reg));
 3879   match(reg);
 3880   format %{ "EBX" %}
 3881   interface(REG_INTER);
 3882 %}
 3883 
 3884 // Tail-call (interprocedural jump) to interpreter
 3885 operand eCXRegP(eRegP reg) %{
 3886   constraint(ALLOC_IN_RC(ecx_reg));
 3887   match(reg);
 3888   format %{ "ECX" %}
 3889   interface(REG_INTER);
 3890 %}
 3891 
 3892 operand eDXRegP(eRegP reg) %{
 3893   constraint(ALLOC_IN_RC(edx_reg));
 3894   match(reg);
 3895   format %{ "EDX" %}
 3896   interface(REG_INTER);
 3897 %}
 3898 
 3899 operand eSIRegP(eRegP reg) %{
 3900   constraint(ALLOC_IN_RC(esi_reg));
 3901   match(reg);
 3902   format %{ "ESI" %}
 3903   interface(REG_INTER);
 3904 %}
 3905 
 3906 // Used in rep stosw
 3907 operand eDIRegP(eRegP reg) %{
 3908   constraint(ALLOC_IN_RC(edi_reg));
 3909   match(reg);
 3910   format %{ "EDI" %}
 3911   interface(REG_INTER);
 3912 %}
 3913 
 3914 operand eRegL() %{
 3915   constraint(ALLOC_IN_RC(long_reg));
 3916   match(RegL);
 3917   match(eADXRegL);
 3918 
 3919   format %{ %}
 3920   interface(REG_INTER);
 3921 %}
 3922 
 3923 operand eADXRegL( eRegL reg ) %{
 3924   constraint(ALLOC_IN_RC(eadx_reg));
 3925   match(reg);
 3926 
 3927   format %{ "EDX:EAX" %}
 3928   interface(REG_INTER);
 3929 %}
 3930 
 3931 operand eBCXRegL( eRegL reg ) %{
 3932   constraint(ALLOC_IN_RC(ebcx_reg));
 3933   match(reg);
 3934 
 3935   format %{ "EBX:ECX" %}
 3936   interface(REG_INTER);
 3937 %}
 3938 
 3939 operand eBDPRegL( eRegL reg ) %{
 3940   constraint(ALLOC_IN_RC(ebpd_reg));
 3941   match(reg);
 3942 
 3943   format %{ "EBP:EDI" %}
 3944   interface(REG_INTER);
 3945 %}
 3946 // Special case for integer high multiply
 3947 operand eADXRegL_low_only() %{
 3948   constraint(ALLOC_IN_RC(eadx_reg));
 3949   match(RegL);
 3950 
 3951   format %{ "EAX" %}
 3952   interface(REG_INTER);
 3953 %}
 3954 
 3955 // Flags register, used as output of compare instructions
 3956 operand rFlagsReg() %{
 3957   constraint(ALLOC_IN_RC(int_flags));
 3958   match(RegFlags);
 3959 
 3960   format %{ "EFLAGS" %}
 3961   interface(REG_INTER);
 3962 %}
 3963 
 3964 // Flags register, used as output of compare instructions
 3965 operand eFlagsReg() %{
 3966   constraint(ALLOC_IN_RC(int_flags));
 3967   match(RegFlags);
 3968 
 3969   format %{ "EFLAGS" %}
 3970   interface(REG_INTER);
 3971 %}
 3972 
 3973 // Flags register, used as output of FLOATING POINT compare instructions
 3974 operand eFlagsRegU() %{
 3975   constraint(ALLOC_IN_RC(int_flags));
 3976   match(RegFlags);
 3977 
 3978   format %{ "EFLAGS_U" %}
 3979   interface(REG_INTER);
 3980 %}
 3981 
 3982 operand eFlagsRegUCF() %{
 3983   constraint(ALLOC_IN_RC(int_flags));
 3984   match(RegFlags);
 3985   predicate(false);
 3986 
 3987   format %{ "EFLAGS_U_CF" %}
 3988   interface(REG_INTER);
 3989 %}
 3990 
 3991 // Condition Code Register used by long compare
 3992 operand flagsReg_long_LTGE() %{
 3993   constraint(ALLOC_IN_RC(int_flags));
 3994   match(RegFlags);
 3995   format %{ "FLAGS_LTGE" %}
 3996   interface(REG_INTER);
 3997 %}
 3998 operand flagsReg_long_EQNE() %{
 3999   constraint(ALLOC_IN_RC(int_flags));
 4000   match(RegFlags);
 4001   format %{ "FLAGS_EQNE" %}
 4002   interface(REG_INTER);
 4003 %}
 4004 operand flagsReg_long_LEGT() %{
 4005   constraint(ALLOC_IN_RC(int_flags));
 4006   match(RegFlags);
 4007   format %{ "FLAGS_LEGT" %}
 4008   interface(REG_INTER);
 4009 %}
 4010 
 4011 // Condition Code Register used by unsigned long compare
 4012 operand flagsReg_ulong_LTGE() %{
 4013   constraint(ALLOC_IN_RC(int_flags));
 4014   match(RegFlags);
 4015   format %{ "FLAGS_U_LTGE" %}
 4016   interface(REG_INTER);
 4017 %}
 4018 operand flagsReg_ulong_EQNE() %{
 4019   constraint(ALLOC_IN_RC(int_flags));
 4020   match(RegFlags);
 4021   format %{ "FLAGS_U_EQNE" %}
 4022   interface(REG_INTER);
 4023 %}
 4024 operand flagsReg_ulong_LEGT() %{
 4025   constraint(ALLOC_IN_RC(int_flags));
 4026   match(RegFlags);
 4027   format %{ "FLAGS_U_LEGT" %}
 4028   interface(REG_INTER);
 4029 %}
 4030 
 4031 // Float register operands
 4032 operand regDPR() %{
 4033   predicate( UseSSE < 2 );
 4034   constraint(ALLOC_IN_RC(fp_dbl_reg));
 4035   match(RegD);
 4036   match(regDPR1);
 4037   match(regDPR2);
 4038   format %{ %}
 4039   interface(REG_INTER);
 4040 %}
 4041 
 4042 operand regDPR1(regDPR reg) %{
 4043   predicate( UseSSE < 2 );
 4044   constraint(ALLOC_IN_RC(fp_dbl_reg0));
 4045   match(reg);
 4046   format %{ "FPR1" %}
 4047   interface(REG_INTER);
 4048 %}
 4049 
 4050 operand regDPR2(regDPR reg) %{
 4051   predicate( UseSSE < 2 );
 4052   constraint(ALLOC_IN_RC(fp_dbl_reg1));
 4053   match(reg);
 4054   format %{ "FPR2" %}
 4055   interface(REG_INTER);
 4056 %}
 4057 
 4058 operand regnotDPR1(regDPR reg) %{
 4059   predicate( UseSSE < 2 );
 4060   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
 4061   match(reg);
 4062   format %{ %}
 4063   interface(REG_INTER);
 4064 %}
 4065 
 4066 // Float register operands
 4067 operand regFPR() %{
 4068   predicate( UseSSE < 2 );
 4069   constraint(ALLOC_IN_RC(fp_flt_reg));
 4070   match(RegF);
 4071   match(regFPR1);
 4072   format %{ %}
 4073   interface(REG_INTER);
 4074 %}
 4075 
 4076 // Float register operands
 4077 operand regFPR1(regFPR reg) %{
 4078   predicate( UseSSE < 2 );
 4079   constraint(ALLOC_IN_RC(fp_flt_reg0));
 4080   match(reg);
 4081   format %{ "FPR1" %}
 4082   interface(REG_INTER);
 4083 %}
 4084 
 4085 // XMM Float register operands
 4086 operand regF() %{
 4087   predicate( UseSSE>=1 );
 4088   constraint(ALLOC_IN_RC(float_reg_legacy));
 4089   match(RegF);
 4090   format %{ %}
 4091   interface(REG_INTER);
 4092 %}
 4093 
 4094 operand legRegF() %{
 4095   predicate( UseSSE>=1 );
 4096   constraint(ALLOC_IN_RC(float_reg_legacy));
 4097   match(RegF);
 4098   format %{ %}
 4099   interface(REG_INTER);
 4100 %}
 4101 
 4102 // Float register operands
 4103 operand vlRegF() %{
 4104    constraint(ALLOC_IN_RC(float_reg_vl));
 4105    match(RegF);
 4106 
 4107    format %{ %}
 4108    interface(REG_INTER);
 4109 %}
 4110 
 4111 // XMM Double register operands
 4112 operand regD() %{
 4113   predicate( UseSSE>=2 );
 4114   constraint(ALLOC_IN_RC(double_reg_legacy));
 4115   match(RegD);
 4116   format %{ %}
 4117   interface(REG_INTER);
 4118 %}
 4119 
 4120 // Double register operands
 4121 operand legRegD() %{
 4122   predicate( UseSSE>=2 );
 4123   constraint(ALLOC_IN_RC(double_reg_legacy));
 4124   match(RegD);
 4125   format %{ %}
 4126   interface(REG_INTER);
 4127 %}
 4128 
 4129 operand vlRegD() %{
 4130    constraint(ALLOC_IN_RC(double_reg_vl));
 4131    match(RegD);
 4132 
 4133    format %{ %}
 4134    interface(REG_INTER);
 4135 %}
 4136 
 4137 //----------Memory Operands----------------------------------------------------
 4138 // Direct Memory Operand
 4139 operand direct(immP addr) %{
 4140   match(addr);
 4141 
 4142   format %{ "[$addr]" %}
 4143   interface(MEMORY_INTER) %{
 4144     base(0xFFFFFFFF);
 4145     index(0x4);
 4146     scale(0x0);
 4147     disp($addr);
 4148   %}
 4149 %}
 4150 
 4151 // Indirect Memory Operand
 4152 operand indirect(eRegP reg) %{
 4153   constraint(ALLOC_IN_RC(int_reg));
 4154   match(reg);
 4155 
 4156   format %{ "[$reg]" %}
 4157   interface(MEMORY_INTER) %{
 4158     base($reg);
 4159     index(0x4);
 4160     scale(0x0);
 4161     disp(0x0);
 4162   %}
 4163 %}
 4164 
 4165 // Indirect Memory Plus Short Offset Operand
 4166 operand indOffset8(eRegP reg, immI8 off) %{
 4167   match(AddP reg off);
 4168 
 4169   format %{ "[$reg + $off]" %}
 4170   interface(MEMORY_INTER) %{
 4171     base($reg);
 4172     index(0x4);
 4173     scale(0x0);
 4174     disp($off);
 4175   %}
 4176 %}
 4177 
 4178 // Indirect Memory Plus Long Offset Operand
 4179 operand indOffset32(eRegP reg, immI off) %{
 4180   match(AddP reg off);
 4181 
 4182   format %{ "[$reg + $off]" %}
 4183   interface(MEMORY_INTER) %{
 4184     base($reg);
 4185     index(0x4);
 4186     scale(0x0);
 4187     disp($off);
 4188   %}
 4189 %}
 4190 
 4191 // Indirect Memory Plus Long Offset Operand
 4192 operand indOffset32X(rRegI reg, immP off) %{
 4193   match(AddP off reg);
 4194 
 4195   format %{ "[$reg + $off]" %}
 4196   interface(MEMORY_INTER) %{
 4197     base($reg);
 4198     index(0x4);
 4199     scale(0x0);
 4200     disp($off);
 4201   %}
 4202 %}
 4203 
 4204 // Indirect Memory Plus Index Register Plus Offset Operand
 4205 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
 4206   match(AddP (AddP reg ireg) off);
 4207 
 4208   op_cost(10);
 4209   format %{"[$reg + $off + $ireg]" %}
 4210   interface(MEMORY_INTER) %{
 4211     base($reg);
 4212     index($ireg);
 4213     scale(0x0);
 4214     disp($off);
 4215   %}
 4216 %}
 4217 
 4218 // Indirect Memory Plus Index Register Plus Offset Operand
 4219 operand indIndex(eRegP reg, rRegI ireg) %{
 4220   match(AddP reg ireg);
 4221 
 4222   op_cost(10);
 4223   format %{"[$reg + $ireg]" %}
 4224   interface(MEMORY_INTER) %{
 4225     base($reg);
 4226     index($ireg);
 4227     scale(0x0);
 4228     disp(0x0);
 4229   %}
 4230 %}
 4231 
 4232 // // -------------------------------------------------------------------------
 4233 // // 486 architecture doesn't support "scale * index + offset" with out a base
 4234 // // -------------------------------------------------------------------------
 4235 // // Scaled Memory Operands
 4236 // // Indirect Memory Times Scale Plus Offset Operand
 4237 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 4238 //   match(AddP off (LShiftI ireg scale));
 4239 //
 4240 //   op_cost(10);
 4241 //   format %{"[$off + $ireg << $scale]" %}
 4242 //   interface(MEMORY_INTER) %{
 4243 //     base(0x4);
 4244 //     index($ireg);
 4245 //     scale($scale);
 4246 //     disp($off);
 4247 //   %}
 4248 // %}
 4249 
 4250 // Indirect Memory Times Scale Plus Index Register
 4251 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
 4252   match(AddP reg (LShiftI ireg scale));
 4253 
 4254   op_cost(10);
 4255   format %{"[$reg + $ireg << $scale]" %}
 4256   interface(MEMORY_INTER) %{
 4257     base($reg);
 4258     index($ireg);
 4259     scale($scale);
 4260     disp(0x0);
 4261   %}
 4262 %}
 4263 
 4264 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
 4265 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
 4266   match(AddP (AddP reg (LShiftI ireg scale)) off);
 4267 
 4268   op_cost(10);
 4269   format %{"[$reg + $off + $ireg << $scale]" %}
 4270   interface(MEMORY_INTER) %{
 4271     base($reg);
 4272     index($ireg);
 4273     scale($scale);
 4274     disp($off);
 4275   %}
 4276 %}
 4277 
 4278 //----------Load Long Memory Operands------------------------------------------
 4279 // The load-long idiom will use it's address expression again after loading
 4280 // the first word of the long.  If the load-long destination overlaps with
 4281 // registers used in the addressing expression, the 2nd half will be loaded
 4282 // from a clobbered address.  Fix this by requiring that load-long use
 4283 // address registers that do not overlap with the load-long target.
 4284 
 4285 // load-long support
 4286 operand load_long_RegP() %{
 4287   constraint(ALLOC_IN_RC(esi_reg));
 4288   match(RegP);
 4289   match(eSIRegP);
 4290   op_cost(100);
 4291   format %{  %}
 4292   interface(REG_INTER);
 4293 %}
 4294 
 4295 // Indirect Memory Operand Long
 4296 operand load_long_indirect(load_long_RegP reg) %{
 4297   constraint(ALLOC_IN_RC(esi_reg));
 4298   match(reg);
 4299 
 4300   format %{ "[$reg]" %}
 4301   interface(MEMORY_INTER) %{
 4302     base($reg);
 4303     index(0x4);
 4304     scale(0x0);
 4305     disp(0x0);
 4306   %}
 4307 %}
 4308 
 4309 // Indirect Memory Plus Long Offset Operand
 4310 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
 4311   match(AddP reg off);
 4312 
 4313   format %{ "[$reg + $off]" %}
 4314   interface(MEMORY_INTER) %{
 4315     base($reg);
 4316     index(0x4);
 4317     scale(0x0);
 4318     disp($off);
 4319   %}
 4320 %}
 4321 
 4322 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
 4323 
 4324 
 4325 //----------Special Memory Operands--------------------------------------------
 4326 // Stack Slot Operand - This operand is used for loading and storing temporary
 4327 //                      values on the stack where a match requires a value to
 4328 //                      flow through memory.
 4329 operand stackSlotP(sRegP reg) %{
 4330   constraint(ALLOC_IN_RC(stack_slots));
 4331   // No match rule because this operand is only generated in matching
 4332   format %{ "[$reg]" %}
 4333   interface(MEMORY_INTER) %{
 4334     base(0x4);   // ESP
 4335     index(0x4);  // No Index
 4336     scale(0x0);  // No Scale
 4337     disp($reg);  // Stack Offset
 4338   %}
 4339 %}
 4340 
 4341 operand stackSlotI(sRegI reg) %{
 4342   constraint(ALLOC_IN_RC(stack_slots));
 4343   // No match rule because this operand is only generated in matching
 4344   format %{ "[$reg]" %}
 4345   interface(MEMORY_INTER) %{
 4346     base(0x4);   // ESP
 4347     index(0x4);  // No Index
 4348     scale(0x0);  // No Scale
 4349     disp($reg);  // Stack Offset
 4350   %}
 4351 %}
 4352 
 4353 operand stackSlotF(sRegF reg) %{
 4354   constraint(ALLOC_IN_RC(stack_slots));
 4355   // No match rule because this operand is only generated in matching
 4356   format %{ "[$reg]" %}
 4357   interface(MEMORY_INTER) %{
 4358     base(0x4);   // ESP
 4359     index(0x4);  // No Index
 4360     scale(0x0);  // No Scale
 4361     disp($reg);  // Stack Offset
 4362   %}
 4363 %}
 4364 
 4365 operand stackSlotD(sRegD reg) %{
 4366   constraint(ALLOC_IN_RC(stack_slots));
 4367   // No match rule because this operand is only generated in matching
 4368   format %{ "[$reg]" %}
 4369   interface(MEMORY_INTER) %{
 4370     base(0x4);   // ESP
 4371     index(0x4);  // No Index
 4372     scale(0x0);  // No Scale
 4373     disp($reg);  // Stack Offset
 4374   %}
 4375 %}
 4376 
 4377 operand stackSlotL(sRegL reg) %{
 4378   constraint(ALLOC_IN_RC(stack_slots));
 4379   // No match rule because this operand is only generated in matching
 4380   format %{ "[$reg]" %}
 4381   interface(MEMORY_INTER) %{
 4382     base(0x4);   // ESP
 4383     index(0x4);  // No Index
 4384     scale(0x0);  // No Scale
 4385     disp($reg);  // Stack Offset
 4386   %}
 4387 %}
 4388 
 4389 //----------Conditional Branch Operands----------------------------------------
 4390 // Comparison Op  - This is the operation of the comparison, and is limited to
 4391 //                  the following set of codes:
 4392 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
 4393 //
 4394 // Other attributes of the comparison, such as unsignedness, are specified
 4395 // by the comparison instruction that sets a condition code flags register.
 4396 // That result is represented by a flags operand whose subtype is appropriate
 4397 // to the unsignedness (etc.) of the comparison.
 4398 //
 4399 // Later, the instruction which matches both the Comparison Op (a Bool) and
 4400 // the flags (produced by the Cmp) specifies the coding of the comparison op
 4401 // by matching a specific subtype of Bool operand below, such as cmpOpU.
 4402 
 4403 // Comparison Code
 4404 operand cmpOp() %{
 4405   match(Bool);
 4406 
 4407   format %{ "" %}
 4408   interface(COND_INTER) %{
 4409     equal(0x4, "e");
 4410     not_equal(0x5, "ne");
 4411     less(0xC, "l");
 4412     greater_equal(0xD, "ge");
 4413     less_equal(0xE, "le");
 4414     greater(0xF, "g");
 4415     overflow(0x0, "o");
 4416     no_overflow(0x1, "no");
 4417   %}
 4418 %}
 4419 
 4420 // Comparison Code, unsigned compare.  Used by FP also, with
 4421 // C2 (unordered) turned into GT or LT already.  The other bits
 4422 // C0 and C3 are turned into Carry & Zero flags.
 4423 operand cmpOpU() %{
 4424   match(Bool);
 4425 
 4426   format %{ "" %}
 4427   interface(COND_INTER) %{
 4428     equal(0x4, "e");
 4429     not_equal(0x5, "ne");
 4430     less(0x2, "b");
 4431     greater_equal(0x3, "nb");
 4432     less_equal(0x6, "be");
 4433     greater(0x7, "nbe");
 4434     overflow(0x0, "o");
 4435     no_overflow(0x1, "no");
 4436   %}
 4437 %}
 4438 
 4439 // Floating comparisons that don't require any fixup for the unordered case
 4440 operand cmpOpUCF() %{
 4441   match(Bool);
 4442   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
 4443             n->as_Bool()->_test._test == BoolTest::ge ||
 4444             n->as_Bool()->_test._test == BoolTest::le ||
 4445             n->as_Bool()->_test._test == BoolTest::gt);
 4446   format %{ "" %}
 4447   interface(COND_INTER) %{
 4448     equal(0x4, "e");
 4449     not_equal(0x5, "ne");
 4450     less(0x2, "b");
 4451     greater_equal(0x3, "nb");
 4452     less_equal(0x6, "be");
 4453     greater(0x7, "nbe");
 4454     overflow(0x0, "o");
 4455     no_overflow(0x1, "no");
 4456   %}
 4457 %}
 4458 
 4459 
 4460 // Floating comparisons that can be fixed up with extra conditional jumps
 4461 operand cmpOpUCF2() %{
 4462   match(Bool);
 4463   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
 4464             n->as_Bool()->_test._test == BoolTest::eq);
 4465   format %{ "" %}
 4466   interface(COND_INTER) %{
 4467     equal(0x4, "e");
 4468     not_equal(0x5, "ne");
 4469     less(0x2, "b");
 4470     greater_equal(0x3, "nb");
 4471     less_equal(0x6, "be");
 4472     greater(0x7, "nbe");
 4473     overflow(0x0, "o");
 4474     no_overflow(0x1, "no");
 4475   %}
 4476 %}
 4477 
 4478 // Comparison Code for FP conditional move
 4479 operand cmpOp_fcmov() %{
 4480   match(Bool);
 4481 
 4482   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
 4483             n->as_Bool()->_test._test != BoolTest::no_overflow);
 4484   format %{ "" %}
 4485   interface(COND_INTER) %{
 4486     equal        (0x0C8);
 4487     not_equal    (0x1C8);
 4488     less         (0x0C0);
 4489     greater_equal(0x1C0);
 4490     less_equal   (0x0D0);
 4491     greater      (0x1D0);
 4492     overflow(0x0, "o"); // not really supported by the instruction
 4493     no_overflow(0x1, "no"); // not really supported by the instruction
 4494   %}
 4495 %}
 4496 
 4497 // Comparison Code used in long compares
 4498 operand cmpOp_commute() %{
 4499   match(Bool);
 4500 
 4501   format %{ "" %}
 4502   interface(COND_INTER) %{
 4503     equal(0x4, "e");
 4504     not_equal(0x5, "ne");
 4505     less(0xF, "g");
 4506     greater_equal(0xE, "le");
 4507     less_equal(0xD, "ge");
 4508     greater(0xC, "l");
 4509     overflow(0x0, "o");
 4510     no_overflow(0x1, "no");
 4511   %}
 4512 %}
 4513 
 4514 // Comparison Code used in unsigned long compares
 4515 operand cmpOpU_commute() %{
 4516   match(Bool);
 4517 
 4518   format %{ "" %}
 4519   interface(COND_INTER) %{
 4520     equal(0x4, "e");
 4521     not_equal(0x5, "ne");
 4522     less(0x7, "nbe");
 4523     greater_equal(0x6, "be");
 4524     less_equal(0x3, "nb");
 4525     greater(0x2, "b");
 4526     overflow(0x0, "o");
 4527     no_overflow(0x1, "no");
 4528   %}
 4529 %}
 4530 
 4531 //----------OPERAND CLASSES----------------------------------------------------
 4532 // Operand Classes are groups of operands that are used as to simplify
 4533 // instruction definitions by not requiring the AD writer to specify separate
 4534 // instructions for every form of operand when the instruction accepts
 4535 // multiple operand types with the same basic encoding and format.  The classic
 4536 // case of this is memory operands.
 4537 
 4538 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
 4539                indIndex, indIndexScale, indIndexScaleOffset);
 4540 
 4541 // Long memory operations are encoded in 2 instructions and a +4 offset.
 4542 // This means some kind of offset is always required and you cannot use
 4543 // an oop as the offset (done when working on static globals).
 4544 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
 4545                     indIndex, indIndexScale, indIndexScaleOffset);
 4546 
 4547 
 4548 //----------PIPELINE-----------------------------------------------------------
 4549 // Rules which define the behavior of the target architectures pipeline.
 4550 pipeline %{
 4551 
 4552 //----------ATTRIBUTES---------------------------------------------------------
 4553 attributes %{
 4554   variable_size_instructions;        // Fixed size instructions
 4555   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
 4556   instruction_unit_size = 1;         // An instruction is 1 bytes long
 4557   instruction_fetch_unit_size = 16;  // The processor fetches one line
 4558   instruction_fetch_units = 1;       // of 16 bytes
 4559 
 4560   // List of nop instructions
 4561   nops( MachNop );
 4562 %}
 4563 
 4564 //----------RESOURCES----------------------------------------------------------
 4565 // Resources are the functional units available to the machine
 4566 
 4567 // Generic P2/P3 pipeline
 4568 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
 4569 // 3 instructions decoded per cycle.
 4570 // 2 load/store ops per cycle, 1 branch, 1 FPU,
 4571 // 2 ALU op, only ALU0 handles mul/div instructions.
 4572 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
 4573            MS0, MS1, MEM = MS0 | MS1,
 4574            BR, FPU,
 4575            ALU0, ALU1, ALU = ALU0 | ALU1 );
 4576 
 4577 //----------PIPELINE DESCRIPTION-----------------------------------------------
 4578 // Pipeline Description specifies the stages in the machine's pipeline
 4579 
 4580 // Generic P2/P3 pipeline
 4581 pipe_desc(S0, S1, S2, S3, S4, S5);
 4582 
 4583 //----------PIPELINE CLASSES---------------------------------------------------
 4584 // Pipeline Classes describe the stages in which input and output are
 4585 // referenced by the hardware pipeline.
 4586 
 4587 // Naming convention: ialu or fpu
 4588 // Then: _reg
 4589 // Then: _reg if there is a 2nd register
 4590 // Then: _long if it's a pair of instructions implementing a long
 4591 // Then: _fat if it requires the big decoder
 4592 //   Or: _mem if it requires the big decoder and a memory unit.
 4593 
 4594 // Integer ALU reg operation
 4595 pipe_class ialu_reg(rRegI dst) %{
 4596     single_instruction;
 4597     dst    : S4(write);
 4598     dst    : S3(read);
 4599     DECODE : S0;        // any decoder
 4600     ALU    : S3;        // any alu
 4601 %}
 4602 
 4603 // Long ALU reg operation
 4604 pipe_class ialu_reg_long(eRegL dst) %{
 4605     instruction_count(2);
 4606     dst    : S4(write);
 4607     dst    : S3(read);
 4608     DECODE : S0(2);     // any 2 decoders
 4609     ALU    : S3(2);     // both alus
 4610 %}
 4611 
 4612 // Integer ALU reg operation using big decoder
 4613 pipe_class ialu_reg_fat(rRegI dst) %{
 4614     single_instruction;
 4615     dst    : S4(write);
 4616     dst    : S3(read);
 4617     D0     : S0;        // big decoder only
 4618     ALU    : S3;        // any alu
 4619 %}
 4620 
 4621 // Long ALU reg operation using big decoder
 4622 pipe_class ialu_reg_long_fat(eRegL dst) %{
 4623     instruction_count(2);
 4624     dst    : S4(write);
 4625     dst    : S3(read);
 4626     D0     : S0(2);     // big decoder only; twice
 4627     ALU    : S3(2);     // any 2 alus
 4628 %}
 4629 
 4630 // Integer ALU reg-reg operation
 4631 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
 4632     single_instruction;
 4633     dst    : S4(write);
 4634     src    : S3(read);
 4635     DECODE : S0;        // any decoder
 4636     ALU    : S3;        // any alu
 4637 %}
 4638 
 4639 // Long ALU reg-reg operation
 4640 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
 4641     instruction_count(2);
 4642     dst    : S4(write);
 4643     src    : S3(read);
 4644     DECODE : S0(2);     // any 2 decoders
 4645     ALU    : S3(2);     // both alus
 4646 %}
 4647 
 4648 // Integer ALU reg-reg operation
 4649 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
 4650     single_instruction;
 4651     dst    : S4(write);
 4652     src    : S3(read);
 4653     D0     : S0;        // big decoder only
 4654     ALU    : S3;        // any alu
 4655 %}
 4656 
 4657 // Long ALU reg-reg operation
 4658 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
 4659     instruction_count(2);
 4660     dst    : S4(write);
 4661     src    : S3(read);
 4662     D0     : S0(2);     // big decoder only; twice
 4663     ALU    : S3(2);     // both alus
 4664 %}
 4665 
 4666 // Integer ALU reg-mem operation
 4667 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
 4668     single_instruction;
 4669     dst    : S5(write);
 4670     mem    : S3(read);
 4671     D0     : S0;        // big decoder only
 4672     ALU    : S4;        // any alu
 4673     MEM    : S3;        // any mem
 4674 %}
 4675 
 4676 // Long ALU reg-mem operation
 4677 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
 4678     instruction_count(2);
 4679     dst    : S5(write);
 4680     mem    : S3(read);
 4681     D0     : S0(2);     // big decoder only; twice
 4682     ALU    : S4(2);     // any 2 alus
 4683     MEM    : S3(2);     // both mems
 4684 %}
 4685 
 4686 // Integer mem operation (prefetch)
 4687 pipe_class ialu_mem(memory mem)
 4688 %{
 4689     single_instruction;
 4690     mem    : S3(read);
 4691     D0     : S0;        // big decoder only
 4692     MEM    : S3;        // any mem
 4693 %}
 4694 
 4695 // Integer Store to Memory
 4696 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
 4697     single_instruction;
 4698     mem    : S3(read);
 4699     src    : S5(read);
 4700     D0     : S0;        // big decoder only
 4701     ALU    : S4;        // any alu
 4702     MEM    : S3;
 4703 %}
 4704 
 4705 // Long Store to Memory
 4706 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
 4707     instruction_count(2);
 4708     mem    : S3(read);
 4709     src    : S5(read);
 4710     D0     : S0(2);     // big decoder only; twice
 4711     ALU    : S4(2);     // any 2 alus
 4712     MEM    : S3(2);     // Both mems
 4713 %}
 4714 
 4715 // Integer Store to Memory
 4716 pipe_class ialu_mem_imm(memory mem) %{
 4717     single_instruction;
 4718     mem    : S3(read);
 4719     D0     : S0;        // big decoder only
 4720     ALU    : S4;        // any alu
 4721     MEM    : S3;
 4722 %}
 4723 
 4724 // Integer ALU0 reg-reg operation
 4725 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
 4726     single_instruction;
 4727     dst    : S4(write);
 4728     src    : S3(read);
 4729     D0     : S0;        // Big decoder only
 4730     ALU0   : S3;        // only alu0
 4731 %}
 4732 
 4733 // Integer ALU0 reg-mem operation
 4734 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
 4735     single_instruction;
 4736     dst    : S5(write);
 4737     mem    : S3(read);
 4738     D0     : S0;        // big decoder only
 4739     ALU0   : S4;        // ALU0 only
 4740     MEM    : S3;        // any mem
 4741 %}
 4742 
 4743 // Integer ALU reg-reg operation
 4744 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
 4745     single_instruction;
 4746     cr     : S4(write);
 4747     src1   : S3(read);
 4748     src2   : S3(read);
 4749     DECODE : S0;        // any decoder
 4750     ALU    : S3;        // any alu
 4751 %}
 4752 
 4753 // Integer ALU reg-imm operation
 4754 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
 4755     single_instruction;
 4756     cr     : S4(write);
 4757     src1   : S3(read);
 4758     DECODE : S0;        // any decoder
 4759     ALU    : S3;        // any alu
 4760 %}
 4761 
 4762 // Integer ALU reg-mem operation
 4763 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
 4764     single_instruction;
 4765     cr     : S4(write);
 4766     src1   : S3(read);
 4767     src2   : S3(read);
 4768     D0     : S0;        // big decoder only
 4769     ALU    : S4;        // any alu
 4770     MEM    : S3;
 4771 %}
 4772 
 4773 // Conditional move reg-reg
 4774 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
 4775     instruction_count(4);
 4776     y      : S4(read);
 4777     q      : S3(read);
 4778     p      : S3(read);
 4779     DECODE : S0(4);     // any decoder
 4780 %}
 4781 
 4782 // Conditional move reg-reg
 4783 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
 4784     single_instruction;
 4785     dst    : S4(write);
 4786     src    : S3(read);
 4787     cr     : S3(read);
 4788     DECODE : S0;        // any decoder
 4789 %}
 4790 
 4791 // Conditional move reg-mem
 4792 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
 4793     single_instruction;
 4794     dst    : S4(write);
 4795     src    : S3(read);
 4796     cr     : S3(read);
 4797     DECODE : S0;        // any decoder
 4798     MEM    : S3;
 4799 %}
 4800 
 4801 // Conditional move reg-reg long
 4802 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
 4803     single_instruction;
 4804     dst    : S4(write);
 4805     src    : S3(read);
 4806     cr     : S3(read);
 4807     DECODE : S0(2);     // any 2 decoders
 4808 %}
 4809 
 4810 // Conditional move double reg-reg
 4811 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
 4812     single_instruction;
 4813     dst    : S4(write);
 4814     src    : S3(read);
 4815     cr     : S3(read);
 4816     DECODE : S0;        // any decoder
 4817 %}
 4818 
 4819 // Float reg-reg operation
 4820 pipe_class fpu_reg(regDPR dst) %{
 4821     instruction_count(2);
 4822     dst    : S3(read);
 4823     DECODE : S0(2);     // any 2 decoders
 4824     FPU    : S3;
 4825 %}
 4826 
 4827 // Float reg-reg operation
 4828 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
 4829     instruction_count(2);
 4830     dst    : S4(write);
 4831     src    : S3(read);
 4832     DECODE : S0(2);     // any 2 decoders
 4833     FPU    : S3;
 4834 %}
 4835 
 4836 // Float reg-reg operation
 4837 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
 4838     instruction_count(3);
 4839     dst    : S4(write);
 4840     src1   : S3(read);
 4841     src2   : S3(read);
 4842     DECODE : S0(3);     // any 3 decoders
 4843     FPU    : S3(2);
 4844 %}
 4845 
 4846 // Float reg-reg operation
 4847 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
 4848     instruction_count(4);
 4849     dst    : S4(write);
 4850     src1   : S3(read);
 4851     src2   : S3(read);
 4852     src3   : S3(read);
 4853     DECODE : S0(4);     // any 3 decoders
 4854     FPU    : S3(2);
 4855 %}
 4856 
 4857 // Float reg-reg operation
 4858 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
 4859     instruction_count(4);
 4860     dst    : S4(write);
 4861     src1   : S3(read);
 4862     src2   : S3(read);
 4863     src3   : S3(read);
 4864     DECODE : S1(3);     // any 3 decoders
 4865     D0     : S0;        // Big decoder only
 4866     FPU    : S3(2);
 4867     MEM    : S3;
 4868 %}
 4869 
 4870 // Float reg-mem operation
 4871 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
 4872     instruction_count(2);
 4873     dst    : S5(write);
 4874     mem    : S3(read);
 4875     D0     : S0;        // big decoder only
 4876     DECODE : S1;        // any decoder for FPU POP
 4877     FPU    : S4;
 4878     MEM    : S3;        // any mem
 4879 %}
 4880 
 4881 // Float reg-mem operation
 4882 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
 4883     instruction_count(3);
 4884     dst    : S5(write);
 4885     src1   : S3(read);
 4886     mem    : S3(read);
 4887     D0     : S0;        // big decoder only
 4888     DECODE : S1(2);     // any decoder for FPU POP
 4889     FPU    : S4;
 4890     MEM    : S3;        // any mem
 4891 %}
 4892 
 4893 // Float mem-reg operation
 4894 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
 4895     instruction_count(2);
 4896     src    : S5(read);
 4897     mem    : S3(read);
 4898     DECODE : S0;        // any decoder for FPU PUSH
 4899     D0     : S1;        // big decoder only
 4900     FPU    : S4;
 4901     MEM    : S3;        // any mem
 4902 %}
 4903 
 4904 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
 4905     instruction_count(3);
 4906     src1   : S3(read);
 4907     src2   : S3(read);
 4908     mem    : S3(read);
 4909     DECODE : S0(2);     // any decoder for FPU PUSH
 4910     D0     : S1;        // big decoder only
 4911     FPU    : S4;
 4912     MEM    : S3;        // any mem
 4913 %}
 4914 
 4915 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
 4916     instruction_count(3);
 4917     src1   : S3(read);
 4918     src2   : S3(read);
 4919     mem    : S4(read);
 4920     DECODE : S0;        // any decoder for FPU PUSH
 4921     D0     : S0(2);     // big decoder only
 4922     FPU    : S4;
 4923     MEM    : S3(2);     // any mem
 4924 %}
 4925 
 4926 pipe_class fpu_mem_mem(memory dst, memory src1) %{
 4927     instruction_count(2);
 4928     src1   : S3(read);
 4929     dst    : S4(read);
 4930     D0     : S0(2);     // big decoder only
 4931     MEM    : S3(2);     // any mem
 4932 %}
 4933 
 4934 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
 4935     instruction_count(3);
 4936     src1   : S3(read);
 4937     src2   : S3(read);
 4938     dst    : S4(read);
 4939     D0     : S0(3);     // big decoder only
 4940     FPU    : S4;
 4941     MEM    : S3(3);     // any mem
 4942 %}
 4943 
 4944 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
 4945     instruction_count(3);
 4946     src1   : S4(read);
 4947     mem    : S4(read);
 4948     DECODE : S0;        // any decoder for FPU PUSH
 4949     D0     : S0(2);     // big decoder only
 4950     FPU    : S4;
 4951     MEM    : S3(2);     // any mem
 4952 %}
 4953 
 4954 // Float load constant
 4955 pipe_class fpu_reg_con(regDPR dst) %{
 4956     instruction_count(2);
 4957     dst    : S5(write);
 4958     D0     : S0;        // big decoder only for the load
 4959     DECODE : S1;        // any decoder for FPU POP
 4960     FPU    : S4;
 4961     MEM    : S3;        // any mem
 4962 %}
 4963 
 4964 // Float load constant
 4965 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
 4966     instruction_count(3);
 4967     dst    : S5(write);
 4968     src    : S3(read);
 4969     D0     : S0;        // big decoder only for the load
 4970     DECODE : S1(2);     // any decoder for FPU POP
 4971     FPU    : S4;
 4972     MEM    : S3;        // any mem
 4973 %}
 4974 
 4975 // UnConditional branch
 4976 pipe_class pipe_jmp( label labl ) %{
 4977     single_instruction;
 4978     BR   : S3;
 4979 %}
 4980 
 4981 // Conditional branch
 4982 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
 4983     single_instruction;
 4984     cr    : S1(read);
 4985     BR    : S3;
 4986 %}
 4987 
 4988 // Allocation idiom
 4989 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
 4990     instruction_count(1); force_serialization;
 4991     fixed_latency(6);
 4992     heap_ptr : S3(read);
 4993     DECODE   : S0(3);
 4994     D0       : S2;
 4995     MEM      : S3;
 4996     ALU      : S3(2);
 4997     dst      : S5(write);
 4998     BR       : S5;
 4999 %}
 5000 
 5001 // Generic big/slow expanded idiom
 5002 pipe_class pipe_slow(  ) %{
 5003     instruction_count(10); multiple_bundles; force_serialization;
 5004     fixed_latency(100);
 5005     D0  : S0(2);
 5006     MEM : S3(2);
 5007 %}
 5008 
 5009 // The real do-nothing guy
 5010 pipe_class empty( ) %{
 5011     instruction_count(0);
 5012 %}
 5013 
 5014 // Define the class for the Nop node
 5015 define %{
 5016    MachNop = empty;
 5017 %}
 5018 
 5019 %}
 5020 
 5021 //----------INSTRUCTIONS-------------------------------------------------------
 5022 //
 5023 // match      -- States which machine-independent subtree may be replaced
 5024 //               by this instruction.
 5025 // ins_cost   -- The estimated cost of this instruction is used by instruction
 5026 //               selection to identify a minimum cost tree of machine
 5027 //               instructions that matches a tree of machine-independent
 5028 //               instructions.
 5029 // format     -- A string providing the disassembly for this instruction.
 5030 //               The value of an instruction's operand may be inserted
 5031 //               by referring to it with a '$' prefix.
 5032 // opcode     -- Three instruction opcodes may be provided.  These are referred
 5033 //               to within an encode class as $primary, $secondary, and $tertiary
 5034 //               respectively.  The primary opcode is commonly used to
 5035 //               indicate the type of machine instruction, while secondary
 5036 //               and tertiary are often used for prefix options or addressing
 5037 //               modes.
 5038 // ins_encode -- A list of encode classes with parameters. The encode class
 5039 //               name must have been defined in an 'enc_class' specification
 5040 //               in the encode section of the architecture description.
 5041 
 5042 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
 5043 // Load Float
 5044 instruct MoveF2LEG(legRegF dst, regF src) %{
 5045   match(Set dst src);
 5046   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5047   ins_encode %{
 5048     ShouldNotReachHere();
 5049   %}
 5050   ins_pipe( fpu_reg_reg );
 5051 %}
 5052 
 5053 // Load Float
 5054 instruct MoveLEG2F(regF dst, legRegF src) %{
 5055   match(Set dst src);
 5056   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
 5057   ins_encode %{
 5058     ShouldNotReachHere();
 5059   %}
 5060   ins_pipe( fpu_reg_reg );
 5061 %}
 5062 
 5063 // Load Float
 5064 instruct MoveF2VL(vlRegF dst, regF src) %{
 5065   match(Set dst src);
 5066   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5067   ins_encode %{
 5068     ShouldNotReachHere();
 5069   %}
 5070   ins_pipe( fpu_reg_reg );
 5071 %}
 5072 
 5073 // Load Float
 5074 instruct MoveVL2F(regF dst, vlRegF src) %{
 5075   match(Set dst src);
 5076   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
 5077   ins_encode %{
 5078     ShouldNotReachHere();
 5079   %}
 5080   ins_pipe( fpu_reg_reg );
 5081 %}
 5082 
 5083 
 5084 
 5085 // Load Double
 5086 instruct MoveD2LEG(legRegD dst, regD src) %{
 5087   match(Set dst src);
 5088   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5089   ins_encode %{
 5090     ShouldNotReachHere();
 5091   %}
 5092   ins_pipe( fpu_reg_reg );
 5093 %}
 5094 
 5095 // Load Double
 5096 instruct MoveLEG2D(regD dst, legRegD src) %{
 5097   match(Set dst src);
 5098   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
 5099   ins_encode %{
 5100     ShouldNotReachHere();
 5101   %}
 5102   ins_pipe( fpu_reg_reg );
 5103 %}
 5104 
 5105 // Load Double
 5106 instruct MoveD2VL(vlRegD dst, regD src) %{
 5107   match(Set dst src);
 5108   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5109   ins_encode %{
 5110     ShouldNotReachHere();
 5111   %}
 5112   ins_pipe( fpu_reg_reg );
 5113 %}
 5114 
 5115 // Load Double
 5116 instruct MoveVL2D(regD dst, vlRegD src) %{
 5117   match(Set dst src);
 5118   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
 5119   ins_encode %{
 5120     ShouldNotReachHere();
 5121   %}
 5122   ins_pipe( fpu_reg_reg );
 5123 %}
 5124 
 5125 //----------BSWAP-Instruction--------------------------------------------------
 5126 instruct bytes_reverse_int(rRegI dst) %{
 5127   match(Set dst (ReverseBytesI dst));
 5128 
 5129   format %{ "BSWAP  $dst" %}
 5130   opcode(0x0F, 0xC8);
 5131   ins_encode( OpcP, OpcSReg(dst) );
 5132   ins_pipe( ialu_reg );
 5133 %}
 5134 
 5135 instruct bytes_reverse_long(eRegL dst) %{
 5136   match(Set dst (ReverseBytesL dst));
 5137 
 5138   format %{ "BSWAP  $dst.lo\n\t"
 5139             "BSWAP  $dst.hi\n\t"
 5140             "XCHG   $dst.lo $dst.hi" %}
 5141 
 5142   ins_cost(125);
 5143   ins_encode( bswap_long_bytes(dst) );
 5144   ins_pipe( ialu_reg_reg);
 5145 %}
 5146 
 5147 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
 5148   match(Set dst (ReverseBytesUS dst));
 5149   effect(KILL cr);
 5150 
 5151   format %{ "BSWAP  $dst\n\t"
 5152             "SHR    $dst,16\n\t" %}
 5153   ins_encode %{
 5154     __ bswapl($dst$$Register);
 5155     __ shrl($dst$$Register, 16);
 5156   %}
 5157   ins_pipe( ialu_reg );
 5158 %}
 5159 
 5160 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
 5161   match(Set dst (ReverseBytesS dst));
 5162   effect(KILL cr);
 5163 
 5164   format %{ "BSWAP  $dst\n\t"
 5165             "SAR    $dst,16\n\t" %}
 5166   ins_encode %{
 5167     __ bswapl($dst$$Register);
 5168     __ sarl($dst$$Register, 16);
 5169   %}
 5170   ins_pipe( ialu_reg );
 5171 %}
 5172 
 5173 
 5174 //---------- Zeros Count Instructions ------------------------------------------
 5175 
 5176 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5177   predicate(UseCountLeadingZerosInstruction);
 5178   match(Set dst (CountLeadingZerosI src));
 5179   effect(KILL cr);
 5180 
 5181   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
 5182   ins_encode %{
 5183     __ lzcntl($dst$$Register, $src$$Register);
 5184   %}
 5185   ins_pipe(ialu_reg);
 5186 %}
 5187 
 5188 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
 5189   predicate(!UseCountLeadingZerosInstruction);
 5190   match(Set dst (CountLeadingZerosI src));
 5191   effect(KILL cr);
 5192 
 5193   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
 5194             "JNZ    skip\n\t"
 5195             "MOV    $dst, -1\n"
 5196       "skip:\n\t"
 5197             "NEG    $dst\n\t"
 5198             "ADD    $dst, 31" %}
 5199   ins_encode %{
 5200     Register Rdst = $dst$$Register;
 5201     Register Rsrc = $src$$Register;
 5202     Label skip;
 5203     __ bsrl(Rdst, Rsrc);
 5204     __ jccb(Assembler::notZero, skip);
 5205     __ movl(Rdst, -1);
 5206     __ bind(skip);
 5207     __ negl(Rdst);
 5208     __ addl(Rdst, BitsPerInt - 1);
 5209   %}
 5210   ins_pipe(ialu_reg);
 5211 %}
 5212 
 5213 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5214   predicate(UseCountLeadingZerosInstruction);
 5215   match(Set dst (CountLeadingZerosL src));
 5216   effect(TEMP dst, KILL cr);
 5217 
 5218   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
 5219             "JNC    done\n\t"
 5220             "LZCNT  $dst, $src.lo\n\t"
 5221             "ADD    $dst, 32\n"
 5222       "done:" %}
 5223   ins_encode %{
 5224     Register Rdst = $dst$$Register;
 5225     Register Rsrc = $src$$Register;
 5226     Label done;
 5227     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5228     __ jccb(Assembler::carryClear, done);
 5229     __ lzcntl(Rdst, Rsrc);
 5230     __ addl(Rdst, BitsPerInt);
 5231     __ bind(done);
 5232   %}
 5233   ins_pipe(ialu_reg);
 5234 %}
 5235 
 5236 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
 5237   predicate(!UseCountLeadingZerosInstruction);
 5238   match(Set dst (CountLeadingZerosL src));
 5239   effect(TEMP dst, KILL cr);
 5240 
 5241   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
 5242             "JZ     msw_is_zero\n\t"
 5243             "ADD    $dst, 32\n\t"
 5244             "JMP    not_zero\n"
 5245       "msw_is_zero:\n\t"
 5246             "BSR    $dst, $src.lo\n\t"
 5247             "JNZ    not_zero\n\t"
 5248             "MOV    $dst, -1\n"
 5249       "not_zero:\n\t"
 5250             "NEG    $dst\n\t"
 5251             "ADD    $dst, 63\n" %}
 5252  ins_encode %{
 5253     Register Rdst = $dst$$Register;
 5254     Register Rsrc = $src$$Register;
 5255     Label msw_is_zero;
 5256     Label not_zero;
 5257     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
 5258     __ jccb(Assembler::zero, msw_is_zero);
 5259     __ addl(Rdst, BitsPerInt);
 5260     __ jmpb(not_zero);
 5261     __ bind(msw_is_zero);
 5262     __ bsrl(Rdst, Rsrc);
 5263     __ jccb(Assembler::notZero, not_zero);
 5264     __ movl(Rdst, -1);
 5265     __ bind(not_zero);
 5266     __ negl(Rdst);
 5267     __ addl(Rdst, BitsPerLong - 1);
 5268   %}
 5269   ins_pipe(ialu_reg);
 5270 %}
 5271 
 5272 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5273   predicate(UseCountTrailingZerosInstruction);
 5274   match(Set dst (CountTrailingZerosI src));
 5275   effect(KILL cr);
 5276 
 5277   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
 5278   ins_encode %{
 5279     __ tzcntl($dst$$Register, $src$$Register);
 5280   %}
 5281   ins_pipe(ialu_reg);
 5282 %}
 5283 
 5284 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
 5285   predicate(!UseCountTrailingZerosInstruction);
 5286   match(Set dst (CountTrailingZerosI src));
 5287   effect(KILL cr);
 5288 
 5289   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
 5290             "JNZ    done\n\t"
 5291             "MOV    $dst, 32\n"
 5292       "done:" %}
 5293   ins_encode %{
 5294     Register Rdst = $dst$$Register;
 5295     Label done;
 5296     __ bsfl(Rdst, $src$$Register);
 5297     __ jccb(Assembler::notZero, done);
 5298     __ movl(Rdst, BitsPerInt);
 5299     __ bind(done);
 5300   %}
 5301   ins_pipe(ialu_reg);
 5302 %}
 5303 
 5304 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
 5305   predicate(UseCountTrailingZerosInstruction);
 5306   match(Set dst (CountTrailingZerosL src));
 5307   effect(TEMP dst, KILL cr);
 5308 
 5309   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
 5310             "JNC    done\n\t"
 5311             "TZCNT  $dst, $src.hi\n\t"
 5312             "ADD    $dst, 32\n"
 5313             "done:" %}
 5314   ins_encode %{
 5315     Register Rdst = $dst$$Register;
 5316     Register Rsrc = $src$$Register;
 5317     Label done;
 5318     __ tzcntl(Rdst, Rsrc);
 5319     __ jccb(Assembler::carryClear, done);
 5320     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
 5321     __ addl(Rdst, BitsPerInt);
 5322     __ bind(done);
 5323   %}
 5324   ins_pipe(ialu_reg);
 5325 %}
 5326 
 5327 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
 5328   predicate(!UseCountTrailingZerosInstruction);
 5329   match(Set dst (CountTrailingZerosL src));
 5330   effect(TEMP dst, KILL cr);
 5331 
 5332   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
 5333             "JNZ    done\n\t"
 5334             "BSF    $dst, $src.hi\n\t"
 5335             "JNZ    msw_not_zero\n\t"
 5336             "MOV    $dst, 32\n"
 5337       "msw_not_zero:\n\t"
 5338             "ADD    $dst, 32\n"
 5339       "done:" %}
 5340   ins_encode %{
 5341     Register Rdst = $dst$$Register;
 5342     Register Rsrc = $src$$Register;
 5343     Label msw_not_zero;
 5344     Label done;
 5345     __ bsfl(Rdst, Rsrc);
 5346     __ jccb(Assembler::notZero, done);
 5347     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
 5348     __ jccb(Assembler::notZero, msw_not_zero);
 5349     __ movl(Rdst, BitsPerInt);
 5350     __ bind(msw_not_zero);
 5351     __ addl(Rdst, BitsPerInt);
 5352     __ bind(done);
 5353   %}
 5354   ins_pipe(ialu_reg);
 5355 %}
 5356 
 5357 
 5358 //---------- Population Count Instructions -------------------------------------
 5359 
 5360 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
 5361   predicate(UsePopCountInstruction);
 5362   match(Set dst (PopCountI src));
 5363   effect(KILL cr);
 5364 
 5365   format %{ "POPCNT $dst, $src" %}
 5366   ins_encode %{
 5367     __ popcntl($dst$$Register, $src$$Register);
 5368   %}
 5369   ins_pipe(ialu_reg);
 5370 %}
 5371 
 5372 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
 5373   predicate(UsePopCountInstruction);
 5374   match(Set dst (PopCountI (LoadI mem)));
 5375   effect(KILL cr);
 5376 
 5377   format %{ "POPCNT $dst, $mem" %}
 5378   ins_encode %{
 5379     __ popcntl($dst$$Register, $mem$$Address);
 5380   %}
 5381   ins_pipe(ialu_reg);
 5382 %}
 5383 
 5384 // Note: Long.bitCount(long) returns an int.
 5385 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 5386   predicate(UsePopCountInstruction);
 5387   match(Set dst (PopCountL src));
 5388   effect(KILL cr, TEMP tmp, TEMP dst);
 5389 
 5390   format %{ "POPCNT $dst, $src.lo\n\t"
 5391             "POPCNT $tmp, $src.hi\n\t"
 5392             "ADD    $dst, $tmp" %}
 5393   ins_encode %{
 5394     __ popcntl($dst$$Register, $src$$Register);
 5395     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 5396     __ addl($dst$$Register, $tmp$$Register);
 5397   %}
 5398   ins_pipe(ialu_reg);
 5399 %}
 5400 
 5401 // Note: Long.bitCount(long) returns an int.
 5402 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
 5403   predicate(UsePopCountInstruction);
 5404   match(Set dst (PopCountL (LoadL mem)));
 5405   effect(KILL cr, TEMP tmp, TEMP dst);
 5406 
 5407   format %{ "POPCNT $dst, $mem\n\t"
 5408             "POPCNT $tmp, $mem+4\n\t"
 5409             "ADD    $dst, $tmp" %}
 5410   ins_encode %{
 5411     //__ popcntl($dst$$Register, $mem$$Address$$first);
 5412     //__ popcntl($tmp$$Register, $mem$$Address$$second);
 5413     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
 5414     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
 5415     __ addl($dst$$Register, $tmp$$Register);
 5416   %}
 5417   ins_pipe(ialu_reg);
 5418 %}
 5419 
 5420 
 5421 //----------Load/Store/Move Instructions---------------------------------------
 5422 //----------Load Instructions--------------------------------------------------
 5423 // Load Byte (8bit signed)
 5424 instruct loadB(xRegI dst, memory mem) %{
 5425   match(Set dst (LoadB mem));
 5426 
 5427   ins_cost(125);
 5428   format %{ "MOVSX8 $dst,$mem\t# byte" %}
 5429 
 5430   ins_encode %{
 5431     __ movsbl($dst$$Register, $mem$$Address);
 5432   %}
 5433 
 5434   ins_pipe(ialu_reg_mem);
 5435 %}
 5436 
 5437 // Load Byte (8bit signed) into Long Register
 5438 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5439   match(Set dst (ConvI2L (LoadB mem)));
 5440   effect(KILL cr);
 5441 
 5442   ins_cost(375);
 5443   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
 5444             "MOV    $dst.hi,$dst.lo\n\t"
 5445             "SAR    $dst.hi,7" %}
 5446 
 5447   ins_encode %{
 5448     __ movsbl($dst$$Register, $mem$$Address);
 5449     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5450     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
 5451   %}
 5452 
 5453   ins_pipe(ialu_reg_mem);
 5454 %}
 5455 
 5456 // Load Unsigned Byte (8bit UNsigned)
 5457 instruct loadUB(xRegI dst, memory mem) %{
 5458   match(Set dst (LoadUB mem));
 5459 
 5460   ins_cost(125);
 5461   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
 5462 
 5463   ins_encode %{
 5464     __ movzbl($dst$$Register, $mem$$Address);
 5465   %}
 5466 
 5467   ins_pipe(ialu_reg_mem);
 5468 %}
 5469 
 5470 // Load Unsigned Byte (8 bit UNsigned) into Long Register
 5471 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5472   match(Set dst (ConvI2L (LoadUB mem)));
 5473   effect(KILL cr);
 5474 
 5475   ins_cost(250);
 5476   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
 5477             "XOR    $dst.hi,$dst.hi" %}
 5478 
 5479   ins_encode %{
 5480     Register Rdst = $dst$$Register;
 5481     __ movzbl(Rdst, $mem$$Address);
 5482     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5483   %}
 5484 
 5485   ins_pipe(ialu_reg_mem);
 5486 %}
 5487 
 5488 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
 5489 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5490   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
 5491   effect(KILL cr);
 5492 
 5493   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
 5494             "XOR    $dst.hi,$dst.hi\n\t"
 5495             "AND    $dst.lo,right_n_bits($mask, 8)" %}
 5496   ins_encode %{
 5497     Register Rdst = $dst$$Register;
 5498     __ movzbl(Rdst, $mem$$Address);
 5499     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5500     __ andl(Rdst, $mask$$constant & right_n_bits(8));
 5501   %}
 5502   ins_pipe(ialu_reg_mem);
 5503 %}
 5504 
 5505 // Load Short (16bit signed)
 5506 instruct loadS(rRegI dst, memory mem) %{
 5507   match(Set dst (LoadS mem));
 5508 
 5509   ins_cost(125);
 5510   format %{ "MOVSX  $dst,$mem\t# short" %}
 5511 
 5512   ins_encode %{
 5513     __ movswl($dst$$Register, $mem$$Address);
 5514   %}
 5515 
 5516   ins_pipe(ialu_reg_mem);
 5517 %}
 5518 
 5519 // Load Short (16 bit signed) to Byte (8 bit signed)
 5520 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5521   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 5522 
 5523   ins_cost(125);
 5524   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
 5525   ins_encode %{
 5526     __ movsbl($dst$$Register, $mem$$Address);
 5527   %}
 5528   ins_pipe(ialu_reg_mem);
 5529 %}
 5530 
 5531 // Load Short (16bit signed) into Long Register
 5532 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5533   match(Set dst (ConvI2L (LoadS mem)));
 5534   effect(KILL cr);
 5535 
 5536   ins_cost(375);
 5537   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
 5538             "MOV    $dst.hi,$dst.lo\n\t"
 5539             "SAR    $dst.hi,15" %}
 5540 
 5541   ins_encode %{
 5542     __ movswl($dst$$Register, $mem$$Address);
 5543     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5544     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
 5545   %}
 5546 
 5547   ins_pipe(ialu_reg_mem);
 5548 %}
 5549 
 5550 // Load Unsigned Short/Char (16bit unsigned)
 5551 instruct loadUS(rRegI dst, memory mem) %{
 5552   match(Set dst (LoadUS mem));
 5553 
 5554   ins_cost(125);
 5555   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
 5556 
 5557   ins_encode %{
 5558     __ movzwl($dst$$Register, $mem$$Address);
 5559   %}
 5560 
 5561   ins_pipe(ialu_reg_mem);
 5562 %}
 5563 
 5564 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
 5565 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5566   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 5567 
 5568   ins_cost(125);
 5569   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
 5570   ins_encode %{
 5571     __ movsbl($dst$$Register, $mem$$Address);
 5572   %}
 5573   ins_pipe(ialu_reg_mem);
 5574 %}
 5575 
 5576 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
 5577 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5578   match(Set dst (ConvI2L (LoadUS mem)));
 5579   effect(KILL cr);
 5580 
 5581   ins_cost(250);
 5582   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
 5583             "XOR    $dst.hi,$dst.hi" %}
 5584 
 5585   ins_encode %{
 5586     __ movzwl($dst$$Register, $mem$$Address);
 5587     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5588   %}
 5589 
 5590   ins_pipe(ialu_reg_mem);
 5591 %}
 5592 
 5593 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
 5594 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5595   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5596   effect(KILL cr);
 5597 
 5598   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
 5599             "XOR    $dst.hi,$dst.hi" %}
 5600   ins_encode %{
 5601     Register Rdst = $dst$$Register;
 5602     __ movzbl(Rdst, $mem$$Address);
 5603     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5604   %}
 5605   ins_pipe(ialu_reg_mem);
 5606 %}
 5607 
 5608 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
 5609 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
 5610   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
 5611   effect(KILL cr);
 5612 
 5613   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
 5614             "XOR    $dst.hi,$dst.hi\n\t"
 5615             "AND    $dst.lo,right_n_bits($mask, 16)" %}
 5616   ins_encode %{
 5617     Register Rdst = $dst$$Register;
 5618     __ movzwl(Rdst, $mem$$Address);
 5619     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5620     __ andl(Rdst, $mask$$constant & right_n_bits(16));
 5621   %}
 5622   ins_pipe(ialu_reg_mem);
 5623 %}
 5624 
 5625 // Load Integer
 5626 instruct loadI(rRegI dst, memory mem) %{
 5627   match(Set dst (LoadI mem));
 5628 
 5629   ins_cost(125);
 5630   format %{ "MOV    $dst,$mem\t# int" %}
 5631 
 5632   ins_encode %{
 5633     __ movl($dst$$Register, $mem$$Address);
 5634   %}
 5635 
 5636   ins_pipe(ialu_reg_mem);
 5637 %}
 5638 
 5639 // Load Integer (32 bit signed) to Byte (8 bit signed)
 5640 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
 5641   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 5642 
 5643   ins_cost(125);
 5644   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
 5645   ins_encode %{
 5646     __ movsbl($dst$$Register, $mem$$Address);
 5647   %}
 5648   ins_pipe(ialu_reg_mem);
 5649 %}
 5650 
 5651 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
 5652 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
 5653   match(Set dst (AndI (LoadI mem) mask));
 5654 
 5655   ins_cost(125);
 5656   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
 5657   ins_encode %{
 5658     __ movzbl($dst$$Register, $mem$$Address);
 5659   %}
 5660   ins_pipe(ialu_reg_mem);
 5661 %}
 5662 
 5663 // Load Integer (32 bit signed) to Short (16 bit signed)
 5664 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
 5665   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 5666 
 5667   ins_cost(125);
 5668   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
 5669   ins_encode %{
 5670     __ movswl($dst$$Register, $mem$$Address);
 5671   %}
 5672   ins_pipe(ialu_reg_mem);
 5673 %}
 5674 
 5675 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
 5676 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
 5677   match(Set dst (AndI (LoadI mem) mask));
 5678 
 5679   ins_cost(125);
 5680   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
 5681   ins_encode %{
 5682     __ movzwl($dst$$Register, $mem$$Address);
 5683   %}
 5684   ins_pipe(ialu_reg_mem);
 5685 %}
 5686 
 5687 // Load Integer into Long Register
 5688 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
 5689   match(Set dst (ConvI2L (LoadI mem)));
 5690   effect(KILL cr);
 5691 
 5692   ins_cost(375);
 5693   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
 5694             "MOV    $dst.hi,$dst.lo\n\t"
 5695             "SAR    $dst.hi,31" %}
 5696 
 5697   ins_encode %{
 5698     __ movl($dst$$Register, $mem$$Address);
 5699     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
 5700     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
 5701   %}
 5702 
 5703   ins_pipe(ialu_reg_mem);
 5704 %}
 5705 
 5706 // Load Integer with mask 0xFF into Long Register
 5707 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
 5708   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5709   effect(KILL cr);
 5710 
 5711   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
 5712             "XOR    $dst.hi,$dst.hi" %}
 5713   ins_encode %{
 5714     Register Rdst = $dst$$Register;
 5715     __ movzbl(Rdst, $mem$$Address);
 5716     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5717   %}
 5718   ins_pipe(ialu_reg_mem);
 5719 %}
 5720 
 5721 // Load Integer with mask 0xFFFF into Long Register
 5722 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
 5723   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5724   effect(KILL cr);
 5725 
 5726   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
 5727             "XOR    $dst.hi,$dst.hi" %}
 5728   ins_encode %{
 5729     Register Rdst = $dst$$Register;
 5730     __ movzwl(Rdst, $mem$$Address);
 5731     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5732   %}
 5733   ins_pipe(ialu_reg_mem);
 5734 %}
 5735 
 5736 // Load Integer with 31-bit mask into Long Register
 5737 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
 5738   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
 5739   effect(KILL cr);
 5740 
 5741   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
 5742             "XOR    $dst.hi,$dst.hi\n\t"
 5743             "AND    $dst.lo,$mask" %}
 5744   ins_encode %{
 5745     Register Rdst = $dst$$Register;
 5746     __ movl(Rdst, $mem$$Address);
 5747     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
 5748     __ andl(Rdst, $mask$$constant);
 5749   %}
 5750   ins_pipe(ialu_reg_mem);
 5751 %}
 5752 
 5753 // Load Unsigned Integer into Long Register
 5754 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
 5755   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 5756   effect(KILL cr);
 5757 
 5758   ins_cost(250);
 5759   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
 5760             "XOR    $dst.hi,$dst.hi" %}
 5761 
 5762   ins_encode %{
 5763     __ movl($dst$$Register, $mem$$Address);
 5764     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
 5765   %}
 5766 
 5767   ins_pipe(ialu_reg_mem);
 5768 %}
 5769 
 5770 // Load Long.  Cannot clobber address while loading, so restrict address
 5771 // register to ESI
 5772 instruct loadL(eRegL dst, load_long_memory mem) %{
 5773   predicate(!((LoadLNode*)n)->require_atomic_access());
 5774   match(Set dst (LoadL mem));
 5775 
 5776   ins_cost(250);
 5777   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
 5778             "MOV    $dst.hi,$mem+4" %}
 5779 
 5780   ins_encode %{
 5781     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
 5782     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
 5783     __ movl($dst$$Register, Amemlo);
 5784     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
 5785   %}
 5786 
 5787   ins_pipe(ialu_reg_long_mem);
 5788 %}
 5789 
 5790 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
 5791 // then store it down to the stack and reload on the int
 5792 // side.
 5793 instruct loadL_volatile(stackSlotL dst, memory mem) %{
 5794   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
 5795   match(Set dst (LoadL mem));
 5796 
 5797   ins_cost(200);
 5798   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
 5799             "FISTp  $dst" %}
 5800   ins_encode(enc_loadL_volatile(mem,dst));
 5801   ins_pipe( fpu_reg_mem );
 5802 %}
 5803 
 5804 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
 5805   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5806   match(Set dst (LoadL mem));
 5807   effect(TEMP tmp);
 5808   ins_cost(180);
 5809   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5810             "MOVSD  $dst,$tmp" %}
 5811   ins_encode %{
 5812     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5813     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
 5814   %}
 5815   ins_pipe( pipe_slow );
 5816 %}
 5817 
 5818 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
 5819   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
 5820   match(Set dst (LoadL mem));
 5821   effect(TEMP tmp);
 5822   ins_cost(160);
 5823   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
 5824             "MOVD   $dst.lo,$tmp\n\t"
 5825             "PSRLQ  $tmp,32\n\t"
 5826             "MOVD   $dst.hi,$tmp" %}
 5827   ins_encode %{
 5828     __ movdbl($tmp$$XMMRegister, $mem$$Address);
 5829     __ movdl($dst$$Register, $tmp$$XMMRegister);
 5830     __ psrlq($tmp$$XMMRegister, 32);
 5831     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
 5832   %}
 5833   ins_pipe( pipe_slow );
 5834 %}
 5835 
 5836 // Load Range
 5837 instruct loadRange(rRegI dst, memory mem) %{
 5838   match(Set dst (LoadRange mem));
 5839 
 5840   ins_cost(125);
 5841   format %{ "MOV    $dst,$mem" %}
 5842   opcode(0x8B);
 5843   ins_encode( OpcP, RegMem(dst,mem));
 5844   ins_pipe( ialu_reg_mem );
 5845 %}
 5846 
 5847 
 5848 // Load Pointer
 5849 instruct loadP(eRegP dst, memory mem) %{
 5850   match(Set dst (LoadP mem));
 5851 
 5852   ins_cost(125);
 5853   format %{ "MOV    $dst,$mem" %}
 5854   opcode(0x8B);
 5855   ins_encode( OpcP, RegMem(dst,mem));
 5856   ins_pipe( ialu_reg_mem );
 5857 %}
 5858 
 5859 // Load Klass Pointer
 5860 instruct loadKlass(eRegP dst, memory mem) %{
 5861   match(Set dst (LoadKlass mem));
 5862 
 5863   ins_cost(125);
 5864   format %{ "MOV    $dst,$mem" %}
 5865   opcode(0x8B);
 5866   ins_encode( OpcP, RegMem(dst,mem));
 5867   ins_pipe( ialu_reg_mem );
 5868 %}
 5869 
 5870 // Load Double
 5871 instruct loadDPR(regDPR dst, memory mem) %{
 5872   predicate(UseSSE<=1);
 5873   match(Set dst (LoadD mem));
 5874 
 5875   ins_cost(150);
 5876   format %{ "FLD_D  ST,$mem\n\t"
 5877             "FSTP   $dst" %}
 5878   opcode(0xDD);               /* DD /0 */
 5879   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5880               Pop_Reg_DPR(dst) );
 5881   ins_pipe( fpu_reg_mem );
 5882 %}
 5883 
 5884 // Load Double to XMM
 5885 instruct loadD(regD dst, memory mem) %{
 5886   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
 5887   match(Set dst (LoadD mem));
 5888   ins_cost(145);
 5889   format %{ "MOVSD  $dst,$mem" %}
 5890   ins_encode %{
 5891     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5892   %}
 5893   ins_pipe( pipe_slow );
 5894 %}
 5895 
 5896 instruct loadD_partial(regD dst, memory mem) %{
 5897   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
 5898   match(Set dst (LoadD mem));
 5899   ins_cost(145);
 5900   format %{ "MOVLPD $dst,$mem" %}
 5901   ins_encode %{
 5902     __ movdbl ($dst$$XMMRegister, $mem$$Address);
 5903   %}
 5904   ins_pipe( pipe_slow );
 5905 %}
 5906 
 5907 // Load to XMM register (single-precision floating point)
 5908 // MOVSS instruction
 5909 instruct loadF(regF dst, memory mem) %{
 5910   predicate(UseSSE>=1);
 5911   match(Set dst (LoadF mem));
 5912   ins_cost(145);
 5913   format %{ "MOVSS  $dst,$mem" %}
 5914   ins_encode %{
 5915     __ movflt ($dst$$XMMRegister, $mem$$Address);
 5916   %}
 5917   ins_pipe( pipe_slow );
 5918 %}
 5919 
 5920 // Load Float
 5921 instruct loadFPR(regFPR dst, memory mem) %{
 5922   predicate(UseSSE==0);
 5923   match(Set dst (LoadF mem));
 5924 
 5925   ins_cost(150);
 5926   format %{ "FLD_S  ST,$mem\n\t"
 5927             "FSTP   $dst" %}
 5928   opcode(0xD9);               /* D9 /0 */
 5929   ins_encode( OpcP, RMopc_Mem(0x00,mem),
 5930               Pop_Reg_FPR(dst) );
 5931   ins_pipe( fpu_reg_mem );
 5932 %}
 5933 
 5934 // Load Effective Address
 5935 instruct leaP8(eRegP dst, indOffset8 mem) %{
 5936   match(Set dst mem);
 5937 
 5938   ins_cost(110);
 5939   format %{ "LEA    $dst,$mem" %}
 5940   opcode(0x8D);
 5941   ins_encode( OpcP, RegMem(dst,mem));
 5942   ins_pipe( ialu_reg_reg_fat );
 5943 %}
 5944 
 5945 instruct leaP32(eRegP dst, indOffset32 mem) %{
 5946   match(Set dst mem);
 5947 
 5948   ins_cost(110);
 5949   format %{ "LEA    $dst,$mem" %}
 5950   opcode(0x8D);
 5951   ins_encode( OpcP, RegMem(dst,mem));
 5952   ins_pipe( ialu_reg_reg_fat );
 5953 %}
 5954 
 5955 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
 5956   match(Set dst mem);
 5957 
 5958   ins_cost(110);
 5959   format %{ "LEA    $dst,$mem" %}
 5960   opcode(0x8D);
 5961   ins_encode( OpcP, RegMem(dst,mem));
 5962   ins_pipe( ialu_reg_reg_fat );
 5963 %}
 5964 
 5965 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
 5966   match(Set dst mem);
 5967 
 5968   ins_cost(110);
 5969   format %{ "LEA    $dst,$mem" %}
 5970   opcode(0x8D);
 5971   ins_encode( OpcP, RegMem(dst,mem));
 5972   ins_pipe( ialu_reg_reg_fat );
 5973 %}
 5974 
 5975 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
 5976   match(Set dst mem);
 5977 
 5978   ins_cost(110);
 5979   format %{ "LEA    $dst,$mem" %}
 5980   opcode(0x8D);
 5981   ins_encode( OpcP, RegMem(dst,mem));
 5982   ins_pipe( ialu_reg_reg_fat );
 5983 %}
 5984 
 5985 // Load Constant
 5986 instruct loadConI(rRegI dst, immI src) %{
 5987   match(Set dst src);
 5988 
 5989   format %{ "MOV    $dst,$src" %}
 5990   ins_encode( LdImmI(dst, src) );
 5991   ins_pipe( ialu_reg_fat );
 5992 %}
 5993 
 5994 // Load Constant zero
 5995 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
 5996   match(Set dst src);
 5997   effect(KILL cr);
 5998 
 5999   ins_cost(50);
 6000   format %{ "XOR    $dst,$dst" %}
 6001   opcode(0x33);  /* + rd */
 6002   ins_encode( OpcP, RegReg( dst, dst ) );
 6003   ins_pipe( ialu_reg );
 6004 %}
 6005 
 6006 instruct loadConP(eRegP dst, immP src) %{
 6007   match(Set dst src);
 6008 
 6009   format %{ "MOV    $dst,$src" %}
 6010   opcode(0xB8);  /* + rd */
 6011   ins_encode( LdImmP(dst, src) );
 6012   ins_pipe( ialu_reg_fat );
 6013 %}
 6014 
 6015 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
 6016   match(Set dst src);
 6017   effect(KILL cr);
 6018   ins_cost(200);
 6019   format %{ "MOV    $dst.lo,$src.lo\n\t"
 6020             "MOV    $dst.hi,$src.hi" %}
 6021   opcode(0xB8);
 6022   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
 6023   ins_pipe( ialu_reg_long_fat );
 6024 %}
 6025 
 6026 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
 6027   match(Set dst src);
 6028   effect(KILL cr);
 6029   ins_cost(150);
 6030   format %{ "XOR    $dst.lo,$dst.lo\n\t"
 6031             "XOR    $dst.hi,$dst.hi" %}
 6032   opcode(0x33,0x33);
 6033   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
 6034   ins_pipe( ialu_reg_long );
 6035 %}
 6036 
 6037 // The instruction usage is guarded by predicate in operand immFPR().
 6038 instruct loadConFPR(regFPR dst, immFPR con) %{
 6039   match(Set dst con);
 6040   ins_cost(125);
 6041   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
 6042             "FSTP   $dst" %}
 6043   ins_encode %{
 6044     __ fld_s($constantaddress($con));
 6045     __ fstp_d($dst$$reg);
 6046   %}
 6047   ins_pipe(fpu_reg_con);
 6048 %}
 6049 
 6050 // The instruction usage is guarded by predicate in operand immFPR0().
 6051 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
 6052   match(Set dst con);
 6053   ins_cost(125);
 6054   format %{ "FLDZ   ST\n\t"
 6055             "FSTP   $dst" %}
 6056   ins_encode %{
 6057     __ fldz();
 6058     __ fstp_d($dst$$reg);
 6059   %}
 6060   ins_pipe(fpu_reg_con);
 6061 %}
 6062 
 6063 // The instruction usage is guarded by predicate in operand immFPR1().
 6064 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
 6065   match(Set dst con);
 6066   ins_cost(125);
 6067   format %{ "FLD1   ST\n\t"
 6068             "FSTP   $dst" %}
 6069   ins_encode %{
 6070     __ fld1();
 6071     __ fstp_d($dst$$reg);
 6072   %}
 6073   ins_pipe(fpu_reg_con);
 6074 %}
 6075 
 6076 // The instruction usage is guarded by predicate in operand immF().
 6077 instruct loadConF(regF dst, immF con) %{
 6078   match(Set dst con);
 6079   ins_cost(125);
 6080   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
 6081   ins_encode %{
 6082     __ movflt($dst$$XMMRegister, $constantaddress($con));
 6083   %}
 6084   ins_pipe(pipe_slow);
 6085 %}
 6086 
 6087 // The instruction usage is guarded by predicate in operand immF0().
 6088 instruct loadConF0(regF dst, immF0 src) %{
 6089   match(Set dst src);
 6090   ins_cost(100);
 6091   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
 6092   ins_encode %{
 6093     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
 6094   %}
 6095   ins_pipe(pipe_slow);
 6096 %}
 6097 
 6098 // The instruction usage is guarded by predicate in operand immDPR().
 6099 instruct loadConDPR(regDPR dst, immDPR con) %{
 6100   match(Set dst con);
 6101   ins_cost(125);
 6102 
 6103   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
 6104             "FSTP   $dst" %}
 6105   ins_encode %{
 6106     __ fld_d($constantaddress($con));
 6107     __ fstp_d($dst$$reg);
 6108   %}
 6109   ins_pipe(fpu_reg_con);
 6110 %}
 6111 
 6112 // The instruction usage is guarded by predicate in operand immDPR0().
 6113 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
 6114   match(Set dst con);
 6115   ins_cost(125);
 6116 
 6117   format %{ "FLDZ   ST\n\t"
 6118             "FSTP   $dst" %}
 6119   ins_encode %{
 6120     __ fldz();
 6121     __ fstp_d($dst$$reg);
 6122   %}
 6123   ins_pipe(fpu_reg_con);
 6124 %}
 6125 
 6126 // The instruction usage is guarded by predicate in operand immDPR1().
 6127 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
 6128   match(Set dst con);
 6129   ins_cost(125);
 6130 
 6131   format %{ "FLD1   ST\n\t"
 6132             "FSTP   $dst" %}
 6133   ins_encode %{
 6134     __ fld1();
 6135     __ fstp_d($dst$$reg);
 6136   %}
 6137   ins_pipe(fpu_reg_con);
 6138 %}
 6139 
 6140 // The instruction usage is guarded by predicate in operand immD().
 6141 instruct loadConD(regD dst, immD con) %{
 6142   match(Set dst con);
 6143   ins_cost(125);
 6144   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
 6145   ins_encode %{
 6146     __ movdbl($dst$$XMMRegister, $constantaddress($con));
 6147   %}
 6148   ins_pipe(pipe_slow);
 6149 %}
 6150 
 6151 // The instruction usage is guarded by predicate in operand immD0().
 6152 instruct loadConD0(regD dst, immD0 src) %{
 6153   match(Set dst src);
 6154   ins_cost(100);
 6155   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
 6156   ins_encode %{
 6157     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
 6158   %}
 6159   ins_pipe( pipe_slow );
 6160 %}
 6161 
 6162 // Load Stack Slot
 6163 instruct loadSSI(rRegI dst, stackSlotI src) %{
 6164   match(Set dst src);
 6165   ins_cost(125);
 6166 
 6167   format %{ "MOV    $dst,$src" %}
 6168   opcode(0x8B);
 6169   ins_encode( OpcP, RegMem(dst,src));
 6170   ins_pipe( ialu_reg_mem );
 6171 %}
 6172 
 6173 instruct loadSSL(eRegL dst, stackSlotL src) %{
 6174   match(Set dst src);
 6175 
 6176   ins_cost(200);
 6177   format %{ "MOV    $dst,$src.lo\n\t"
 6178             "MOV    $dst+4,$src.hi" %}
 6179   opcode(0x8B, 0x8B);
 6180   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
 6181   ins_pipe( ialu_mem_long_reg );
 6182 %}
 6183 
 6184 // Load Stack Slot
 6185 instruct loadSSP(eRegP dst, stackSlotP src) %{
 6186   match(Set dst src);
 6187   ins_cost(125);
 6188 
 6189   format %{ "MOV    $dst,$src" %}
 6190   opcode(0x8B);
 6191   ins_encode( OpcP, RegMem(dst,src));
 6192   ins_pipe( ialu_reg_mem );
 6193 %}
 6194 
 6195 // Load Stack Slot
 6196 instruct loadSSF(regFPR dst, stackSlotF src) %{
 6197   match(Set dst src);
 6198   ins_cost(125);
 6199 
 6200   format %{ "FLD_S  $src\n\t"
 6201             "FSTP   $dst" %}
 6202   opcode(0xD9);               /* D9 /0, FLD m32real */
 6203   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6204               Pop_Reg_FPR(dst) );
 6205   ins_pipe( fpu_reg_mem );
 6206 %}
 6207 
 6208 // Load Stack Slot
 6209 instruct loadSSD(regDPR dst, stackSlotD src) %{
 6210   match(Set dst src);
 6211   ins_cost(125);
 6212 
 6213   format %{ "FLD_D  $src\n\t"
 6214             "FSTP   $dst" %}
 6215   opcode(0xDD);               /* DD /0, FLD m64real */
 6216   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
 6217               Pop_Reg_DPR(dst) );
 6218   ins_pipe( fpu_reg_mem );
 6219 %}
 6220 
 6221 // Prefetch instructions for allocation.
 6222 // Must be safe to execute with invalid address (cannot fault).
 6223 
 6224 instruct prefetchAlloc0( memory mem ) %{
 6225   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
 6226   match(PrefetchAllocation mem);
 6227   ins_cost(0);
 6228   size(0);
 6229   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
 6230   ins_encode();
 6231   ins_pipe(empty);
 6232 %}
 6233 
 6234 instruct prefetchAlloc( memory mem ) %{
 6235   predicate(AllocatePrefetchInstr==3);
 6236   match( PrefetchAllocation mem );
 6237   ins_cost(100);
 6238 
 6239   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
 6240   ins_encode %{
 6241     __ prefetchw($mem$$Address);
 6242   %}
 6243   ins_pipe(ialu_mem);
 6244 %}
 6245 
 6246 instruct prefetchAllocNTA( memory mem ) %{
 6247   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
 6248   match(PrefetchAllocation mem);
 6249   ins_cost(100);
 6250 
 6251   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
 6252   ins_encode %{
 6253     __ prefetchnta($mem$$Address);
 6254   %}
 6255   ins_pipe(ialu_mem);
 6256 %}
 6257 
 6258 instruct prefetchAllocT0( memory mem ) %{
 6259   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
 6260   match(PrefetchAllocation mem);
 6261   ins_cost(100);
 6262 
 6263   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
 6264   ins_encode %{
 6265     __ prefetcht0($mem$$Address);
 6266   %}
 6267   ins_pipe(ialu_mem);
 6268 %}
 6269 
 6270 instruct prefetchAllocT2( memory mem ) %{
 6271   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
 6272   match(PrefetchAllocation mem);
 6273   ins_cost(100);
 6274 
 6275   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
 6276   ins_encode %{
 6277     __ prefetcht2($mem$$Address);
 6278   %}
 6279   ins_pipe(ialu_mem);
 6280 %}
 6281 
 6282 //----------Store Instructions-------------------------------------------------
 6283 
 6284 // Store Byte
 6285 instruct storeB(memory mem, xRegI src) %{
 6286   match(Set mem (StoreB mem src));
 6287 
 6288   ins_cost(125);
 6289   format %{ "MOV8   $mem,$src" %}
 6290   opcode(0x88);
 6291   ins_encode( OpcP, RegMem( src, mem ) );
 6292   ins_pipe( ialu_mem_reg );
 6293 %}
 6294 
 6295 // Store Char/Short
 6296 instruct storeC(memory mem, rRegI src) %{
 6297   match(Set mem (StoreC mem src));
 6298 
 6299   ins_cost(125);
 6300   format %{ "MOV16  $mem,$src" %}
 6301   opcode(0x89, 0x66);
 6302   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
 6303   ins_pipe( ialu_mem_reg );
 6304 %}
 6305 
 6306 // Store Integer
 6307 instruct storeI(memory mem, rRegI src) %{
 6308   match(Set mem (StoreI mem src));
 6309 
 6310   ins_cost(125);
 6311   format %{ "MOV    $mem,$src" %}
 6312   opcode(0x89);
 6313   ins_encode( OpcP, RegMem( src, mem ) );
 6314   ins_pipe( ialu_mem_reg );
 6315 %}
 6316 
 6317 // Store Long
 6318 instruct storeL(long_memory mem, eRegL src) %{
 6319   predicate(!((StoreLNode*)n)->require_atomic_access());
 6320   match(Set mem (StoreL mem src));
 6321 
 6322   ins_cost(200);
 6323   format %{ "MOV    $mem,$src.lo\n\t"
 6324             "MOV    $mem+4,$src.hi" %}
 6325   opcode(0x89, 0x89);
 6326   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
 6327   ins_pipe( ialu_mem_long_reg );
 6328 %}
 6329 
 6330 // Store Long to Integer
 6331 instruct storeL2I(memory mem, eRegL src) %{
 6332   match(Set mem (StoreI mem (ConvL2I src)));
 6333 
 6334   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
 6335   ins_encode %{
 6336     __ movl($mem$$Address, $src$$Register);
 6337   %}
 6338   ins_pipe(ialu_mem_reg);
 6339 %}
 6340 
 6341 // Volatile Store Long.  Must be atomic, so move it into
 6342 // the FP TOS and then do a 64-bit FIST.  Has to probe the
 6343 // target address before the store (for null-ptr checks)
 6344 // so the memory operand is used twice in the encoding.
 6345 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
 6346   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
 6347   match(Set mem (StoreL mem src));
 6348   effect( KILL cr );
 6349   ins_cost(400);
 6350   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6351             "FILD   $src\n\t"
 6352             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
 6353   opcode(0x3B);
 6354   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
 6355   ins_pipe( fpu_reg_mem );
 6356 %}
 6357 
 6358 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
 6359   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6360   match(Set mem (StoreL mem src));
 6361   effect( TEMP tmp, KILL cr );
 6362   ins_cost(380);
 6363   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6364             "MOVSD  $tmp,$src\n\t"
 6365             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6366   ins_encode %{
 6367     __ cmpl(rax, $mem$$Address);
 6368     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
 6369     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6370   %}
 6371   ins_pipe( pipe_slow );
 6372 %}
 6373 
 6374 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
 6375   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
 6376   match(Set mem (StoreL mem src));
 6377   effect( TEMP tmp2 , TEMP tmp, KILL cr );
 6378   ins_cost(360);
 6379   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
 6380             "MOVD   $tmp,$src.lo\n\t"
 6381             "MOVD   $tmp2,$src.hi\n\t"
 6382             "PUNPCKLDQ $tmp,$tmp2\n\t"
 6383             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
 6384   ins_encode %{
 6385     __ cmpl(rax, $mem$$Address);
 6386     __ movdl($tmp$$XMMRegister, $src$$Register);
 6387     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
 6388     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
 6389     __ movdbl($mem$$Address, $tmp$$XMMRegister);
 6390   %}
 6391   ins_pipe( pipe_slow );
 6392 %}
 6393 
 6394 // Store Pointer; for storing unknown oops and raw pointers
 6395 instruct storeP(memory mem, anyRegP src) %{
 6396   match(Set mem (StoreP mem src));
 6397 
 6398   ins_cost(125);
 6399   format %{ "MOV    $mem,$src" %}
 6400   opcode(0x89);
 6401   ins_encode( OpcP, RegMem( src, mem ) );
 6402   ins_pipe( ialu_mem_reg );
 6403 %}
 6404 
 6405 // Store Integer Immediate
 6406 instruct storeImmI(memory mem, immI src) %{
 6407   match(Set mem (StoreI mem src));
 6408 
 6409   ins_cost(150);
 6410   format %{ "MOV    $mem,$src" %}
 6411   opcode(0xC7);               /* C7 /0 */
 6412   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6413   ins_pipe( ialu_mem_imm );
 6414 %}
 6415 
 6416 // Store Short/Char Immediate
 6417 instruct storeImmI16(memory mem, immI16 src) %{
 6418   predicate(UseStoreImmI16);
 6419   match(Set mem (StoreC mem src));
 6420 
 6421   ins_cost(150);
 6422   format %{ "MOV16  $mem,$src" %}
 6423   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
 6424   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
 6425   ins_pipe( ialu_mem_imm );
 6426 %}
 6427 
 6428 // Store Pointer Immediate; null pointers or constant oops that do not
 6429 // need card-mark barriers.
 6430 instruct storeImmP(memory mem, immP src) %{
 6431   match(Set mem (StoreP mem src));
 6432 
 6433   ins_cost(150);
 6434   format %{ "MOV    $mem,$src" %}
 6435   opcode(0xC7);               /* C7 /0 */
 6436   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
 6437   ins_pipe( ialu_mem_imm );
 6438 %}
 6439 
 6440 // Store Byte Immediate
 6441 instruct storeImmB(memory mem, immI8 src) %{
 6442   match(Set mem (StoreB mem src));
 6443 
 6444   ins_cost(150);
 6445   format %{ "MOV8   $mem,$src" %}
 6446   opcode(0xC6);               /* C6 /0 */
 6447   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6448   ins_pipe( ialu_mem_imm );
 6449 %}
 6450 
 6451 // Store CMS card-mark Immediate
 6452 instruct storeImmCM(memory mem, immI8 src) %{
 6453   match(Set mem (StoreCM mem src));
 6454 
 6455   ins_cost(150);
 6456   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
 6457   opcode(0xC6);               /* C6 /0 */
 6458   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
 6459   ins_pipe( ialu_mem_imm );
 6460 %}
 6461 
 6462 // Store Double
 6463 instruct storeDPR( memory mem, regDPR1 src) %{
 6464   predicate(UseSSE<=1);
 6465   match(Set mem (StoreD mem src));
 6466 
 6467   ins_cost(100);
 6468   format %{ "FST_D  $mem,$src" %}
 6469   opcode(0xDD);       /* DD /2 */
 6470   ins_encode( enc_FPR_store(mem,src) );
 6471   ins_pipe( fpu_mem_reg );
 6472 %}
 6473 
 6474 // Store double does rounding on x86
 6475 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
 6476   predicate(UseSSE<=1);
 6477   match(Set mem (StoreD mem (RoundDouble src)));
 6478 
 6479   ins_cost(100);
 6480   format %{ "FST_D  $mem,$src\t# round" %}
 6481   opcode(0xDD);       /* DD /2 */
 6482   ins_encode( enc_FPR_store(mem,src) );
 6483   ins_pipe( fpu_mem_reg );
 6484 %}
 6485 
 6486 // Store XMM register to memory (double-precision floating points)
 6487 // MOVSD instruction
 6488 instruct storeD(memory mem, regD src) %{
 6489   predicate(UseSSE>=2);
 6490   match(Set mem (StoreD mem src));
 6491   ins_cost(95);
 6492   format %{ "MOVSD  $mem,$src" %}
 6493   ins_encode %{
 6494     __ movdbl($mem$$Address, $src$$XMMRegister);
 6495   %}
 6496   ins_pipe( pipe_slow );
 6497 %}
 6498 
 6499 // Store XMM register to memory (single-precision floating point)
 6500 // MOVSS instruction
 6501 instruct storeF(memory mem, regF src) %{
 6502   predicate(UseSSE>=1);
 6503   match(Set mem (StoreF mem src));
 6504   ins_cost(95);
 6505   format %{ "MOVSS  $mem,$src" %}
 6506   ins_encode %{
 6507     __ movflt($mem$$Address, $src$$XMMRegister);
 6508   %}
 6509   ins_pipe( pipe_slow );
 6510 %}
 6511 
 6512 
 6513 // Store Float
 6514 instruct storeFPR( memory mem, regFPR1 src) %{
 6515   predicate(UseSSE==0);
 6516   match(Set mem (StoreF mem src));
 6517 
 6518   ins_cost(100);
 6519   format %{ "FST_S  $mem,$src" %}
 6520   opcode(0xD9);       /* D9 /2 */
 6521   ins_encode( enc_FPR_store(mem,src) );
 6522   ins_pipe( fpu_mem_reg );
 6523 %}
 6524 
 6525 // Store Float does rounding on x86
 6526 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
 6527   predicate(UseSSE==0);
 6528   match(Set mem (StoreF mem (RoundFloat src)));
 6529 
 6530   ins_cost(100);
 6531   format %{ "FST_S  $mem,$src\t# round" %}
 6532   opcode(0xD9);       /* D9 /2 */
 6533   ins_encode( enc_FPR_store(mem,src) );
 6534   ins_pipe( fpu_mem_reg );
 6535 %}
 6536 
 6537 // Store Float does rounding on x86
 6538 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
 6539   predicate(UseSSE<=1);
 6540   match(Set mem (StoreF mem (ConvD2F src)));
 6541 
 6542   ins_cost(100);
 6543   format %{ "FST_S  $mem,$src\t# D-round" %}
 6544   opcode(0xD9);       /* D9 /2 */
 6545   ins_encode( enc_FPR_store(mem,src) );
 6546   ins_pipe( fpu_mem_reg );
 6547 %}
 6548 
 6549 // Store immediate Float value (it is faster than store from FPU register)
 6550 // The instruction usage is guarded by predicate in operand immFPR().
 6551 instruct storeFPR_imm( memory mem, immFPR src) %{
 6552   match(Set mem (StoreF mem src));
 6553 
 6554   ins_cost(50);
 6555   format %{ "MOV    $mem,$src\t# store float" %}
 6556   opcode(0xC7);               /* C7 /0 */
 6557   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
 6558   ins_pipe( ialu_mem_imm );
 6559 %}
 6560 
 6561 // Store immediate Float value (it is faster than store from XMM register)
 6562 // The instruction usage is guarded by predicate in operand immF().
 6563 instruct storeF_imm( memory mem, immF src) %{
 6564   match(Set mem (StoreF mem src));
 6565 
 6566   ins_cost(50);
 6567   format %{ "MOV    $mem,$src\t# store float" %}
 6568   opcode(0xC7);               /* C7 /0 */
 6569   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
 6570   ins_pipe( ialu_mem_imm );
 6571 %}
 6572 
 6573 // Store Integer to stack slot
 6574 instruct storeSSI(stackSlotI dst, rRegI src) %{
 6575   match(Set dst src);
 6576 
 6577   ins_cost(100);
 6578   format %{ "MOV    $dst,$src" %}
 6579   opcode(0x89);
 6580   ins_encode( OpcPRegSS( dst, src ) );
 6581   ins_pipe( ialu_mem_reg );
 6582 %}
 6583 
 6584 // Store Integer to stack slot
 6585 instruct storeSSP(stackSlotP dst, eRegP src) %{
 6586   match(Set dst src);
 6587 
 6588   ins_cost(100);
 6589   format %{ "MOV    $dst,$src" %}
 6590   opcode(0x89);
 6591   ins_encode( OpcPRegSS( dst, src ) );
 6592   ins_pipe( ialu_mem_reg );
 6593 %}
 6594 
 6595 // Store Long to stack slot
 6596 instruct storeSSL(stackSlotL dst, eRegL src) %{
 6597   match(Set dst src);
 6598 
 6599   ins_cost(200);
 6600   format %{ "MOV    $dst,$src.lo\n\t"
 6601             "MOV    $dst+4,$src.hi" %}
 6602   opcode(0x89, 0x89);
 6603   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
 6604   ins_pipe( ialu_mem_long_reg );
 6605 %}
 6606 
 6607 //----------MemBar Instructions-----------------------------------------------
 6608 // Memory barrier flavors
 6609 
 6610 instruct membar_acquire() %{
 6611   match(MemBarAcquire);
 6612   match(LoadFence);
 6613   ins_cost(400);
 6614 
 6615   size(0);
 6616   format %{ "MEMBAR-acquire ! (empty encoding)" %}
 6617   ins_encode();
 6618   ins_pipe(empty);
 6619 %}
 6620 
 6621 instruct membar_acquire_lock() %{
 6622   match(MemBarAcquireLock);
 6623   ins_cost(0);
 6624 
 6625   size(0);
 6626   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
 6627   ins_encode( );
 6628   ins_pipe(empty);
 6629 %}
 6630 
 6631 instruct membar_release() %{
 6632   match(MemBarRelease);
 6633   match(StoreFence);
 6634   ins_cost(400);
 6635 
 6636   size(0);
 6637   format %{ "MEMBAR-release ! (empty encoding)" %}
 6638   ins_encode( );
 6639   ins_pipe(empty);
 6640 %}
 6641 
 6642 instruct membar_release_lock() %{
 6643   match(MemBarReleaseLock);
 6644   ins_cost(0);
 6645 
 6646   size(0);
 6647   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
 6648   ins_encode( );
 6649   ins_pipe(empty);
 6650 %}
 6651 
 6652 instruct membar_volatile(eFlagsReg cr) %{
 6653   match(MemBarVolatile);
 6654   effect(KILL cr);
 6655   ins_cost(400);
 6656 
 6657   format %{
 6658     $$template
 6659     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
 6660   %}
 6661   ins_encode %{
 6662     __ membar(Assembler::StoreLoad);
 6663   %}
 6664   ins_pipe(pipe_slow);
 6665 %}
 6666 
 6667 instruct unnecessary_membar_volatile() %{
 6668   match(MemBarVolatile);
 6669   predicate(Matcher::post_store_load_barrier(n));
 6670   ins_cost(0);
 6671 
 6672   size(0);
 6673   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
 6674   ins_encode( );
 6675   ins_pipe(empty);
 6676 %}
 6677 
 6678 instruct membar_storestore() %{
 6679   match(MemBarStoreStore);
 6680   match(StoreStoreFence);
 6681   ins_cost(0);
 6682 
 6683   size(0);
 6684   format %{ "MEMBAR-storestore (empty encoding)" %}
 6685   ins_encode( );
 6686   ins_pipe(empty);
 6687 %}
 6688 
 6689 //----------Move Instructions--------------------------------------------------
 6690 instruct castX2P(eAXRegP dst, eAXRegI src) %{
 6691   match(Set dst (CastX2P src));
 6692   format %{ "# X2P  $dst, $src" %}
 6693   ins_encode( /*empty encoding*/ );
 6694   ins_cost(0);
 6695   ins_pipe(empty);
 6696 %}
 6697 
 6698 instruct castP2X(rRegI dst, eRegP src ) %{
 6699   match(Set dst (CastP2X src));
 6700   ins_cost(50);
 6701   format %{ "MOV    $dst, $src\t# CastP2X" %}
 6702   ins_encode( enc_Copy( dst, src) );
 6703   ins_pipe( ialu_reg_reg );
 6704 %}
 6705 
 6706 //----------Conditional Move---------------------------------------------------
 6707 // Conditional move
 6708 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
 6709   predicate(!VM_Version::supports_cmov() );
 6710   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6711   ins_cost(200);
 6712   format %{ "J$cop,us skip\t# signed cmove\n\t"
 6713             "MOV    $dst,$src\n"
 6714       "skip:" %}
 6715   ins_encode %{
 6716     Label Lskip;
 6717     // Invert sense of branch from sense of CMOV
 6718     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6719     __ movl($dst$$Register, $src$$Register);
 6720     __ bind(Lskip);
 6721   %}
 6722   ins_pipe( pipe_cmov_reg );
 6723 %}
 6724 
 6725 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
 6726   predicate(!VM_Version::supports_cmov() );
 6727   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6728   ins_cost(200);
 6729   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
 6730             "MOV    $dst,$src\n"
 6731       "skip:" %}
 6732   ins_encode %{
 6733     Label Lskip;
 6734     // Invert sense of branch from sense of CMOV
 6735     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
 6736     __ movl($dst$$Register, $src$$Register);
 6737     __ bind(Lskip);
 6738   %}
 6739   ins_pipe( pipe_cmov_reg );
 6740 %}
 6741 
 6742 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
 6743   predicate(VM_Version::supports_cmov() );
 6744   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6745   ins_cost(200);
 6746   format %{ "CMOV$cop $dst,$src" %}
 6747   opcode(0x0F,0x40);
 6748   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6749   ins_pipe( pipe_cmov_reg );
 6750 %}
 6751 
 6752 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
 6753   predicate(VM_Version::supports_cmov() );
 6754   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6755   ins_cost(200);
 6756   format %{ "CMOV$cop $dst,$src" %}
 6757   opcode(0x0F,0x40);
 6758   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6759   ins_pipe( pipe_cmov_reg );
 6760 %}
 6761 
 6762 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
 6763   predicate(VM_Version::supports_cmov() );
 6764   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
 6765   ins_cost(200);
 6766   expand %{
 6767     cmovI_regU(cop, cr, dst, src);
 6768   %}
 6769 %}
 6770 
 6771 // Conditional move
 6772 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
 6773   predicate(VM_Version::supports_cmov() );
 6774   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6775   ins_cost(250);
 6776   format %{ "CMOV$cop $dst,$src" %}
 6777   opcode(0x0F,0x40);
 6778   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6779   ins_pipe( pipe_cmov_mem );
 6780 %}
 6781 
 6782 // Conditional move
 6783 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
 6784   predicate(VM_Version::supports_cmov() );
 6785   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6786   ins_cost(250);
 6787   format %{ "CMOV$cop $dst,$src" %}
 6788   opcode(0x0F,0x40);
 6789   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6790   ins_pipe( pipe_cmov_mem );
 6791 %}
 6792 
 6793 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
 6794   predicate(VM_Version::supports_cmov() );
 6795   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
 6796   ins_cost(250);
 6797   expand %{
 6798     cmovI_memU(cop, cr, dst, src);
 6799   %}
 6800 %}
 6801 
 6802 // Conditional move
 6803 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6804   predicate(VM_Version::supports_cmov() );
 6805   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6806   ins_cost(200);
 6807   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6808   opcode(0x0F,0x40);
 6809   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6810   ins_pipe( pipe_cmov_reg );
 6811 %}
 6812 
 6813 // Conditional move (non-P6 version)
 6814 // Note:  a CMoveP is generated for  stubs and native wrappers
 6815 //        regardless of whether we are on a P6, so we
 6816 //        emulate a cmov here
 6817 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
 6818   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6819   ins_cost(300);
 6820   format %{ "Jn$cop   skip\n\t"
 6821           "MOV    $dst,$src\t# pointer\n"
 6822       "skip:" %}
 6823   opcode(0x8b);
 6824   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
 6825   ins_pipe( pipe_cmov_reg );
 6826 %}
 6827 
 6828 // Conditional move
 6829 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
 6830   predicate(VM_Version::supports_cmov() );
 6831   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6832   ins_cost(200);
 6833   format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6834   opcode(0x0F,0x40);
 6835   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
 6836   ins_pipe( pipe_cmov_reg );
 6837 %}
 6838 
 6839 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
 6840   predicate(VM_Version::supports_cmov() );
 6841   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
 6842   ins_cost(200);
 6843   expand %{
 6844     cmovP_regU(cop, cr, dst, src);
 6845   %}
 6846 %}
 6847 
 6848 // DISABLED: Requires the ADLC to emit a bottom_type call that
 6849 // correctly meets the two pointer arguments; one is an incoming
 6850 // register but the other is a memory operand.  ALSO appears to
 6851 // be buggy with implicit null checks.
 6852 //
 6853 //// Conditional move
 6854 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
 6855 //  predicate(VM_Version::supports_cmov() );
 6856 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6857 //  ins_cost(250);
 6858 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6859 //  opcode(0x0F,0x40);
 6860 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6861 //  ins_pipe( pipe_cmov_mem );
 6862 //%}
 6863 //
 6864 //// Conditional move
 6865 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
 6866 //  predicate(VM_Version::supports_cmov() );
 6867 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
 6868 //  ins_cost(250);
 6869 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
 6870 //  opcode(0x0F,0x40);
 6871 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
 6872 //  ins_pipe( pipe_cmov_mem );
 6873 //%}
 6874 
 6875 // Conditional move
 6876 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
 6877   predicate(UseSSE<=1);
 6878   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6879   ins_cost(200);
 6880   format %{ "FCMOV$cop $dst,$src\t# double" %}
 6881   opcode(0xDA);
 6882   ins_encode( enc_cmov_dpr(cop,src) );
 6883   ins_pipe( pipe_cmovDPR_reg );
 6884 %}
 6885 
 6886 // Conditional move
 6887 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
 6888   predicate(UseSSE==0);
 6889   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6890   ins_cost(200);
 6891   format %{ "FCMOV$cop $dst,$src\t# float" %}
 6892   opcode(0xDA);
 6893   ins_encode( enc_cmov_dpr(cop,src) );
 6894   ins_pipe( pipe_cmovDPR_reg );
 6895 %}
 6896 
 6897 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6898 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
 6899   predicate(UseSSE<=1);
 6900   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6901   ins_cost(200);
 6902   format %{ "Jn$cop   skip\n\t"
 6903             "MOV    $dst,$src\t# double\n"
 6904       "skip:" %}
 6905   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6906   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
 6907   ins_pipe( pipe_cmovDPR_reg );
 6908 %}
 6909 
 6910 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
 6911 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
 6912   predicate(UseSSE==0);
 6913   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6914   ins_cost(200);
 6915   format %{ "Jn$cop    skip\n\t"
 6916             "MOV    $dst,$src\t# float\n"
 6917       "skip:" %}
 6918   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
 6919   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
 6920   ins_pipe( pipe_cmovDPR_reg );
 6921 %}
 6922 
 6923 // No CMOVE with SSE/SSE2
 6924 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
 6925   predicate (UseSSE>=1);
 6926   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6927   ins_cost(200);
 6928   format %{ "Jn$cop   skip\n\t"
 6929             "MOVSS  $dst,$src\t# float\n"
 6930       "skip:" %}
 6931   ins_encode %{
 6932     Label skip;
 6933     // Invert sense of branch from sense of CMOV
 6934     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6935     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6936     __ bind(skip);
 6937   %}
 6938   ins_pipe( pipe_slow );
 6939 %}
 6940 
 6941 // No CMOVE with SSE/SSE2
 6942 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
 6943   predicate (UseSSE>=2);
 6944   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6945   ins_cost(200);
 6946   format %{ "Jn$cop   skip\n\t"
 6947             "MOVSD  $dst,$src\t# float\n"
 6948       "skip:" %}
 6949   ins_encode %{
 6950     Label skip;
 6951     // Invert sense of branch from sense of CMOV
 6952     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6953     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6954     __ bind(skip);
 6955   %}
 6956   ins_pipe( pipe_slow );
 6957 %}
 6958 
 6959 // unsigned version
 6960 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
 6961   predicate (UseSSE>=1);
 6962   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6963   ins_cost(200);
 6964   format %{ "Jn$cop   skip\n\t"
 6965             "MOVSS  $dst,$src\t# float\n"
 6966       "skip:" %}
 6967   ins_encode %{
 6968     Label skip;
 6969     // Invert sense of branch from sense of CMOV
 6970     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6971     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
 6972     __ bind(skip);
 6973   %}
 6974   ins_pipe( pipe_slow );
 6975 %}
 6976 
 6977 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
 6978   predicate (UseSSE>=1);
 6979   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
 6980   ins_cost(200);
 6981   expand %{
 6982     fcmovF_regU(cop, cr, dst, src);
 6983   %}
 6984 %}
 6985 
 6986 // unsigned version
 6987 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
 6988   predicate (UseSSE>=2);
 6989   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 6990   ins_cost(200);
 6991   format %{ "Jn$cop   skip\n\t"
 6992             "MOVSD  $dst,$src\t# float\n"
 6993       "skip:" %}
 6994   ins_encode %{
 6995     Label skip;
 6996     // Invert sense of branch from sense of CMOV
 6997     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
 6998     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
 6999     __ bind(skip);
 7000   %}
 7001   ins_pipe( pipe_slow );
 7002 %}
 7003 
 7004 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
 7005   predicate (UseSSE>=2);
 7006   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
 7007   ins_cost(200);
 7008   expand %{
 7009     fcmovD_regU(cop, cr, dst, src);
 7010   %}
 7011 %}
 7012 
 7013 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
 7014   predicate(VM_Version::supports_cmov() );
 7015   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7016   ins_cost(200);
 7017   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7018             "CMOV$cop $dst.hi,$src.hi" %}
 7019   opcode(0x0F,0x40);
 7020   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7021   ins_pipe( pipe_cmov_reg_long );
 7022 %}
 7023 
 7024 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
 7025   predicate(VM_Version::supports_cmov() );
 7026   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7027   ins_cost(200);
 7028   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
 7029             "CMOV$cop $dst.hi,$src.hi" %}
 7030   opcode(0x0F,0x40);
 7031   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
 7032   ins_pipe( pipe_cmov_reg_long );
 7033 %}
 7034 
 7035 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
 7036   predicate(VM_Version::supports_cmov() );
 7037   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
 7038   ins_cost(200);
 7039   expand %{
 7040     cmovL_regU(cop, cr, dst, src);
 7041   %}
 7042 %}
 7043 
 7044 //----------Arithmetic Instructions--------------------------------------------
 7045 //----------Addition Instructions----------------------------------------------
 7046 
 7047 // Integer Addition Instructions
 7048 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7049   match(Set dst (AddI dst src));
 7050   effect(KILL cr);
 7051 
 7052   size(2);
 7053   format %{ "ADD    $dst,$src" %}
 7054   opcode(0x03);
 7055   ins_encode( OpcP, RegReg( dst, src) );
 7056   ins_pipe( ialu_reg_reg );
 7057 %}
 7058 
 7059 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7060   match(Set dst (AddI dst src));
 7061   effect(KILL cr);
 7062 
 7063   format %{ "ADD    $dst,$src" %}
 7064   opcode(0x81, 0x00); /* /0 id */
 7065   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7066   ins_pipe( ialu_reg );
 7067 %}
 7068 
 7069 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
 7070   predicate(UseIncDec);
 7071   match(Set dst (AddI dst src));
 7072   effect(KILL cr);
 7073 
 7074   size(1);
 7075   format %{ "INC    $dst" %}
 7076   opcode(0x40); /*  */
 7077   ins_encode( Opc_plus( primary, dst ) );
 7078   ins_pipe( ialu_reg );
 7079 %}
 7080 
 7081 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
 7082   match(Set dst (AddI src0 src1));
 7083   ins_cost(110);
 7084 
 7085   format %{ "LEA    $dst,[$src0 + $src1]" %}
 7086   opcode(0x8D); /* 0x8D /r */
 7087   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7088   ins_pipe( ialu_reg_reg );
 7089 %}
 7090 
 7091 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
 7092   match(Set dst (AddP src0 src1));
 7093   ins_cost(110);
 7094 
 7095   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
 7096   opcode(0x8D); /* 0x8D /r */
 7097   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
 7098   ins_pipe( ialu_reg_reg );
 7099 %}
 7100 
 7101 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
 7102   predicate(UseIncDec);
 7103   match(Set dst (AddI dst src));
 7104   effect(KILL cr);
 7105 
 7106   size(1);
 7107   format %{ "DEC    $dst" %}
 7108   opcode(0x48); /*  */
 7109   ins_encode( Opc_plus( primary, dst ) );
 7110   ins_pipe( ialu_reg );
 7111 %}
 7112 
 7113 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
 7114   match(Set dst (AddP dst src));
 7115   effect(KILL cr);
 7116 
 7117   size(2);
 7118   format %{ "ADD    $dst,$src" %}
 7119   opcode(0x03);
 7120   ins_encode( OpcP, RegReg( dst, src) );
 7121   ins_pipe( ialu_reg_reg );
 7122 %}
 7123 
 7124 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
 7125   match(Set dst (AddP dst src));
 7126   effect(KILL cr);
 7127 
 7128   format %{ "ADD    $dst,$src" %}
 7129   opcode(0x81,0x00); /* Opcode 81 /0 id */
 7130   // ins_encode( RegImm( dst, src) );
 7131   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7132   ins_pipe( ialu_reg );
 7133 %}
 7134 
 7135 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7136   match(Set dst (AddI dst (LoadI src)));
 7137   effect(KILL cr);
 7138 
 7139   ins_cost(150);
 7140   format %{ "ADD    $dst,$src" %}
 7141   opcode(0x03);
 7142   ins_encode( OpcP, RegMem( dst, src) );
 7143   ins_pipe( ialu_reg_mem );
 7144 %}
 7145 
 7146 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7147   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7148   effect(KILL cr);
 7149 
 7150   ins_cost(150);
 7151   format %{ "ADD    $dst,$src" %}
 7152   opcode(0x01);  /* Opcode 01 /r */
 7153   ins_encode( OpcP, RegMem( src, dst ) );
 7154   ins_pipe( ialu_mem_reg );
 7155 %}
 7156 
 7157 // Add Memory with Immediate
 7158 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 7159   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7160   effect(KILL cr);
 7161 
 7162   ins_cost(125);
 7163   format %{ "ADD    $dst,$src" %}
 7164   opcode(0x81);               /* Opcode 81 /0 id */
 7165   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
 7166   ins_pipe( ialu_mem_imm );
 7167 %}
 7168 
 7169 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
 7170   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7171   effect(KILL cr);
 7172 
 7173   ins_cost(125);
 7174   format %{ "INC    $dst" %}
 7175   opcode(0xFF);               /* Opcode FF /0 */
 7176   ins_encode( OpcP, RMopc_Mem(0x00,dst));
 7177   ins_pipe( ialu_mem_imm );
 7178 %}
 7179 
 7180 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
 7181   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
 7182   effect(KILL cr);
 7183 
 7184   ins_cost(125);
 7185   format %{ "DEC    $dst" %}
 7186   opcode(0xFF);               /* Opcode FF /1 */
 7187   ins_encode( OpcP, RMopc_Mem(0x01,dst));
 7188   ins_pipe( ialu_mem_imm );
 7189 %}
 7190 
 7191 
 7192 instruct checkCastPP( eRegP dst ) %{
 7193   match(Set dst (CheckCastPP dst));
 7194 
 7195   size(0);
 7196   format %{ "#checkcastPP of $dst" %}
 7197   ins_encode( /*empty encoding*/ );
 7198   ins_pipe( empty );
 7199 %}
 7200 
 7201 instruct castPP( eRegP dst ) %{
 7202   match(Set dst (CastPP dst));
 7203   format %{ "#castPP of $dst" %}
 7204   ins_encode( /*empty encoding*/ );
 7205   ins_pipe( empty );
 7206 %}
 7207 
 7208 instruct castII( rRegI dst ) %{
 7209   match(Set dst (CastII dst));
 7210   format %{ "#castII of $dst" %}
 7211   ins_encode( /*empty encoding*/ );
 7212   ins_cost(0);
 7213   ins_pipe( empty );
 7214 %}
 7215 
 7216 instruct castLL( eRegL dst ) %{
 7217   match(Set dst (CastLL dst));
 7218   format %{ "#castLL of $dst" %}
 7219   ins_encode( /*empty encoding*/ );
 7220   ins_cost(0);
 7221   ins_pipe( empty );
 7222 %}
 7223 
 7224 instruct castFF( regF dst ) %{
 7225   predicate(UseSSE >= 1);
 7226   match(Set dst (CastFF dst));
 7227   format %{ "#castFF of $dst" %}
 7228   ins_encode( /*empty encoding*/ );
 7229   ins_cost(0);
 7230   ins_pipe( empty );
 7231 %}
 7232 
 7233 instruct castDD( regD dst ) %{
 7234   predicate(UseSSE >= 2);
 7235   match(Set dst (CastDD dst));
 7236   format %{ "#castDD of $dst" %}
 7237   ins_encode( /*empty encoding*/ );
 7238   ins_cost(0);
 7239   ins_pipe( empty );
 7240 %}
 7241 
 7242 instruct castFF_PR( regFPR dst ) %{
 7243   predicate(UseSSE < 1);
 7244   match(Set dst (CastFF dst));
 7245   format %{ "#castFF of $dst" %}
 7246   ins_encode( /*empty encoding*/ );
 7247   ins_cost(0);
 7248   ins_pipe( empty );
 7249 %}
 7250 
 7251 instruct castDD_PR( regDPR dst ) %{
 7252   predicate(UseSSE < 2);
 7253   match(Set dst (CastDD dst));
 7254   format %{ "#castDD of $dst" %}
 7255   ins_encode( /*empty encoding*/ );
 7256   ins_cost(0);
 7257   ins_pipe( empty );
 7258 %}
 7259 
 7260 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 7261 
 7262 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7263   predicate(VM_Version::supports_cx8());
 7264   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
 7265   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
 7266   effect(KILL cr, KILL oldval);
 7267   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7268             "MOV    $res,0\n\t"
 7269             "JNE,s  fail\n\t"
 7270             "MOV    $res,1\n"
 7271           "fail:" %}
 7272   ins_encode( enc_cmpxchg8(mem_ptr),
 7273               enc_flags_ne_to_boolean(res) );
 7274   ins_pipe( pipe_cmpxchg );
 7275 %}
 7276 
 7277 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7278   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
 7279   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
 7280   effect(KILL cr, KILL oldval);
 7281   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7282             "MOV    $res,0\n\t"
 7283             "JNE,s  fail\n\t"
 7284             "MOV    $res,1\n"
 7285           "fail:" %}
 7286   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7287   ins_pipe( pipe_cmpxchg );
 7288 %}
 7289 
 7290 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7291   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
 7292   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
 7293   effect(KILL cr, KILL oldval);
 7294   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7295             "MOV    $res,0\n\t"
 7296             "JNE,s  fail\n\t"
 7297             "MOV    $res,1\n"
 7298           "fail:" %}
 7299   ins_encode( enc_cmpxchgb(mem_ptr),
 7300               enc_flags_ne_to_boolean(res) );
 7301   ins_pipe( pipe_cmpxchg );
 7302 %}
 7303 
 7304 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
 7305   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
 7306   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
 7307   effect(KILL cr, KILL oldval);
 7308   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7309             "MOV    $res,0\n\t"
 7310             "JNE,s  fail\n\t"
 7311             "MOV    $res,1\n"
 7312           "fail:" %}
 7313   ins_encode( enc_cmpxchgw(mem_ptr),
 7314               enc_flags_ne_to_boolean(res) );
 7315   ins_pipe( pipe_cmpxchg );
 7316 %}
 7317 
 7318 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7319   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
 7320   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
 7321   effect(KILL cr, KILL oldval);
 7322   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
 7323             "MOV    $res,0\n\t"
 7324             "JNE,s  fail\n\t"
 7325             "MOV    $res,1\n"
 7326           "fail:" %}
 7327   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
 7328   ins_pipe( pipe_cmpxchg );
 7329 %}
 7330 
 7331 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
 7332   predicate(VM_Version::supports_cx8());
 7333   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
 7334   effect(KILL cr);
 7335   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7336   ins_encode( enc_cmpxchg8(mem_ptr) );
 7337   ins_pipe( pipe_cmpxchg );
 7338 %}
 7339 
 7340 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
 7341   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
 7342   effect(KILL cr);
 7343   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7344   ins_encode( enc_cmpxchg(mem_ptr) );
 7345   ins_pipe( pipe_cmpxchg );
 7346 %}
 7347 
 7348 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7349   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
 7350   effect(KILL cr);
 7351   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7352   ins_encode( enc_cmpxchgb(mem_ptr) );
 7353   ins_pipe( pipe_cmpxchg );
 7354 %}
 7355 
 7356 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7357   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
 7358   effect(KILL cr);
 7359   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7360   ins_encode( enc_cmpxchgw(mem_ptr) );
 7361   ins_pipe( pipe_cmpxchg );
 7362 %}
 7363 
 7364 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
 7365   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
 7366   effect(KILL cr);
 7367   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
 7368   ins_encode( enc_cmpxchg(mem_ptr) );
 7369   ins_pipe( pipe_cmpxchg );
 7370 %}
 7371 
 7372 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7373   predicate(n->as_LoadStore()->result_not_used());
 7374   match(Set dummy (GetAndAddB mem add));
 7375   effect(KILL cr);
 7376   format %{ "ADDB  [$mem],$add" %}
 7377   ins_encode %{
 7378     __ lock();
 7379     __ addb($mem$$Address, $add$$constant);
 7380   %}
 7381   ins_pipe( pipe_cmpxchg );
 7382 %}
 7383 
 7384 // Important to match to xRegI: only 8-bit regs.
 7385 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
 7386   match(Set newval (GetAndAddB mem newval));
 7387   effect(KILL cr);
 7388   format %{ "XADDB  [$mem],$newval" %}
 7389   ins_encode %{
 7390     __ lock();
 7391     __ xaddb($mem$$Address, $newval$$Register);
 7392   %}
 7393   ins_pipe( pipe_cmpxchg );
 7394 %}
 7395 
 7396 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7397   predicate(n->as_LoadStore()->result_not_used());
 7398   match(Set dummy (GetAndAddS mem add));
 7399   effect(KILL cr);
 7400   format %{ "ADDS  [$mem],$add" %}
 7401   ins_encode %{
 7402     __ lock();
 7403     __ addw($mem$$Address, $add$$constant);
 7404   %}
 7405   ins_pipe( pipe_cmpxchg );
 7406 %}
 7407 
 7408 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
 7409   match(Set newval (GetAndAddS mem newval));
 7410   effect(KILL cr);
 7411   format %{ "XADDS  [$mem],$newval" %}
 7412   ins_encode %{
 7413     __ lock();
 7414     __ xaddw($mem$$Address, $newval$$Register);
 7415   %}
 7416   ins_pipe( pipe_cmpxchg );
 7417 %}
 7418 
 7419 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
 7420   predicate(n->as_LoadStore()->result_not_used());
 7421   match(Set dummy (GetAndAddI mem add));
 7422   effect(KILL cr);
 7423   format %{ "ADDL  [$mem],$add" %}
 7424   ins_encode %{
 7425     __ lock();
 7426     __ addl($mem$$Address, $add$$constant);
 7427   %}
 7428   ins_pipe( pipe_cmpxchg );
 7429 %}
 7430 
 7431 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
 7432   match(Set newval (GetAndAddI mem newval));
 7433   effect(KILL cr);
 7434   format %{ "XADDL  [$mem],$newval" %}
 7435   ins_encode %{
 7436     __ lock();
 7437     __ xaddl($mem$$Address, $newval$$Register);
 7438   %}
 7439   ins_pipe( pipe_cmpxchg );
 7440 %}
 7441 
 7442 // Important to match to xRegI: only 8-bit regs.
 7443 instruct xchgB( memory mem, xRegI newval) %{
 7444   match(Set newval (GetAndSetB mem newval));
 7445   format %{ "XCHGB  $newval,[$mem]" %}
 7446   ins_encode %{
 7447     __ xchgb($newval$$Register, $mem$$Address);
 7448   %}
 7449   ins_pipe( pipe_cmpxchg );
 7450 %}
 7451 
 7452 instruct xchgS( memory mem, rRegI newval) %{
 7453   match(Set newval (GetAndSetS mem newval));
 7454   format %{ "XCHGW  $newval,[$mem]" %}
 7455   ins_encode %{
 7456     __ xchgw($newval$$Register, $mem$$Address);
 7457   %}
 7458   ins_pipe( pipe_cmpxchg );
 7459 %}
 7460 
 7461 instruct xchgI( memory mem, rRegI newval) %{
 7462   match(Set newval (GetAndSetI mem newval));
 7463   format %{ "XCHGL  $newval,[$mem]" %}
 7464   ins_encode %{
 7465     __ xchgl($newval$$Register, $mem$$Address);
 7466   %}
 7467   ins_pipe( pipe_cmpxchg );
 7468 %}
 7469 
 7470 instruct xchgP( memory mem, pRegP newval) %{
 7471   match(Set newval (GetAndSetP mem newval));
 7472   format %{ "XCHGL  $newval,[$mem]" %}
 7473   ins_encode %{
 7474     __ xchgl($newval$$Register, $mem$$Address);
 7475   %}
 7476   ins_pipe( pipe_cmpxchg );
 7477 %}
 7478 
 7479 //----------Subtraction Instructions-------------------------------------------
 7480 
 7481 // Integer Subtraction Instructions
 7482 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7483   match(Set dst (SubI dst src));
 7484   effect(KILL cr);
 7485 
 7486   size(2);
 7487   format %{ "SUB    $dst,$src" %}
 7488   opcode(0x2B);
 7489   ins_encode( OpcP, RegReg( dst, src) );
 7490   ins_pipe( ialu_reg_reg );
 7491 %}
 7492 
 7493 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 7494   match(Set dst (SubI dst src));
 7495   effect(KILL cr);
 7496 
 7497   format %{ "SUB    $dst,$src" %}
 7498   opcode(0x81,0x05);  /* Opcode 81 /5 */
 7499   // ins_encode( RegImm( dst, src) );
 7500   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 7501   ins_pipe( ialu_reg );
 7502 %}
 7503 
 7504 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 7505   match(Set dst (SubI dst (LoadI src)));
 7506   effect(KILL cr);
 7507 
 7508   ins_cost(150);
 7509   format %{ "SUB    $dst,$src" %}
 7510   opcode(0x2B);
 7511   ins_encode( OpcP, RegMem( dst, src) );
 7512   ins_pipe( ialu_reg_mem );
 7513 %}
 7514 
 7515 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 7516   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
 7517   effect(KILL cr);
 7518 
 7519   ins_cost(150);
 7520   format %{ "SUB    $dst,$src" %}
 7521   opcode(0x29);  /* Opcode 29 /r */
 7522   ins_encode( OpcP, RegMem( src, dst ) );
 7523   ins_pipe( ialu_mem_reg );
 7524 %}
 7525 
 7526 // Subtract from a pointer
 7527 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
 7528   match(Set dst (AddP dst (SubI zero src)));
 7529   effect(KILL cr);
 7530 
 7531   size(2);
 7532   format %{ "SUB    $dst,$src" %}
 7533   opcode(0x2B);
 7534   ins_encode( OpcP, RegReg( dst, src) );
 7535   ins_pipe( ialu_reg_reg );
 7536 %}
 7537 
 7538 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 7539   match(Set dst (SubI zero dst));
 7540   effect(KILL cr);
 7541 
 7542   size(2);
 7543   format %{ "NEG    $dst" %}
 7544   opcode(0xF7,0x03);  // Opcode F7 /3
 7545   ins_encode( OpcP, RegOpc( dst ) );
 7546   ins_pipe( ialu_reg );
 7547 %}
 7548 
 7549 //----------Multiplication/Division Instructions-------------------------------
 7550 // Integer Multiplication Instructions
 7551 // Multiply Register
 7552 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 7553   match(Set dst (MulI dst src));
 7554   effect(KILL cr);
 7555 
 7556   size(3);
 7557   ins_cost(300);
 7558   format %{ "IMUL   $dst,$src" %}
 7559   opcode(0xAF, 0x0F);
 7560   ins_encode( OpcS, OpcP, RegReg( dst, src) );
 7561   ins_pipe( ialu_reg_reg_alu0 );
 7562 %}
 7563 
 7564 // Multiply 32-bit Immediate
 7565 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
 7566   match(Set dst (MulI src imm));
 7567   effect(KILL cr);
 7568 
 7569   ins_cost(300);
 7570   format %{ "IMUL   $dst,$src,$imm" %}
 7571   opcode(0x69);  /* 69 /r id */
 7572   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
 7573   ins_pipe( ialu_reg_reg_alu0 );
 7574 %}
 7575 
 7576 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
 7577   match(Set dst src);
 7578   effect(KILL cr);
 7579 
 7580   // Note that this is artificially increased to make it more expensive than loadConL
 7581   ins_cost(250);
 7582   format %{ "MOV    EAX,$src\t// low word only" %}
 7583   opcode(0xB8);
 7584   ins_encode( LdImmL_Lo(dst, src) );
 7585   ins_pipe( ialu_reg_fat );
 7586 %}
 7587 
 7588 // Multiply by 32-bit Immediate, taking the shifted high order results
 7589 //  (special case for shift by 32)
 7590 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
 7591   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7592   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7593              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7594              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7595   effect(USE src1, KILL cr);
 7596 
 7597   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7598   ins_cost(0*100 + 1*400 - 150);
 7599   format %{ "IMUL   EDX:EAX,$src1" %}
 7600   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7601   ins_pipe( pipe_slow );
 7602 %}
 7603 
 7604 // Multiply by 32-bit Immediate, taking the shifted high order results
 7605 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
 7606   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
 7607   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
 7608              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
 7609              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
 7610   effect(USE src1, KILL cr);
 7611 
 7612   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
 7613   ins_cost(1*100 + 1*400 - 150);
 7614   format %{ "IMUL   EDX:EAX,$src1\n\t"
 7615             "SAR    EDX,$cnt-32" %}
 7616   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
 7617   ins_pipe( pipe_slow );
 7618 %}
 7619 
 7620 // Multiply Memory 32-bit Immediate
 7621 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
 7622   match(Set dst (MulI (LoadI src) imm));
 7623   effect(KILL cr);
 7624 
 7625   ins_cost(300);
 7626   format %{ "IMUL   $dst,$src,$imm" %}
 7627   opcode(0x69);  /* 69 /r id */
 7628   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
 7629   ins_pipe( ialu_reg_mem_alu0 );
 7630 %}
 7631 
 7632 // Multiply Memory
 7633 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
 7634   match(Set dst (MulI dst (LoadI src)));
 7635   effect(KILL cr);
 7636 
 7637   ins_cost(350);
 7638   format %{ "IMUL   $dst,$src" %}
 7639   opcode(0xAF, 0x0F);
 7640   ins_encode( OpcS, OpcP, RegMem( dst, src) );
 7641   ins_pipe( ialu_reg_mem_alu0 );
 7642 %}
 7643 
 7644 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
 7645 %{
 7646   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
 7647   effect(KILL cr, KILL src2);
 7648 
 7649   expand %{ mulI_eReg(dst, src1, cr);
 7650            mulI_eReg(src2, src3, cr);
 7651            addI_eReg(dst, src2, cr); %}
 7652 %}
 7653 
 7654 // Multiply Register Int to Long
 7655 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
 7656   // Basic Idea: long = (long)int * (long)int
 7657   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
 7658   effect(DEF dst, USE src, USE src1, KILL flags);
 7659 
 7660   ins_cost(300);
 7661   format %{ "IMUL   $dst,$src1" %}
 7662 
 7663   ins_encode( long_int_multiply( dst, src1 ) );
 7664   ins_pipe( ialu_reg_reg_alu0 );
 7665 %}
 7666 
 7667 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
 7668   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
 7669   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
 7670   effect(KILL flags);
 7671 
 7672   ins_cost(300);
 7673   format %{ "MUL    $dst,$src1" %}
 7674 
 7675   ins_encode( long_uint_multiply(dst, src1) );
 7676   ins_pipe( ialu_reg_reg_alu0 );
 7677 %}
 7678 
 7679 // Multiply Register Long
 7680 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7681   match(Set dst (MulL dst src));
 7682   effect(KILL cr, TEMP tmp);
 7683   ins_cost(4*100+3*400);
 7684 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7685 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
 7686   format %{ "MOV    $tmp,$src.lo\n\t"
 7687             "IMUL   $tmp,EDX\n\t"
 7688             "MOV    EDX,$src.hi\n\t"
 7689             "IMUL   EDX,EAX\n\t"
 7690             "ADD    $tmp,EDX\n\t"
 7691             "MUL    EDX:EAX,$src.lo\n\t"
 7692             "ADD    EDX,$tmp" %}
 7693   ins_encode( long_multiply( dst, src, tmp ) );
 7694   ins_pipe( pipe_slow );
 7695 %}
 7696 
 7697 // Multiply Register Long where the left operand's high 32 bits are zero
 7698 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7699   predicate(is_operand_hi32_zero(n->in(1)));
 7700   match(Set dst (MulL dst src));
 7701   effect(KILL cr, TEMP tmp);
 7702   ins_cost(2*100+2*400);
 7703 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7704 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
 7705   format %{ "MOV    $tmp,$src.hi\n\t"
 7706             "IMUL   $tmp,EAX\n\t"
 7707             "MUL    EDX:EAX,$src.lo\n\t"
 7708             "ADD    EDX,$tmp" %}
 7709   ins_encode %{
 7710     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
 7711     __ imull($tmp$$Register, rax);
 7712     __ mull($src$$Register);
 7713     __ addl(rdx, $tmp$$Register);
 7714   %}
 7715   ins_pipe( pipe_slow );
 7716 %}
 7717 
 7718 // Multiply Register Long where the right operand's high 32 bits are zero
 7719 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
 7720   predicate(is_operand_hi32_zero(n->in(2)));
 7721   match(Set dst (MulL dst src));
 7722   effect(KILL cr, TEMP tmp);
 7723   ins_cost(2*100+2*400);
 7724 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7725 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
 7726   format %{ "MOV    $tmp,$src.lo\n\t"
 7727             "IMUL   $tmp,EDX\n\t"
 7728             "MUL    EDX:EAX,$src.lo\n\t"
 7729             "ADD    EDX,$tmp" %}
 7730   ins_encode %{
 7731     __ movl($tmp$$Register, $src$$Register);
 7732     __ imull($tmp$$Register, rdx);
 7733     __ mull($src$$Register);
 7734     __ addl(rdx, $tmp$$Register);
 7735   %}
 7736   ins_pipe( pipe_slow );
 7737 %}
 7738 
 7739 // Multiply Register Long where the left and the right operands' high 32 bits are zero
 7740 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
 7741   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
 7742   match(Set dst (MulL dst src));
 7743   effect(KILL cr);
 7744   ins_cost(1*400);
 7745 // Basic idea: lo(result) = lo(x_lo * y_lo)
 7746 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
 7747   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
 7748   ins_encode %{
 7749     __ mull($src$$Register);
 7750   %}
 7751   ins_pipe( pipe_slow );
 7752 %}
 7753 
 7754 // Multiply Register Long by small constant
 7755 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
 7756   match(Set dst (MulL dst src));
 7757   effect(KILL cr, TEMP tmp);
 7758   ins_cost(2*100+2*400);
 7759   size(12);
 7760 // Basic idea: lo(result) = lo(src * EAX)
 7761 //             hi(result) = hi(src * EAX) + lo(src * EDX)
 7762   format %{ "IMUL   $tmp,EDX,$src\n\t"
 7763             "MOV    EDX,$src\n\t"
 7764             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
 7765             "ADD    EDX,$tmp" %}
 7766   ins_encode( long_multiply_con( dst, src, tmp ) );
 7767   ins_pipe( pipe_slow );
 7768 %}
 7769 
 7770 // Integer DIV with Register
 7771 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7772   match(Set rax (DivI rax div));
 7773   effect(KILL rdx, KILL cr);
 7774   size(26);
 7775   ins_cost(30*100+10*100);
 7776   format %{ "CMP    EAX,0x80000000\n\t"
 7777             "JNE,s  normal\n\t"
 7778             "XOR    EDX,EDX\n\t"
 7779             "CMP    ECX,-1\n\t"
 7780             "JE,s   done\n"
 7781     "normal: CDQ\n\t"
 7782             "IDIV   $div\n\t"
 7783     "done:"        %}
 7784   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7785   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7786   ins_pipe( ialu_reg_reg_alu0 );
 7787 %}
 7788 
 7789 // Divide Register Long
 7790 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7791   match(Set dst (DivL src1 src2));
 7792   effect(CALL);
 7793   ins_cost(10000);
 7794   format %{ "PUSH   $src1.hi\n\t"
 7795             "PUSH   $src1.lo\n\t"
 7796             "PUSH   $src2.hi\n\t"
 7797             "PUSH   $src2.lo\n\t"
 7798             "CALL   SharedRuntime::ldiv\n\t"
 7799             "ADD    ESP,16" %}
 7800   ins_encode( long_div(src1,src2) );
 7801   ins_pipe( pipe_slow );
 7802 %}
 7803 
 7804 // Integer DIVMOD with Register, both quotient and mod results
 7805 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
 7806   match(DivModI rax div);
 7807   effect(KILL cr);
 7808   size(26);
 7809   ins_cost(30*100+10*100);
 7810   format %{ "CMP    EAX,0x80000000\n\t"
 7811             "JNE,s  normal\n\t"
 7812             "XOR    EDX,EDX\n\t"
 7813             "CMP    ECX,-1\n\t"
 7814             "JE,s   done\n"
 7815     "normal: CDQ\n\t"
 7816             "IDIV   $div\n\t"
 7817     "done:"        %}
 7818   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7819   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7820   ins_pipe( pipe_slow );
 7821 %}
 7822 
 7823 // Integer MOD with Register
 7824 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
 7825   match(Set rdx (ModI rax div));
 7826   effect(KILL rax, KILL cr);
 7827 
 7828   size(26);
 7829   ins_cost(300);
 7830   format %{ "CDQ\n\t"
 7831             "IDIV   $div" %}
 7832   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
 7833   ins_encode( cdq_enc, OpcP, RegOpc(div) );
 7834   ins_pipe( ialu_reg_reg_alu0 );
 7835 %}
 7836 
 7837 // Remainder Register Long
 7838 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{
 7839   match(Set dst (ModL src1 src2));
 7840   effect(CALL);
 7841   ins_cost(10000);
 7842   format %{ "PUSH   $src1.hi\n\t"
 7843             "PUSH   $src1.lo\n\t"
 7844             "PUSH   $src2.hi\n\t"
 7845             "PUSH   $src2.lo\n\t"
 7846             "CALL   SharedRuntime::lrem\n\t"
 7847             "ADD    ESP,16" %}
 7848   ins_encode( long_mod(src1,src2) );
 7849   ins_pipe( pipe_slow );
 7850 %}
 7851 
 7852 // Divide Register Long (no special case since divisor != -1)
 7853 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7854   match(Set dst (DivL dst imm));
 7855   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7856   ins_cost(1000);
 7857   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
 7858             "XOR    $tmp2,$tmp2\n\t"
 7859             "CMP    $tmp,EDX\n\t"
 7860             "JA,s   fast\n\t"
 7861             "MOV    $tmp2,EAX\n\t"
 7862             "MOV    EAX,EDX\n\t"
 7863             "MOV    EDX,0\n\t"
 7864             "JLE,s  pos\n\t"
 7865             "LNEG   EAX : $tmp2\n\t"
 7866             "DIV    $tmp # unsigned division\n\t"
 7867             "XCHG   EAX,$tmp2\n\t"
 7868             "DIV    $tmp\n\t"
 7869             "LNEG   $tmp2 : EAX\n\t"
 7870             "JMP,s  done\n"
 7871     "pos:\n\t"
 7872             "DIV    $tmp\n\t"
 7873             "XCHG   EAX,$tmp2\n"
 7874     "fast:\n\t"
 7875             "DIV    $tmp\n"
 7876     "done:\n\t"
 7877             "MOV    EDX,$tmp2\n\t"
 7878             "NEG    EDX:EAX # if $imm < 0" %}
 7879   ins_encode %{
 7880     int con = (int)$imm$$constant;
 7881     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7882     int pcon = (con > 0) ? con : -con;
 7883     Label Lfast, Lpos, Ldone;
 7884 
 7885     __ movl($tmp$$Register, pcon);
 7886     __ xorl($tmp2$$Register,$tmp2$$Register);
 7887     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7888     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
 7889 
 7890     __ movl($tmp2$$Register, $dst$$Register); // save
 7891     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7892     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7893     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7894 
 7895     // Negative dividend.
 7896     // convert value to positive to use unsigned division
 7897     __ lneg($dst$$Register, $tmp2$$Register);
 7898     __ divl($tmp$$Register);
 7899     __ xchgl($dst$$Register, $tmp2$$Register);
 7900     __ divl($tmp$$Register);
 7901     // revert result back to negative
 7902     __ lneg($tmp2$$Register, $dst$$Register);
 7903     __ jmpb(Ldone);
 7904 
 7905     __ bind(Lpos);
 7906     __ divl($tmp$$Register); // Use unsigned division
 7907     __ xchgl($dst$$Register, $tmp2$$Register);
 7908     // Fallthrow for final divide, tmp2 has 32 bit hi result
 7909 
 7910     __ bind(Lfast);
 7911     // fast path: src is positive
 7912     __ divl($tmp$$Register); // Use unsigned division
 7913 
 7914     __ bind(Ldone);
 7915     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
 7916     if (con < 0) {
 7917       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
 7918     }
 7919   %}
 7920   ins_pipe( pipe_slow );
 7921 %}
 7922 
 7923 // Remainder Register Long (remainder fit into 32 bits)
 7924 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
 7925   match(Set dst (ModL dst imm));
 7926   effect( TEMP tmp, TEMP tmp2, KILL cr );
 7927   ins_cost(1000);
 7928   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
 7929             "CMP    $tmp,EDX\n\t"
 7930             "JA,s   fast\n\t"
 7931             "MOV    $tmp2,EAX\n\t"
 7932             "MOV    EAX,EDX\n\t"
 7933             "MOV    EDX,0\n\t"
 7934             "JLE,s  pos\n\t"
 7935             "LNEG   EAX : $tmp2\n\t"
 7936             "DIV    $tmp # unsigned division\n\t"
 7937             "MOV    EAX,$tmp2\n\t"
 7938             "DIV    $tmp\n\t"
 7939             "NEG    EDX\n\t"
 7940             "JMP,s  done\n"
 7941     "pos:\n\t"
 7942             "DIV    $tmp\n\t"
 7943             "MOV    EAX,$tmp2\n"
 7944     "fast:\n\t"
 7945             "DIV    $tmp\n"
 7946     "done:\n\t"
 7947             "MOV    EAX,EDX\n\t"
 7948             "SAR    EDX,31\n\t" %}
 7949   ins_encode %{
 7950     int con = (int)$imm$$constant;
 7951     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
 7952     int pcon = (con > 0) ? con : -con;
 7953     Label  Lfast, Lpos, Ldone;
 7954 
 7955     __ movl($tmp$$Register, pcon);
 7956     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
 7957     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
 7958 
 7959     __ movl($tmp2$$Register, $dst$$Register); // save
 7960     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7961     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
 7962     __ jccb(Assembler::lessEqual, Lpos); // result is positive
 7963 
 7964     // Negative dividend.
 7965     // convert value to positive to use unsigned division
 7966     __ lneg($dst$$Register, $tmp2$$Register);
 7967     __ divl($tmp$$Register);
 7968     __ movl($dst$$Register, $tmp2$$Register);
 7969     __ divl($tmp$$Register);
 7970     // revert remainder back to negative
 7971     __ negl(HIGH_FROM_LOW($dst$$Register));
 7972     __ jmpb(Ldone);
 7973 
 7974     __ bind(Lpos);
 7975     __ divl($tmp$$Register);
 7976     __ movl($dst$$Register, $tmp2$$Register);
 7977 
 7978     __ bind(Lfast);
 7979     // fast path: src is positive
 7980     __ divl($tmp$$Register);
 7981 
 7982     __ bind(Ldone);
 7983     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
 7984     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
 7985 
 7986   %}
 7987   ins_pipe( pipe_slow );
 7988 %}
 7989 
 7990 // Integer Shift Instructions
 7991 // Shift Left by one
 7992 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 7993   match(Set dst (LShiftI dst shift));
 7994   effect(KILL cr);
 7995 
 7996   size(2);
 7997   format %{ "SHL    $dst,$shift" %}
 7998   opcode(0xD1, 0x4);  /* D1 /4 */
 7999   ins_encode( OpcP, RegOpc( dst ) );
 8000   ins_pipe( ialu_reg );
 8001 %}
 8002 
 8003 // Shift Left by 8-bit immediate
 8004 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8005   match(Set dst (LShiftI dst shift));
 8006   effect(KILL cr);
 8007 
 8008   size(3);
 8009   format %{ "SHL    $dst,$shift" %}
 8010   opcode(0xC1, 0x4);  /* C1 /4 ib */
 8011   ins_encode( RegOpcImm( dst, shift) );
 8012   ins_pipe( ialu_reg );
 8013 %}
 8014 
 8015 // Shift Left by variable
 8016 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8017   match(Set dst (LShiftI dst shift));
 8018   effect(KILL cr);
 8019 
 8020   size(2);
 8021   format %{ "SHL    $dst,$shift" %}
 8022   opcode(0xD3, 0x4);  /* D3 /4 */
 8023   ins_encode( OpcP, RegOpc( dst ) );
 8024   ins_pipe( ialu_reg_reg );
 8025 %}
 8026 
 8027 // Arithmetic shift right by one
 8028 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8029   match(Set dst (RShiftI dst shift));
 8030   effect(KILL cr);
 8031 
 8032   size(2);
 8033   format %{ "SAR    $dst,$shift" %}
 8034   opcode(0xD1, 0x7);  /* D1 /7 */
 8035   ins_encode( OpcP, RegOpc( dst ) );
 8036   ins_pipe( ialu_reg );
 8037 %}
 8038 
 8039 // Arithmetic shift right by one
 8040 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
 8041   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8042   effect(KILL cr);
 8043   format %{ "SAR    $dst,$shift" %}
 8044   opcode(0xD1, 0x7);  /* D1 /7 */
 8045   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
 8046   ins_pipe( ialu_mem_imm );
 8047 %}
 8048 
 8049 // Arithmetic Shift Right by 8-bit immediate
 8050 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8051   match(Set dst (RShiftI dst shift));
 8052   effect(KILL cr);
 8053 
 8054   size(3);
 8055   format %{ "SAR    $dst,$shift" %}
 8056   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8057   ins_encode( RegOpcImm( dst, shift ) );
 8058   ins_pipe( ialu_mem_imm );
 8059 %}
 8060 
 8061 // Arithmetic Shift Right by 8-bit immediate
 8062 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
 8063   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
 8064   effect(KILL cr);
 8065 
 8066   format %{ "SAR    $dst,$shift" %}
 8067   opcode(0xC1, 0x7);  /* C1 /7 ib */
 8068   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
 8069   ins_pipe( ialu_mem_imm );
 8070 %}
 8071 
 8072 // Arithmetic Shift Right by variable
 8073 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8074   match(Set dst (RShiftI dst shift));
 8075   effect(KILL cr);
 8076 
 8077   size(2);
 8078   format %{ "SAR    $dst,$shift" %}
 8079   opcode(0xD3, 0x7);  /* D3 /7 */
 8080   ins_encode( OpcP, RegOpc( dst ) );
 8081   ins_pipe( ialu_reg_reg );
 8082 %}
 8083 
 8084 // Logical shift right by one
 8085 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8086   match(Set dst (URShiftI dst shift));
 8087   effect(KILL cr);
 8088 
 8089   size(2);
 8090   format %{ "SHR    $dst,$shift" %}
 8091   opcode(0xD1, 0x5);  /* D1 /5 */
 8092   ins_encode( OpcP, RegOpc( dst ) );
 8093   ins_pipe( ialu_reg );
 8094 %}
 8095 
 8096 // Logical Shift Right by 8-bit immediate
 8097 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8098   match(Set dst (URShiftI dst shift));
 8099   effect(KILL cr);
 8100 
 8101   size(3);
 8102   format %{ "SHR    $dst,$shift" %}
 8103   opcode(0xC1, 0x5);  /* C1 /5 ib */
 8104   ins_encode( RegOpcImm( dst, shift) );
 8105   ins_pipe( ialu_reg );
 8106 %}
 8107 
 8108 
 8109 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 8110 // This idiom is used by the compiler for the i2b bytecode.
 8111 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
 8112   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 8113 
 8114   size(3);
 8115   format %{ "MOVSX  $dst,$src :8" %}
 8116   ins_encode %{
 8117     __ movsbl($dst$$Register, $src$$Register);
 8118   %}
 8119   ins_pipe(ialu_reg_reg);
 8120 %}
 8121 
 8122 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 8123 // This idiom is used by the compiler the i2s bytecode.
 8124 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
 8125   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 8126 
 8127   size(3);
 8128   format %{ "MOVSX  $dst,$src :16" %}
 8129   ins_encode %{
 8130     __ movswl($dst$$Register, $src$$Register);
 8131   %}
 8132   ins_pipe(ialu_reg_reg);
 8133 %}
 8134 
 8135 
 8136 // Logical Shift Right by variable
 8137 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8138   match(Set dst (URShiftI dst shift));
 8139   effect(KILL cr);
 8140 
 8141   size(2);
 8142   format %{ "SHR    $dst,$shift" %}
 8143   opcode(0xD3, 0x5);  /* D3 /5 */
 8144   ins_encode( OpcP, RegOpc( dst ) );
 8145   ins_pipe( ialu_reg_reg );
 8146 %}
 8147 
 8148 
 8149 //----------Logical Instructions-----------------------------------------------
 8150 //----------Integer Logical Instructions---------------------------------------
 8151 // And Instructions
 8152 // And Register with Register
 8153 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8154   match(Set dst (AndI dst src));
 8155   effect(KILL cr);
 8156 
 8157   size(2);
 8158   format %{ "AND    $dst,$src" %}
 8159   opcode(0x23);
 8160   ins_encode( OpcP, RegReg( dst, src) );
 8161   ins_pipe( ialu_reg_reg );
 8162 %}
 8163 
 8164 // And Register with Immediate
 8165 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8166   match(Set dst (AndI dst src));
 8167   effect(KILL cr);
 8168 
 8169   format %{ "AND    $dst,$src" %}
 8170   opcode(0x81,0x04);  /* Opcode 81 /4 */
 8171   // ins_encode( RegImm( dst, src) );
 8172   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8173   ins_pipe( ialu_reg );
 8174 %}
 8175 
 8176 // And Register with Memory
 8177 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8178   match(Set dst (AndI dst (LoadI src)));
 8179   effect(KILL cr);
 8180 
 8181   ins_cost(150);
 8182   format %{ "AND    $dst,$src" %}
 8183   opcode(0x23);
 8184   ins_encode( OpcP, RegMem( dst, src) );
 8185   ins_pipe( ialu_reg_mem );
 8186 %}
 8187 
 8188 // And Memory with Register
 8189 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8190   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8191   effect(KILL cr);
 8192 
 8193   ins_cost(150);
 8194   format %{ "AND    $dst,$src" %}
 8195   opcode(0x21);  /* Opcode 21 /r */
 8196   ins_encode( OpcP, RegMem( src, dst ) );
 8197   ins_pipe( ialu_mem_reg );
 8198 %}
 8199 
 8200 // And Memory with Immediate
 8201 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8202   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
 8203   effect(KILL cr);
 8204 
 8205   ins_cost(125);
 8206   format %{ "AND    $dst,$src" %}
 8207   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
 8208   // ins_encode( MemImm( dst, src) );
 8209   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8210   ins_pipe( ialu_mem_imm );
 8211 %}
 8212 
 8213 // BMI1 instructions
 8214 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
 8215   match(Set dst (AndI (XorI src1 minus_1) src2));
 8216   predicate(UseBMI1Instructions);
 8217   effect(KILL cr);
 8218 
 8219   format %{ "ANDNL  $dst, $src1, $src2" %}
 8220 
 8221   ins_encode %{
 8222     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
 8223   %}
 8224   ins_pipe(ialu_reg);
 8225 %}
 8226 
 8227 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
 8228   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
 8229   predicate(UseBMI1Instructions);
 8230   effect(KILL cr);
 8231 
 8232   ins_cost(125);
 8233   format %{ "ANDNL  $dst, $src1, $src2" %}
 8234 
 8235   ins_encode %{
 8236     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
 8237   %}
 8238   ins_pipe(ialu_reg_mem);
 8239 %}
 8240 
 8241 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
 8242   match(Set dst (AndI (SubI imm_zero src) src));
 8243   predicate(UseBMI1Instructions);
 8244   effect(KILL cr);
 8245 
 8246   format %{ "BLSIL  $dst, $src" %}
 8247 
 8248   ins_encode %{
 8249     __ blsil($dst$$Register, $src$$Register);
 8250   %}
 8251   ins_pipe(ialu_reg);
 8252 %}
 8253 
 8254 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
 8255   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
 8256   predicate(UseBMI1Instructions);
 8257   effect(KILL cr);
 8258 
 8259   ins_cost(125);
 8260   format %{ "BLSIL  $dst, $src" %}
 8261 
 8262   ins_encode %{
 8263     __ blsil($dst$$Register, $src$$Address);
 8264   %}
 8265   ins_pipe(ialu_reg_mem);
 8266 %}
 8267 
 8268 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8269 %{
 8270   match(Set dst (XorI (AddI src minus_1) src));
 8271   predicate(UseBMI1Instructions);
 8272   effect(KILL cr);
 8273 
 8274   format %{ "BLSMSKL $dst, $src" %}
 8275 
 8276   ins_encode %{
 8277     __ blsmskl($dst$$Register, $src$$Register);
 8278   %}
 8279 
 8280   ins_pipe(ialu_reg);
 8281 %}
 8282 
 8283 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8284 %{
 8285   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
 8286   predicate(UseBMI1Instructions);
 8287   effect(KILL cr);
 8288 
 8289   ins_cost(125);
 8290   format %{ "BLSMSKL $dst, $src" %}
 8291 
 8292   ins_encode %{
 8293     __ blsmskl($dst$$Register, $src$$Address);
 8294   %}
 8295 
 8296   ins_pipe(ialu_reg_mem);
 8297 %}
 8298 
 8299 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
 8300 %{
 8301   match(Set dst (AndI (AddI src minus_1) src) );
 8302   predicate(UseBMI1Instructions);
 8303   effect(KILL cr);
 8304 
 8305   format %{ "BLSRL  $dst, $src" %}
 8306 
 8307   ins_encode %{
 8308     __ blsrl($dst$$Register, $src$$Register);
 8309   %}
 8310 
 8311   ins_pipe(ialu_reg);
 8312 %}
 8313 
 8314 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
 8315 %{
 8316   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
 8317   predicate(UseBMI1Instructions);
 8318   effect(KILL cr);
 8319 
 8320   ins_cost(125);
 8321   format %{ "BLSRL  $dst, $src" %}
 8322 
 8323   ins_encode %{
 8324     __ blsrl($dst$$Register, $src$$Address);
 8325   %}
 8326 
 8327   ins_pipe(ialu_reg_mem);
 8328 %}
 8329 
 8330 // Or Instructions
 8331 // Or Register with Register
 8332 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8333   match(Set dst (OrI dst src));
 8334   effect(KILL cr);
 8335 
 8336   size(2);
 8337   format %{ "OR     $dst,$src" %}
 8338   opcode(0x0B);
 8339   ins_encode( OpcP, RegReg( dst, src) );
 8340   ins_pipe( ialu_reg_reg );
 8341 %}
 8342 
 8343 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
 8344   match(Set dst (OrI dst (CastP2X src)));
 8345   effect(KILL cr);
 8346 
 8347   size(2);
 8348   format %{ "OR     $dst,$src" %}
 8349   opcode(0x0B);
 8350   ins_encode( OpcP, RegReg( dst, src) );
 8351   ins_pipe( ialu_reg_reg );
 8352 %}
 8353 
 8354 
 8355 // Or Register with Immediate
 8356 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8357   match(Set dst (OrI dst src));
 8358   effect(KILL cr);
 8359 
 8360   format %{ "OR     $dst,$src" %}
 8361   opcode(0x81,0x01);  /* Opcode 81 /1 id */
 8362   // ins_encode( RegImm( dst, src) );
 8363   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8364   ins_pipe( ialu_reg );
 8365 %}
 8366 
 8367 // Or Register with Memory
 8368 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8369   match(Set dst (OrI dst (LoadI src)));
 8370   effect(KILL cr);
 8371 
 8372   ins_cost(150);
 8373   format %{ "OR     $dst,$src" %}
 8374   opcode(0x0B);
 8375   ins_encode( OpcP, RegMem( dst, src) );
 8376   ins_pipe( ialu_reg_mem );
 8377 %}
 8378 
 8379 // Or Memory with Register
 8380 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8381   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8382   effect(KILL cr);
 8383 
 8384   ins_cost(150);
 8385   format %{ "OR     $dst,$src" %}
 8386   opcode(0x09);  /* Opcode 09 /r */
 8387   ins_encode( OpcP, RegMem( src, dst ) );
 8388   ins_pipe( ialu_mem_reg );
 8389 %}
 8390 
 8391 // Or Memory with Immediate
 8392 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8393   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
 8394   effect(KILL cr);
 8395 
 8396   ins_cost(125);
 8397   format %{ "OR     $dst,$src" %}
 8398   opcode(0x81,0x1);  /* Opcode 81 /1 id */
 8399   // ins_encode( MemImm( dst, src) );
 8400   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8401   ins_pipe( ialu_mem_imm );
 8402 %}
 8403 
 8404 // ROL/ROR
 8405 // ROL expand
 8406 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8407   effect(USE_DEF dst, USE shift, KILL cr);
 8408 
 8409   format %{ "ROL    $dst, $shift" %}
 8410   opcode(0xD1, 0x0); /* Opcode D1 /0 */
 8411   ins_encode( OpcP, RegOpc( dst ));
 8412   ins_pipe( ialu_reg );
 8413 %}
 8414 
 8415 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8416   effect(USE_DEF dst, USE shift, KILL cr);
 8417 
 8418   format %{ "ROL    $dst, $shift" %}
 8419   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
 8420   ins_encode( RegOpcImm(dst, shift) );
 8421   ins_pipe(ialu_reg);
 8422 %}
 8423 
 8424 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
 8425   effect(USE_DEF dst, USE shift, KILL cr);
 8426 
 8427   format %{ "ROL    $dst, $shift" %}
 8428   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
 8429   ins_encode(OpcP, RegOpc(dst));
 8430   ins_pipe( ialu_reg_reg );
 8431 %}
 8432 // end of ROL expand
 8433 
 8434 // ROL 32bit by one once
 8435 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
 8436   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8437 
 8438   expand %{
 8439     rolI_eReg_imm1(dst, lshift, cr);
 8440   %}
 8441 %}
 8442 
 8443 // ROL 32bit var by imm8 once
 8444 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
 8445   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8446   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 8447 
 8448   expand %{
 8449     rolI_eReg_imm8(dst, lshift, cr);
 8450   %}
 8451 %}
 8452 
 8453 // ROL 32bit var by var once
 8454 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8455   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
 8456 
 8457   expand %{
 8458     rolI_eReg_CL(dst, shift, cr);
 8459   %}
 8460 %}
 8461 
 8462 // ROL 32bit var by var once
 8463 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8464   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
 8465 
 8466   expand %{
 8467     rolI_eReg_CL(dst, shift, cr);
 8468   %}
 8469 %}
 8470 
 8471 // ROR expand
 8472 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
 8473   effect(USE_DEF dst, USE shift, KILL cr);
 8474 
 8475   format %{ "ROR    $dst, $shift" %}
 8476   opcode(0xD1,0x1);  /* Opcode D1 /1 */
 8477   ins_encode( OpcP, RegOpc( dst ) );
 8478   ins_pipe( ialu_reg );
 8479 %}
 8480 
 8481 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
 8482   effect (USE_DEF dst, USE shift, KILL cr);
 8483 
 8484   format %{ "ROR    $dst, $shift" %}
 8485   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
 8486   ins_encode( RegOpcImm(dst, shift) );
 8487   ins_pipe( ialu_reg );
 8488 %}
 8489 
 8490 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
 8491   effect(USE_DEF dst, USE shift, KILL cr);
 8492 
 8493   format %{ "ROR    $dst, $shift" %}
 8494   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
 8495   ins_encode(OpcP, RegOpc(dst));
 8496   ins_pipe( ialu_reg_reg );
 8497 %}
 8498 // end of ROR expand
 8499 
 8500 // ROR right once
 8501 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
 8502   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8503 
 8504   expand %{
 8505     rorI_eReg_imm1(dst, rshift, cr);
 8506   %}
 8507 %}
 8508 
 8509 // ROR 32bit by immI8 once
 8510 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
 8511   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
 8512   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 8513 
 8514   expand %{
 8515     rorI_eReg_imm8(dst, rshift, cr);
 8516   %}
 8517 %}
 8518 
 8519 // ROR 32bit var by var once
 8520 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
 8521   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
 8522 
 8523   expand %{
 8524     rorI_eReg_CL(dst, shift, cr);
 8525   %}
 8526 %}
 8527 
 8528 // ROR 32bit var by var once
 8529 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
 8530   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
 8531 
 8532   expand %{
 8533     rorI_eReg_CL(dst, shift, cr);
 8534   %}
 8535 %}
 8536 
 8537 // Xor Instructions
 8538 // Xor Register with Register
 8539 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
 8540   match(Set dst (XorI dst src));
 8541   effect(KILL cr);
 8542 
 8543   size(2);
 8544   format %{ "XOR    $dst,$src" %}
 8545   opcode(0x33);
 8546   ins_encode( OpcP, RegReg( dst, src) );
 8547   ins_pipe( ialu_reg_reg );
 8548 %}
 8549 
 8550 // Xor Register with Immediate -1
 8551 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
 8552   match(Set dst (XorI dst imm));
 8553 
 8554   size(2);
 8555   format %{ "NOT    $dst" %}
 8556   ins_encode %{
 8557      __ notl($dst$$Register);
 8558   %}
 8559   ins_pipe( ialu_reg );
 8560 %}
 8561 
 8562 // Xor Register with Immediate
 8563 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
 8564   match(Set dst (XorI dst src));
 8565   effect(KILL cr);
 8566 
 8567   format %{ "XOR    $dst,$src" %}
 8568   opcode(0x81,0x06);  /* Opcode 81 /6 id */
 8569   // ins_encode( RegImm( dst, src) );
 8570   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
 8571   ins_pipe( ialu_reg );
 8572 %}
 8573 
 8574 // Xor Register with Memory
 8575 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
 8576   match(Set dst (XorI dst (LoadI src)));
 8577   effect(KILL cr);
 8578 
 8579   ins_cost(150);
 8580   format %{ "XOR    $dst,$src" %}
 8581   opcode(0x33);
 8582   ins_encode( OpcP, RegMem(dst, src) );
 8583   ins_pipe( ialu_reg_mem );
 8584 %}
 8585 
 8586 // Xor Memory with Register
 8587 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
 8588   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8589   effect(KILL cr);
 8590 
 8591   ins_cost(150);
 8592   format %{ "XOR    $dst,$src" %}
 8593   opcode(0x31);  /* Opcode 31 /r */
 8594   ins_encode( OpcP, RegMem( src, dst ) );
 8595   ins_pipe( ialu_mem_reg );
 8596 %}
 8597 
 8598 // Xor Memory with Immediate
 8599 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
 8600   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
 8601   effect(KILL cr);
 8602 
 8603   ins_cost(125);
 8604   format %{ "XOR    $dst,$src" %}
 8605   opcode(0x81,0x6);  /* Opcode 81 /6 id */
 8606   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
 8607   ins_pipe( ialu_mem_imm );
 8608 %}
 8609 
 8610 //----------Convert Int to Boolean---------------------------------------------
 8611 
 8612 instruct movI_nocopy(rRegI dst, rRegI src) %{
 8613   effect( DEF dst, USE src );
 8614   format %{ "MOV    $dst,$src" %}
 8615   ins_encode( enc_Copy( dst, src) );
 8616   ins_pipe( ialu_reg_reg );
 8617 %}
 8618 
 8619 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8620   effect( USE_DEF dst, USE src, KILL cr );
 8621 
 8622   size(4);
 8623   format %{ "NEG    $dst\n\t"
 8624             "ADC    $dst,$src" %}
 8625   ins_encode( neg_reg(dst),
 8626               OpcRegReg(0x13,dst,src) );
 8627   ins_pipe( ialu_reg_reg_long );
 8628 %}
 8629 
 8630 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
 8631   match(Set dst (Conv2B src));
 8632 
 8633   expand %{
 8634     movI_nocopy(dst,src);
 8635     ci2b(dst,src,cr);
 8636   %}
 8637 %}
 8638 
 8639 instruct movP_nocopy(rRegI dst, eRegP src) %{
 8640   effect( DEF dst, USE src );
 8641   format %{ "MOV    $dst,$src" %}
 8642   ins_encode( enc_Copy( dst, src) );
 8643   ins_pipe( ialu_reg_reg );
 8644 %}
 8645 
 8646 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8647   effect( USE_DEF dst, USE src, KILL cr );
 8648   format %{ "NEG    $dst\n\t"
 8649             "ADC    $dst,$src" %}
 8650   ins_encode( neg_reg(dst),
 8651               OpcRegReg(0x13,dst,src) );
 8652   ins_pipe( ialu_reg_reg_long );
 8653 %}
 8654 
 8655 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
 8656   match(Set dst (Conv2B src));
 8657 
 8658   expand %{
 8659     movP_nocopy(dst,src);
 8660     cp2b(dst,src,cr);
 8661   %}
 8662 %}
 8663 
 8664 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
 8665   match(Set dst (CmpLTMask p q));
 8666   effect(KILL cr);
 8667   ins_cost(400);
 8668 
 8669   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
 8670   format %{ "XOR    $dst,$dst\n\t"
 8671             "CMP    $p,$q\n\t"
 8672             "SETlt  $dst\n\t"
 8673             "NEG    $dst" %}
 8674   ins_encode %{
 8675     Register Rp = $p$$Register;
 8676     Register Rq = $q$$Register;
 8677     Register Rd = $dst$$Register;
 8678     Label done;
 8679     __ xorl(Rd, Rd);
 8680     __ cmpl(Rp, Rq);
 8681     __ setb(Assembler::less, Rd);
 8682     __ negl(Rd);
 8683   %}
 8684 
 8685   ins_pipe(pipe_slow);
 8686 %}
 8687 
 8688 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
 8689   match(Set dst (CmpLTMask dst zero));
 8690   effect(DEF dst, KILL cr);
 8691   ins_cost(100);
 8692 
 8693   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
 8694   ins_encode %{
 8695   __ sarl($dst$$Register, 31);
 8696   %}
 8697   ins_pipe(ialu_reg);
 8698 %}
 8699 
 8700 /* better to save a register than avoid a branch */
 8701 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8702   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
 8703   effect(KILL cr);
 8704   ins_cost(400);
 8705   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
 8706             "JGE    done\n\t"
 8707             "ADD    $p,$y\n"
 8708             "done:  " %}
 8709   ins_encode %{
 8710     Register Rp = $p$$Register;
 8711     Register Rq = $q$$Register;
 8712     Register Ry = $y$$Register;
 8713     Label done;
 8714     __ subl(Rp, Rq);
 8715     __ jccb(Assembler::greaterEqual, done);
 8716     __ addl(Rp, Ry);
 8717     __ bind(done);
 8718   %}
 8719 
 8720   ins_pipe(pipe_cmplt);
 8721 %}
 8722 
 8723 /* better to save a register than avoid a branch */
 8724 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
 8725   match(Set y (AndI (CmpLTMask p q) y));
 8726   effect(KILL cr);
 8727 
 8728   ins_cost(300);
 8729 
 8730   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
 8731             "JLT      done\n\t"
 8732             "XORL     $y, $y\n"
 8733             "done:  " %}
 8734   ins_encode %{
 8735     Register Rp = $p$$Register;
 8736     Register Rq = $q$$Register;
 8737     Register Ry = $y$$Register;
 8738     Label done;
 8739     __ cmpl(Rp, Rq);
 8740     __ jccb(Assembler::less, done);
 8741     __ xorl(Ry, Ry);
 8742     __ bind(done);
 8743   %}
 8744 
 8745   ins_pipe(pipe_cmplt);
 8746 %}
 8747 
 8748 /* If I enable this, I encourage spilling in the inner loop of compress.
 8749 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
 8750   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
 8751 */
 8752 //----------Overflow Math Instructions-----------------------------------------
 8753 
 8754 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8755 %{
 8756   match(Set cr (OverflowAddI op1 op2));
 8757   effect(DEF cr, USE_KILL op1, USE op2);
 8758 
 8759   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8760 
 8761   ins_encode %{
 8762     __ addl($op1$$Register, $op2$$Register);
 8763   %}
 8764   ins_pipe(ialu_reg_reg);
 8765 %}
 8766 
 8767 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
 8768 %{
 8769   match(Set cr (OverflowAddI op1 op2));
 8770   effect(DEF cr, USE_KILL op1, USE op2);
 8771 
 8772   format %{ "ADD    $op1, $op2\t# overflow check int" %}
 8773 
 8774   ins_encode %{
 8775     __ addl($op1$$Register, $op2$$constant);
 8776   %}
 8777   ins_pipe(ialu_reg_reg);
 8778 %}
 8779 
 8780 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
 8781 %{
 8782   match(Set cr (OverflowSubI op1 op2));
 8783 
 8784   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8785   ins_encode %{
 8786     __ cmpl($op1$$Register, $op2$$Register);
 8787   %}
 8788   ins_pipe(ialu_reg_reg);
 8789 %}
 8790 
 8791 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
 8792 %{
 8793   match(Set cr (OverflowSubI op1 op2));
 8794 
 8795   format %{ "CMP    $op1, $op2\t# overflow check int" %}
 8796   ins_encode %{
 8797     __ cmpl($op1$$Register, $op2$$constant);
 8798   %}
 8799   ins_pipe(ialu_reg_reg);
 8800 %}
 8801 
 8802 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
 8803 %{
 8804   match(Set cr (OverflowSubI zero op2));
 8805   effect(DEF cr, USE_KILL op2);
 8806 
 8807   format %{ "NEG    $op2\t# overflow check int" %}
 8808   ins_encode %{
 8809     __ negl($op2$$Register);
 8810   %}
 8811   ins_pipe(ialu_reg_reg);
 8812 %}
 8813 
 8814 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
 8815 %{
 8816   match(Set cr (OverflowMulI op1 op2));
 8817   effect(DEF cr, USE_KILL op1, USE op2);
 8818 
 8819   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
 8820   ins_encode %{
 8821     __ imull($op1$$Register, $op2$$Register);
 8822   %}
 8823   ins_pipe(ialu_reg_reg_alu0);
 8824 %}
 8825 
 8826 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
 8827 %{
 8828   match(Set cr (OverflowMulI op1 op2));
 8829   effect(DEF cr, TEMP tmp, USE op1, USE op2);
 8830 
 8831   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
 8832   ins_encode %{
 8833     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
 8834   %}
 8835   ins_pipe(ialu_reg_reg_alu0);
 8836 %}
 8837 
 8838 // Integer Absolute Instructions
 8839 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
 8840 %{
 8841   match(Set dst (AbsI src));
 8842   effect(TEMP dst, TEMP tmp, KILL cr);
 8843   format %{ "movl $tmp, $src\n\t"
 8844             "sarl $tmp, 31\n\t"
 8845             "movl $dst, $src\n\t"
 8846             "xorl $dst, $tmp\n\t"
 8847             "subl $dst, $tmp\n"
 8848           %}
 8849   ins_encode %{
 8850     __ movl($tmp$$Register, $src$$Register);
 8851     __ sarl($tmp$$Register, 31);
 8852     __ movl($dst$$Register, $src$$Register);
 8853     __ xorl($dst$$Register, $tmp$$Register);
 8854     __ subl($dst$$Register, $tmp$$Register);
 8855   %}
 8856 
 8857   ins_pipe(ialu_reg_reg);
 8858 %}
 8859 
 8860 //----------Long Instructions------------------------------------------------
 8861 // Add Long Register with Register
 8862 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8863   match(Set dst (AddL dst src));
 8864   effect(KILL cr);
 8865   ins_cost(200);
 8866   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8867             "ADC    $dst.hi,$src.hi" %}
 8868   opcode(0x03, 0x13);
 8869   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8870   ins_pipe( ialu_reg_reg_long );
 8871 %}
 8872 
 8873 // Add Long Register with Immediate
 8874 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8875   match(Set dst (AddL dst src));
 8876   effect(KILL cr);
 8877   format %{ "ADD    $dst.lo,$src.lo\n\t"
 8878             "ADC    $dst.hi,$src.hi" %}
 8879   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
 8880   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8881   ins_pipe( ialu_reg_long );
 8882 %}
 8883 
 8884 // Add Long Register with Memory
 8885 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8886   match(Set dst (AddL dst (LoadL mem)));
 8887   effect(KILL cr);
 8888   ins_cost(125);
 8889   format %{ "ADD    $dst.lo,$mem\n\t"
 8890             "ADC    $dst.hi,$mem+4" %}
 8891   opcode(0x03, 0x13);
 8892   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8893   ins_pipe( ialu_reg_long_mem );
 8894 %}
 8895 
 8896 // Subtract Long Register with Register.
 8897 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8898   match(Set dst (SubL dst src));
 8899   effect(KILL cr);
 8900   ins_cost(200);
 8901   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8902             "SBB    $dst.hi,$src.hi" %}
 8903   opcode(0x2B, 0x1B);
 8904   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
 8905   ins_pipe( ialu_reg_reg_long );
 8906 %}
 8907 
 8908 // Subtract Long Register with Immediate
 8909 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8910   match(Set dst (SubL dst src));
 8911   effect(KILL cr);
 8912   format %{ "SUB    $dst.lo,$src.lo\n\t"
 8913             "SBB    $dst.hi,$src.hi" %}
 8914   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
 8915   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8916   ins_pipe( ialu_reg_long );
 8917 %}
 8918 
 8919 // Subtract Long Register with Memory
 8920 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8921   match(Set dst (SubL dst (LoadL mem)));
 8922   effect(KILL cr);
 8923   ins_cost(125);
 8924   format %{ "SUB    $dst.lo,$mem\n\t"
 8925             "SBB    $dst.hi,$mem+4" %}
 8926   opcode(0x2B, 0x1B);
 8927   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8928   ins_pipe( ialu_reg_long_mem );
 8929 %}
 8930 
 8931 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
 8932   match(Set dst (SubL zero dst));
 8933   effect(KILL cr);
 8934   ins_cost(300);
 8935   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
 8936   ins_encode( neg_long(dst) );
 8937   ins_pipe( ialu_reg_reg_long );
 8938 %}
 8939 
 8940 // And Long Register with Register
 8941 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 8942   match(Set dst (AndL dst src));
 8943   effect(KILL cr);
 8944   format %{ "AND    $dst.lo,$src.lo\n\t"
 8945             "AND    $dst.hi,$src.hi" %}
 8946   opcode(0x23,0x23);
 8947   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 8948   ins_pipe( ialu_reg_reg_long );
 8949 %}
 8950 
 8951 // And Long Register with Immediate
 8952 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 8953   match(Set dst (AndL dst src));
 8954   effect(KILL cr);
 8955   format %{ "AND    $dst.lo,$src.lo\n\t"
 8956             "AND    $dst.hi,$src.hi" %}
 8957   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
 8958   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 8959   ins_pipe( ialu_reg_long );
 8960 %}
 8961 
 8962 // And Long Register with Memory
 8963 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 8964   match(Set dst (AndL dst (LoadL mem)));
 8965   effect(KILL cr);
 8966   ins_cost(125);
 8967   format %{ "AND    $dst.lo,$mem\n\t"
 8968             "AND    $dst.hi,$mem+4" %}
 8969   opcode(0x23, 0x23);
 8970   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 8971   ins_pipe( ialu_reg_long_mem );
 8972 %}
 8973 
 8974 // BMI1 instructions
 8975 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
 8976   match(Set dst (AndL (XorL src1 minus_1) src2));
 8977   predicate(UseBMI1Instructions);
 8978   effect(KILL cr, TEMP dst);
 8979 
 8980   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
 8981             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
 8982          %}
 8983 
 8984   ins_encode %{
 8985     Register Rdst = $dst$$Register;
 8986     Register Rsrc1 = $src1$$Register;
 8987     Register Rsrc2 = $src2$$Register;
 8988     __ andnl(Rdst, Rsrc1, Rsrc2);
 8989     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
 8990   %}
 8991   ins_pipe(ialu_reg_reg_long);
 8992 %}
 8993 
 8994 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
 8995   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
 8996   predicate(UseBMI1Instructions);
 8997   effect(KILL cr, TEMP dst);
 8998 
 8999   ins_cost(125);
 9000   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
 9001             "ANDNL  $dst.hi, $src1.hi, $src2+4"
 9002          %}
 9003 
 9004   ins_encode %{
 9005     Register Rdst = $dst$$Register;
 9006     Register Rsrc1 = $src1$$Register;
 9007     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
 9008 
 9009     __ andnl(Rdst, Rsrc1, $src2$$Address);
 9010     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
 9011   %}
 9012   ins_pipe(ialu_reg_mem);
 9013 %}
 9014 
 9015 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
 9016   match(Set dst (AndL (SubL imm_zero src) src));
 9017   predicate(UseBMI1Instructions);
 9018   effect(KILL cr, TEMP dst);
 9019 
 9020   format %{ "MOVL   $dst.hi, 0\n\t"
 9021             "BLSIL  $dst.lo, $src.lo\n\t"
 9022             "JNZ    done\n\t"
 9023             "BLSIL  $dst.hi, $src.hi\n"
 9024             "done:"
 9025          %}
 9026 
 9027   ins_encode %{
 9028     Label done;
 9029     Register Rdst = $dst$$Register;
 9030     Register Rsrc = $src$$Register;
 9031     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9032     __ blsil(Rdst, Rsrc);
 9033     __ jccb(Assembler::notZero, done);
 9034     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9035     __ bind(done);
 9036   %}
 9037   ins_pipe(ialu_reg);
 9038 %}
 9039 
 9040 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
 9041   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
 9042   predicate(UseBMI1Instructions);
 9043   effect(KILL cr, TEMP dst);
 9044 
 9045   ins_cost(125);
 9046   format %{ "MOVL   $dst.hi, 0\n\t"
 9047             "BLSIL  $dst.lo, $src\n\t"
 9048             "JNZ    done\n\t"
 9049             "BLSIL  $dst.hi, $src+4\n"
 9050             "done:"
 9051          %}
 9052 
 9053   ins_encode %{
 9054     Label done;
 9055     Register Rdst = $dst$$Register;
 9056     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9057 
 9058     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9059     __ blsil(Rdst, $src$$Address);
 9060     __ jccb(Assembler::notZero, done);
 9061     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
 9062     __ bind(done);
 9063   %}
 9064   ins_pipe(ialu_reg_mem);
 9065 %}
 9066 
 9067 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9068 %{
 9069   match(Set dst (XorL (AddL src minus_1) src));
 9070   predicate(UseBMI1Instructions);
 9071   effect(KILL cr, TEMP dst);
 9072 
 9073   format %{ "MOVL    $dst.hi, 0\n\t"
 9074             "BLSMSKL $dst.lo, $src.lo\n\t"
 9075             "JNC     done\n\t"
 9076             "BLSMSKL $dst.hi, $src.hi\n"
 9077             "done:"
 9078          %}
 9079 
 9080   ins_encode %{
 9081     Label done;
 9082     Register Rdst = $dst$$Register;
 9083     Register Rsrc = $src$$Register;
 9084     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9085     __ blsmskl(Rdst, Rsrc);
 9086     __ jccb(Assembler::carryClear, done);
 9087     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9088     __ bind(done);
 9089   %}
 9090 
 9091   ins_pipe(ialu_reg);
 9092 %}
 9093 
 9094 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9095 %{
 9096   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
 9097   predicate(UseBMI1Instructions);
 9098   effect(KILL cr, TEMP dst);
 9099 
 9100   ins_cost(125);
 9101   format %{ "MOVL    $dst.hi, 0\n\t"
 9102             "BLSMSKL $dst.lo, $src\n\t"
 9103             "JNC     done\n\t"
 9104             "BLSMSKL $dst.hi, $src+4\n"
 9105             "done:"
 9106          %}
 9107 
 9108   ins_encode %{
 9109     Label done;
 9110     Register Rdst = $dst$$Register;
 9111     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9112 
 9113     __ movl(HIGH_FROM_LOW(Rdst), 0);
 9114     __ blsmskl(Rdst, $src$$Address);
 9115     __ jccb(Assembler::carryClear, done);
 9116     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
 9117     __ bind(done);
 9118   %}
 9119 
 9120   ins_pipe(ialu_reg_mem);
 9121 %}
 9122 
 9123 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
 9124 %{
 9125   match(Set dst (AndL (AddL src minus_1) src) );
 9126   predicate(UseBMI1Instructions);
 9127   effect(KILL cr, TEMP dst);
 9128 
 9129   format %{ "MOVL   $dst.hi, $src.hi\n\t"
 9130             "BLSRL  $dst.lo, $src.lo\n\t"
 9131             "JNC    done\n\t"
 9132             "BLSRL  $dst.hi, $src.hi\n"
 9133             "done:"
 9134   %}
 9135 
 9136   ins_encode %{
 9137     Label done;
 9138     Register Rdst = $dst$$Register;
 9139     Register Rsrc = $src$$Register;
 9140     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9141     __ blsrl(Rdst, Rsrc);
 9142     __ jccb(Assembler::carryClear, done);
 9143     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
 9144     __ bind(done);
 9145   %}
 9146 
 9147   ins_pipe(ialu_reg);
 9148 %}
 9149 
 9150 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
 9151 %{
 9152   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
 9153   predicate(UseBMI1Instructions);
 9154   effect(KILL cr, TEMP dst);
 9155 
 9156   ins_cost(125);
 9157   format %{ "MOVL   $dst.hi, $src+4\n\t"
 9158             "BLSRL  $dst.lo, $src\n\t"
 9159             "JNC    done\n\t"
 9160             "BLSRL  $dst.hi, $src+4\n"
 9161             "done:"
 9162   %}
 9163 
 9164   ins_encode %{
 9165     Label done;
 9166     Register Rdst = $dst$$Register;
 9167     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
 9168     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
 9169     __ blsrl(Rdst, $src$$Address);
 9170     __ jccb(Assembler::carryClear, done);
 9171     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
 9172     __ bind(done);
 9173   %}
 9174 
 9175   ins_pipe(ialu_reg_mem);
 9176 %}
 9177 
 9178 // Or Long Register with Register
 9179 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9180   match(Set dst (OrL dst src));
 9181   effect(KILL cr);
 9182   format %{ "OR     $dst.lo,$src.lo\n\t"
 9183             "OR     $dst.hi,$src.hi" %}
 9184   opcode(0x0B,0x0B);
 9185   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9186   ins_pipe( ialu_reg_reg_long );
 9187 %}
 9188 
 9189 // Or Long Register with Immediate
 9190 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9191   match(Set dst (OrL dst src));
 9192   effect(KILL cr);
 9193   format %{ "OR     $dst.lo,$src.lo\n\t"
 9194             "OR     $dst.hi,$src.hi" %}
 9195   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
 9196   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9197   ins_pipe( ialu_reg_long );
 9198 %}
 9199 
 9200 // Or Long Register with Memory
 9201 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9202   match(Set dst (OrL dst (LoadL mem)));
 9203   effect(KILL cr);
 9204   ins_cost(125);
 9205   format %{ "OR     $dst.lo,$mem\n\t"
 9206             "OR     $dst.hi,$mem+4" %}
 9207   opcode(0x0B,0x0B);
 9208   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9209   ins_pipe( ialu_reg_long_mem );
 9210 %}
 9211 
 9212 // Xor Long Register with Register
 9213 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
 9214   match(Set dst (XorL dst src));
 9215   effect(KILL cr);
 9216   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9217             "XOR    $dst.hi,$src.hi" %}
 9218   opcode(0x33,0x33);
 9219   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
 9220   ins_pipe( ialu_reg_reg_long );
 9221 %}
 9222 
 9223 // Xor Long Register with Immediate -1
 9224 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
 9225   match(Set dst (XorL dst imm));
 9226   format %{ "NOT    $dst.lo\n\t"
 9227             "NOT    $dst.hi" %}
 9228   ins_encode %{
 9229      __ notl($dst$$Register);
 9230      __ notl(HIGH_FROM_LOW($dst$$Register));
 9231   %}
 9232   ins_pipe( ialu_reg_long );
 9233 %}
 9234 
 9235 // Xor Long Register with Immediate
 9236 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
 9237   match(Set dst (XorL dst src));
 9238   effect(KILL cr);
 9239   format %{ "XOR    $dst.lo,$src.lo\n\t"
 9240             "XOR    $dst.hi,$src.hi" %}
 9241   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
 9242   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
 9243   ins_pipe( ialu_reg_long );
 9244 %}
 9245 
 9246 // Xor Long Register with Memory
 9247 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
 9248   match(Set dst (XorL dst (LoadL mem)));
 9249   effect(KILL cr);
 9250   ins_cost(125);
 9251   format %{ "XOR    $dst.lo,$mem\n\t"
 9252             "XOR    $dst.hi,$mem+4" %}
 9253   opcode(0x33,0x33);
 9254   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
 9255   ins_pipe( ialu_reg_long_mem );
 9256 %}
 9257 
 9258 // Shift Left Long by 1
 9259 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
 9260   predicate(UseNewLongLShift);
 9261   match(Set dst (LShiftL dst cnt));
 9262   effect(KILL cr);
 9263   ins_cost(100);
 9264   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9265             "ADC    $dst.hi,$dst.hi" %}
 9266   ins_encode %{
 9267     __ addl($dst$$Register,$dst$$Register);
 9268     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9269   %}
 9270   ins_pipe( ialu_reg_long );
 9271 %}
 9272 
 9273 // Shift Left Long by 2
 9274 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
 9275   predicate(UseNewLongLShift);
 9276   match(Set dst (LShiftL dst cnt));
 9277   effect(KILL cr);
 9278   ins_cost(100);
 9279   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9280             "ADC    $dst.hi,$dst.hi\n\t"
 9281             "ADD    $dst.lo,$dst.lo\n\t"
 9282             "ADC    $dst.hi,$dst.hi" %}
 9283   ins_encode %{
 9284     __ addl($dst$$Register,$dst$$Register);
 9285     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9286     __ addl($dst$$Register,$dst$$Register);
 9287     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9288   %}
 9289   ins_pipe( ialu_reg_long );
 9290 %}
 9291 
 9292 // Shift Left Long by 3
 9293 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
 9294   predicate(UseNewLongLShift);
 9295   match(Set dst (LShiftL dst cnt));
 9296   effect(KILL cr);
 9297   ins_cost(100);
 9298   format %{ "ADD    $dst.lo,$dst.lo\n\t"
 9299             "ADC    $dst.hi,$dst.hi\n\t"
 9300             "ADD    $dst.lo,$dst.lo\n\t"
 9301             "ADC    $dst.hi,$dst.hi\n\t"
 9302             "ADD    $dst.lo,$dst.lo\n\t"
 9303             "ADC    $dst.hi,$dst.hi" %}
 9304   ins_encode %{
 9305     __ addl($dst$$Register,$dst$$Register);
 9306     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9307     __ addl($dst$$Register,$dst$$Register);
 9308     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9309     __ addl($dst$$Register,$dst$$Register);
 9310     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
 9311   %}
 9312   ins_pipe( ialu_reg_long );
 9313 %}
 9314 
 9315 // Shift Left Long by 1-31
 9316 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9317   match(Set dst (LShiftL dst cnt));
 9318   effect(KILL cr);
 9319   ins_cost(200);
 9320   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
 9321             "SHL    $dst.lo,$cnt" %}
 9322   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
 9323   ins_encode( move_long_small_shift(dst,cnt) );
 9324   ins_pipe( ialu_reg_long );
 9325 %}
 9326 
 9327 // Shift Left Long by 32-63
 9328 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9329   match(Set dst (LShiftL dst cnt));
 9330   effect(KILL cr);
 9331   ins_cost(300);
 9332   format %{ "MOV    $dst.hi,$dst.lo\n"
 9333           "\tSHL    $dst.hi,$cnt-32\n"
 9334           "\tXOR    $dst.lo,$dst.lo" %}
 9335   opcode(0xC1, 0x4);  /* C1 /4 ib */
 9336   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9337   ins_pipe( ialu_reg_long );
 9338 %}
 9339 
 9340 // Shift Left Long by variable
 9341 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9342   match(Set dst (LShiftL dst shift));
 9343   effect(KILL cr);
 9344   ins_cost(500+200);
 9345   size(17);
 9346   format %{ "TEST   $shift,32\n\t"
 9347             "JEQ,s  small\n\t"
 9348             "MOV    $dst.hi,$dst.lo\n\t"
 9349             "XOR    $dst.lo,$dst.lo\n"
 9350     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
 9351             "SHL    $dst.lo,$shift" %}
 9352   ins_encode( shift_left_long( dst, shift ) );
 9353   ins_pipe( pipe_slow );
 9354 %}
 9355 
 9356 // Shift Right Long by 1-31
 9357 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9358   match(Set dst (URShiftL dst cnt));
 9359   effect(KILL cr);
 9360   ins_cost(200);
 9361   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9362             "SHR    $dst.hi,$cnt" %}
 9363   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
 9364   ins_encode( move_long_small_shift(dst,cnt) );
 9365   ins_pipe( ialu_reg_long );
 9366 %}
 9367 
 9368 // Shift Right Long by 32-63
 9369 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9370   match(Set dst (URShiftL dst cnt));
 9371   effect(KILL cr);
 9372   ins_cost(300);
 9373   format %{ "MOV    $dst.lo,$dst.hi\n"
 9374           "\tSHR    $dst.lo,$cnt-32\n"
 9375           "\tXOR    $dst.hi,$dst.hi" %}
 9376   opcode(0xC1, 0x5);  /* C1 /5 ib */
 9377   ins_encode( move_long_big_shift_clr(dst,cnt) );
 9378   ins_pipe( ialu_reg_long );
 9379 %}
 9380 
 9381 // Shift Right Long by variable
 9382 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9383   match(Set dst (URShiftL dst shift));
 9384   effect(KILL cr);
 9385   ins_cost(600);
 9386   size(17);
 9387   format %{ "TEST   $shift,32\n\t"
 9388             "JEQ,s  small\n\t"
 9389             "MOV    $dst.lo,$dst.hi\n\t"
 9390             "XOR    $dst.hi,$dst.hi\n"
 9391     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9392             "SHR    $dst.hi,$shift" %}
 9393   ins_encode( shift_right_long( dst, shift ) );
 9394   ins_pipe( pipe_slow );
 9395 %}
 9396 
 9397 // Shift Right Long by 1-31
 9398 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
 9399   match(Set dst (RShiftL dst cnt));
 9400   effect(KILL cr);
 9401   ins_cost(200);
 9402   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
 9403             "SAR    $dst.hi,$cnt" %}
 9404   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
 9405   ins_encode( move_long_small_shift(dst,cnt) );
 9406   ins_pipe( ialu_reg_long );
 9407 %}
 9408 
 9409 // Shift Right Long by 32-63
 9410 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
 9411   match(Set dst (RShiftL dst cnt));
 9412   effect(KILL cr);
 9413   ins_cost(300);
 9414   format %{ "MOV    $dst.lo,$dst.hi\n"
 9415           "\tSAR    $dst.lo,$cnt-32\n"
 9416           "\tSAR    $dst.hi,31" %}
 9417   opcode(0xC1, 0x7);  /* C1 /7 ib */
 9418   ins_encode( move_long_big_shift_sign(dst,cnt) );
 9419   ins_pipe( ialu_reg_long );
 9420 %}
 9421 
 9422 // Shift Right arithmetic Long by variable
 9423 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
 9424   match(Set dst (RShiftL dst shift));
 9425   effect(KILL cr);
 9426   ins_cost(600);
 9427   size(18);
 9428   format %{ "TEST   $shift,32\n\t"
 9429             "JEQ,s  small\n\t"
 9430             "MOV    $dst.lo,$dst.hi\n\t"
 9431             "SAR    $dst.hi,31\n"
 9432     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
 9433             "SAR    $dst.hi,$shift" %}
 9434   ins_encode( shift_right_arith_long( dst, shift ) );
 9435   ins_pipe( pipe_slow );
 9436 %}
 9437 
 9438 
 9439 //----------Double Instructions------------------------------------------------
 9440 // Double Math
 9441 
 9442 // Compare & branch
 9443 
 9444 // P6 version of float compare, sets condition codes in EFLAGS
 9445 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9446   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9447   match(Set cr (CmpD src1 src2));
 9448   effect(KILL rax);
 9449   ins_cost(150);
 9450   format %{ "FLD    $src1\n\t"
 9451             "FUCOMIP ST,$src2  // P6 instruction\n\t"
 9452             "JNP    exit\n\t"
 9453             "MOV    ah,1       // saw a NaN, set CF\n\t"
 9454             "SAHF\n"
 9455      "exit:\tNOP               // avoid branch to branch" %}
 9456   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9457   ins_encode( Push_Reg_DPR(src1),
 9458               OpcP, RegOpc(src2),
 9459               cmpF_P6_fixup );
 9460   ins_pipe( pipe_slow );
 9461 %}
 9462 
 9463 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
 9464   predicate(VM_Version::supports_cmov() && UseSSE <=1);
 9465   match(Set cr (CmpD src1 src2));
 9466   ins_cost(150);
 9467   format %{ "FLD    $src1\n\t"
 9468             "FUCOMIP ST,$src2  // P6 instruction" %}
 9469   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
 9470   ins_encode( Push_Reg_DPR(src1),
 9471               OpcP, RegOpc(src2));
 9472   ins_pipe( pipe_slow );
 9473 %}
 9474 
 9475 // Compare & branch
 9476 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
 9477   predicate(UseSSE<=1);
 9478   match(Set cr (CmpD src1 src2));
 9479   effect(KILL rax);
 9480   ins_cost(200);
 9481   format %{ "FLD    $src1\n\t"
 9482             "FCOMp  $src2\n\t"
 9483             "FNSTSW AX\n\t"
 9484             "TEST   AX,0x400\n\t"
 9485             "JZ,s   flags\n\t"
 9486             "MOV    AH,1\t# unordered treat as LT\n"
 9487     "flags:\tSAHF" %}
 9488   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9489   ins_encode( Push_Reg_DPR(src1),
 9490               OpcP, RegOpc(src2),
 9491               fpu_flags);
 9492   ins_pipe( pipe_slow );
 9493 %}
 9494 
 9495 // Compare vs zero into -1,0,1
 9496 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
 9497   predicate(UseSSE<=1);
 9498   match(Set dst (CmpD3 src1 zero));
 9499   effect(KILL cr, KILL rax);
 9500   ins_cost(280);
 9501   format %{ "FTSTD  $dst,$src1" %}
 9502   opcode(0xE4, 0xD9);
 9503   ins_encode( Push_Reg_DPR(src1),
 9504               OpcS, OpcP, PopFPU,
 9505               CmpF_Result(dst));
 9506   ins_pipe( pipe_slow );
 9507 %}
 9508 
 9509 // Compare into -1,0,1
 9510 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
 9511   predicate(UseSSE<=1);
 9512   match(Set dst (CmpD3 src1 src2));
 9513   effect(KILL cr, KILL rax);
 9514   ins_cost(300);
 9515   format %{ "FCMPD  $dst,$src1,$src2" %}
 9516   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
 9517   ins_encode( Push_Reg_DPR(src1),
 9518               OpcP, RegOpc(src2),
 9519               CmpF_Result(dst));
 9520   ins_pipe( pipe_slow );
 9521 %}
 9522 
 9523 // float compare and set condition codes in EFLAGS by XMM regs
 9524 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
 9525   predicate(UseSSE>=2);
 9526   match(Set cr (CmpD src1 src2));
 9527   ins_cost(145);
 9528   format %{ "UCOMISD $src1,$src2\n\t"
 9529             "JNP,s   exit\n\t"
 9530             "PUSHF\t# saw NaN, set CF\n\t"
 9531             "AND     [rsp], #0xffffff2b\n\t"
 9532             "POPF\n"
 9533     "exit:" %}
 9534   ins_encode %{
 9535     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9536     emit_cmpfp_fixup(_masm);
 9537   %}
 9538   ins_pipe( pipe_slow );
 9539 %}
 9540 
 9541 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
 9542   predicate(UseSSE>=2);
 9543   match(Set cr (CmpD src1 src2));
 9544   ins_cost(100);
 9545   format %{ "UCOMISD $src1,$src2" %}
 9546   ins_encode %{
 9547     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9548   %}
 9549   ins_pipe( pipe_slow );
 9550 %}
 9551 
 9552 // float compare and set condition codes in EFLAGS by XMM regs
 9553 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
 9554   predicate(UseSSE>=2);
 9555   match(Set cr (CmpD src1 (LoadD src2)));
 9556   ins_cost(145);
 9557   format %{ "UCOMISD $src1,$src2\n\t"
 9558             "JNP,s   exit\n\t"
 9559             "PUSHF\t# saw NaN, set CF\n\t"
 9560             "AND     [rsp], #0xffffff2b\n\t"
 9561             "POPF\n"
 9562     "exit:" %}
 9563   ins_encode %{
 9564     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9565     emit_cmpfp_fixup(_masm);
 9566   %}
 9567   ins_pipe( pipe_slow );
 9568 %}
 9569 
 9570 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
 9571   predicate(UseSSE>=2);
 9572   match(Set cr (CmpD src1 (LoadD src2)));
 9573   ins_cost(100);
 9574   format %{ "UCOMISD $src1,$src2" %}
 9575   ins_encode %{
 9576     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9577   %}
 9578   ins_pipe( pipe_slow );
 9579 %}
 9580 
 9581 // Compare into -1,0,1 in XMM
 9582 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
 9583   predicate(UseSSE>=2);
 9584   match(Set dst (CmpD3 src1 src2));
 9585   effect(KILL cr);
 9586   ins_cost(255);
 9587   format %{ "UCOMISD $src1, $src2\n\t"
 9588             "MOV     $dst, #-1\n\t"
 9589             "JP,s    done\n\t"
 9590             "JB,s    done\n\t"
 9591             "SETNE   $dst\n\t"
 9592             "MOVZB   $dst, $dst\n"
 9593     "done:" %}
 9594   ins_encode %{
 9595     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
 9596     emit_cmpfp3(_masm, $dst$$Register);
 9597   %}
 9598   ins_pipe( pipe_slow );
 9599 %}
 9600 
 9601 // Compare into -1,0,1 in XMM and memory
 9602 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
 9603   predicate(UseSSE>=2);
 9604   match(Set dst (CmpD3 src1 (LoadD src2)));
 9605   effect(KILL cr);
 9606   ins_cost(275);
 9607   format %{ "UCOMISD $src1, $src2\n\t"
 9608             "MOV     $dst, #-1\n\t"
 9609             "JP,s    done\n\t"
 9610             "JB,s    done\n\t"
 9611             "SETNE   $dst\n\t"
 9612             "MOVZB   $dst, $dst\n"
 9613     "done:" %}
 9614   ins_encode %{
 9615     __ ucomisd($src1$$XMMRegister, $src2$$Address);
 9616     emit_cmpfp3(_masm, $dst$$Register);
 9617   %}
 9618   ins_pipe( pipe_slow );
 9619 %}
 9620 
 9621 
 9622 instruct subDPR_reg(regDPR dst, regDPR src) %{
 9623   predicate (UseSSE <=1);
 9624   match(Set dst (SubD dst src));
 9625 
 9626   format %{ "FLD    $src\n\t"
 9627             "DSUBp  $dst,ST" %}
 9628   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
 9629   ins_cost(150);
 9630   ins_encode( Push_Reg_DPR(src),
 9631               OpcP, RegOpc(dst) );
 9632   ins_pipe( fpu_reg_reg );
 9633 %}
 9634 
 9635 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9636   predicate (UseSSE <=1);
 9637   match(Set dst (RoundDouble (SubD src1 src2)));
 9638   ins_cost(250);
 9639 
 9640   format %{ "FLD    $src2\n\t"
 9641             "DSUB   ST,$src1\n\t"
 9642             "FSTP_D $dst\t# D-round" %}
 9643   opcode(0xD8, 0x5);
 9644   ins_encode( Push_Reg_DPR(src2),
 9645               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9646   ins_pipe( fpu_mem_reg_reg );
 9647 %}
 9648 
 9649 
 9650 instruct subDPR_reg_mem(regDPR dst, memory src) %{
 9651   predicate (UseSSE <=1);
 9652   match(Set dst (SubD dst (LoadD src)));
 9653   ins_cost(150);
 9654 
 9655   format %{ "FLD    $src\n\t"
 9656             "DSUBp  $dst,ST" %}
 9657   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9658   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9659               OpcP, RegOpc(dst) );
 9660   ins_pipe( fpu_reg_mem );
 9661 %}
 9662 
 9663 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
 9664   predicate (UseSSE<=1);
 9665   match(Set dst (AbsD src));
 9666   ins_cost(100);
 9667   format %{ "FABS" %}
 9668   opcode(0xE1, 0xD9);
 9669   ins_encode( OpcS, OpcP );
 9670   ins_pipe( fpu_reg_reg );
 9671 %}
 9672 
 9673 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
 9674   predicate(UseSSE<=1);
 9675   match(Set dst (NegD src));
 9676   ins_cost(100);
 9677   format %{ "FCHS" %}
 9678   opcode(0xE0, 0xD9);
 9679   ins_encode( OpcS, OpcP );
 9680   ins_pipe( fpu_reg_reg );
 9681 %}
 9682 
 9683 instruct addDPR_reg(regDPR dst, regDPR src) %{
 9684   predicate(UseSSE<=1);
 9685   match(Set dst (AddD dst src));
 9686   format %{ "FLD    $src\n\t"
 9687             "DADD   $dst,ST" %}
 9688   size(4);
 9689   ins_cost(150);
 9690   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
 9691   ins_encode( Push_Reg_DPR(src),
 9692               OpcP, RegOpc(dst) );
 9693   ins_pipe( fpu_reg_reg );
 9694 %}
 9695 
 9696 
 9697 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
 9698   predicate(UseSSE<=1);
 9699   match(Set dst (RoundDouble (AddD src1 src2)));
 9700   ins_cost(250);
 9701 
 9702   format %{ "FLD    $src2\n\t"
 9703             "DADD   ST,$src1\n\t"
 9704             "FSTP_D $dst\t# D-round" %}
 9705   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
 9706   ins_encode( Push_Reg_DPR(src2),
 9707               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
 9708   ins_pipe( fpu_mem_reg_reg );
 9709 %}
 9710 
 9711 
 9712 instruct addDPR_reg_mem(regDPR dst, memory src) %{
 9713   predicate(UseSSE<=1);
 9714   match(Set dst (AddD dst (LoadD src)));
 9715   ins_cost(150);
 9716 
 9717   format %{ "FLD    $src\n\t"
 9718             "DADDp  $dst,ST" %}
 9719   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
 9720   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9721               OpcP, RegOpc(dst) );
 9722   ins_pipe( fpu_reg_mem );
 9723 %}
 9724 
 9725 // add-to-memory
 9726 instruct addDPR_mem_reg(memory dst, regDPR src) %{
 9727   predicate(UseSSE<=1);
 9728   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
 9729   ins_cost(150);
 9730 
 9731   format %{ "FLD_D  $dst\n\t"
 9732             "DADD   ST,$src\n\t"
 9733             "FST_D  $dst" %}
 9734   opcode(0xDD, 0x0);
 9735   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
 9736               Opcode(0xD8), RegOpc(src),
 9737               set_instruction_start,
 9738               Opcode(0xDD), RMopc_Mem(0x03,dst) );
 9739   ins_pipe( fpu_reg_mem );
 9740 %}
 9741 
 9742 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
 9743   predicate(UseSSE<=1);
 9744   match(Set dst (AddD dst con));
 9745   ins_cost(125);
 9746   format %{ "FLD1\n\t"
 9747             "DADDp  $dst,ST" %}
 9748   ins_encode %{
 9749     __ fld1();
 9750     __ faddp($dst$$reg);
 9751   %}
 9752   ins_pipe(fpu_reg);
 9753 %}
 9754 
 9755 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
 9756   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9757   match(Set dst (AddD dst con));
 9758   ins_cost(200);
 9759   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9760             "DADDp  $dst,ST" %}
 9761   ins_encode %{
 9762     __ fld_d($constantaddress($con));
 9763     __ faddp($dst$$reg);
 9764   %}
 9765   ins_pipe(fpu_reg_mem);
 9766 %}
 9767 
 9768 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
 9769   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
 9770   match(Set dst (RoundDouble (AddD src con)));
 9771   ins_cost(200);
 9772   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9773             "DADD   ST,$src\n\t"
 9774             "FSTP_D $dst\t# D-round" %}
 9775   ins_encode %{
 9776     __ fld_d($constantaddress($con));
 9777     __ fadd($src$$reg);
 9778     __ fstp_d(Address(rsp, $dst$$disp));
 9779   %}
 9780   ins_pipe(fpu_mem_reg_con);
 9781 %}
 9782 
 9783 instruct mulDPR_reg(regDPR dst, regDPR src) %{
 9784   predicate(UseSSE<=1);
 9785   match(Set dst (MulD dst src));
 9786   format %{ "FLD    $src\n\t"
 9787             "DMULp  $dst,ST" %}
 9788   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9789   ins_cost(150);
 9790   ins_encode( Push_Reg_DPR(src),
 9791               OpcP, RegOpc(dst) );
 9792   ins_pipe( fpu_reg_reg );
 9793 %}
 9794 
 9795 // Strict FP instruction biases argument before multiply then
 9796 // biases result to avoid double rounding of subnormals.
 9797 //
 9798 // scale arg1 by multiplying arg1 by 2^(-15360)
 9799 // load arg2
 9800 // multiply scaled arg1 by arg2
 9801 // rescale product by 2^(15360)
 9802 //
 9803 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9804   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9805   match(Set dst (MulD dst src));
 9806   ins_cost(1);   // Select this instruction for all FP double multiplies
 9807 
 9808   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9809             "DMULp  $dst,ST\n\t"
 9810             "FLD    $src\n\t"
 9811             "DMULp  $dst,ST\n\t"
 9812             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9813             "DMULp  $dst,ST\n\t" %}
 9814   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
 9815   ins_encode( strictfp_bias1(dst),
 9816               Push_Reg_DPR(src),
 9817               OpcP, RegOpc(dst),
 9818               strictfp_bias2(dst) );
 9819   ins_pipe( fpu_reg_reg );
 9820 %}
 9821 
 9822 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
 9823   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
 9824   match(Set dst (MulD dst con));
 9825   ins_cost(200);
 9826   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
 9827             "DMULp  $dst,ST" %}
 9828   ins_encode %{
 9829     __ fld_d($constantaddress($con));
 9830     __ fmulp($dst$$reg);
 9831   %}
 9832   ins_pipe(fpu_reg_mem);
 9833 %}
 9834 
 9835 
 9836 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
 9837   predicate( UseSSE<=1 );
 9838   match(Set dst (MulD dst (LoadD src)));
 9839   ins_cost(200);
 9840   format %{ "FLD_D  $src\n\t"
 9841             "DMULp  $dst,ST" %}
 9842   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
 9843   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
 9844               OpcP, RegOpc(dst) );
 9845   ins_pipe( fpu_reg_mem );
 9846 %}
 9847 
 9848 //
 9849 // Cisc-alternate to reg-reg multiply
 9850 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
 9851   predicate( UseSSE<=1 );
 9852   match(Set dst (MulD src (LoadD mem)));
 9853   ins_cost(250);
 9854   format %{ "FLD_D  $mem\n\t"
 9855             "DMUL   ST,$src\n\t"
 9856             "FSTP_D $dst" %}
 9857   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
 9858   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
 9859               OpcReg_FPR(src),
 9860               Pop_Reg_DPR(dst) );
 9861   ins_pipe( fpu_reg_reg_mem );
 9862 %}
 9863 
 9864 
 9865 // MACRO3 -- addDPR a mulDPR
 9866 // This instruction is a '2-address' instruction in that the result goes
 9867 // back to src2.  This eliminates a move from the macro; possibly the
 9868 // register allocator will have to add it back (and maybe not).
 9869 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9870   predicate( UseSSE<=1 );
 9871   match(Set src2 (AddD (MulD src0 src1) src2));
 9872   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9873             "DMUL   ST,$src1\n\t"
 9874             "DADDp  $src2,ST" %}
 9875   ins_cost(250);
 9876   opcode(0xDD); /* LoadD DD /0 */
 9877   ins_encode( Push_Reg_FPR(src0),
 9878               FMul_ST_reg(src1),
 9879               FAddP_reg_ST(src2) );
 9880   ins_pipe( fpu_reg_reg_reg );
 9881 %}
 9882 
 9883 
 9884 // MACRO3 -- subDPR a mulDPR
 9885 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
 9886   predicate( UseSSE<=1 );
 9887   match(Set src2 (SubD (MulD src0 src1) src2));
 9888   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
 9889             "DMUL   ST,$src1\n\t"
 9890             "DSUBRp $src2,ST" %}
 9891   ins_cost(250);
 9892   ins_encode( Push_Reg_FPR(src0),
 9893               FMul_ST_reg(src1),
 9894               Opcode(0xDE), Opc_plus(0xE0,src2));
 9895   ins_pipe( fpu_reg_reg_reg );
 9896 %}
 9897 
 9898 
 9899 instruct divDPR_reg(regDPR dst, regDPR src) %{
 9900   predicate( UseSSE<=1 );
 9901   match(Set dst (DivD dst src));
 9902 
 9903   format %{ "FLD    $src\n\t"
 9904             "FDIVp  $dst,ST" %}
 9905   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9906   ins_cost(150);
 9907   ins_encode( Push_Reg_DPR(src),
 9908               OpcP, RegOpc(dst) );
 9909   ins_pipe( fpu_reg_reg );
 9910 %}
 9911 
 9912 // Strict FP instruction biases argument before division then
 9913 // biases result, to avoid double rounding of subnormals.
 9914 //
 9915 // scale dividend by multiplying dividend by 2^(-15360)
 9916 // load divisor
 9917 // divide scaled dividend by divisor
 9918 // rescale quotient by 2^(15360)
 9919 //
 9920 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
 9921   predicate (UseSSE<=1);
 9922   match(Set dst (DivD dst src));
 9923   predicate( UseSSE<=1 && Compile::current()->has_method() );
 9924   ins_cost(01);
 9925 
 9926   format %{ "FLD    StubRoutines::x86::_fpu_subnormal_bias1\n\t"
 9927             "DMULp  $dst,ST\n\t"
 9928             "FLD    $src\n\t"
 9929             "FDIVp  $dst,ST\n\t"
 9930             "FLD    StubRoutines::x86::_fpu_subnormal_bias2\n\t"
 9931             "DMULp  $dst,ST\n\t" %}
 9932   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
 9933   ins_encode( strictfp_bias1(dst),
 9934               Push_Reg_DPR(src),
 9935               OpcP, RegOpc(dst),
 9936               strictfp_bias2(dst) );
 9937   ins_pipe( fpu_reg_reg );
 9938 %}
 9939 
 9940 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
 9941   predicate(UseSSE<=1);
 9942   match(Set dst (ModD dst src));
 9943   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
 9944 
 9945   format %{ "DMOD   $dst,$src" %}
 9946   ins_cost(250);
 9947   ins_encode(Push_Reg_Mod_DPR(dst, src),
 9948               emitModDPR(),
 9949               Push_Result_Mod_DPR(src),
 9950               Pop_Reg_DPR(dst));
 9951   ins_pipe( pipe_slow );
 9952 %}
 9953 
 9954 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
 9955   predicate(UseSSE>=2);
 9956   match(Set dst (ModD src0 src1));
 9957   effect(KILL rax, KILL cr);
 9958 
 9959   format %{ "SUB    ESP,8\t # DMOD\n"
 9960           "\tMOVSD  [ESP+0],$src1\n"
 9961           "\tFLD_D  [ESP+0]\n"
 9962           "\tMOVSD  [ESP+0],$src0\n"
 9963           "\tFLD_D  [ESP+0]\n"
 9964      "loop:\tFPREM\n"
 9965           "\tFWAIT\n"
 9966           "\tFNSTSW AX\n"
 9967           "\tSAHF\n"
 9968           "\tJP     loop\n"
 9969           "\tFSTP_D [ESP+0]\n"
 9970           "\tMOVSD  $dst,[ESP+0]\n"
 9971           "\tADD    ESP,8\n"
 9972           "\tFSTP   ST0\t # Restore FPU Stack"
 9973     %}
 9974   ins_cost(250);
 9975   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
 9976   ins_pipe( pipe_slow );
 9977 %}
 9978 
 9979 instruct atanDPR_reg(regDPR dst, regDPR src) %{
 9980   predicate (UseSSE<=1);
 9981   match(Set dst(AtanD dst src));
 9982   format %{ "DATA   $dst,$src" %}
 9983   opcode(0xD9, 0xF3);
 9984   ins_encode( Push_Reg_DPR(src),
 9985               OpcP, OpcS, RegOpc(dst) );
 9986   ins_pipe( pipe_slow );
 9987 %}
 9988 
 9989 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
 9990   predicate (UseSSE>=2);
 9991   match(Set dst(AtanD dst src));
 9992   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
 9993   format %{ "DATA   $dst,$src" %}
 9994   opcode(0xD9, 0xF3);
 9995   ins_encode( Push_SrcD(src),
 9996               OpcP, OpcS, Push_ResultD(dst) );
 9997   ins_pipe( pipe_slow );
 9998 %}
 9999 
10000 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10001   predicate (UseSSE<=1);
10002   match(Set dst (SqrtD src));
10003   format %{ "DSQRT  $dst,$src" %}
10004   opcode(0xFA, 0xD9);
10005   ins_encode( Push_Reg_DPR(src),
10006               OpcS, OpcP, Pop_Reg_DPR(dst) );
10007   ins_pipe( pipe_slow );
10008 %}
10009 
10010 //-------------Float Instructions-------------------------------
10011 // Float Math
10012 
10013 // Code for float compare:
10014 //     fcompp();
10015 //     fwait(); fnstsw_ax();
10016 //     sahf();
10017 //     movl(dst, unordered_result);
10018 //     jcc(Assembler::parity, exit);
10019 //     movl(dst, less_result);
10020 //     jcc(Assembler::below, exit);
10021 //     movl(dst, equal_result);
10022 //     jcc(Assembler::equal, exit);
10023 //     movl(dst, greater_result);
10024 //   exit:
10025 
10026 // P6 version of float compare, sets condition codes in EFLAGS
10027 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10028   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10029   match(Set cr (CmpF src1 src2));
10030   effect(KILL rax);
10031   ins_cost(150);
10032   format %{ "FLD    $src1\n\t"
10033             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10034             "JNP    exit\n\t"
10035             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10036             "SAHF\n"
10037      "exit:\tNOP               // avoid branch to branch" %}
10038   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10039   ins_encode( Push_Reg_DPR(src1),
10040               OpcP, RegOpc(src2),
10041               cmpF_P6_fixup );
10042   ins_pipe( pipe_slow );
10043 %}
10044 
10045 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10046   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10047   match(Set cr (CmpF src1 src2));
10048   ins_cost(100);
10049   format %{ "FLD    $src1\n\t"
10050             "FUCOMIP ST,$src2  // P6 instruction" %}
10051   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10052   ins_encode( Push_Reg_DPR(src1),
10053               OpcP, RegOpc(src2));
10054   ins_pipe( pipe_slow );
10055 %}
10056 
10057 
10058 // Compare & branch
10059 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10060   predicate(UseSSE == 0);
10061   match(Set cr (CmpF src1 src2));
10062   effect(KILL rax);
10063   ins_cost(200);
10064   format %{ "FLD    $src1\n\t"
10065             "FCOMp  $src2\n\t"
10066             "FNSTSW AX\n\t"
10067             "TEST   AX,0x400\n\t"
10068             "JZ,s   flags\n\t"
10069             "MOV    AH,1\t# unordered treat as LT\n"
10070     "flags:\tSAHF" %}
10071   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10072   ins_encode( Push_Reg_DPR(src1),
10073               OpcP, RegOpc(src2),
10074               fpu_flags);
10075   ins_pipe( pipe_slow );
10076 %}
10077 
10078 // Compare vs zero into -1,0,1
10079 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10080   predicate(UseSSE == 0);
10081   match(Set dst (CmpF3 src1 zero));
10082   effect(KILL cr, KILL rax);
10083   ins_cost(280);
10084   format %{ "FTSTF  $dst,$src1" %}
10085   opcode(0xE4, 0xD9);
10086   ins_encode( Push_Reg_DPR(src1),
10087               OpcS, OpcP, PopFPU,
10088               CmpF_Result(dst));
10089   ins_pipe( pipe_slow );
10090 %}
10091 
10092 // Compare into -1,0,1
10093 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10094   predicate(UseSSE == 0);
10095   match(Set dst (CmpF3 src1 src2));
10096   effect(KILL cr, KILL rax);
10097   ins_cost(300);
10098   format %{ "FCMPF  $dst,$src1,$src2" %}
10099   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10100   ins_encode( Push_Reg_DPR(src1),
10101               OpcP, RegOpc(src2),
10102               CmpF_Result(dst));
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 // float compare and set condition codes in EFLAGS by XMM regs
10107 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10108   predicate(UseSSE>=1);
10109   match(Set cr (CmpF src1 src2));
10110   ins_cost(145);
10111   format %{ "UCOMISS $src1,$src2\n\t"
10112             "JNP,s   exit\n\t"
10113             "PUSHF\t# saw NaN, set CF\n\t"
10114             "AND     [rsp], #0xffffff2b\n\t"
10115             "POPF\n"
10116     "exit:" %}
10117   ins_encode %{
10118     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10119     emit_cmpfp_fixup(_masm);
10120   %}
10121   ins_pipe( pipe_slow );
10122 %}
10123 
10124 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10125   predicate(UseSSE>=1);
10126   match(Set cr (CmpF src1 src2));
10127   ins_cost(100);
10128   format %{ "UCOMISS $src1,$src2" %}
10129   ins_encode %{
10130     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10131   %}
10132   ins_pipe( pipe_slow );
10133 %}
10134 
10135 // float compare and set condition codes in EFLAGS by XMM regs
10136 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10137   predicate(UseSSE>=1);
10138   match(Set cr (CmpF src1 (LoadF src2)));
10139   ins_cost(165);
10140   format %{ "UCOMISS $src1,$src2\n\t"
10141             "JNP,s   exit\n\t"
10142             "PUSHF\t# saw NaN, set CF\n\t"
10143             "AND     [rsp], #0xffffff2b\n\t"
10144             "POPF\n"
10145     "exit:" %}
10146   ins_encode %{
10147     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10148     emit_cmpfp_fixup(_masm);
10149   %}
10150   ins_pipe( pipe_slow );
10151 %}
10152 
10153 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10154   predicate(UseSSE>=1);
10155   match(Set cr (CmpF src1 (LoadF src2)));
10156   ins_cost(100);
10157   format %{ "UCOMISS $src1,$src2" %}
10158   ins_encode %{
10159     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10160   %}
10161   ins_pipe( pipe_slow );
10162 %}
10163 
10164 // Compare into -1,0,1 in XMM
10165 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10166   predicate(UseSSE>=1);
10167   match(Set dst (CmpF3 src1 src2));
10168   effect(KILL cr);
10169   ins_cost(255);
10170   format %{ "UCOMISS $src1, $src2\n\t"
10171             "MOV     $dst, #-1\n\t"
10172             "JP,s    done\n\t"
10173             "JB,s    done\n\t"
10174             "SETNE   $dst\n\t"
10175             "MOVZB   $dst, $dst\n"
10176     "done:" %}
10177   ins_encode %{
10178     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10179     emit_cmpfp3(_masm, $dst$$Register);
10180   %}
10181   ins_pipe( pipe_slow );
10182 %}
10183 
10184 // Compare into -1,0,1 in XMM and memory
10185 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10186   predicate(UseSSE>=1);
10187   match(Set dst (CmpF3 src1 (LoadF src2)));
10188   effect(KILL cr);
10189   ins_cost(275);
10190   format %{ "UCOMISS $src1, $src2\n\t"
10191             "MOV     $dst, #-1\n\t"
10192             "JP,s    done\n\t"
10193             "JB,s    done\n\t"
10194             "SETNE   $dst\n\t"
10195             "MOVZB   $dst, $dst\n"
10196     "done:" %}
10197   ins_encode %{
10198     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10199     emit_cmpfp3(_masm, $dst$$Register);
10200   %}
10201   ins_pipe( pipe_slow );
10202 %}
10203 
10204 // Spill to obtain 24-bit precision
10205 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10206   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10207   match(Set dst (SubF src1 src2));
10208 
10209   format %{ "FSUB   $dst,$src1 - $src2" %}
10210   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10211   ins_encode( Push_Reg_FPR(src1),
10212               OpcReg_FPR(src2),
10213               Pop_Mem_FPR(dst) );
10214   ins_pipe( fpu_mem_reg_reg );
10215 %}
10216 //
10217 // This instruction does not round to 24-bits
10218 instruct subFPR_reg(regFPR dst, regFPR src) %{
10219   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10220   match(Set dst (SubF dst src));
10221 
10222   format %{ "FSUB   $dst,$src" %}
10223   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10224   ins_encode( Push_Reg_FPR(src),
10225               OpcP, RegOpc(dst) );
10226   ins_pipe( fpu_reg_reg );
10227 %}
10228 
10229 // Spill to obtain 24-bit precision
10230 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10231   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10232   match(Set dst (AddF src1 src2));
10233 
10234   format %{ "FADD   $dst,$src1,$src2" %}
10235   opcode(0xD8, 0x0); /* D8 C0+i */
10236   ins_encode( Push_Reg_FPR(src2),
10237               OpcReg_FPR(src1),
10238               Pop_Mem_FPR(dst) );
10239   ins_pipe( fpu_mem_reg_reg );
10240 %}
10241 //
10242 // This instruction does not round to 24-bits
10243 instruct addFPR_reg(regFPR dst, regFPR src) %{
10244   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10245   match(Set dst (AddF dst src));
10246 
10247   format %{ "FLD    $src\n\t"
10248             "FADDp  $dst,ST" %}
10249   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10250   ins_encode( Push_Reg_FPR(src),
10251               OpcP, RegOpc(dst) );
10252   ins_pipe( fpu_reg_reg );
10253 %}
10254 
10255 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10256   predicate(UseSSE==0);
10257   match(Set dst (AbsF src));
10258   ins_cost(100);
10259   format %{ "FABS" %}
10260   opcode(0xE1, 0xD9);
10261   ins_encode( OpcS, OpcP );
10262   ins_pipe( fpu_reg_reg );
10263 %}
10264 
10265 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10266   predicate(UseSSE==0);
10267   match(Set dst (NegF src));
10268   ins_cost(100);
10269   format %{ "FCHS" %}
10270   opcode(0xE0, 0xD9);
10271   ins_encode( OpcS, OpcP );
10272   ins_pipe( fpu_reg_reg );
10273 %}
10274 
10275 // Cisc-alternate to addFPR_reg
10276 // Spill to obtain 24-bit precision
10277 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10278   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10279   match(Set dst (AddF src1 (LoadF src2)));
10280 
10281   format %{ "FLD    $src2\n\t"
10282             "FADD   ST,$src1\n\t"
10283             "FSTP_S $dst" %}
10284   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10285   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10286               OpcReg_FPR(src1),
10287               Pop_Mem_FPR(dst) );
10288   ins_pipe( fpu_mem_reg_mem );
10289 %}
10290 //
10291 // Cisc-alternate to addFPR_reg
10292 // This instruction does not round to 24-bits
10293 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10294   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10295   match(Set dst (AddF dst (LoadF src)));
10296 
10297   format %{ "FADD   $dst,$src" %}
10298   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10299   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10300               OpcP, RegOpc(dst) );
10301   ins_pipe( fpu_reg_mem );
10302 %}
10303 
10304 // // Following two instructions for _222_mpegaudio
10305 // Spill to obtain 24-bit precision
10306 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10307   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10308   match(Set dst (AddF src1 src2));
10309 
10310   format %{ "FADD   $dst,$src1,$src2" %}
10311   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10312   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10313               OpcReg_FPR(src2),
10314               Pop_Mem_FPR(dst) );
10315   ins_pipe( fpu_mem_reg_mem );
10316 %}
10317 
10318 // Cisc-spill variant
10319 // Spill to obtain 24-bit precision
10320 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10321   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10322   match(Set dst (AddF src1 (LoadF src2)));
10323 
10324   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10325   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10326   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10327               set_instruction_start,
10328               OpcP, RMopc_Mem(secondary,src1),
10329               Pop_Mem_FPR(dst) );
10330   ins_pipe( fpu_mem_mem_mem );
10331 %}
10332 
10333 // Spill to obtain 24-bit precision
10334 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10335   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10336   match(Set dst (AddF src1 src2));
10337 
10338   format %{ "FADD   $dst,$src1,$src2" %}
10339   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10340   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10341               set_instruction_start,
10342               OpcP, RMopc_Mem(secondary,src1),
10343               Pop_Mem_FPR(dst) );
10344   ins_pipe( fpu_mem_mem_mem );
10345 %}
10346 
10347 
10348 // Spill to obtain 24-bit precision
10349 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10350   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10351   match(Set dst (AddF src con));
10352   format %{ "FLD    $src\n\t"
10353             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10354             "FSTP_S $dst"  %}
10355   ins_encode %{
10356     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10357     __ fadd_s($constantaddress($con));
10358     __ fstp_s(Address(rsp, $dst$$disp));
10359   %}
10360   ins_pipe(fpu_mem_reg_con);
10361 %}
10362 //
10363 // This instruction does not round to 24-bits
10364 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10365   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10366   match(Set dst (AddF src con));
10367   format %{ "FLD    $src\n\t"
10368             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10369             "FSTP   $dst"  %}
10370   ins_encode %{
10371     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10372     __ fadd_s($constantaddress($con));
10373     __ fstp_d($dst$$reg);
10374   %}
10375   ins_pipe(fpu_reg_reg_con);
10376 %}
10377 
10378 // Spill to obtain 24-bit precision
10379 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10380   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10381   match(Set dst (MulF src1 src2));
10382 
10383   format %{ "FLD    $src1\n\t"
10384             "FMUL   $src2\n\t"
10385             "FSTP_S $dst"  %}
10386   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10387   ins_encode( Push_Reg_FPR(src1),
10388               OpcReg_FPR(src2),
10389               Pop_Mem_FPR(dst) );
10390   ins_pipe( fpu_mem_reg_reg );
10391 %}
10392 //
10393 // This instruction does not round to 24-bits
10394 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10395   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10396   match(Set dst (MulF src1 src2));
10397 
10398   format %{ "FLD    $src1\n\t"
10399             "FMUL   $src2\n\t"
10400             "FSTP_S $dst"  %}
10401   opcode(0xD8, 0x1); /* D8 C8+i */
10402   ins_encode( Push_Reg_FPR(src2),
10403               OpcReg_FPR(src1),
10404               Pop_Reg_FPR(dst) );
10405   ins_pipe( fpu_reg_reg_reg );
10406 %}
10407 
10408 
10409 // Spill to obtain 24-bit precision
10410 // Cisc-alternate to reg-reg multiply
10411 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10412   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10413   match(Set dst (MulF src1 (LoadF src2)));
10414 
10415   format %{ "FLD_S  $src2\n\t"
10416             "FMUL   $src1\n\t"
10417             "FSTP_S $dst"  %}
10418   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10419   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10420               OpcReg_FPR(src1),
10421               Pop_Mem_FPR(dst) );
10422   ins_pipe( fpu_mem_reg_mem );
10423 %}
10424 //
10425 // This instruction does not round to 24-bits
10426 // Cisc-alternate to reg-reg multiply
10427 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10428   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10429   match(Set dst (MulF src1 (LoadF src2)));
10430 
10431   format %{ "FMUL   $dst,$src1,$src2" %}
10432   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10433   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10434               OpcReg_FPR(src1),
10435               Pop_Reg_FPR(dst) );
10436   ins_pipe( fpu_reg_reg_mem );
10437 %}
10438 
10439 // Spill to obtain 24-bit precision
10440 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10441   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10442   match(Set dst (MulF src1 src2));
10443 
10444   format %{ "FMUL   $dst,$src1,$src2" %}
10445   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10446   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10447               set_instruction_start,
10448               OpcP, RMopc_Mem(secondary,src1),
10449               Pop_Mem_FPR(dst) );
10450   ins_pipe( fpu_mem_mem_mem );
10451 %}
10452 
10453 // Spill to obtain 24-bit precision
10454 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10455   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10456   match(Set dst (MulF src con));
10457 
10458   format %{ "FLD    $src\n\t"
10459             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10460             "FSTP_S $dst"  %}
10461   ins_encode %{
10462     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10463     __ fmul_s($constantaddress($con));
10464     __ fstp_s(Address(rsp, $dst$$disp));
10465   %}
10466   ins_pipe(fpu_mem_reg_con);
10467 %}
10468 //
10469 // This instruction does not round to 24-bits
10470 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10471   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10472   match(Set dst (MulF src con));
10473 
10474   format %{ "FLD    $src\n\t"
10475             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10476             "FSTP   $dst"  %}
10477   ins_encode %{
10478     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10479     __ fmul_s($constantaddress($con));
10480     __ fstp_d($dst$$reg);
10481   %}
10482   ins_pipe(fpu_reg_reg_con);
10483 %}
10484 
10485 
10486 //
10487 // MACRO1 -- subsume unshared load into mulFPR
10488 // This instruction does not round to 24-bits
10489 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10490   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10491   match(Set dst (MulF (LoadF mem1) src));
10492 
10493   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10494             "FMUL   ST,$src\n\t"
10495             "FSTP   $dst" %}
10496   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10497   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10498               OpcReg_FPR(src),
10499               Pop_Reg_FPR(dst) );
10500   ins_pipe( fpu_reg_reg_mem );
10501 %}
10502 //
10503 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10504 // This instruction does not round to 24-bits
10505 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10506   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10507   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10508   ins_cost(95);
10509 
10510   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10511             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10512             "FADD   ST,$src2\n\t"
10513             "FSTP   $dst" %}
10514   opcode(0xD9); /* LoadF D9 /0 */
10515   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10516               FMul_ST_reg(src1),
10517               FAdd_ST_reg(src2),
10518               Pop_Reg_FPR(dst) );
10519   ins_pipe( fpu_reg_mem_reg_reg );
10520 %}
10521 
10522 // MACRO3 -- addFPR a mulFPR
10523 // This instruction does not round to 24-bits.  It is a '2-address'
10524 // instruction in that the result goes back to src2.  This eliminates
10525 // a move from the macro; possibly the register allocator will have
10526 // to add it back (and maybe not).
10527 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10528   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10529   match(Set src2 (AddF (MulF src0 src1) src2));
10530 
10531   format %{ "FLD    $src0     ===MACRO3===\n\t"
10532             "FMUL   ST,$src1\n\t"
10533             "FADDP  $src2,ST" %}
10534   opcode(0xD9); /* LoadF D9 /0 */
10535   ins_encode( Push_Reg_FPR(src0),
10536               FMul_ST_reg(src1),
10537               FAddP_reg_ST(src2) );
10538   ins_pipe( fpu_reg_reg_reg );
10539 %}
10540 
10541 // MACRO4 -- divFPR subFPR
10542 // This instruction does not round to 24-bits
10543 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10544   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10545   match(Set dst (DivF (SubF src2 src1) src3));
10546 
10547   format %{ "FLD    $src2   ===MACRO4===\n\t"
10548             "FSUB   ST,$src1\n\t"
10549             "FDIV   ST,$src3\n\t"
10550             "FSTP  $dst" %}
10551   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10552   ins_encode( Push_Reg_FPR(src2),
10553               subFPR_divFPR_encode(src1,src3),
10554               Pop_Reg_FPR(dst) );
10555   ins_pipe( fpu_reg_reg_reg_reg );
10556 %}
10557 
10558 // Spill to obtain 24-bit precision
10559 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10560   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10561   match(Set dst (DivF src1 src2));
10562 
10563   format %{ "FDIV   $dst,$src1,$src2" %}
10564   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10565   ins_encode( Push_Reg_FPR(src1),
10566               OpcReg_FPR(src2),
10567               Pop_Mem_FPR(dst) );
10568   ins_pipe( fpu_mem_reg_reg );
10569 %}
10570 //
10571 // This instruction does not round to 24-bits
10572 instruct divFPR_reg(regFPR dst, regFPR src) %{
10573   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10574   match(Set dst (DivF dst src));
10575 
10576   format %{ "FDIV   $dst,$src" %}
10577   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10578   ins_encode( Push_Reg_FPR(src),
10579               OpcP, RegOpc(dst) );
10580   ins_pipe( fpu_reg_reg );
10581 %}
10582 
10583 
10584 // Spill to obtain 24-bit precision
10585 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10586   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10587   match(Set dst (ModF src1 src2));
10588   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10589 
10590   format %{ "FMOD   $dst,$src1,$src2" %}
10591   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10592               emitModDPR(),
10593               Push_Result_Mod_DPR(src2),
10594               Pop_Mem_FPR(dst));
10595   ins_pipe( pipe_slow );
10596 %}
10597 //
10598 // This instruction does not round to 24-bits
10599 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10600   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10601   match(Set dst (ModF dst src));
10602   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10603 
10604   format %{ "FMOD   $dst,$src" %}
10605   ins_encode(Push_Reg_Mod_DPR(dst, src),
10606               emitModDPR(),
10607               Push_Result_Mod_DPR(src),
10608               Pop_Reg_FPR(dst));
10609   ins_pipe( pipe_slow );
10610 %}
10611 
10612 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10613   predicate(UseSSE>=1);
10614   match(Set dst (ModF src0 src1));
10615   effect(KILL rax, KILL cr);
10616   format %{ "SUB    ESP,4\t # FMOD\n"
10617           "\tMOVSS  [ESP+0],$src1\n"
10618           "\tFLD_S  [ESP+0]\n"
10619           "\tMOVSS  [ESP+0],$src0\n"
10620           "\tFLD_S  [ESP+0]\n"
10621      "loop:\tFPREM\n"
10622           "\tFWAIT\n"
10623           "\tFNSTSW AX\n"
10624           "\tSAHF\n"
10625           "\tJP     loop\n"
10626           "\tFSTP_S [ESP+0]\n"
10627           "\tMOVSS  $dst,[ESP+0]\n"
10628           "\tADD    ESP,4\n"
10629           "\tFSTP   ST0\t # Restore FPU Stack"
10630     %}
10631   ins_cost(250);
10632   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10633   ins_pipe( pipe_slow );
10634 %}
10635 
10636 
10637 //----------Arithmetic Conversion Instructions---------------------------------
10638 // The conversions operations are all Alpha sorted.  Please keep it that way!
10639 
10640 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10641   predicate(UseSSE==0);
10642   match(Set dst (RoundFloat src));
10643   ins_cost(125);
10644   format %{ "FST_S  $dst,$src\t# F-round" %}
10645   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10646   ins_pipe( fpu_mem_reg );
10647 %}
10648 
10649 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10650   predicate(UseSSE<=1);
10651   match(Set dst (RoundDouble src));
10652   ins_cost(125);
10653   format %{ "FST_D  $dst,$src\t# D-round" %}
10654   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10655   ins_pipe( fpu_mem_reg );
10656 %}
10657 
10658 // Force rounding to 24-bit precision and 6-bit exponent
10659 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10660   predicate(UseSSE==0);
10661   match(Set dst (ConvD2F src));
10662   format %{ "FST_S  $dst,$src\t# F-round" %}
10663   expand %{
10664     roundFloat_mem_reg(dst,src);
10665   %}
10666 %}
10667 
10668 // Force rounding to 24-bit precision and 6-bit exponent
10669 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10670   predicate(UseSSE==1);
10671   match(Set dst (ConvD2F src));
10672   effect( KILL cr );
10673   format %{ "SUB    ESP,4\n\t"
10674             "FST_S  [ESP],$src\t# F-round\n\t"
10675             "MOVSS  $dst,[ESP]\n\t"
10676             "ADD ESP,4" %}
10677   ins_encode %{
10678     __ subptr(rsp, 4);
10679     if ($src$$reg != FPR1L_enc) {
10680       __ fld_s($src$$reg-1);
10681       __ fstp_s(Address(rsp, 0));
10682     } else {
10683       __ fst_s(Address(rsp, 0));
10684     }
10685     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10686     __ addptr(rsp, 4);
10687   %}
10688   ins_pipe( pipe_slow );
10689 %}
10690 
10691 // Force rounding double precision to single precision
10692 instruct convD2F_reg(regF dst, regD src) %{
10693   predicate(UseSSE>=2);
10694   match(Set dst (ConvD2F src));
10695   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10696   ins_encode %{
10697     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10698   %}
10699   ins_pipe( pipe_slow );
10700 %}
10701 
10702 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10703   predicate(UseSSE==0);
10704   match(Set dst (ConvF2D src));
10705   format %{ "FST_S  $dst,$src\t# D-round" %}
10706   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10707   ins_pipe( fpu_reg_reg );
10708 %}
10709 
10710 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10711   predicate(UseSSE==1);
10712   match(Set dst (ConvF2D src));
10713   format %{ "FST_D  $dst,$src\t# D-round" %}
10714   expand %{
10715     roundDouble_mem_reg(dst,src);
10716   %}
10717 %}
10718 
10719 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10720   predicate(UseSSE==1);
10721   match(Set dst (ConvF2D src));
10722   effect( KILL cr );
10723   format %{ "SUB    ESP,4\n\t"
10724             "MOVSS  [ESP] $src\n\t"
10725             "FLD_S  [ESP]\n\t"
10726             "ADD    ESP,4\n\t"
10727             "FSTP   $dst\t# D-round" %}
10728   ins_encode %{
10729     __ subptr(rsp, 4);
10730     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10731     __ fld_s(Address(rsp, 0));
10732     __ addptr(rsp, 4);
10733     __ fstp_d($dst$$reg);
10734   %}
10735   ins_pipe( pipe_slow );
10736 %}
10737 
10738 instruct convF2D_reg(regD dst, regF src) %{
10739   predicate(UseSSE>=2);
10740   match(Set dst (ConvF2D src));
10741   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10742   ins_encode %{
10743     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10744   %}
10745   ins_pipe( pipe_slow );
10746 %}
10747 
10748 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10749 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10750   predicate(UseSSE<=1);
10751   match(Set dst (ConvD2I src));
10752   effect( KILL tmp, KILL cr );
10753   format %{ "FLD    $src\t# Convert double to int \n\t"
10754             "FLDCW  trunc mode\n\t"
10755             "SUB    ESP,4\n\t"
10756             "FISTp  [ESP + #0]\n\t"
10757             "FLDCW  std/24-bit mode\n\t"
10758             "POP    EAX\n\t"
10759             "CMP    EAX,0x80000000\n\t"
10760             "JNE,s  fast\n\t"
10761             "FLD_D  $src\n\t"
10762             "CALL   d2i_wrapper\n"
10763       "fast:" %}
10764   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10765   ins_pipe( pipe_slow );
10766 %}
10767 
10768 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10769 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10770   predicate(UseSSE>=2);
10771   match(Set dst (ConvD2I src));
10772   effect( KILL tmp, KILL cr );
10773   format %{ "CVTTSD2SI $dst, $src\n\t"
10774             "CMP    $dst,0x80000000\n\t"
10775             "JNE,s  fast\n\t"
10776             "SUB    ESP, 8\n\t"
10777             "MOVSD  [ESP], $src\n\t"
10778             "FLD_D  [ESP]\n\t"
10779             "ADD    ESP, 8\n\t"
10780             "CALL   d2i_wrapper\n"
10781       "fast:" %}
10782   ins_encode %{
10783     Label fast;
10784     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10785     __ cmpl($dst$$Register, 0x80000000);
10786     __ jccb(Assembler::notEqual, fast);
10787     __ subptr(rsp, 8);
10788     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10789     __ fld_d(Address(rsp, 0));
10790     __ addptr(rsp, 8);
10791     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10792     __ post_call_nop();
10793     __ bind(fast);
10794   %}
10795   ins_pipe( pipe_slow );
10796 %}
10797 
10798 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10799   predicate(UseSSE<=1);
10800   match(Set dst (ConvD2L src));
10801   effect( KILL cr );
10802   format %{ "FLD    $src\t# Convert double to long\n\t"
10803             "FLDCW  trunc mode\n\t"
10804             "SUB    ESP,8\n\t"
10805             "FISTp  [ESP + #0]\n\t"
10806             "FLDCW  std/24-bit mode\n\t"
10807             "POP    EAX\n\t"
10808             "POP    EDX\n\t"
10809             "CMP    EDX,0x80000000\n\t"
10810             "JNE,s  fast\n\t"
10811             "TEST   EAX,EAX\n\t"
10812             "JNE,s  fast\n\t"
10813             "FLD    $src\n\t"
10814             "CALL   d2l_wrapper\n"
10815       "fast:" %}
10816   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10817   ins_pipe( pipe_slow );
10818 %}
10819 
10820 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10821 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10822   predicate (UseSSE>=2);
10823   match(Set dst (ConvD2L src));
10824   effect( KILL cr );
10825   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10826             "MOVSD  [ESP],$src\n\t"
10827             "FLD_D  [ESP]\n\t"
10828             "FLDCW  trunc mode\n\t"
10829             "FISTp  [ESP + #0]\n\t"
10830             "FLDCW  std/24-bit mode\n\t"
10831             "POP    EAX\n\t"
10832             "POP    EDX\n\t"
10833             "CMP    EDX,0x80000000\n\t"
10834             "JNE,s  fast\n\t"
10835             "TEST   EAX,EAX\n\t"
10836             "JNE,s  fast\n\t"
10837             "SUB    ESP,8\n\t"
10838             "MOVSD  [ESP],$src\n\t"
10839             "FLD_D  [ESP]\n\t"
10840             "ADD    ESP,8\n\t"
10841             "CALL   d2l_wrapper\n"
10842       "fast:" %}
10843   ins_encode %{
10844     Label fast;
10845     __ subptr(rsp, 8);
10846     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10847     __ fld_d(Address(rsp, 0));
10848     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10849     __ fistp_d(Address(rsp, 0));
10850     // Restore the rounding mode, mask the exception
10851     if (Compile::current()->in_24_bit_fp_mode()) {
10852       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10853     } else {
10854       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10855     }
10856     // Load the converted long, adjust CPU stack
10857     __ pop(rax);
10858     __ pop(rdx);
10859     __ cmpl(rdx, 0x80000000);
10860     __ jccb(Assembler::notEqual, fast);
10861     __ testl(rax, rax);
10862     __ jccb(Assembler::notEqual, fast);
10863     __ subptr(rsp, 8);
10864     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10865     __ fld_d(Address(rsp, 0));
10866     __ addptr(rsp, 8);
10867     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
10868     __ post_call_nop();
10869     __ bind(fast);
10870   %}
10871   ins_pipe( pipe_slow );
10872 %}
10873 
10874 // Convert a double to an int.  Java semantics require we do complex
10875 // manglations in the corner cases.  So we set the rounding mode to
10876 // 'zero', store the darned double down as an int, and reset the
10877 // rounding mode to 'nearest'.  The hardware stores a flag value down
10878 // if we would overflow or converted a NAN; we check for this and
10879 // and go the slow path if needed.
10880 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10881   predicate(UseSSE==0);
10882   match(Set dst (ConvF2I src));
10883   effect( KILL tmp, KILL cr );
10884   format %{ "FLD    $src\t# Convert float to int \n\t"
10885             "FLDCW  trunc mode\n\t"
10886             "SUB    ESP,4\n\t"
10887             "FISTp  [ESP + #0]\n\t"
10888             "FLDCW  std/24-bit mode\n\t"
10889             "POP    EAX\n\t"
10890             "CMP    EAX,0x80000000\n\t"
10891             "JNE,s  fast\n\t"
10892             "FLD    $src\n\t"
10893             "CALL   d2i_wrapper\n"
10894       "fast:" %}
10895   // DPR2I_encoding works for FPR2I
10896   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10897   ins_pipe( pipe_slow );
10898 %}
10899 
10900 // Convert a float in xmm to an int reg.
10901 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10902   predicate(UseSSE>=1);
10903   match(Set dst (ConvF2I src));
10904   effect( KILL tmp, KILL cr );
10905   format %{ "CVTTSS2SI $dst, $src\n\t"
10906             "CMP    $dst,0x80000000\n\t"
10907             "JNE,s  fast\n\t"
10908             "SUB    ESP, 4\n\t"
10909             "MOVSS  [ESP], $src\n\t"
10910             "FLD    [ESP]\n\t"
10911             "ADD    ESP, 4\n\t"
10912             "CALL   d2i_wrapper\n"
10913       "fast:" %}
10914   ins_encode %{
10915     Label fast;
10916     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10917     __ cmpl($dst$$Register, 0x80000000);
10918     __ jccb(Assembler::notEqual, fast);
10919     __ subptr(rsp, 4);
10920     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10921     __ fld_s(Address(rsp, 0));
10922     __ addptr(rsp, 4);
10923     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper())));
10924     __ post_call_nop();
10925     __ bind(fast);
10926   %}
10927   ins_pipe( pipe_slow );
10928 %}
10929 
10930 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10931   predicate(UseSSE==0);
10932   match(Set dst (ConvF2L src));
10933   effect( KILL cr );
10934   format %{ "FLD    $src\t# Convert float to long\n\t"
10935             "FLDCW  trunc mode\n\t"
10936             "SUB    ESP,8\n\t"
10937             "FISTp  [ESP + #0]\n\t"
10938             "FLDCW  std/24-bit mode\n\t"
10939             "POP    EAX\n\t"
10940             "POP    EDX\n\t"
10941             "CMP    EDX,0x80000000\n\t"
10942             "JNE,s  fast\n\t"
10943             "TEST   EAX,EAX\n\t"
10944             "JNE,s  fast\n\t"
10945             "FLD    $src\n\t"
10946             "CALL   d2l_wrapper\n"
10947       "fast:" %}
10948   // DPR2L_encoding works for FPR2L
10949   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10950   ins_pipe( pipe_slow );
10951 %}
10952 
10953 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10954 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10955   predicate (UseSSE>=1);
10956   match(Set dst (ConvF2L src));
10957   effect( KILL cr );
10958   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10959             "MOVSS  [ESP],$src\n\t"
10960             "FLD_S  [ESP]\n\t"
10961             "FLDCW  trunc mode\n\t"
10962             "FISTp  [ESP + #0]\n\t"
10963             "FLDCW  std/24-bit mode\n\t"
10964             "POP    EAX\n\t"
10965             "POP    EDX\n\t"
10966             "CMP    EDX,0x80000000\n\t"
10967             "JNE,s  fast\n\t"
10968             "TEST   EAX,EAX\n\t"
10969             "JNE,s  fast\n\t"
10970             "SUB    ESP,4\t# Convert float to long\n\t"
10971             "MOVSS  [ESP],$src\n\t"
10972             "FLD_S  [ESP]\n\t"
10973             "ADD    ESP,4\n\t"
10974             "CALL   d2l_wrapper\n"
10975       "fast:" %}
10976   ins_encode %{
10977     Label fast;
10978     __ subptr(rsp, 8);
10979     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10980     __ fld_s(Address(rsp, 0));
10981     __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()));
10982     __ fistp_d(Address(rsp, 0));
10983     // Restore the rounding mode, mask the exception
10984     if (Compile::current()->in_24_bit_fp_mode()) {
10985       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24()));
10986     } else {
10987       __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std()));
10988     }
10989     // Load the converted long, adjust CPU stack
10990     __ pop(rax);
10991     __ pop(rdx);
10992     __ cmpl(rdx, 0x80000000);
10993     __ jccb(Assembler::notEqual, fast);
10994     __ testl(rax, rax);
10995     __ jccb(Assembler::notEqual, fast);
10996     __ subptr(rsp, 4);
10997     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10998     __ fld_s(Address(rsp, 0));
10999     __ addptr(rsp, 4);
11000     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper())));
11001     __ post_call_nop();
11002     __ bind(fast);
11003   %}
11004   ins_pipe( pipe_slow );
11005 %}
11006 
11007 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11008   predicate( UseSSE<=1 );
11009   match(Set dst (ConvI2D src));
11010   format %{ "FILD   $src\n\t"
11011             "FSTP   $dst" %}
11012   opcode(0xDB, 0x0);  /* DB /0 */
11013   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11014   ins_pipe( fpu_reg_mem );
11015 %}
11016 
11017 instruct convI2D_reg(regD dst, rRegI src) %{
11018   predicate( UseSSE>=2 && !UseXmmI2D );
11019   match(Set dst (ConvI2D src));
11020   format %{ "CVTSI2SD $dst,$src" %}
11021   ins_encode %{
11022     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11023   %}
11024   ins_pipe( pipe_slow );
11025 %}
11026 
11027 instruct convI2D_mem(regD dst, memory mem) %{
11028   predicate( UseSSE>=2 );
11029   match(Set dst (ConvI2D (LoadI mem)));
11030   format %{ "CVTSI2SD $dst,$mem" %}
11031   ins_encode %{
11032     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11033   %}
11034   ins_pipe( pipe_slow );
11035 %}
11036 
11037 instruct convXI2D_reg(regD dst, rRegI src)
11038 %{
11039   predicate( UseSSE>=2 && UseXmmI2D );
11040   match(Set dst (ConvI2D src));
11041 
11042   format %{ "MOVD  $dst,$src\n\t"
11043             "CVTDQ2PD $dst,$dst\t# i2d" %}
11044   ins_encode %{
11045     __ movdl($dst$$XMMRegister, $src$$Register);
11046     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11047   %}
11048   ins_pipe(pipe_slow); // XXX
11049 %}
11050 
11051 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11052   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11053   match(Set dst (ConvI2D (LoadI mem)));
11054   format %{ "FILD   $mem\n\t"
11055             "FSTP   $dst" %}
11056   opcode(0xDB);      /* DB /0 */
11057   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11058               Pop_Reg_DPR(dst));
11059   ins_pipe( fpu_reg_mem );
11060 %}
11061 
11062 // Convert a byte to a float; no rounding step needed.
11063 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11064   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11065   match(Set dst (ConvI2F src));
11066   format %{ "FILD   $src\n\t"
11067             "FSTP   $dst" %}
11068 
11069   opcode(0xDB, 0x0);  /* DB /0 */
11070   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11071   ins_pipe( fpu_reg_mem );
11072 %}
11073 
11074 // In 24-bit mode, force exponent rounding by storing back out
11075 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11076   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11077   match(Set dst (ConvI2F src));
11078   ins_cost(200);
11079   format %{ "FILD   $src\n\t"
11080             "FSTP_S $dst" %}
11081   opcode(0xDB, 0x0);  /* DB /0 */
11082   ins_encode( Push_Mem_I(src),
11083               Pop_Mem_FPR(dst));
11084   ins_pipe( fpu_mem_mem );
11085 %}
11086 
11087 // In 24-bit mode, force exponent rounding by storing back out
11088 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11089   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11090   match(Set dst (ConvI2F (LoadI mem)));
11091   ins_cost(200);
11092   format %{ "FILD   $mem\n\t"
11093             "FSTP_S $dst" %}
11094   opcode(0xDB);  /* DB /0 */
11095   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11096               Pop_Mem_FPR(dst));
11097   ins_pipe( fpu_mem_mem );
11098 %}
11099 
11100 // This instruction does not round to 24-bits
11101 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11102   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11103   match(Set dst (ConvI2F src));
11104   format %{ "FILD   $src\n\t"
11105             "FSTP   $dst" %}
11106   opcode(0xDB, 0x0);  /* DB /0 */
11107   ins_encode( Push_Mem_I(src),
11108               Pop_Reg_FPR(dst));
11109   ins_pipe( fpu_reg_mem );
11110 %}
11111 
11112 // This instruction does not round to 24-bits
11113 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11114   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11115   match(Set dst (ConvI2F (LoadI mem)));
11116   format %{ "FILD   $mem\n\t"
11117             "FSTP   $dst" %}
11118   opcode(0xDB);      /* DB /0 */
11119   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11120               Pop_Reg_FPR(dst));
11121   ins_pipe( fpu_reg_mem );
11122 %}
11123 
11124 // Convert an int to a float in xmm; no rounding step needed.
11125 instruct convI2F_reg(regF dst, rRegI src) %{
11126   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11127   match(Set dst (ConvI2F src));
11128   format %{ "CVTSI2SS $dst, $src" %}
11129   ins_encode %{
11130     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11131   %}
11132   ins_pipe( pipe_slow );
11133 %}
11134 
11135  instruct convXI2F_reg(regF dst, rRegI src)
11136 %{
11137   predicate( UseSSE>=2 && UseXmmI2F );
11138   match(Set dst (ConvI2F src));
11139 
11140   format %{ "MOVD  $dst,$src\n\t"
11141             "CVTDQ2PS $dst,$dst\t# i2f" %}
11142   ins_encode %{
11143     __ movdl($dst$$XMMRegister, $src$$Register);
11144     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11145   %}
11146   ins_pipe(pipe_slow); // XXX
11147 %}
11148 
11149 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11150   match(Set dst (ConvI2L src));
11151   effect(KILL cr);
11152   ins_cost(375);
11153   format %{ "MOV    $dst.lo,$src\n\t"
11154             "MOV    $dst.hi,$src\n\t"
11155             "SAR    $dst.hi,31" %}
11156   ins_encode(convert_int_long(dst,src));
11157   ins_pipe( ialu_reg_reg_long );
11158 %}
11159 
11160 // Zero-extend convert int to long
11161 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11162   match(Set dst (AndL (ConvI2L src) mask) );
11163   effect( KILL flags );
11164   ins_cost(250);
11165   format %{ "MOV    $dst.lo,$src\n\t"
11166             "XOR    $dst.hi,$dst.hi" %}
11167   opcode(0x33); // XOR
11168   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11169   ins_pipe( ialu_reg_reg_long );
11170 %}
11171 
11172 // Zero-extend long
11173 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11174   match(Set dst (AndL src mask) );
11175   effect( KILL flags );
11176   ins_cost(250);
11177   format %{ "MOV    $dst.lo,$src.lo\n\t"
11178             "XOR    $dst.hi,$dst.hi\n\t" %}
11179   opcode(0x33); // XOR
11180   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11181   ins_pipe( ialu_reg_reg_long );
11182 %}
11183 
11184 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11185   predicate (UseSSE<=1);
11186   match(Set dst (ConvL2D src));
11187   effect( KILL cr );
11188   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11189             "PUSH   $src.lo\n\t"
11190             "FILD   ST,[ESP + #0]\n\t"
11191             "ADD    ESP,8\n\t"
11192             "FSTP_D $dst\t# D-round" %}
11193   opcode(0xDF, 0x5);  /* DF /5 */
11194   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11195   ins_pipe( pipe_slow );
11196 %}
11197 
11198 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11199   predicate (UseSSE>=2);
11200   match(Set dst (ConvL2D src));
11201   effect( KILL cr );
11202   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11203             "PUSH   $src.lo\n\t"
11204             "FILD_D [ESP]\n\t"
11205             "FSTP_D [ESP]\n\t"
11206             "MOVSD  $dst,[ESP]\n\t"
11207             "ADD    ESP,8" %}
11208   opcode(0xDF, 0x5);  /* DF /5 */
11209   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11210   ins_pipe( pipe_slow );
11211 %}
11212 
11213 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11214   predicate (UseSSE>=1);
11215   match(Set dst (ConvL2F src));
11216   effect( KILL cr );
11217   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11218             "PUSH   $src.lo\n\t"
11219             "FILD_D [ESP]\n\t"
11220             "FSTP_S [ESP]\n\t"
11221             "MOVSS  $dst,[ESP]\n\t"
11222             "ADD    ESP,8" %}
11223   opcode(0xDF, 0x5);  /* DF /5 */
11224   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11225   ins_pipe( pipe_slow );
11226 %}
11227 
11228 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11229   match(Set dst (ConvL2F src));
11230   effect( KILL cr );
11231   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11232             "PUSH   $src.lo\n\t"
11233             "FILD   ST,[ESP + #0]\n\t"
11234             "ADD    ESP,8\n\t"
11235             "FSTP_S $dst\t# F-round" %}
11236   opcode(0xDF, 0x5);  /* DF /5 */
11237   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11238   ins_pipe( pipe_slow );
11239 %}
11240 
11241 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11242   match(Set dst (ConvL2I src));
11243   effect( DEF dst, USE src );
11244   format %{ "MOV    $dst,$src.lo" %}
11245   ins_encode(enc_CopyL_Lo(dst,src));
11246   ins_pipe( ialu_reg_reg );
11247 %}
11248 
11249 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11250   match(Set dst (MoveF2I src));
11251   effect( DEF dst, USE src );
11252   ins_cost(100);
11253   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11254   ins_encode %{
11255     __ movl($dst$$Register, Address(rsp, $src$$disp));
11256   %}
11257   ins_pipe( ialu_reg_mem );
11258 %}
11259 
11260 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11261   predicate(UseSSE==0);
11262   match(Set dst (MoveF2I src));
11263   effect( DEF dst, USE src );
11264 
11265   ins_cost(125);
11266   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11267   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11268   ins_pipe( fpu_mem_reg );
11269 %}
11270 
11271 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11272   predicate(UseSSE>=1);
11273   match(Set dst (MoveF2I src));
11274   effect( DEF dst, USE src );
11275 
11276   ins_cost(95);
11277   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11278   ins_encode %{
11279     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11280   %}
11281   ins_pipe( pipe_slow );
11282 %}
11283 
11284 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11285   predicate(UseSSE>=2);
11286   match(Set dst (MoveF2I src));
11287   effect( DEF dst, USE src );
11288   ins_cost(85);
11289   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11290   ins_encode %{
11291     __ movdl($dst$$Register, $src$$XMMRegister);
11292   %}
11293   ins_pipe( pipe_slow );
11294 %}
11295 
11296 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11297   match(Set dst (MoveI2F src));
11298   effect( DEF dst, USE src );
11299 
11300   ins_cost(100);
11301   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11302   ins_encode %{
11303     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11304   %}
11305   ins_pipe( ialu_mem_reg );
11306 %}
11307 
11308 
11309 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11310   predicate(UseSSE==0);
11311   match(Set dst (MoveI2F src));
11312   effect(DEF dst, USE src);
11313 
11314   ins_cost(125);
11315   format %{ "FLD_S  $src\n\t"
11316             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11317   opcode(0xD9);               /* D9 /0, FLD m32real */
11318   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11319               Pop_Reg_FPR(dst) );
11320   ins_pipe( fpu_reg_mem );
11321 %}
11322 
11323 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11324   predicate(UseSSE>=1);
11325   match(Set dst (MoveI2F src));
11326   effect( DEF dst, USE src );
11327 
11328   ins_cost(95);
11329   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11330   ins_encode %{
11331     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11332   %}
11333   ins_pipe( pipe_slow );
11334 %}
11335 
11336 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11337   predicate(UseSSE>=2);
11338   match(Set dst (MoveI2F src));
11339   effect( DEF dst, USE src );
11340 
11341   ins_cost(85);
11342   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11343   ins_encode %{
11344     __ movdl($dst$$XMMRegister, $src$$Register);
11345   %}
11346   ins_pipe( pipe_slow );
11347 %}
11348 
11349 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11350   match(Set dst (MoveD2L src));
11351   effect(DEF dst, USE src);
11352 
11353   ins_cost(250);
11354   format %{ "MOV    $dst.lo,$src\n\t"
11355             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11356   opcode(0x8B, 0x8B);
11357   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11358   ins_pipe( ialu_mem_long_reg );
11359 %}
11360 
11361 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11362   predicate(UseSSE<=1);
11363   match(Set dst (MoveD2L src));
11364   effect(DEF dst, USE src);
11365 
11366   ins_cost(125);
11367   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11368   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11369   ins_pipe( fpu_mem_reg );
11370 %}
11371 
11372 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11373   predicate(UseSSE>=2);
11374   match(Set dst (MoveD2L src));
11375   effect(DEF dst, USE src);
11376   ins_cost(95);
11377   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11378   ins_encode %{
11379     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11380   %}
11381   ins_pipe( pipe_slow );
11382 %}
11383 
11384 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11385   predicate(UseSSE>=2);
11386   match(Set dst (MoveD2L src));
11387   effect(DEF dst, USE src, TEMP tmp);
11388   ins_cost(85);
11389   format %{ "MOVD   $dst.lo,$src\n\t"
11390             "PSHUFLW $tmp,$src,0x4E\n\t"
11391             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11392   ins_encode %{
11393     __ movdl($dst$$Register, $src$$XMMRegister);
11394     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11395     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11396   %}
11397   ins_pipe( pipe_slow );
11398 %}
11399 
11400 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11401   match(Set dst (MoveL2D src));
11402   effect(DEF dst, USE src);
11403 
11404   ins_cost(200);
11405   format %{ "MOV    $dst,$src.lo\n\t"
11406             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11407   opcode(0x89, 0x89);
11408   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11409   ins_pipe( ialu_mem_long_reg );
11410 %}
11411 
11412 
11413 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11414   predicate(UseSSE<=1);
11415   match(Set dst (MoveL2D src));
11416   effect(DEF dst, USE src);
11417   ins_cost(125);
11418 
11419   format %{ "FLD_D  $src\n\t"
11420             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11421   opcode(0xDD);               /* DD /0, FLD m64real */
11422   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11423               Pop_Reg_DPR(dst) );
11424   ins_pipe( fpu_reg_mem );
11425 %}
11426 
11427 
11428 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11429   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11430   match(Set dst (MoveL2D src));
11431   effect(DEF dst, USE src);
11432 
11433   ins_cost(95);
11434   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11435   ins_encode %{
11436     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11437   %}
11438   ins_pipe( pipe_slow );
11439 %}
11440 
11441 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11442   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11443   match(Set dst (MoveL2D src));
11444   effect(DEF dst, USE src);
11445 
11446   ins_cost(95);
11447   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11448   ins_encode %{
11449     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11450   %}
11451   ins_pipe( pipe_slow );
11452 %}
11453 
11454 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11455   predicate(UseSSE>=2);
11456   match(Set dst (MoveL2D src));
11457   effect(TEMP dst, USE src, TEMP tmp);
11458   ins_cost(85);
11459   format %{ "MOVD   $dst,$src.lo\n\t"
11460             "MOVD   $tmp,$src.hi\n\t"
11461             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11462   ins_encode %{
11463     __ movdl($dst$$XMMRegister, $src$$Register);
11464     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11465     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11466   %}
11467   ins_pipe( pipe_slow );
11468 %}
11469 
11470 //----------------------------- CompressBits/ExpandBits ------------------------
11471 
11472 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11473   predicate(n->bottom_type()->isa_long());
11474   match(Set dst (CompressBits src mask));
11475   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11476   format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11477   ins_encode %{
11478     Label exit, partail_result;
11479     // Parallely extract both upper and lower 32 bits of source into destination register pair.
11480     // Merge the results of upper and lower destination registers such that upper destination
11481     // results are contiguously laid out after the lower destination result.
11482     __ pextl($dst$$Register, $src$$Register, $mask$$Register);
11483     __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11484     __ popcntl($rtmp$$Register, $mask$$Register);
11485     // Skip merging if bit count of lower mask register is equal to 32 (register size).
11486     __ cmpl($rtmp$$Register, 32);
11487     __ jccb(Assembler::equal, exit);
11488     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11489     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11490     // Shift left the contents of upper destination register by true bit count of lower mask register
11491     // and merge with lower destination register.
11492     __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11493     __ orl($dst$$Register, $rtmp$$Register);
11494     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11495     // Zero out upper destination register if true bit count of lower 32 bit mask is zero
11496     // since contents of upper destination have already been copied to lower destination
11497     // register.
11498     __ cmpl($rtmp$$Register, 0);
11499     __ jccb(Assembler::greater, partail_result);
11500     __ movl(HIGH_FROM_LOW($dst$$Register), 0);
11501     __ jmp(exit);
11502     __ bind(partail_result);
11503     // Perform right shift over upper destination register to move out bits already copied
11504     // to lower destination register.
11505     __ subl($rtmp$$Register, 32);
11506     __ negl($rtmp$$Register);
11507     __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11508     __ bind(exit);
11509   %}
11510   ins_pipe( pipe_slow );
11511 %}
11512 
11513 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{
11514   predicate(n->bottom_type()->isa_long());
11515   match(Set dst (ExpandBits src mask));
11516   effect(TEMP rtmp, TEMP xtmp, KILL cr);
11517   format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %}
11518   ins_encode %{
11519     // Extraction operation sequentially reads the bits from source register starting from LSB
11520     // and lays them out into destination register at bit locations corresponding to true bits
11521     // in mask register. Thus number of source bits read are equal to combined true bit count
11522     // of mask register pair.
11523     Label exit, mask_clipping;
11524     __ pdepl($dst$$Register, $src$$Register, $mask$$Register);
11525     __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register));
11526     __ popcntl($rtmp$$Register, $mask$$Register);
11527     // If true bit count of lower mask register is 32 then none of bit of lower source register
11528     // will feed to upper destination register.
11529     __ cmpl($rtmp$$Register, 32);
11530     __ jccb(Assembler::equal, exit);
11531     // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot.
11532     __ movdl($xtmp$$XMMRegister, $rtmp$$Register);
11533     // Shift right the contents of lower source register to remove already consumed bits.
11534     __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register);
11535     // Extract the bits from lower source register starting from LSB under the influence
11536     // of upper mask register.
11537     __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register));
11538     __ movdl($rtmp$$Register, $xtmp$$XMMRegister);
11539     __ subl($rtmp$$Register, 32);
11540     __ negl($rtmp$$Register);
11541     __ movdl($xtmp$$XMMRegister, $mask$$Register);
11542     __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register));
11543     // Clear the set bits in upper mask register which have been used to extract the contents
11544     // from lower source register.
11545     __ bind(mask_clipping);
11546     __ blsrl($mask$$Register, $mask$$Register);
11547     __ decrementl($rtmp$$Register, 1);
11548     __ jccb(Assembler::greater, mask_clipping);
11549     // Starting from LSB extract the bits from upper source register under the influence of
11550     // remaining set bits in upper mask register.
11551     __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register);
11552     // Merge the partial results extracted from lower and upper source register bits.
11553     __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register);
11554     __ movdl($mask$$Register, $xtmp$$XMMRegister);
11555     __ bind(exit);
11556   %}
11557   ins_pipe( pipe_slow );
11558 %}
11559 
11560 // =======================================================================
11561 // fast clearing of an array
11562 // Small ClearArray non-AVX512.
11563 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11564   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
11565   match(Set dummy (ClearArray cnt base));
11566   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11567 
11568   format %{ $$template
11569     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11570     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11571     $$emit$$"JG     LARGE\n\t"
11572     $$emit$$"SHL    ECX, 1\n\t"
11573     $$emit$$"DEC    ECX\n\t"
11574     $$emit$$"JS     DONE\t# Zero length\n\t"
11575     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11576     $$emit$$"DEC    ECX\n\t"
11577     $$emit$$"JGE    LOOP\n\t"
11578     $$emit$$"JMP    DONE\n\t"
11579     $$emit$$"# LARGE:\n\t"
11580     if (UseFastStosb) {
11581        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11582        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11583     } else if (UseXMMForObjInit) {
11584        $$emit$$"MOV     RDI,RAX\n\t"
11585        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11586        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11587        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11588        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11589        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11590        $$emit$$"ADD     0x40,RAX\n\t"
11591        $$emit$$"# L_zero_64_bytes:\n\t"
11592        $$emit$$"SUB     0x8,RCX\n\t"
11593        $$emit$$"JGE     L_loop\n\t"
11594        $$emit$$"ADD     0x4,RCX\n\t"
11595        $$emit$$"JL      L_tail\n\t"
11596        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11597        $$emit$$"ADD     0x20,RAX\n\t"
11598        $$emit$$"SUB     0x4,RCX\n\t"
11599        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11600        $$emit$$"ADD     0x4,RCX\n\t"
11601        $$emit$$"JLE     L_end\n\t"
11602        $$emit$$"DEC     RCX\n\t"
11603        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11604        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11605        $$emit$$"ADD     0x8,RAX\n\t"
11606        $$emit$$"DEC     RCX\n\t"
11607        $$emit$$"JGE     L_sloop\n\t"
11608        $$emit$$"# L_end:\n\t"
11609     } else {
11610        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11611        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11612     }
11613     $$emit$$"# DONE"
11614   %}
11615   ins_encode %{
11616     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11617                  $tmp$$XMMRegister, false, knoreg);
11618   %}
11619   ins_pipe( pipe_slow );
11620 %}
11621 
11622 // Small ClearArray AVX512 non-constant length.
11623 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11624   predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
11625   match(Set dummy (ClearArray cnt base));
11626   ins_cost(125);
11627   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11628 
11629   format %{ $$template
11630     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11631     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11632     $$emit$$"JG     LARGE\n\t"
11633     $$emit$$"SHL    ECX, 1\n\t"
11634     $$emit$$"DEC    ECX\n\t"
11635     $$emit$$"JS     DONE\t# Zero length\n\t"
11636     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11637     $$emit$$"DEC    ECX\n\t"
11638     $$emit$$"JGE    LOOP\n\t"
11639     $$emit$$"JMP    DONE\n\t"
11640     $$emit$$"# LARGE:\n\t"
11641     if (UseFastStosb) {
11642        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11643        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11644     } else if (UseXMMForObjInit) {
11645        $$emit$$"MOV     RDI,RAX\n\t"
11646        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11647        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11648        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11649        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11650        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11651        $$emit$$"ADD     0x40,RAX\n\t"
11652        $$emit$$"# L_zero_64_bytes:\n\t"
11653        $$emit$$"SUB     0x8,RCX\n\t"
11654        $$emit$$"JGE     L_loop\n\t"
11655        $$emit$$"ADD     0x4,RCX\n\t"
11656        $$emit$$"JL      L_tail\n\t"
11657        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11658        $$emit$$"ADD     0x20,RAX\n\t"
11659        $$emit$$"SUB     0x4,RCX\n\t"
11660        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11661        $$emit$$"ADD     0x4,RCX\n\t"
11662        $$emit$$"JLE     L_end\n\t"
11663        $$emit$$"DEC     RCX\n\t"
11664        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11665        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11666        $$emit$$"ADD     0x8,RAX\n\t"
11667        $$emit$$"DEC     RCX\n\t"
11668        $$emit$$"JGE     L_sloop\n\t"
11669        $$emit$$"# L_end:\n\t"
11670     } else {
11671        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11672        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11673     }
11674     $$emit$$"# DONE"
11675   %}
11676   ins_encode %{
11677     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11678                  $tmp$$XMMRegister, false, $ktmp$$KRegister);
11679   %}
11680   ins_pipe( pipe_slow );
11681 %}
11682 
11683 // Large ClearArray non-AVX512.
11684 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11685   predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large());
11686   match(Set dummy (ClearArray cnt base));
11687   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11688   format %{ $$template
11689     if (UseFastStosb) {
11690        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11691        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11692        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11693     } else if (UseXMMForObjInit) {
11694        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11695        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11696        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11697        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11698        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11699        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11700        $$emit$$"ADD     0x40,RAX\n\t"
11701        $$emit$$"# L_zero_64_bytes:\n\t"
11702        $$emit$$"SUB     0x8,RCX\n\t"
11703        $$emit$$"JGE     L_loop\n\t"
11704        $$emit$$"ADD     0x4,RCX\n\t"
11705        $$emit$$"JL      L_tail\n\t"
11706        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11707        $$emit$$"ADD     0x20,RAX\n\t"
11708        $$emit$$"SUB     0x4,RCX\n\t"
11709        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11710        $$emit$$"ADD     0x4,RCX\n\t"
11711        $$emit$$"JLE     L_end\n\t"
11712        $$emit$$"DEC     RCX\n\t"
11713        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11714        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11715        $$emit$$"ADD     0x8,RAX\n\t"
11716        $$emit$$"DEC     RCX\n\t"
11717        $$emit$$"JGE     L_sloop\n\t"
11718        $$emit$$"# L_end:\n\t"
11719     } else {
11720        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11721        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11722        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11723     }
11724     $$emit$$"# DONE"
11725   %}
11726   ins_encode %{
11727     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11728                  $tmp$$XMMRegister, true, knoreg);
11729   %}
11730   ins_pipe( pipe_slow );
11731 %}
11732 
11733 // Large ClearArray AVX512.
11734 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11735   predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
11736   match(Set dummy (ClearArray cnt base));
11737   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
11738   format %{ $$template
11739     if (UseFastStosb) {
11740        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11741        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11742        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11743     } else if (UseXMMForObjInit) {
11744        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11745        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11746        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11747        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11748        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11749        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11750        $$emit$$"ADD     0x40,RAX\n\t"
11751        $$emit$$"# L_zero_64_bytes:\n\t"
11752        $$emit$$"SUB     0x8,RCX\n\t"
11753        $$emit$$"JGE     L_loop\n\t"
11754        $$emit$$"ADD     0x4,RCX\n\t"
11755        $$emit$$"JL      L_tail\n\t"
11756        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11757        $$emit$$"ADD     0x20,RAX\n\t"
11758        $$emit$$"SUB     0x4,RCX\n\t"
11759        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11760        $$emit$$"ADD     0x4,RCX\n\t"
11761        $$emit$$"JLE     L_end\n\t"
11762        $$emit$$"DEC     RCX\n\t"
11763        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11764        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11765        $$emit$$"ADD     0x8,RAX\n\t"
11766        $$emit$$"DEC     RCX\n\t"
11767        $$emit$$"JGE     L_sloop\n\t"
11768        $$emit$$"# L_end:\n\t"
11769     } else {
11770        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11771        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11772        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11773     }
11774     $$emit$$"# DONE"
11775   %}
11776   ins_encode %{
11777     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11778                  $tmp$$XMMRegister, true, $ktmp$$KRegister);
11779   %}
11780   ins_pipe( pipe_slow );
11781 %}
11782 
11783 // Small ClearArray AVX512 constant length.
11784 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
11785 %{
11786   predicate(!((ClearArrayNode*)n)->is_large() &&
11787                ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
11788   match(Set dummy (ClearArray cnt base));
11789   ins_cost(100);
11790   effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
11791   format %{ "clear_mem_imm $base , $cnt  \n\t" %}
11792   ins_encode %{
11793    __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
11794   %}
11795   ins_pipe(pipe_slow);
11796 %}
11797 
11798 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11799                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11800   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11801   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11802   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11803 
11804   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11805   ins_encode %{
11806     __ string_compare($str1$$Register, $str2$$Register,
11807                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11808                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
11809   %}
11810   ins_pipe( pipe_slow );
11811 %}
11812 
11813 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11814                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11815   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11816   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11817   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11818 
11819   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11820   ins_encode %{
11821     __ string_compare($str1$$Register, $str2$$Register,
11822                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11823                       $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
11824   %}
11825   ins_pipe( pipe_slow );
11826 %}
11827 
11828 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11829                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11830   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11831   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11832   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11833 
11834   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11835   ins_encode %{
11836     __ string_compare($str1$$Register, $str2$$Register,
11837                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11838                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
11839   %}
11840   ins_pipe( pipe_slow );
11841 %}
11842 
11843 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11844                               eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11845   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11846   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11847   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11848 
11849   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11850   ins_encode %{
11851     __ string_compare($str1$$Register, $str2$$Register,
11852                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11853                       $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
11854   %}
11855   ins_pipe( pipe_slow );
11856 %}
11857 
11858 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11859                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11860   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11861   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11862   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11863 
11864   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11865   ins_encode %{
11866     __ string_compare($str1$$Register, $str2$$Register,
11867                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11868                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
11869   %}
11870   ins_pipe( pipe_slow );
11871 %}
11872 
11873 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11874                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11875   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11876   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11877   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11878 
11879   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11880   ins_encode %{
11881     __ string_compare($str1$$Register, $str2$$Register,
11882                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11883                       $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
11884   %}
11885   ins_pipe( pipe_slow );
11886 %}
11887 
11888 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11889                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11890   predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11891   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11892   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11893 
11894   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11895   ins_encode %{
11896     __ string_compare($str2$$Register, $str1$$Register,
11897                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11898                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
11899   %}
11900   ins_pipe( pipe_slow );
11901 %}
11902 
11903 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11904                                eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
11905   predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11906   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11907   effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11908 
11909   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11910   ins_encode %{
11911     __ string_compare($str2$$Register, $str1$$Register,
11912                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11913                       $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
11914   %}
11915   ins_pipe( pipe_slow );
11916 %}
11917 
11918 // fast string equals
11919 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11920                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11921   predicate(!VM_Version::supports_avx512vlbw());
11922   match(Set result (StrEquals (Binary str1 str2) cnt));
11923   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11924 
11925   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11926   ins_encode %{
11927     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11928                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11929                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
11930   %}
11931 
11932   ins_pipe( pipe_slow );
11933 %}
11934 
11935 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11936                             regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
11937   predicate(VM_Version::supports_avx512vlbw());
11938   match(Set result (StrEquals (Binary str1 str2) cnt));
11939   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11940 
11941   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11942   ins_encode %{
11943     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11944                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11945                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
11946   %}
11947 
11948   ins_pipe( pipe_slow );
11949 %}
11950 
11951 
11952 // fast search of substring with known size.
11953 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11954                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11955   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11956   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11957   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11958 
11959   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11960   ins_encode %{
11961     int icnt2 = (int)$int_cnt2$$constant;
11962     if (icnt2 >= 16) {
11963       // IndexOf for constant substrings with size >= 16 elements
11964       // which don't need to be loaded through stack.
11965       __ string_indexofC8($str1$$Register, $str2$$Register,
11966                           $cnt1$$Register, $cnt2$$Register,
11967                           icnt2, $result$$Register,
11968                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11969     } else {
11970       // Small strings are loaded through stack if they cross page boundary.
11971       __ string_indexof($str1$$Register, $str2$$Register,
11972                         $cnt1$$Register, $cnt2$$Register,
11973                         icnt2, $result$$Register,
11974                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11975     }
11976   %}
11977   ins_pipe( pipe_slow );
11978 %}
11979 
11980 // fast search of substring with known size.
11981 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11982                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11983   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11984   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11985   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11986 
11987   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11988   ins_encode %{
11989     int icnt2 = (int)$int_cnt2$$constant;
11990     if (icnt2 >= 8) {
11991       // IndexOf for constant substrings with size >= 8 elements
11992       // which don't need to be loaded through stack.
11993       __ string_indexofC8($str1$$Register, $str2$$Register,
11994                           $cnt1$$Register, $cnt2$$Register,
11995                           icnt2, $result$$Register,
11996                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11997     } else {
11998       // Small strings are loaded through stack if they cross page boundary.
11999       __ string_indexof($str1$$Register, $str2$$Register,
12000                         $cnt1$$Register, $cnt2$$Register,
12001                         icnt2, $result$$Register,
12002                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12003     }
12004   %}
12005   ins_pipe( pipe_slow );
12006 %}
12007 
12008 // fast search of substring with known size.
12009 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12010                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12011   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12012   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12013   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12014 
12015   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
12016   ins_encode %{
12017     int icnt2 = (int)$int_cnt2$$constant;
12018     if (icnt2 >= 8) {
12019       // IndexOf for constant substrings with size >= 8 elements
12020       // which don't need to be loaded through stack.
12021       __ string_indexofC8($str1$$Register, $str2$$Register,
12022                           $cnt1$$Register, $cnt2$$Register,
12023                           icnt2, $result$$Register,
12024                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12025     } else {
12026       // Small strings are loaded through stack if they cross page boundary.
12027       __ string_indexof($str1$$Register, $str2$$Register,
12028                         $cnt1$$Register, $cnt2$$Register,
12029                         icnt2, $result$$Register,
12030                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12031     }
12032   %}
12033   ins_pipe( pipe_slow );
12034 %}
12035 
12036 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12037                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12038   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
12039   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12040   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12041 
12042   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12043   ins_encode %{
12044     __ string_indexof($str1$$Register, $str2$$Register,
12045                       $cnt1$$Register, $cnt2$$Register,
12046                       (-1), $result$$Register,
12047                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
12048   %}
12049   ins_pipe( pipe_slow );
12050 %}
12051 
12052 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12053                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12054   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
12055   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12056   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12057 
12058   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12059   ins_encode %{
12060     __ string_indexof($str1$$Register, $str2$$Register,
12061                       $cnt1$$Register, $cnt2$$Register,
12062                       (-1), $result$$Register,
12063                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
12064   %}
12065   ins_pipe( pipe_slow );
12066 %}
12067 
12068 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12069                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
12070   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
12071   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12072   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12073 
12074   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12075   ins_encode %{
12076     __ string_indexof($str1$$Register, $str2$$Register,
12077                       $cnt1$$Register, $cnt2$$Register,
12078                       (-1), $result$$Register,
12079                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
12080   %}
12081   ins_pipe( pipe_slow );
12082 %}
12083 
12084 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12085                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12086   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
12087   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12088   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12089   format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12090   ins_encode %{
12091     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12092                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12093   %}
12094   ins_pipe( pipe_slow );
12095 %}
12096 
12097 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
12098                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
12099   predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
12100   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
12101   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
12102   format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
12103   ins_encode %{
12104     __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
12105                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
12106   %}
12107   ins_pipe( pipe_slow );
12108 %}
12109 
12110 
12111 // fast array equals
12112 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12113                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12114 %{
12115   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12116   match(Set result (AryEq ary1 ary2));
12117   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12118   //ins_cost(300);
12119 
12120   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12121   ins_encode %{
12122     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12123                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12124                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
12125   %}
12126   ins_pipe( pipe_slow );
12127 %}
12128 
12129 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12130                        regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12131 %{
12132   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
12133   match(Set result (AryEq ary1 ary2));
12134   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12135   //ins_cost(300);
12136 
12137   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12138   ins_encode %{
12139     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12140                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12141                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
12142   %}
12143   ins_pipe( pipe_slow );
12144 %}
12145 
12146 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12147                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12148 %{
12149   predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12150   match(Set result (AryEq ary1 ary2));
12151   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12152   //ins_cost(300);
12153 
12154   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12155   ins_encode %{
12156     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12157                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12158                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
12159   %}
12160   ins_pipe( pipe_slow );
12161 %}
12162 
12163 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12164                             regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12165 %{
12166   predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
12167   match(Set result (AryEq ary1 ary2));
12168   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12169   //ins_cost(300);
12170 
12171   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12172   ins_encode %{
12173     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
12174                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
12175                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
12176   %}
12177   ins_pipe( pipe_slow );
12178 %}
12179 
12180 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result,
12181                          regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
12182 %{
12183   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12184   match(Set result (CountPositives ary1 len));
12185   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12186 
12187   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12188   ins_encode %{
12189     __ count_positives($ary1$$Register, $len$$Register,
12190                        $result$$Register, $tmp3$$Register,
12191                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
12192   %}
12193   ins_pipe( pipe_slow );
12194 %}
12195 
12196 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
12197                               regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
12198 %{
12199   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12200   match(Set result (CountPositives ary1 len));
12201   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
12202 
12203   format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
12204   ins_encode %{
12205     __ count_positives($ary1$$Register, $len$$Register,
12206                        $result$$Register, $tmp3$$Register,
12207                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
12208   %}
12209   ins_pipe( pipe_slow );
12210 %}
12211 
12212 
12213 // fast char[] to byte[] compression
12214 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12215                          regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12216   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12217   match(Set result (StrCompressedCopy src (Binary dst len)));
12218   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12219 
12220   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12221   ins_encode %{
12222     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12223                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12224                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12225                            knoreg, knoreg);
12226   %}
12227   ins_pipe( pipe_slow );
12228 %}
12229 
12230 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
12231                               regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12232   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12233   match(Set result (StrCompressedCopy src (Binary dst len)));
12234   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12235 
12236   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
12237   ins_encode %{
12238     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
12239                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12240                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
12241                            $ktmp1$$KRegister, $ktmp2$$KRegister);
12242   %}
12243   ins_pipe( pipe_slow );
12244 %}
12245 
12246 // fast byte[] to char[] inflation
12247 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12248                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12249   predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
12250   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12251   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12252 
12253   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12254   ins_encode %{
12255     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12256                           $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
12257   %}
12258   ins_pipe( pipe_slow );
12259 %}
12260 
12261 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12262                              regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
12263   predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
12264   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12265   effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12266 
12267   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12268   ins_encode %{
12269     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12270                           $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
12271   %}
12272   ins_pipe( pipe_slow );
12273 %}
12274 
12275 // encode char[] to byte[] in ISO_8859_1
12276 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12277                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12278                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12279   predicate(!((EncodeISOArrayNode*)n)->is_ascii());
12280   match(Set result (EncodeISOArray src (Binary dst len)));
12281   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12282 
12283   format %{ "Encode iso array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12284   ins_encode %{
12285     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12286                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12287                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
12288   %}
12289   ins_pipe( pipe_slow );
12290 %}
12291 
12292 // encode char[] to byte[] in ASCII
12293 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12294                             regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12295                             eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12296   predicate(((EncodeISOArrayNode*)n)->is_ascii());
12297   match(Set result (EncodeISOArray src (Binary dst len)));
12298   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12299 
12300   format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12301   ins_encode %{
12302     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12303                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12304                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
12305   %}
12306   ins_pipe( pipe_slow );
12307 %}
12308 
12309 //----------Control Flow Instructions------------------------------------------
12310 // Signed compare Instructions
12311 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12312   match(Set cr (CmpI op1 op2));
12313   effect( DEF cr, USE op1, USE op2 );
12314   format %{ "CMP    $op1,$op2" %}
12315   opcode(0x3B);  /* Opcode 3B /r */
12316   ins_encode( OpcP, RegReg( op1, op2) );
12317   ins_pipe( ialu_cr_reg_reg );
12318 %}
12319 
12320 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12321   match(Set cr (CmpI op1 op2));
12322   effect( DEF cr, USE op1 );
12323   format %{ "CMP    $op1,$op2" %}
12324   opcode(0x81,0x07);  /* Opcode 81 /7 */
12325   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12326   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12327   ins_pipe( ialu_cr_reg_imm );
12328 %}
12329 
12330 // Cisc-spilled version of cmpI_eReg
12331 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12332   match(Set cr (CmpI op1 (LoadI op2)));
12333 
12334   format %{ "CMP    $op1,$op2" %}
12335   ins_cost(500);
12336   opcode(0x3B);  /* Opcode 3B /r */
12337   ins_encode( OpcP, RegMem( op1, op2) );
12338   ins_pipe( ialu_cr_reg_mem );
12339 %}
12340 
12341 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12342   match(Set cr (CmpI src zero));
12343   effect( DEF cr, USE src );
12344 
12345   format %{ "TEST   $src,$src" %}
12346   opcode(0x85);
12347   ins_encode( OpcP, RegReg( src, src ) );
12348   ins_pipe( ialu_cr_reg_imm );
12349 %}
12350 
12351 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12352   match(Set cr (CmpI (AndI src con) zero));
12353 
12354   format %{ "TEST   $src,$con" %}
12355   opcode(0xF7,0x00);
12356   ins_encode( OpcP, RegOpc(src), Con32(con) );
12357   ins_pipe( ialu_cr_reg_imm );
12358 %}
12359 
12360 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12361   match(Set cr (CmpI (AndI src mem) zero));
12362 
12363   format %{ "TEST   $src,$mem" %}
12364   opcode(0x85);
12365   ins_encode( OpcP, RegMem( src, mem ) );
12366   ins_pipe( ialu_cr_reg_mem );
12367 %}
12368 
12369 // Unsigned compare Instructions; really, same as signed except they
12370 // produce an eFlagsRegU instead of eFlagsReg.
12371 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12372   match(Set cr (CmpU op1 op2));
12373 
12374   format %{ "CMPu   $op1,$op2" %}
12375   opcode(0x3B);  /* Opcode 3B /r */
12376   ins_encode( OpcP, RegReg( op1, op2) );
12377   ins_pipe( ialu_cr_reg_reg );
12378 %}
12379 
12380 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12381   match(Set cr (CmpU op1 op2));
12382 
12383   format %{ "CMPu   $op1,$op2" %}
12384   opcode(0x81,0x07);  /* Opcode 81 /7 */
12385   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12386   ins_pipe( ialu_cr_reg_imm );
12387 %}
12388 
12389 // // Cisc-spilled version of cmpU_eReg
12390 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12391   match(Set cr (CmpU op1 (LoadI op2)));
12392 
12393   format %{ "CMPu   $op1,$op2" %}
12394   ins_cost(500);
12395   opcode(0x3B);  /* Opcode 3B /r */
12396   ins_encode( OpcP, RegMem( op1, op2) );
12397   ins_pipe( ialu_cr_reg_mem );
12398 %}
12399 
12400 // // Cisc-spilled version of cmpU_eReg
12401 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12402 //  match(Set cr (CmpU (LoadI op1) op2));
12403 //
12404 //  format %{ "CMPu   $op1,$op2" %}
12405 //  ins_cost(500);
12406 //  opcode(0x39);  /* Opcode 39 /r */
12407 //  ins_encode( OpcP, RegMem( op1, op2) );
12408 //%}
12409 
12410 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12411   match(Set cr (CmpU src zero));
12412 
12413   format %{ "TESTu  $src,$src" %}
12414   opcode(0x85);
12415   ins_encode( OpcP, RegReg( src, src ) );
12416   ins_pipe( ialu_cr_reg_imm );
12417 %}
12418 
12419 // Unsigned pointer compare Instructions
12420 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12421   match(Set cr (CmpP op1 op2));
12422 
12423   format %{ "CMPu   $op1,$op2" %}
12424   opcode(0x3B);  /* Opcode 3B /r */
12425   ins_encode( OpcP, RegReg( op1, op2) );
12426   ins_pipe( ialu_cr_reg_reg );
12427 %}
12428 
12429 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12430   match(Set cr (CmpP op1 op2));
12431 
12432   format %{ "CMPu   $op1,$op2" %}
12433   opcode(0x81,0x07);  /* Opcode 81 /7 */
12434   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12435   ins_pipe( ialu_cr_reg_imm );
12436 %}
12437 
12438 // // Cisc-spilled version of cmpP_eReg
12439 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12440   match(Set cr (CmpP op1 (LoadP op2)));
12441 
12442   format %{ "CMPu   $op1,$op2" %}
12443   ins_cost(500);
12444   opcode(0x3B);  /* Opcode 3B /r */
12445   ins_encode( OpcP, RegMem( op1, op2) );
12446   ins_pipe( ialu_cr_reg_mem );
12447 %}
12448 
12449 // // Cisc-spilled version of cmpP_eReg
12450 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12451 //  match(Set cr (CmpP (LoadP op1) op2));
12452 //
12453 //  format %{ "CMPu   $op1,$op2" %}
12454 //  ins_cost(500);
12455 //  opcode(0x39);  /* Opcode 39 /r */
12456 //  ins_encode( OpcP, RegMem( op1, op2) );
12457 //%}
12458 
12459 // Compare raw pointer (used in out-of-heap check).
12460 // Only works because non-oop pointers must be raw pointers
12461 // and raw pointers have no anti-dependencies.
12462 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12463   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12464   match(Set cr (CmpP op1 (LoadP op2)));
12465 
12466   format %{ "CMPu   $op1,$op2" %}
12467   opcode(0x3B);  /* Opcode 3B /r */
12468   ins_encode( OpcP, RegMem( op1, op2) );
12469   ins_pipe( ialu_cr_reg_mem );
12470 %}
12471 
12472 //
12473 // This will generate a signed flags result. This should be ok
12474 // since any compare to a zero should be eq/neq.
12475 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12476   match(Set cr (CmpP src zero));
12477 
12478   format %{ "TEST   $src,$src" %}
12479   opcode(0x85);
12480   ins_encode( OpcP, RegReg( src, src ) );
12481   ins_pipe( ialu_cr_reg_imm );
12482 %}
12483 
12484 // Cisc-spilled version of testP_reg
12485 // This will generate a signed flags result. This should be ok
12486 // since any compare to a zero should be eq/neq.
12487 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12488   match(Set cr (CmpP (LoadP op) zero));
12489 
12490   format %{ "TEST   $op,0xFFFFFFFF" %}
12491   ins_cost(500);
12492   opcode(0xF7);               /* Opcode F7 /0 */
12493   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12494   ins_pipe( ialu_cr_reg_imm );
12495 %}
12496 
12497 // Yanked all unsigned pointer compare operations.
12498 // Pointer compares are done with CmpP which is already unsigned.
12499 
12500 //----------Max and Min--------------------------------------------------------
12501 // Min Instructions
12502 ////
12503 //   *** Min and Max using the conditional move are slower than the
12504 //   *** branch version on a Pentium III.
12505 // // Conditional move for min
12506 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12507 //  effect( USE_DEF op2, USE op1, USE cr );
12508 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12509 //  opcode(0x4C,0x0F);
12510 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12511 //  ins_pipe( pipe_cmov_reg );
12512 //%}
12513 //
12514 //// Min Register with Register (P6 version)
12515 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12516 //  predicate(VM_Version::supports_cmov() );
12517 //  match(Set op2 (MinI op1 op2));
12518 //  ins_cost(200);
12519 //  expand %{
12520 //    eFlagsReg cr;
12521 //    compI_eReg(cr,op1,op2);
12522 //    cmovI_reg_lt(op2,op1,cr);
12523 //  %}
12524 //%}
12525 
12526 // Min Register with Register (generic version)
12527 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12528   match(Set dst (MinI dst src));
12529   effect(KILL flags);
12530   ins_cost(300);
12531 
12532   format %{ "MIN    $dst,$src" %}
12533   opcode(0xCC);
12534   ins_encode( min_enc(dst,src) );
12535   ins_pipe( pipe_slow );
12536 %}
12537 
12538 // Max Register with Register
12539 //   *** Min and Max using the conditional move are slower than the
12540 //   *** branch version on a Pentium III.
12541 // // Conditional move for max
12542 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12543 //  effect( USE_DEF op2, USE op1, USE cr );
12544 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12545 //  opcode(0x4F,0x0F);
12546 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12547 //  ins_pipe( pipe_cmov_reg );
12548 //%}
12549 //
12550 // // Max Register with Register (P6 version)
12551 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12552 //  predicate(VM_Version::supports_cmov() );
12553 //  match(Set op2 (MaxI op1 op2));
12554 //  ins_cost(200);
12555 //  expand %{
12556 //    eFlagsReg cr;
12557 //    compI_eReg(cr,op1,op2);
12558 //    cmovI_reg_gt(op2,op1,cr);
12559 //  %}
12560 //%}
12561 
12562 // Max Register with Register (generic version)
12563 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12564   match(Set dst (MaxI dst src));
12565   effect(KILL flags);
12566   ins_cost(300);
12567 
12568   format %{ "MAX    $dst,$src" %}
12569   opcode(0xCC);
12570   ins_encode( max_enc(dst,src) );
12571   ins_pipe( pipe_slow );
12572 %}
12573 
12574 // ============================================================================
12575 // Counted Loop limit node which represents exact final iterator value.
12576 // Note: the resulting value should fit into integer range since
12577 // counted loops have limit check on overflow.
12578 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12579   match(Set limit (LoopLimit (Binary init limit) stride));
12580   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12581   ins_cost(300);
12582 
12583   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12584   ins_encode %{
12585     int strd = (int)$stride$$constant;
12586     assert(strd != 1 && strd != -1, "sanity");
12587     int m1 = (strd > 0) ? 1 : -1;
12588     // Convert limit to long (EAX:EDX)
12589     __ cdql();
12590     // Convert init to long (init:tmp)
12591     __ movl($tmp$$Register, $init$$Register);
12592     __ sarl($tmp$$Register, 31);
12593     // $limit - $init
12594     __ subl($limit$$Register, $init$$Register);
12595     __ sbbl($limit_hi$$Register, $tmp$$Register);
12596     // + ($stride - 1)
12597     if (strd > 0) {
12598       __ addl($limit$$Register, (strd - 1));
12599       __ adcl($limit_hi$$Register, 0);
12600       __ movl($tmp$$Register, strd);
12601     } else {
12602       __ addl($limit$$Register, (strd + 1));
12603       __ adcl($limit_hi$$Register, -1);
12604       __ lneg($limit_hi$$Register, $limit$$Register);
12605       __ movl($tmp$$Register, -strd);
12606     }
12607     // signed division: (EAX:EDX) / pos_stride
12608     __ idivl($tmp$$Register);
12609     if (strd < 0) {
12610       // restore sign
12611       __ negl($tmp$$Register);
12612     }
12613     // (EAX) * stride
12614     __ mull($tmp$$Register);
12615     // + init (ignore upper bits)
12616     __ addl($limit$$Register, $init$$Register);
12617   %}
12618   ins_pipe( pipe_slow );
12619 %}
12620 
12621 // ============================================================================
12622 // Branch Instructions
12623 // Jump Table
12624 instruct jumpXtnd(rRegI switch_val) %{
12625   match(Jump switch_val);
12626   ins_cost(350);
12627   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12628   ins_encode %{
12629     // Jump to Address(table_base + switch_reg)
12630     Address index(noreg, $switch_val$$Register, Address::times_1);
12631     __ jump(ArrayAddress($constantaddress, index), noreg);
12632   %}
12633   ins_pipe(pipe_jmp);
12634 %}
12635 
12636 // Jump Direct - Label defines a relative address from JMP+1
12637 instruct jmpDir(label labl) %{
12638   match(Goto);
12639   effect(USE labl);
12640 
12641   ins_cost(300);
12642   format %{ "JMP    $labl" %}
12643   size(5);
12644   ins_encode %{
12645     Label* L = $labl$$label;
12646     __ jmp(*L, false); // Always long jump
12647   %}
12648   ins_pipe( pipe_jmp );
12649 %}
12650 
12651 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12652 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12653   match(If cop cr);
12654   effect(USE labl);
12655 
12656   ins_cost(300);
12657   format %{ "J$cop    $labl" %}
12658   size(6);
12659   ins_encode %{
12660     Label* L = $labl$$label;
12661     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12662   %}
12663   ins_pipe( pipe_jcc );
12664 %}
12665 
12666 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12667 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12668   match(CountedLoopEnd cop cr);
12669   effect(USE labl);
12670 
12671   ins_cost(300);
12672   format %{ "J$cop    $labl\t# Loop end" %}
12673   size(6);
12674   ins_encode %{
12675     Label* L = $labl$$label;
12676     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12677   %}
12678   ins_pipe( pipe_jcc );
12679 %}
12680 
12681 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12682 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12683   match(CountedLoopEnd cop cmp);
12684   effect(USE labl);
12685 
12686   ins_cost(300);
12687   format %{ "J$cop,u  $labl\t# Loop end" %}
12688   size(6);
12689   ins_encode %{
12690     Label* L = $labl$$label;
12691     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12692   %}
12693   ins_pipe( pipe_jcc );
12694 %}
12695 
12696 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12697   match(CountedLoopEnd cop cmp);
12698   effect(USE labl);
12699 
12700   ins_cost(200);
12701   format %{ "J$cop,u  $labl\t# Loop end" %}
12702   size(6);
12703   ins_encode %{
12704     Label* L = $labl$$label;
12705     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12706   %}
12707   ins_pipe( pipe_jcc );
12708 %}
12709 
12710 // Jump Direct Conditional - using unsigned comparison
12711 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12712   match(If cop cmp);
12713   effect(USE labl);
12714 
12715   ins_cost(300);
12716   format %{ "J$cop,u  $labl" %}
12717   size(6);
12718   ins_encode %{
12719     Label* L = $labl$$label;
12720     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12721   %}
12722   ins_pipe(pipe_jcc);
12723 %}
12724 
12725 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12726   match(If cop cmp);
12727   effect(USE labl);
12728 
12729   ins_cost(200);
12730   format %{ "J$cop,u  $labl" %}
12731   size(6);
12732   ins_encode %{
12733     Label* L = $labl$$label;
12734     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12735   %}
12736   ins_pipe(pipe_jcc);
12737 %}
12738 
12739 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12740   match(If cop cmp);
12741   effect(USE labl);
12742 
12743   ins_cost(200);
12744   format %{ $$template
12745     if ($cop$$cmpcode == Assembler::notEqual) {
12746       $$emit$$"JP,u   $labl\n\t"
12747       $$emit$$"J$cop,u   $labl"
12748     } else {
12749       $$emit$$"JP,u   done\n\t"
12750       $$emit$$"J$cop,u   $labl\n\t"
12751       $$emit$$"done:"
12752     }
12753   %}
12754   ins_encode %{
12755     Label* l = $labl$$label;
12756     if ($cop$$cmpcode == Assembler::notEqual) {
12757       __ jcc(Assembler::parity, *l, false);
12758       __ jcc(Assembler::notEqual, *l, false);
12759     } else if ($cop$$cmpcode == Assembler::equal) {
12760       Label done;
12761       __ jccb(Assembler::parity, done);
12762       __ jcc(Assembler::equal, *l, false);
12763       __ bind(done);
12764     } else {
12765        ShouldNotReachHere();
12766     }
12767   %}
12768   ins_pipe(pipe_jcc);
12769 %}
12770 
12771 // ============================================================================
12772 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12773 // array for an instance of the superklass.  Set a hidden internal cache on a
12774 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12775 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12776 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12777   match(Set result (PartialSubtypeCheck sub super));
12778   effect( KILL rcx, KILL cr );
12779 
12780   ins_cost(1100);  // slightly larger than the next version
12781   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12782             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12783             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12784             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12785             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12786             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12787             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12788      "miss:\t" %}
12789 
12790   opcode(0x1); // Force a XOR of EDI
12791   ins_encode( enc_PartialSubtypeCheck() );
12792   ins_pipe( pipe_slow );
12793 %}
12794 
12795 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12796   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12797   effect( KILL rcx, KILL result );
12798 
12799   ins_cost(1000);
12800   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12801             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12802             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12803             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12804             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12805             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12806      "miss:\t" %}
12807 
12808   opcode(0x0);  // No need to XOR EDI
12809   ins_encode( enc_PartialSubtypeCheck() );
12810   ins_pipe( pipe_slow );
12811 %}
12812 
12813 // ============================================================================
12814 // Branch Instructions -- short offset versions
12815 //
12816 // These instructions are used to replace jumps of a long offset (the default
12817 // match) with jumps of a shorter offset.  These instructions are all tagged
12818 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12819 // match rules in general matching.  Instead, the ADLC generates a conversion
12820 // method in the MachNode which can be used to do in-place replacement of the
12821 // long variant with the shorter variant.  The compiler will determine if a
12822 // branch can be taken by the is_short_branch_offset() predicate in the machine
12823 // specific code section of the file.
12824 
12825 // Jump Direct - Label defines a relative address from JMP+1
12826 instruct jmpDir_short(label labl) %{
12827   match(Goto);
12828   effect(USE labl);
12829 
12830   ins_cost(300);
12831   format %{ "JMP,s  $labl" %}
12832   size(2);
12833   ins_encode %{
12834     Label* L = $labl$$label;
12835     __ jmpb(*L);
12836   %}
12837   ins_pipe( pipe_jmp );
12838   ins_short_branch(1);
12839 %}
12840 
12841 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12842 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12843   match(If cop cr);
12844   effect(USE labl);
12845 
12846   ins_cost(300);
12847   format %{ "J$cop,s  $labl" %}
12848   size(2);
12849   ins_encode %{
12850     Label* L = $labl$$label;
12851     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12852   %}
12853   ins_pipe( pipe_jcc );
12854   ins_short_branch(1);
12855 %}
12856 
12857 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12858 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12859   match(CountedLoopEnd cop cr);
12860   effect(USE labl);
12861 
12862   ins_cost(300);
12863   format %{ "J$cop,s  $labl\t# Loop end" %}
12864   size(2);
12865   ins_encode %{
12866     Label* L = $labl$$label;
12867     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12868   %}
12869   ins_pipe( pipe_jcc );
12870   ins_short_branch(1);
12871 %}
12872 
12873 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12874 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12875   match(CountedLoopEnd cop cmp);
12876   effect(USE labl);
12877 
12878   ins_cost(300);
12879   format %{ "J$cop,us $labl\t# Loop end" %}
12880   size(2);
12881   ins_encode %{
12882     Label* L = $labl$$label;
12883     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12884   %}
12885   ins_pipe( pipe_jcc );
12886   ins_short_branch(1);
12887 %}
12888 
12889 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12890   match(CountedLoopEnd cop cmp);
12891   effect(USE labl);
12892 
12893   ins_cost(300);
12894   format %{ "J$cop,us $labl\t# Loop end" %}
12895   size(2);
12896   ins_encode %{
12897     Label* L = $labl$$label;
12898     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12899   %}
12900   ins_pipe( pipe_jcc );
12901   ins_short_branch(1);
12902 %}
12903 
12904 // Jump Direct Conditional - using unsigned comparison
12905 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12906   match(If cop cmp);
12907   effect(USE labl);
12908 
12909   ins_cost(300);
12910   format %{ "J$cop,us $labl" %}
12911   size(2);
12912   ins_encode %{
12913     Label* L = $labl$$label;
12914     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12915   %}
12916   ins_pipe( pipe_jcc );
12917   ins_short_branch(1);
12918 %}
12919 
12920 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12921   match(If cop cmp);
12922   effect(USE labl);
12923 
12924   ins_cost(300);
12925   format %{ "J$cop,us $labl" %}
12926   size(2);
12927   ins_encode %{
12928     Label* L = $labl$$label;
12929     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12930   %}
12931   ins_pipe( pipe_jcc );
12932   ins_short_branch(1);
12933 %}
12934 
12935 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12936   match(If cop cmp);
12937   effect(USE labl);
12938 
12939   ins_cost(300);
12940   format %{ $$template
12941     if ($cop$$cmpcode == Assembler::notEqual) {
12942       $$emit$$"JP,u,s   $labl\n\t"
12943       $$emit$$"J$cop,u,s   $labl"
12944     } else {
12945       $$emit$$"JP,u,s   done\n\t"
12946       $$emit$$"J$cop,u,s  $labl\n\t"
12947       $$emit$$"done:"
12948     }
12949   %}
12950   size(4);
12951   ins_encode %{
12952     Label* l = $labl$$label;
12953     if ($cop$$cmpcode == Assembler::notEqual) {
12954       __ jccb(Assembler::parity, *l);
12955       __ jccb(Assembler::notEqual, *l);
12956     } else if ($cop$$cmpcode == Assembler::equal) {
12957       Label done;
12958       __ jccb(Assembler::parity, done);
12959       __ jccb(Assembler::equal, *l);
12960       __ bind(done);
12961     } else {
12962        ShouldNotReachHere();
12963     }
12964   %}
12965   ins_pipe(pipe_jcc);
12966   ins_short_branch(1);
12967 %}
12968 
12969 // ============================================================================
12970 // Long Compare
12971 //
12972 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12973 // is tricky.  The flavor of compare used depends on whether we are testing
12974 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12975 // The GE test is the negated LT test.  The LE test can be had by commuting
12976 // the operands (yielding a GE test) and then negating; negate again for the
12977 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12978 // NE test is negated from that.
12979 
12980 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12981 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12982 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12983 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12984 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12985 // foo match ends up with the wrong leaf.  One fix is to not match both
12986 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12987 // both forms beat the trinary form of long-compare and both are very useful
12988 // on Intel which has so few registers.
12989 
12990 // Manifest a CmpL result in an integer register.  Very painful.
12991 // This is the test to avoid.
12992 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12993   match(Set dst (CmpL3 src1 src2));
12994   effect( KILL flags );
12995   ins_cost(1000);
12996   format %{ "XOR    $dst,$dst\n\t"
12997             "CMP    $src1.hi,$src2.hi\n\t"
12998             "JLT,s  m_one\n\t"
12999             "JGT,s  p_one\n\t"
13000             "CMP    $src1.lo,$src2.lo\n\t"
13001             "JB,s   m_one\n\t"
13002             "JEQ,s  done\n"
13003     "p_one:\tINC    $dst\n\t"
13004             "JMP,s  done\n"
13005     "m_one:\tDEC    $dst\n"
13006      "done:" %}
13007   ins_encode %{
13008     Label p_one, m_one, done;
13009     __ xorptr($dst$$Register, $dst$$Register);
13010     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13011     __ jccb(Assembler::less,    m_one);
13012     __ jccb(Assembler::greater, p_one);
13013     __ cmpl($src1$$Register, $src2$$Register);
13014     __ jccb(Assembler::below,   m_one);
13015     __ jccb(Assembler::equal,   done);
13016     __ bind(p_one);
13017     __ incrementl($dst$$Register);
13018     __ jmpb(done);
13019     __ bind(m_one);
13020     __ decrementl($dst$$Register);
13021     __ bind(done);
13022   %}
13023   ins_pipe( pipe_slow );
13024 %}
13025 
13026 //======
13027 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13028 // compares.  Can be used for LE or GT compares by reversing arguments.
13029 // NOT GOOD FOR EQ/NE tests.
13030 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13031   match( Set flags (CmpL src zero ));
13032   ins_cost(100);
13033   format %{ "TEST   $src.hi,$src.hi" %}
13034   opcode(0x85);
13035   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13036   ins_pipe( ialu_cr_reg_reg );
13037 %}
13038 
13039 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13040 // compares.  Can be used for LE or GT compares by reversing arguments.
13041 // NOT GOOD FOR EQ/NE tests.
13042 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13043   match( Set flags (CmpL src1 src2 ));
13044   effect( TEMP tmp );
13045   ins_cost(300);
13046   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13047             "MOV    $tmp,$src1.hi\n\t"
13048             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13049   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13050   ins_pipe( ialu_cr_reg_reg );
13051 %}
13052 
13053 // Long compares reg < zero/req OR reg >= zero/req.
13054 // Just a wrapper for a normal branch, plus the predicate test.
13055 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13056   match(If cmp flags);
13057   effect(USE labl);
13058   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13059   expand %{
13060     jmpCon(cmp,flags,labl);    // JLT or JGE...
13061   %}
13062 %}
13063 
13064 //======
13065 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13066 // compares.  Can be used for LE or GT compares by reversing arguments.
13067 // NOT GOOD FOR EQ/NE tests.
13068 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
13069   match(Set flags (CmpUL src zero));
13070   ins_cost(100);
13071   format %{ "TEST   $src.hi,$src.hi" %}
13072   opcode(0x85);
13073   ins_encode(OpcP, RegReg_Hi2(src, src));
13074   ins_pipe(ialu_cr_reg_reg);
13075 %}
13076 
13077 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
13078 // compares.  Can be used for LE or GT compares by reversing arguments.
13079 // NOT GOOD FOR EQ/NE tests.
13080 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
13081   match(Set flags (CmpUL src1 src2));
13082   effect(TEMP tmp);
13083   ins_cost(300);
13084   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13085             "MOV    $tmp,$src1.hi\n\t"
13086             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
13087   ins_encode(long_cmp_flags2(src1, src2, tmp));
13088   ins_pipe(ialu_cr_reg_reg);
13089 %}
13090 
13091 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13092 // Just a wrapper for a normal branch, plus the predicate test.
13093 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
13094   match(If cmp flags);
13095   effect(USE labl);
13096   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
13097   expand %{
13098     jmpCon(cmp, flags, labl);    // JLT or JGE...
13099   %}
13100 %}
13101 
13102 // Compare 2 longs and CMOVE longs.
13103 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13104   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13105   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13106   ins_cost(400);
13107   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13108             "CMOV$cmp $dst.hi,$src.hi" %}
13109   opcode(0x0F,0x40);
13110   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13111   ins_pipe( pipe_cmov_reg_long );
13112 %}
13113 
13114 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13115   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13116   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13117   ins_cost(500);
13118   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13119             "CMOV$cmp $dst.hi,$src.hi" %}
13120   opcode(0x0F,0x40);
13121   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13122   ins_pipe( pipe_cmov_reg_long );
13123 %}
13124 
13125 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{
13126   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13127   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13128   ins_cost(400);
13129   expand %{
13130     cmovLL_reg_LTGE(cmp, flags, dst, src);
13131   %}
13132 %}
13133 
13134 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{
13135   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13136   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13137   ins_cost(500);
13138   expand %{
13139     cmovLL_mem_LTGE(cmp, flags, dst, src);
13140   %}
13141 %}
13142 
13143 // Compare 2 longs and CMOVE ints.
13144 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
13145   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13146   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13147   ins_cost(200);
13148   format %{ "CMOV$cmp $dst,$src" %}
13149   opcode(0x0F,0x40);
13150   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13151   ins_pipe( pipe_cmov_reg );
13152 %}
13153 
13154 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
13155   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13156   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13157   ins_cost(250);
13158   format %{ "CMOV$cmp $dst,$src" %}
13159   opcode(0x0F,0x40);
13160   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13161   ins_pipe( pipe_cmov_mem );
13162 %}
13163 
13164 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{
13165   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13166   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13167   ins_cost(200);
13168   expand %{
13169     cmovII_reg_LTGE(cmp, flags, dst, src);
13170   %}
13171 %}
13172 
13173 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{
13174   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13175   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13176   ins_cost(250);
13177   expand %{
13178     cmovII_mem_LTGE(cmp, flags, dst, src);
13179   %}
13180 %}
13181 
13182 // Compare 2 longs and CMOVE ptrs.
13183 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13184   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13185   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13186   ins_cost(200);
13187   format %{ "CMOV$cmp $dst,$src" %}
13188   opcode(0x0F,0x40);
13189   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13190   ins_pipe( pipe_cmov_reg );
13191 %}
13192 
13193 // Compare 2 unsigned longs and CMOVE ptrs.
13194 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{
13195   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13196   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13197   ins_cost(200);
13198   expand %{
13199     cmovPP_reg_LTGE(cmp,flags,dst,src);
13200   %}
13201 %}
13202 
13203 // Compare 2 longs and CMOVE doubles
13204 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
13205   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13206   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13207   ins_cost(200);
13208   expand %{
13209     fcmovDPR_regS(cmp,flags,dst,src);
13210   %}
13211 %}
13212 
13213 // Compare 2 longs and CMOVE doubles
13214 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13215   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13216   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13217   ins_cost(200);
13218   expand %{
13219     fcmovD_regS(cmp,flags,dst,src);
13220   %}
13221 %}
13222 
13223 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
13224   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13225   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13226   ins_cost(200);
13227   expand %{
13228     fcmovFPR_regS(cmp,flags,dst,src);
13229   %}
13230 %}
13231 
13232 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13233   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13234   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13235   ins_cost(200);
13236   expand %{
13237     fcmovF_regS(cmp,flags,dst,src);
13238   %}
13239 %}
13240 
13241 //======
13242 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13243 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
13244   match( Set flags (CmpL src zero ));
13245   effect(TEMP tmp);
13246   ins_cost(200);
13247   format %{ "MOV    $tmp,$src.lo\n\t"
13248             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13249   ins_encode( long_cmp_flags0( src, tmp ) );
13250   ins_pipe( ialu_reg_reg_long );
13251 %}
13252 
13253 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13254 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13255   match( Set flags (CmpL src1 src2 ));
13256   ins_cost(200+300);
13257   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13258             "JNE,s  skip\n\t"
13259             "CMP    $src1.hi,$src2.hi\n\t"
13260      "skip:\t" %}
13261   ins_encode( long_cmp_flags1( src1, src2 ) );
13262   ins_pipe( ialu_cr_reg_reg );
13263 %}
13264 
13265 // Long compare reg == zero/reg OR reg != zero/reg
13266 // Just a wrapper for a normal branch, plus the predicate test.
13267 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13268   match(If cmp flags);
13269   effect(USE labl);
13270   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13271   expand %{
13272     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13273   %}
13274 %}
13275 
13276 //======
13277 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13278 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13279   match(Set flags (CmpUL src zero));
13280   effect(TEMP tmp);
13281   ins_cost(200);
13282   format %{ "MOV    $tmp,$src.lo\n\t"
13283             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13284   ins_encode(long_cmp_flags0(src, tmp));
13285   ins_pipe(ialu_reg_reg_long);
13286 %}
13287 
13288 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13289 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13290   match(Set flags (CmpUL src1 src2));
13291   ins_cost(200+300);
13292   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13293             "JNE,s  skip\n\t"
13294             "CMP    $src1.hi,$src2.hi\n\t"
13295      "skip:\t" %}
13296   ins_encode(long_cmp_flags1(src1, src2));
13297   ins_pipe(ialu_cr_reg_reg);
13298 %}
13299 
13300 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13301 // Just a wrapper for a normal branch, plus the predicate test.
13302 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13303   match(If cmp flags);
13304   effect(USE labl);
13305   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13306   expand %{
13307     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13308   %}
13309 %}
13310 
13311 // Compare 2 longs and CMOVE longs.
13312 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13313   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13314   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13315   ins_cost(400);
13316   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13317             "CMOV$cmp $dst.hi,$src.hi" %}
13318   opcode(0x0F,0x40);
13319   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13320   ins_pipe( pipe_cmov_reg_long );
13321 %}
13322 
13323 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13324   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13325   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13326   ins_cost(500);
13327   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13328             "CMOV$cmp $dst.hi,$src.hi" %}
13329   opcode(0x0F,0x40);
13330   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13331   ins_pipe( pipe_cmov_reg_long );
13332 %}
13333 
13334 // Compare 2 longs and CMOVE ints.
13335 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13336   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13337   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13338   ins_cost(200);
13339   format %{ "CMOV$cmp $dst,$src" %}
13340   opcode(0x0F,0x40);
13341   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13342   ins_pipe( pipe_cmov_reg );
13343 %}
13344 
13345 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13346   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13347   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13348   ins_cost(250);
13349   format %{ "CMOV$cmp $dst,$src" %}
13350   opcode(0x0F,0x40);
13351   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13352   ins_pipe( pipe_cmov_mem );
13353 %}
13354 
13355 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{
13356   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13357   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13358   ins_cost(200);
13359   expand %{
13360     cmovII_reg_EQNE(cmp, flags, dst, src);
13361   %}
13362 %}
13363 
13364 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{
13365   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13366   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13367   ins_cost(250);
13368   expand %{
13369     cmovII_mem_EQNE(cmp, flags, dst, src);
13370   %}
13371 %}
13372 
13373 // Compare 2 longs and CMOVE ptrs.
13374 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13375   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13376   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13377   ins_cost(200);
13378   format %{ "CMOV$cmp $dst,$src" %}
13379   opcode(0x0F,0x40);
13380   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13381   ins_pipe( pipe_cmov_reg );
13382 %}
13383 
13384 // Compare 2 unsigned longs and CMOVE ptrs.
13385 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{
13386   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13387   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13388   ins_cost(200);
13389   expand %{
13390     cmovPP_reg_EQNE(cmp,flags,dst,src);
13391   %}
13392 %}
13393 
13394 // Compare 2 longs and CMOVE doubles
13395 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13396   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13397   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13398   ins_cost(200);
13399   expand %{
13400     fcmovDPR_regS(cmp,flags,dst,src);
13401   %}
13402 %}
13403 
13404 // Compare 2 longs and CMOVE doubles
13405 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13406   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13407   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13408   ins_cost(200);
13409   expand %{
13410     fcmovD_regS(cmp,flags,dst,src);
13411   %}
13412 %}
13413 
13414 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13415   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13416   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13417   ins_cost(200);
13418   expand %{
13419     fcmovFPR_regS(cmp,flags,dst,src);
13420   %}
13421 %}
13422 
13423 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13424   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13425   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13426   ins_cost(200);
13427   expand %{
13428     fcmovF_regS(cmp,flags,dst,src);
13429   %}
13430 %}
13431 
13432 //======
13433 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13434 // Same as cmpL_reg_flags_LEGT except must negate src
13435 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13436   match( Set flags (CmpL src zero ));
13437   effect( TEMP tmp );
13438   ins_cost(300);
13439   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13440             "CMP    $tmp,$src.lo\n\t"
13441             "SBB    $tmp,$src.hi\n\t" %}
13442   ins_encode( long_cmp_flags3(src, tmp) );
13443   ins_pipe( ialu_reg_reg_long );
13444 %}
13445 
13446 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13447 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13448 // requires a commuted test to get the same result.
13449 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13450   match( Set flags (CmpL src1 src2 ));
13451   effect( TEMP tmp );
13452   ins_cost(300);
13453   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13454             "MOV    $tmp,$src2.hi\n\t"
13455             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13456   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13457   ins_pipe( ialu_cr_reg_reg );
13458 %}
13459 
13460 // Long compares reg < zero/req OR reg >= zero/req.
13461 // Just a wrapper for a normal branch, plus the predicate test
13462 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13463   match(If cmp flags);
13464   effect(USE labl);
13465   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13466   ins_cost(300);
13467   expand %{
13468     jmpCon(cmp,flags,labl);    // JGT or JLE...
13469   %}
13470 %}
13471 
13472 //======
13473 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13474 // Same as cmpUL_reg_flags_LEGT except must negate src
13475 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13476   match(Set flags (CmpUL src zero));
13477   effect(TEMP tmp);
13478   ins_cost(300);
13479   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13480             "CMP    $tmp,$src.lo\n\t"
13481             "SBB    $tmp,$src.hi\n\t" %}
13482   ins_encode(long_cmp_flags3(src, tmp));
13483   ins_pipe(ialu_reg_reg_long);
13484 %}
13485 
13486 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13487 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13488 // requires a commuted test to get the same result.
13489 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13490   match(Set flags (CmpUL src1 src2));
13491   effect(TEMP tmp);
13492   ins_cost(300);
13493   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13494             "MOV    $tmp,$src2.hi\n\t"
13495             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13496   ins_encode(long_cmp_flags2( src2, src1, tmp));
13497   ins_pipe(ialu_cr_reg_reg);
13498 %}
13499 
13500 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13501 // Just a wrapper for a normal branch, plus the predicate test
13502 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13503   match(If cmp flags);
13504   effect(USE labl);
13505   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13506   ins_cost(300);
13507   expand %{
13508     jmpCon(cmp, flags, labl);    // JGT or JLE...
13509   %}
13510 %}
13511 
13512 // Compare 2 longs and CMOVE longs.
13513 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13514   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13515   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13516   ins_cost(400);
13517   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13518             "CMOV$cmp $dst.hi,$src.hi" %}
13519   opcode(0x0F,0x40);
13520   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13521   ins_pipe( pipe_cmov_reg_long );
13522 %}
13523 
13524 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13525   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13526   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13527   ins_cost(500);
13528   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13529             "CMOV$cmp $dst.hi,$src.hi+4" %}
13530   opcode(0x0F,0x40);
13531   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13532   ins_pipe( pipe_cmov_reg_long );
13533 %}
13534 
13535 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
13536   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13537   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13538   ins_cost(400);
13539   expand %{
13540     cmovLL_reg_LEGT(cmp, flags, dst, src);
13541   %}
13542 %}
13543 
13544 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
13545   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13546   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13547   ins_cost(500);
13548   expand %{
13549     cmovLL_mem_LEGT(cmp, flags, dst, src);
13550   %}
13551 %}
13552 
13553 // Compare 2 longs and CMOVE ints.
13554 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13555   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13556   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13557   ins_cost(200);
13558   format %{ "CMOV$cmp $dst,$src" %}
13559   opcode(0x0F,0x40);
13560   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13561   ins_pipe( pipe_cmov_reg );
13562 %}
13563 
13564 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13565   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13566   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13567   ins_cost(250);
13568   format %{ "CMOV$cmp $dst,$src" %}
13569   opcode(0x0F,0x40);
13570   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13571   ins_pipe( pipe_cmov_mem );
13572 %}
13573 
13574 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{
13575   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13576   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13577   ins_cost(200);
13578   expand %{
13579     cmovII_reg_LEGT(cmp, flags, dst, src);
13580   %}
13581 %}
13582 
13583 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{
13584   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13585   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13586   ins_cost(250);
13587   expand %{
13588     cmovII_mem_LEGT(cmp, flags, dst, src);
13589   %}
13590 %}
13591 
13592 // Compare 2 longs and CMOVE ptrs.
13593 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13594   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13595   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13596   ins_cost(200);
13597   format %{ "CMOV$cmp $dst,$src" %}
13598   opcode(0x0F,0x40);
13599   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13600   ins_pipe( pipe_cmov_reg );
13601 %}
13602 
13603 // Compare 2 unsigned longs and CMOVE ptrs.
13604 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{
13605   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13606   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13607   ins_cost(200);
13608   expand %{
13609     cmovPP_reg_LEGT(cmp,flags,dst,src);
13610   %}
13611 %}
13612 
13613 // Compare 2 longs and CMOVE doubles
13614 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13615   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13616   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13617   ins_cost(200);
13618   expand %{
13619     fcmovDPR_regS(cmp,flags,dst,src);
13620   %}
13621 %}
13622 
13623 // Compare 2 longs and CMOVE doubles
13624 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13625   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13626   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13627   ins_cost(200);
13628   expand %{
13629     fcmovD_regS(cmp,flags,dst,src);
13630   %}
13631 %}
13632 
13633 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13634   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13635   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13636   ins_cost(200);
13637   expand %{
13638     fcmovFPR_regS(cmp,flags,dst,src);
13639   %}
13640 %}
13641 
13642 
13643 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13644   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13645   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13646   ins_cost(200);
13647   expand %{
13648     fcmovF_regS(cmp,flags,dst,src);
13649   %}
13650 %}
13651 
13652 
13653 // ============================================================================
13654 // Procedure Call/Return Instructions
13655 // Call Java Static Instruction
13656 // Note: If this code changes, the corresponding ret_addr_offset() and
13657 //       compute_padding() functions will have to be adjusted.
13658 instruct CallStaticJavaDirect(method meth) %{
13659   match(CallStaticJava);
13660   effect(USE meth);
13661 
13662   ins_cost(300);
13663   format %{ "CALL,static " %}
13664   opcode(0xE8); /* E8 cd */
13665   ins_encode( pre_call_resets,
13666               Java_Static_Call( meth ),
13667               call_epilog,
13668               post_call_FPU );
13669   ins_pipe( pipe_slow );
13670   ins_alignment(4);
13671 %}
13672 
13673 // Call Java Dynamic Instruction
13674 // Note: If this code changes, the corresponding ret_addr_offset() and
13675 //       compute_padding() functions will have to be adjusted.
13676 instruct CallDynamicJavaDirect(method meth) %{
13677   match(CallDynamicJava);
13678   effect(USE meth);
13679 
13680   ins_cost(300);
13681   format %{ "MOV    EAX,(oop)-1\n\t"
13682             "CALL,dynamic" %}
13683   opcode(0xE8); /* E8 cd */
13684   ins_encode( pre_call_resets,
13685               Java_Dynamic_Call( meth ),
13686               call_epilog,
13687               post_call_FPU );
13688   ins_pipe( pipe_slow );
13689   ins_alignment(4);
13690 %}
13691 
13692 // Call Runtime Instruction
13693 instruct CallRuntimeDirect(method meth) %{
13694   match(CallRuntime );
13695   effect(USE meth);
13696 
13697   ins_cost(300);
13698   format %{ "CALL,runtime " %}
13699   opcode(0xE8); /* E8 cd */
13700   // Use FFREEs to clear entries in float stack
13701   ins_encode( pre_call_resets,
13702               FFree_Float_Stack_All,
13703               Java_To_Runtime( meth ),
13704               post_call_FPU );
13705   ins_pipe( pipe_slow );
13706 %}
13707 
13708 // Call runtime without safepoint
13709 instruct CallLeafDirect(method meth) %{
13710   match(CallLeaf);
13711   effect(USE meth);
13712 
13713   ins_cost(300);
13714   format %{ "CALL_LEAF,runtime " %}
13715   opcode(0xE8); /* E8 cd */
13716   ins_encode( pre_call_resets,
13717               FFree_Float_Stack_All,
13718               Java_To_Runtime( meth ),
13719               Verify_FPU_For_Leaf, post_call_FPU );
13720   ins_pipe( pipe_slow );
13721 %}
13722 
13723 instruct CallLeafNoFPDirect(method meth) %{
13724   match(CallLeafNoFP);
13725   effect(USE meth);
13726 
13727   ins_cost(300);
13728   format %{ "CALL_LEAF_NOFP,runtime " %}
13729   opcode(0xE8); /* E8 cd */
13730   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13731   ins_pipe( pipe_slow );
13732 %}
13733 
13734 
13735 // Return Instruction
13736 // Remove the return address & jump to it.
13737 instruct Ret() %{
13738   match(Return);
13739   format %{ "RET" %}
13740   opcode(0xC3);
13741   ins_encode(OpcP);
13742   ins_pipe( pipe_jmp );
13743 %}
13744 
13745 // Tail Call; Jump from runtime stub to Java code.
13746 // Also known as an 'interprocedural jump'.
13747 // Target of jump will eventually return to caller.
13748 // TailJump below removes the return address.
13749 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{
13750   match(TailCall jump_target method_ptr);
13751   ins_cost(300);
13752   format %{ "JMP    $jump_target \t# EBX holds method" %}
13753   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13754   ins_encode( OpcP, RegOpc(jump_target) );
13755   ins_pipe( pipe_jmp );
13756 %}
13757 
13758 
13759 // Tail Jump; remove the return address; jump to target.
13760 // TailCall above leaves the return address around.
13761 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13762   match( TailJump jump_target ex_oop );
13763   ins_cost(300);
13764   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13765             "JMP    $jump_target " %}
13766   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13767   ins_encode( enc_pop_rdx,
13768               OpcP, RegOpc(jump_target) );
13769   ins_pipe( pipe_jmp );
13770 %}
13771 
13772 // Create exception oop: created by stack-crawling runtime code.
13773 // Created exception is now available to this handler, and is setup
13774 // just prior to jumping to this handler.  No code emitted.
13775 instruct CreateException( eAXRegP ex_oop )
13776 %{
13777   match(Set ex_oop (CreateEx));
13778 
13779   size(0);
13780   // use the following format syntax
13781   format %{ "# exception oop is in EAX; no code emitted" %}
13782   ins_encode();
13783   ins_pipe( empty );
13784 %}
13785 
13786 
13787 // Rethrow exception:
13788 // The exception oop will come in the first argument position.
13789 // Then JUMP (not call) to the rethrow stub code.
13790 instruct RethrowException()
13791 %{
13792   match(Rethrow);
13793 
13794   // use the following format syntax
13795   format %{ "JMP    rethrow_stub" %}
13796   ins_encode(enc_rethrow);
13797   ins_pipe( pipe_jmp );
13798 %}
13799 
13800 // inlined locking and unlocking
13801 
13802 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13803   predicate(Compile::current()->use_rtm());
13804   match(Set cr (FastLock object box));
13805   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13806   ins_cost(300);
13807   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13808   ins_encode %{
13809     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13810                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13811                  _rtm_counters, _stack_rtm_counters,
13812                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13813                  true, ra_->C->profile_rtm());
13814   %}
13815   ins_pipe(pipe_slow);
13816 %}
13817 
13818 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13819   predicate(!Compile::current()->use_rtm());
13820   match(Set cr (FastLock object box));
13821   effect(TEMP tmp, TEMP scr, USE_KILL box);
13822   ins_cost(300);
13823   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13824   ins_encode %{
13825     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13826                  $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false);
13827   %}
13828   ins_pipe(pipe_slow);
13829 %}
13830 
13831 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13832   match(Set cr (FastUnlock object box));
13833   effect(TEMP tmp, USE_KILL box);
13834   ins_cost(300);
13835   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13836   ins_encode %{
13837     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13838   %}
13839   ins_pipe(pipe_slow);
13840 %}
13841 
13842 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{
13843   predicate(Matcher::vector_length(n) <= 32);
13844   match(Set dst (MaskAll src));
13845   format %{ "mask_all_evexL_LE32 $dst, $src \t" %}
13846   ins_encode %{
13847     int mask_len = Matcher::vector_length(this);
13848     __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
13849   %}
13850   ins_pipe( pipe_slow );
13851 %}
13852 
13853 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{
13854   predicate(Matcher::vector_length(n) > 32);
13855   match(Set dst (MaskAll src));
13856   effect(TEMP ktmp);
13857   format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %}
13858   ins_encode %{
13859     int mask_len = Matcher::vector_length(this);
13860     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13861   %}
13862   ins_pipe( pipe_slow );
13863 %}
13864 
13865 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{
13866   predicate(Matcher::vector_length(n) > 32);
13867   match(Set dst (MaskAll src));
13868   effect(TEMP ktmp);
13869   format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %}
13870   ins_encode %{
13871     int mask_len = Matcher::vector_length(this);
13872     __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len);
13873   %}
13874   ins_pipe( pipe_slow );
13875 %}
13876 
13877 // ============================================================================
13878 // Safepoint Instruction
13879 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13880   match(SafePoint poll);
13881   effect(KILL cr, USE poll);
13882 
13883   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13884   ins_cost(125);
13885   // EBP would need size(3)
13886   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13887   ins_encode %{
13888     __ relocate(relocInfo::poll_type);
13889     address pre_pc = __ pc();
13890     __ testl(rax, Address($poll$$Register, 0));
13891     address post_pc = __ pc();
13892     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13893   %}
13894   ins_pipe(ialu_reg_mem);
13895 %}
13896 
13897 
13898 // ============================================================================
13899 // This name is KNOWN by the ADLC and cannot be changed.
13900 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13901 // for this guy.
13902 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13903   match(Set dst (ThreadLocal));
13904   effect(DEF dst, KILL cr);
13905 
13906   format %{ "MOV    $dst, Thread::current()" %}
13907   ins_encode %{
13908     Register dstReg = as_Register($dst$$reg);
13909     __ get_thread(dstReg);
13910   %}
13911   ins_pipe( ialu_reg_fat );
13912 %}
13913 
13914 
13915 
13916 //----------PEEPHOLE RULES-----------------------------------------------------
13917 // These must follow all instruction definitions as they use the names
13918 // defined in the instructions definitions.
13919 //
13920 // peepmatch ( root_instr_name [preceding_instruction]* );
13921 //
13922 // peepconstraint %{
13923 // (instruction_number.operand_name relational_op instruction_number.operand_name
13924 //  [, ...] );
13925 // // instruction numbers are zero-based using left to right order in peepmatch
13926 //
13927 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13928 // // provide an instruction_number.operand_name for each operand that appears
13929 // // in the replacement instruction's match rule
13930 //
13931 // ---------VM FLAGS---------------------------------------------------------
13932 //
13933 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13934 //
13935 // Each peephole rule is given an identifying number starting with zero and
13936 // increasing by one in the order seen by the parser.  An individual peephole
13937 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13938 // on the command-line.
13939 //
13940 // ---------CURRENT LIMITATIONS----------------------------------------------
13941 //
13942 // Only match adjacent instructions in same basic block
13943 // Only equality constraints
13944 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13945 // Only one replacement instruction
13946 //
13947 // ---------EXAMPLE----------------------------------------------------------
13948 //
13949 // // pertinent parts of existing instructions in architecture description
13950 // instruct movI(rRegI dst, rRegI src) %{
13951 //   match(Set dst (CopyI src));
13952 // %}
13953 //
13954 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13955 //   match(Set dst (AddI dst src));
13956 //   effect(KILL cr);
13957 // %}
13958 //
13959 // // Change (inc mov) to lea
13960 // peephole %{
13961 //   // increment preceded by register-register move
13962 //   peepmatch ( incI_eReg movI );
13963 //   // require that the destination register of the increment
13964 //   // match the destination register of the move
13965 //   peepconstraint ( 0.dst == 1.dst );
13966 //   // construct a replacement instruction that sets
13967 //   // the destination to ( move's source register + one )
13968 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13969 // %}
13970 //
13971 // Implementation no longer uses movX instructions since
13972 // machine-independent system no longer uses CopyX nodes.
13973 //
13974 // peephole %{
13975 //   peepmatch ( incI_eReg movI );
13976 //   peepconstraint ( 0.dst == 1.dst );
13977 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13978 // %}
13979 //
13980 // peephole %{
13981 //   peepmatch ( decI_eReg movI );
13982 //   peepconstraint ( 0.dst == 1.dst );
13983 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13984 // %}
13985 //
13986 // peephole %{
13987 //   peepmatch ( addI_eReg_imm movI );
13988 //   peepconstraint ( 0.dst == 1.dst );
13989 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13990 // %}
13991 //
13992 // peephole %{
13993 //   peepmatch ( addP_eReg_imm movP );
13994 //   peepconstraint ( 0.dst == 1.dst );
13995 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13996 // %}
13997 
13998 // // Change load of spilled value to only a spill
13999 // instruct storeI(memory mem, rRegI src) %{
14000 //   match(Set mem (StoreI mem src));
14001 // %}
14002 //
14003 // instruct loadI(rRegI dst, memory mem) %{
14004 //   match(Set dst (LoadI mem));
14005 // %}
14006 //
14007 peephole %{
14008   peepmatch ( loadI storeI );
14009   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14010   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14011 %}
14012 
14013 //----------SMARTSPILL RULES---------------------------------------------------
14014 // These must follow all instruction definitions as they use the names
14015 // defined in the instructions definitions.