1 //
   2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return SafepointMechanism::uses_thread_local_poll();
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     if (SafepointMechanism::uses_thread_local_poll()) {
 710       Register pollReg = as_Register(EBX_enc);
 711       MacroAssembler masm(&cbuf);
 712       masm.get_thread(pollReg);
 713       masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 714       masm.relocate(relocInfo::poll_return_type);
 715       masm.testl(rax, Address(pollReg, 0));
 716     } else {
 717       cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 718       emit_opcode(cbuf,0x85);
 719       emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 720       emit_d32(cbuf, (intptr_t)os::get_polling_page());
 721     }
 722   }
 723 }
 724 
 725 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 726   return MachNode::size(ra_); // too many variables; just compute it
 727                               // the hard way
 728 }
 729 
 730 int MachEpilogNode::reloc() const {
 731   return 0; // a large enough number
 732 }
 733 
 734 const Pipeline * MachEpilogNode::pipeline() const {
 735   return MachNode::pipeline_class();
 736 }
 737 
 738 int MachEpilogNode::safepoint_offset() const { return 0; }
 739 
 740 //=============================================================================
 741 
 742 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 743 static enum RC rc_class( OptoReg::Name reg ) {
 744 
 745   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 746   if (OptoReg::is_stack(reg)) return rc_stack;
 747 
 748   VMReg r = OptoReg::as_VMReg(reg);
 749   if (r->is_Register()) return rc_int;
 750   if (r->is_FloatRegister()) {
 751     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 752     return rc_float;
 753   }
 754   assert(r->is_XMMRegister(), "must be");
 755   return rc_xmm;
 756 }
 757 
 758 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 759                         int opcode, const char *op_str, int size, outputStream* st ) {
 760   if( cbuf ) {
 761     emit_opcode  (*cbuf, opcode );
 762     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 763 #ifndef PRODUCT
 764   } else if( !do_size ) {
 765     if( size != 0 ) st->print("\n\t");
 766     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 767       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 768       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 769     } else { // FLD, FST, PUSH, POP
 770       st->print("%s [ESP + #%d]",op_str,offset);
 771     }
 772 #endif
 773   }
 774   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 775   return size+3+offset_size;
 776 }
 777 
 778 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 779 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 780                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 781   int in_size_in_bits = Assembler::EVEX_32bit;
 782   int evex_encoding = 0;
 783   if (reg_lo+1 == reg_hi) {
 784     in_size_in_bits = Assembler::EVEX_64bit;
 785     evex_encoding = Assembler::VEX_W;
 786   }
 787   if (cbuf) {
 788     MacroAssembler _masm(cbuf);
 789     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 790     //                          it maps more cases to single byte displacement
 791     _masm.set_managed();
 792     if (reg_lo+1 == reg_hi) { // double move?
 793       if (is_load) {
 794         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 795       } else {
 796         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 797       }
 798     } else {
 799       if (is_load) {
 800         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     }
 805 #ifndef PRODUCT
 806   } else if (!do_size) {
 807     if (size != 0) st->print("\n\t");
 808     if (reg_lo+1 == reg_hi) { // double move?
 809       if (is_load) st->print("%s %s,[ESP + #%d]",
 810                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 811                               Matcher::regName[reg_lo], offset);
 812       else         st->print("MOVSD  [ESP + #%d],%s",
 813                               offset, Matcher::regName[reg_lo]);
 814     } else {
 815       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 816                               Matcher::regName[reg_lo], offset);
 817       else         st->print("MOVSS  [ESP + #%d],%s",
 818                               offset, Matcher::regName[reg_lo]);
 819     }
 820 #endif
 821   }
 822   bool is_single_byte = false;
 823   if ((UseAVX > 2) && (offset != 0)) {
 824     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 825   }
 826   int offset_size = 0;
 827   if (UseAVX > 2 ) {
 828     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 829   } else {
 830     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 831   }
 832   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 833   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 834   return size+5+offset_size;
 835 }
 836 
 837 
 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 839                             int src_hi, int dst_hi, int size, outputStream* st ) {
 840   if (cbuf) {
 841     MacroAssembler _masm(cbuf);
 842     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 843     _masm.set_managed();
 844     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 845       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     } else {
 848       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 849                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 850     }
 851 #ifndef PRODUCT
 852   } else if (!do_size) {
 853     if (size != 0) st->print("\n\t");
 854     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 855       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 856         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       } else {
 858         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       }
 860     } else {
 861       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 862         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       } else {
 864         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       }
 866     }
 867 #endif
 868   }
 869   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 870   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 871   int sz = (UseAVX > 2) ? 6 : 4;
 872   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 873       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 874   return size + sz;
 875 }
 876 
 877 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 878                             int src_hi, int dst_hi, int size, outputStream* st ) {
 879   // 32-bit
 880   if (cbuf) {
 881     MacroAssembler _masm(cbuf);
 882     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 883     _masm.set_managed();
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 901     _masm.set_managed();
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1016       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Needs 2 CMOV's for longs.
1424 const int Matcher::long_cmove_cost() { return 1; }
1425 
1426 // No CMOVF/CMOVD with SSE/SSE2
1427 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428 
1429 // Does the CPU require late expand (see block.cpp for description of late expand)?
1430 const bool Matcher::require_postalloc_expand = false;
1431 
1432 // Do we need to mask the count passed to shift instructions or does
1433 // the cpu only look at the lower 5/6 bits anyway?
1434 const bool Matcher::need_masked_shift_count = false;
1435 
1436 bool Matcher::narrow_oop_use_complex_address() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 bool Matcher::narrow_klass_use_complex_address() {
1442   ShouldNotCallThis();
1443   return true;
1444 }
1445 
1446 bool Matcher::const_oop_prefer_decode() {
1447   ShouldNotCallThis();
1448   return true;
1449 }
1450 
1451 bool Matcher::const_klass_prefer_decode() {
1452   ShouldNotCallThis();
1453   return true;
1454 }
1455 
1456 // Is it better to copy float constants, or load them directly from memory?
1457 // Intel can load a float constant from a direct address, requiring no
1458 // extra registers.  Most RISCs will have to materialize an address into a
1459 // register first, so they would do better to copy the constant from stack.
1460 const bool Matcher::rematerialize_float_constants = true;
1461 
1462 // If CPU can load and store mis-aligned doubles directly then no fixup is
1463 // needed.  Else we split the double into 2 integer pieces and move it
1464 // piece-by-piece.  Only happens when passing doubles into C code as the
1465 // Java calling convention forces doubles to be aligned.
1466 const bool Matcher::misaligned_doubles_ok = true;
1467 
1468 
1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1470   // Get the memory operand from the node
1471   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1472   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1473   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1474   uint opcnt     = 1;                 // First operand
1475   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1476   while( idx >= skipped+num_edges ) {
1477     skipped += num_edges;
1478     opcnt++;                          // Bump operand count
1479     assert( opcnt < numopnds, "Accessing non-existent operand" );
1480     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1481   }
1482 
1483   MachOper *memory = node->_opnds[opcnt];
1484   MachOper *new_memory = NULL;
1485   switch (memory->opcode()) {
1486   case DIRECT:
1487   case INDOFFSET32X:
1488     // No transformation necessary.
1489     return;
1490   case INDIRECT:
1491     new_memory = new indirect_win95_safeOper( );
1492     break;
1493   case INDOFFSET8:
1494     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDOFFSET32:
1497     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXOFFSET:
1500     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1501     break;
1502   case INDINDEXSCALE:
1503     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1504     break;
1505   case INDINDEXSCALEOFFSET:
1506     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1507     break;
1508   case LOAD_LONG_INDIRECT:
1509   case LOAD_LONG_INDOFFSET32:
1510     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1511     return;
1512   default:
1513     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1514     return;
1515   }
1516   node->_opnds[opcnt] = new_memory;
1517 }
1518 
1519 // Advertise here if the CPU requires explicit rounding operations
1520 // to implement the UseStrictFP mode.
1521 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1522 
1523 // Are floats conerted to double when stored to stack during deoptimization?
1524 // On x32 it is stored with convertion only when FPU is used for floats.
1525 bool Matcher::float_in_double() { return (UseSSE == 0); }
1526 
1527 // Do ints take an entire long register or just half?
1528 const bool Matcher::int_in_long = false;
1529 
1530 // Return whether or not this register is ever used as an argument.  This
1531 // function is used on startup to build the trampoline stubs in generateOptoStub.
1532 // Registers not mentioned will be killed by the VM call in the trampoline, and
1533 // arguments in those registers not be available to the callee.
1534 bool Matcher::can_be_java_arg( int reg ) {
1535   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1536   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1537   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1538   return false;
1539 }
1540 
1541 bool Matcher::is_spillable_arg( int reg ) {
1542   return can_be_java_arg(reg);
1543 }
1544 
1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1546   // Use hardware integer DIV instruction when
1547   // it is faster than a code which use multiply.
1548   // Only when constant divisor fits into 32 bit
1549   // (min_jint is excluded to get only correct
1550   // positive 32 bit values from negative).
1551   return VM_Version::has_fast_idiv() &&
1552          (divisor == (int)divisor && divisor != min_jint);
1553 }
1554 
1555 // Register for DIVI projection of divmodI
1556 RegMask Matcher::divI_proj_mask() {
1557   return EAX_REG_mask();
1558 }
1559 
1560 // Register for MODI projection of divmodI
1561 RegMask Matcher::modI_proj_mask() {
1562   return EDX_REG_mask();
1563 }
1564 
1565 // Register for DIVL projection of divmodL
1566 RegMask Matcher::divL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 // Register for MODL projection of divmodL
1572 RegMask Matcher::modL_proj_mask() {
1573   ShouldNotReachHere();
1574   return RegMask();
1575 }
1576 
1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1578   return NO_REG_mask();
1579 }
1580 
1581 // Returns true if the high 32 bits of the value is known to be zero.
1582 bool is_operand_hi32_zero(Node* n) {
1583   int opc = n->Opcode();
1584   if (opc == Op_AndL) {
1585     Node* o2 = n->in(2);
1586     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587       return true;
1588     }
1589   }
1590   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1591     return true;
1592   }
1593   return false;
1594 }
1595 
1596 %}
1597 
1598 //----------ENCODING BLOCK-----------------------------------------------------
1599 // This block specifies the encoding classes used by the compiler to output
1600 // byte streams.  Encoding classes generate functions which are called by
1601 // Machine Instruction Nodes in order to generate the bit encoding of the
1602 // instruction.  Operands specify their base encoding interface with the
1603 // interface keyword.  There are currently supported four interfaces,
1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1605 // operand to generate a function which returns its register number when
1606 // queried.   CONST_INTER causes an operand to generate a function which
1607 // returns the value of the constant when queried.  MEMORY_INTER causes an
1608 // operand to generate four functions which return the Base Register, the
1609 // Index Register, the Scale Value, and the Offset Value of the operand when
1610 // queried.  COND_INTER causes an operand to generate six functions which
1611 // return the encoding code (ie - encoding bits for the instruction)
1612 // associated with each basic boolean condition for a conditional instruction.
1613 // Instructions specify two basic values for encoding.  They use the
1614 // ins_encode keyword to specify their encoding class (which must be one of
1615 // the class names specified in the encoding block), and they use the
1616 // opcode keyword to specify, in order, their primary, secondary, and
1617 // tertiary opcode.  Only the opcode sections which a particular instruction
1618 // needs for encoding need to be specified.
1619 encode %{
1620   // Build emit functions for each basic byte or larger field in the intel
1621   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1622   // code in the enc_class source block.  Emit functions will live in the
1623   // main source block for now.  In future, we can generalize this by
1624   // adding a syntax that specifies the sizes of fields in an order,
1625   // so that the adlc can build the emit functions automagically
1626 
1627   // Emit primary opcode
1628   enc_class OpcP %{
1629     emit_opcode(cbuf, $primary);
1630   %}
1631 
1632   // Emit secondary opcode
1633   enc_class OpcS %{
1634     emit_opcode(cbuf, $secondary);
1635   %}
1636 
1637   // Emit opcode directly
1638   enc_class Opcode(immI d8) %{
1639     emit_opcode(cbuf, $d8$$constant);
1640   %}
1641 
1642   enc_class SizePrefix %{
1643     emit_opcode(cbuf,0x66);
1644   %}
1645 
1646   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1647     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1648   %}
1649 
1650   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1651     emit_opcode(cbuf,$opcode$$constant);
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class mov_r32_imm0( rRegI dst ) %{
1656     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1657     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1658   %}
1659 
1660   enc_class cdq_enc %{
1661     // Full implementation of Java idiv and irem; checks for
1662     // special case as described in JVM spec., p.243 & p.271.
1663     //
1664     //         normal case                           special case
1665     //
1666     // input : rax,: dividend                         min_int
1667     //         reg: divisor                          -1
1668     //
1669     // output: rax,: quotient  (= rax, idiv reg)       min_int
1670     //         rdx: remainder (= rax, irem reg)       0
1671     //
1672     //  Code sequnce:
1673     //
1674     //  81 F8 00 00 00 80    cmp         rax,80000000h
1675     //  0F 85 0B 00 00 00    jne         normal_case
1676     //  33 D2                xor         rdx,edx
1677     //  83 F9 FF             cmp         rcx,0FFh
1678     //  0F 84 03 00 00 00    je          done
1679     //                  normal_case:
1680     //  99                   cdq
1681     //  F7 F9                idiv        rax,ecx
1682     //                  done:
1683     //
1684     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1687     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1688     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1689     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1690     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1691     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1693     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1695     // normal_case:
1696     emit_opcode(cbuf,0x99);                                         // cdq
1697     // idiv (note: must be emitted by the user of this rule)
1698     // normal:
1699   %}
1700 
1701   // Dense encoding for older common ops
1702   enc_class Opc_plus(immI opcode, rRegI reg) %{
1703     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1704   %}
1705 
1706 
1707   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1708   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1709     // Check for 8-bit immediate, and set sign extend bit in opcode
1710     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1711       emit_opcode(cbuf, $primary | 0x02);
1712     }
1713     else {                          // If 32-bit immediate
1714       emit_opcode(cbuf, $primary);
1715     }
1716   %}
1717 
1718   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1719     // Emit primary opcode and set sign-extend bit
1720     // Check for 8-bit immediate, and set sign extend bit in opcode
1721     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1722       emit_opcode(cbuf, $primary | 0x02);    }
1723     else {                          // If 32-bit immediate
1724       emit_opcode(cbuf, $primary);
1725     }
1726     // Emit r/m byte with secondary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1728   %}
1729 
1730   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1731     // Check for 8-bit immediate, and set sign extend bit in opcode
1732     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1733       $$$emit8$imm$$constant;
1734     }
1735     else {                          // If 32-bit immediate
1736       // Output immediate
1737       $$$emit32$imm$$constant;
1738     }
1739   %}
1740 
1741   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1742     // Emit primary opcode and set sign-extend bit
1743     // Check for 8-bit immediate, and set sign extend bit in opcode
1744     int con = (int)$imm$$constant; // Throw away top bits
1745     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1746     // Emit r/m byte with secondary opcode, after primary opcode.
1747     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1748     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1749     else                               emit_d32(cbuf,con);
1750   %}
1751 
1752   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1753     // Emit primary opcode and set sign-extend bit
1754     // Check for 8-bit immediate, and set sign extend bit in opcode
1755     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1756     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1757     // Emit r/m byte with tertiary opcode, after primary opcode.
1758     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1759     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1760     else                               emit_d32(cbuf,con);
1761   %}
1762 
1763   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1764     emit_cc(cbuf, $secondary, $dst$$reg );
1765   %}
1766 
1767   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1768     int destlo = $dst$$reg;
1769     int desthi = HIGH_FROM_LOW(destlo);
1770     // bswap lo
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, destlo);
1773     // bswap hi
1774     emit_opcode(cbuf, 0x0F);
1775     emit_cc(cbuf, 0xC8, desthi);
1776     // xchg lo and hi
1777     emit_opcode(cbuf, 0x87);
1778     emit_rm(cbuf, 0x3, destlo, desthi);
1779   %}
1780 
1781   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1782     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1783   %}
1784 
1785   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1786     $$$emit8$primary;
1787     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1788   %}
1789 
1790   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1791     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1792     emit_d8(cbuf, op >> 8 );
1793     emit_d8(cbuf, op & 255);
1794   %}
1795 
1796   // emulate a CMOV with a conditional branch around a MOV
1797   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1798     // Invert sense of branch from sense of CMOV
1799     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1800     emit_d8( cbuf, $brOffs$$constant );
1801   %}
1802 
1803   enc_class enc_PartialSubtypeCheck( ) %{
1804     Register Redi = as_Register(EDI_enc); // result register
1805     Register Reax = as_Register(EAX_enc); // super class
1806     Register Recx = as_Register(ECX_enc); // killed
1807     Register Resi = as_Register(ESI_enc); // sub class
1808     Label miss;
1809 
1810     MacroAssembler _masm(&cbuf);
1811     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1812                                      NULL, &miss,
1813                                      /*set_cond_codes:*/ true);
1814     if ($primary) {
1815       __ xorptr(Redi, Redi);
1816     }
1817     __ bind(miss);
1818   %}
1819 
1820   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1821     MacroAssembler masm(&cbuf);
1822     int start = masm.offset();
1823     if (UseSSE >= 2) {
1824       if (VerifyFPU) {
1825         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1826       }
1827     } else {
1828       // External c_calling_convention expects the FPU stack to be 'clean'.
1829       // Compiled code leaves it dirty.  Do cleanup now.
1830       masm.empty_FPU_stack();
1831     }
1832     if (sizeof_FFree_Float_Stack_All == -1) {
1833       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1834     } else {
1835       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1836     }
1837   %}
1838 
1839   enc_class Verify_FPU_For_Leaf %{
1840     if( VerifyFPU ) {
1841       MacroAssembler masm(&cbuf);
1842       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1843     }
1844   %}
1845 
1846   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1847     // This is the instruction starting address for relocation info.
1848     cbuf.set_insts_mark();
1849     $$$emit8$primary;
1850     // CALL directly to the runtime
1851     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1852                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1853 
1854     if (UseSSE >= 2) {
1855       MacroAssembler _masm(&cbuf);
1856       BasicType rt = tf()->return_type();
1857 
1858       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1859         // A C runtime call where the return value is unused.  In SSE2+
1860         // mode the result needs to be removed from the FPU stack.  It's
1861         // likely that this function call could be removed by the
1862         // optimizer if the C function is a pure function.
1863         __ ffree(0);
1864       } else if (rt == T_FLOAT) {
1865         __ lea(rsp, Address(rsp, -4));
1866         __ fstp_s(Address(rsp, 0));
1867         __ movflt(xmm0, Address(rsp, 0));
1868         __ lea(rsp, Address(rsp,  4));
1869       } else if (rt == T_DOUBLE) {
1870         __ lea(rsp, Address(rsp, -8));
1871         __ fstp_d(Address(rsp, 0));
1872         __ movdbl(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  8));
1874       }
1875     }
1876   %}
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     // Clear upper bits of YMM registers when current compiled code uses
1886     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887     MacroAssembler _masm(&cbuf);
1888     __ vzeroupper();
1889     debug_only(int off1 = cbuf.insts_size());
1890     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1891   %}
1892 
1893   enc_class post_call_FPU %{
1894     // If method sets FPU control word do it here also
1895     if (Compile::current()->in_24_bit_fp_mode()) {
1896       MacroAssembler masm(&cbuf);
1897       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1898     }
1899   %}
1900 
1901   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1902     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1903     // who we intended to call.
1904     cbuf.set_insts_mark();
1905     $$$emit8$primary;
1906 
1907     if (!_method) {
1908       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1909                      runtime_call_Relocation::spec(),
1910                      RELOC_IMM32);
1911     } else {
1912       int method_index = resolved_method_index(cbuf);
1913       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1914                                                   : static_call_Relocation::spec(method_index);
1915       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                      rspec, RELOC_DISP32);
1917       // Emit stubs for static call.
1918       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1919       if (stub == NULL) {
1920         ciEnv::current()->record_failure("CodeCache is full");
1921         return;
1922       }
1923     }
1924   %}
1925 
1926   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1927     MacroAssembler _masm(&cbuf);
1928     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(Method::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943 //   Following encoding is no longer used, but may be restored if calling
1944 //   convention changes significantly.
1945 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1946 //
1947 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1948 //     // int ic_reg     = Matcher::inline_cache_reg();
1949 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1950 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1951 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1952 //
1953 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1954 //     // // so we load it immediately before the call
1955 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1956 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1957 //
1958 //     // xor rbp,ebp
1959 //     emit_opcode(cbuf, 0x33);
1960 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1961 //
1962 //     // CALL to interpreter.
1963 //     cbuf.set_insts_mark();
1964 //     $$$emit8$primary;
1965 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1966 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1967 //   %}
1968 
1969   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1970     $$$emit8$primary;
1971     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1972     $$$emit8$shift$$constant;
1973   %}
1974 
1975   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1976     // Load immediate does not have a zero or sign extended version
1977     // for 8-bit immediates
1978     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1979     $$$emit32$src$$constant;
1980   %}
1981 
1982   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, $primary + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     int dst_enc = $dst$$reg;
1993     int src_con = $src$$constant & 0x0FFFFFFFFL;
1994     if (src_con == 0) {
1995       // xor dst, dst
1996       emit_opcode(cbuf, 0x33);
1997       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1998     } else {
1999       emit_opcode(cbuf, $primary + dst_enc);
2000       emit_d32(cbuf, src_con);
2001     }
2002   %}
2003 
2004   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2005     // Load immediate does not have a zero or sign extended version
2006     // for 8-bit immediates
2007     int dst_enc = $dst$$reg + 2;
2008     int src_con = ((julong)($src$$constant)) >> 32;
2009     if (src_con == 0) {
2010       // xor dst, dst
2011       emit_opcode(cbuf, 0x33);
2012       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2013     } else {
2014       emit_opcode(cbuf, $primary + dst_enc);
2015       emit_d32(cbuf, src_con);
2016     }
2017   %}
2018 
2019 
2020   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2021   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2026     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2027   %}
2028 
2029   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2030     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2031   %}
2032 
2033   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2034     $$$emit8$primary;
2035     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2036   %}
2037 
2038   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2039     $$$emit8$secondary;
2040     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2041   %}
2042 
2043   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2052     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2053   %}
2054 
2055   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2056     // Output immediate
2057     $$$emit32$src$$constant;
2058   %}
2059 
2060   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2061     // Output Float immediate bits
2062     jfloat jf = $src$$constant;
2063     int    jf_as_bits = jint_cast( jf );
2064     emit_d32(cbuf, jf_as_bits);
2065   %}
2066 
2067   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2068     // Output Float immediate bits
2069     jfloat jf = $src$$constant;
2070     int    jf_as_bits = jint_cast( jf );
2071     emit_d32(cbuf, jf_as_bits);
2072   %}
2073 
2074   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2075     // Output immediate
2076     $$$emit16$src$$constant;
2077   %}
2078 
2079   enc_class Con_d32(immI src) %{
2080     emit_d32(cbuf,$src$$constant);
2081   %}
2082 
2083   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2084     // Output immediate memory reference
2085     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2086     emit_d32(cbuf, 0x00);
2087   %}
2088 
2089   enc_class lock_prefix( ) %{
2090     emit_opcode(cbuf,0xF0);         // [Lock]
2091   %}
2092 
2093   // Cmp-xchg long value.
2094   // Note: we need to swap rbx, and rcx before and after the
2095   //       cmpxchg8 instruction because the instruction uses
2096   //       rcx as the high order word of the new value to store but
2097   //       our register encoding uses rbx,.
2098   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099 
2100     // XCHG  rbx,ecx
2101     emit_opcode(cbuf,0x87);
2102     emit_opcode(cbuf,0xD9);
2103     // [Lock]
2104     emit_opcode(cbuf,0xF0);
2105     // CMPXCHG8 [Eptr]
2106     emit_opcode(cbuf,0x0F);
2107     emit_opcode(cbuf,0xC7);
2108     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2109     // XCHG  rbx,ecx
2110     emit_opcode(cbuf,0x87);
2111     emit_opcode(cbuf,0xD9);
2112   %}
2113 
2114   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2115     // [Lock]
2116     emit_opcode(cbuf,0xF0);
2117 
2118     // CMPXCHG [Eptr]
2119     emit_opcode(cbuf,0x0F);
2120     emit_opcode(cbuf,0xB1);
2121     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2122   %}
2123 
2124   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2125     // [Lock]
2126     emit_opcode(cbuf,0xF0);
2127 
2128     // CMPXCHGB [Eptr]
2129     emit_opcode(cbuf,0x0F);
2130     emit_opcode(cbuf,0xB0);
2131     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2132   %}
2133 
2134   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2135     // [Lock]
2136     emit_opcode(cbuf,0xF0);
2137 
2138     // 16-bit mode
2139     emit_opcode(cbuf, 0x66);
2140 
2141     // CMPXCHGW [Eptr]
2142     emit_opcode(cbuf,0x0F);
2143     emit_opcode(cbuf,0xB1);
2144     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145   %}
2146 
2147   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2148     int res_encoding = $res$$reg;
2149 
2150     // MOV  res,0
2151     emit_opcode( cbuf, 0xB8 + res_encoding);
2152     emit_d32( cbuf, 0 );
2153     // JNE,s  fail
2154     emit_opcode(cbuf,0x75);
2155     emit_d8(cbuf, 5 );
2156     // MOV  res,1
2157     emit_opcode( cbuf, 0xB8 + res_encoding);
2158     emit_d32( cbuf, 1 );
2159     // fail:
2160   %}
2161 
2162   enc_class set_instruction_start( ) %{
2163     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2164   %}
2165 
2166   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2167     int reg_encoding = $ereg$$reg;
2168     int base  = $mem$$base;
2169     int index = $mem$$index;
2170     int scale = $mem$$scale;
2171     int displace = $mem$$disp;
2172     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2173     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2174   %}
2175 
2176   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2177     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2178     int base  = $mem$$base;
2179     int index = $mem$$index;
2180     int scale = $mem$$scale;
2181     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2182     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2183     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2184   %}
2185 
2186   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2187     int r1, r2;
2188     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2189     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2190     emit_opcode(cbuf,0x0F);
2191     emit_opcode(cbuf,$tertiary);
2192     emit_rm(cbuf, 0x3, r1, r2);
2193     emit_d8(cbuf,$cnt$$constant);
2194     emit_d8(cbuf,$primary);
2195     emit_rm(cbuf, 0x3, $secondary, r1);
2196     emit_d8(cbuf,$cnt$$constant);
2197   %}
2198 
2199   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2200     emit_opcode( cbuf, 0x8B ); // Move
2201     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2202     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2203       emit_d8(cbuf,$primary);
2204       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2205       emit_d8(cbuf,$cnt$$constant-32);
2206     }
2207     emit_d8(cbuf,$primary);
2208     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2209     emit_d8(cbuf,31);
2210   %}
2211 
2212   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2213     int r1, r2;
2214     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2215     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2216 
2217     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2218     emit_rm(cbuf, 0x3, r1, r2);
2219     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2220       emit_opcode(cbuf,$primary);
2221       emit_rm(cbuf, 0x3, $secondary, r1);
2222       emit_d8(cbuf,$cnt$$constant-32);
2223     }
2224     emit_opcode(cbuf,0x33);  // XOR r2,r2
2225     emit_rm(cbuf, 0x3, r2, r2);
2226   %}
2227 
2228   // Clone of RegMem but accepts an extra parameter to access each
2229   // half of a double in memory; it never needs relocation info.
2230   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2231     emit_opcode(cbuf,$opcode$$constant);
2232     int reg_encoding = $rm_reg$$reg;
2233     int base     = $mem$$base;
2234     int index    = $mem$$index;
2235     int scale    = $mem$$scale;
2236     int displace = $mem$$disp + $disp_for_half$$constant;
2237     relocInfo::relocType disp_reloc = relocInfo::none;
2238     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2239   %}
2240 
2241   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2242   //
2243   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2244   // and it never needs relocation information.
2245   // Frequently used to move data between FPU's Stack Top and memory.
2246   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2247     int rm_byte_opcode = $rm_opcode$$constant;
2248     int base     = $mem$$base;
2249     int index    = $mem$$index;
2250     int scale    = $mem$$scale;
2251     int displace = $mem$$disp;
2252     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2253     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2254   %}
2255 
2256   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2257     int rm_byte_opcode = $rm_opcode$$constant;
2258     int base     = $mem$$base;
2259     int index    = $mem$$index;
2260     int scale    = $mem$$scale;
2261     int displace = $mem$$disp;
2262     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2263     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2264   %}
2265 
2266   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2267     int reg_encoding = $dst$$reg;
2268     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2269     int index        = 0x04;            // 0x04 indicates no index
2270     int scale        = 0x00;            // 0x00 indicates no scale
2271     int displace     = $src1$$constant; // 0x00 indicates no displacement
2272     relocInfo::relocType disp_reloc = relocInfo::none;
2273     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2274   %}
2275 
2276   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2277     // Compare dst,src
2278     emit_opcode(cbuf,0x3B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280     // jmp dst < src around move
2281     emit_opcode(cbuf,0x7C);
2282     emit_d8(cbuf,2);
2283     // move dst,src
2284     emit_opcode(cbuf,0x8B);
2285     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2286   %}
2287 
2288   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2289     // Compare dst,src
2290     emit_opcode(cbuf,0x3B);
2291     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2292     // jmp dst > src around move
2293     emit_opcode(cbuf,0x7F);
2294     emit_d8(cbuf,2);
2295     // move dst,src
2296     emit_opcode(cbuf,0x8B);
2297     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2298   %}
2299 
2300   enc_class enc_FPR_store(memory mem, regDPR src) %{
2301     // If src is FPR1, we can just FST to store it.
2302     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2303     int reg_encoding = 0x2; // Just store
2304     int base  = $mem$$base;
2305     int index = $mem$$index;
2306     int scale = $mem$$scale;
2307     int displace = $mem$$disp;
2308     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2309     if( $src$$reg != FPR1L_enc ) {
2310       reg_encoding = 0x3;  // Store & pop
2311       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2312       emit_d8( cbuf, 0xC0-1+$src$$reg );
2313     }
2314     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2315     emit_opcode(cbuf,$primary);
2316     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2317   %}
2318 
2319   enc_class neg_reg(rRegI dst) %{
2320     // NEG $dst
2321     emit_opcode(cbuf,0xF7);
2322     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2323   %}
2324 
2325   enc_class setLT_reg(eCXRegI dst) %{
2326     // SETLT $dst
2327     emit_opcode(cbuf,0x0F);
2328     emit_opcode(cbuf,0x9C);
2329     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2330   %}
2331 
2332   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2333     int tmpReg = $tmp$$reg;
2334 
2335     // SUB $p,$q
2336     emit_opcode(cbuf,0x2B);
2337     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2338     // SBB $tmp,$tmp
2339     emit_opcode(cbuf,0x1B);
2340     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2341     // AND $tmp,$y
2342     emit_opcode(cbuf,0x23);
2343     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2344     // ADD $p,$tmp
2345     emit_opcode(cbuf,0x03);
2346     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2347   %}
2348 
2349   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2350     // TEST shift,32
2351     emit_opcode(cbuf,0xF7);
2352     emit_rm(cbuf, 0x3, 0, ECX_enc);
2353     emit_d32(cbuf,0x20);
2354     // JEQ,s small
2355     emit_opcode(cbuf, 0x74);
2356     emit_d8(cbuf, 0x04);
2357     // MOV    $dst.hi,$dst.lo
2358     emit_opcode( cbuf, 0x8B );
2359     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2360     // CLR    $dst.lo
2361     emit_opcode(cbuf, 0x33);
2362     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2363 // small:
2364     // SHLD   $dst.hi,$dst.lo,$shift
2365     emit_opcode(cbuf,0x0F);
2366     emit_opcode(cbuf,0xA5);
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2368     // SHL    $dst.lo,$shift"
2369     emit_opcode(cbuf,0xD3);
2370     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2371   %}
2372 
2373   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2374     // TEST shift,32
2375     emit_opcode(cbuf,0xF7);
2376     emit_rm(cbuf, 0x3, 0, ECX_enc);
2377     emit_d32(cbuf,0x20);
2378     // JEQ,s small
2379     emit_opcode(cbuf, 0x74);
2380     emit_d8(cbuf, 0x04);
2381     // MOV    $dst.lo,$dst.hi
2382     emit_opcode( cbuf, 0x8B );
2383     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384     // CLR    $dst.hi
2385     emit_opcode(cbuf, 0x33);
2386     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2387 // small:
2388     // SHRD   $dst.lo,$dst.hi,$shift
2389     emit_opcode(cbuf,0x0F);
2390     emit_opcode(cbuf,0xAD);
2391     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2392     // SHR    $dst.hi,$shift"
2393     emit_opcode(cbuf,0xD3);
2394     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2395   %}
2396 
2397   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2398     // TEST shift,32
2399     emit_opcode(cbuf,0xF7);
2400     emit_rm(cbuf, 0x3, 0, ECX_enc);
2401     emit_d32(cbuf,0x20);
2402     // JEQ,s small
2403     emit_opcode(cbuf, 0x74);
2404     emit_d8(cbuf, 0x05);
2405     // MOV    $dst.lo,$dst.hi
2406     emit_opcode( cbuf, 0x8B );
2407     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2408     // SAR    $dst.hi,31
2409     emit_opcode(cbuf, 0xC1);
2410     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2411     emit_d8(cbuf, 0x1F );
2412 // small:
2413     // SHRD   $dst.lo,$dst.hi,$shift
2414     emit_opcode(cbuf,0x0F);
2415     emit_opcode(cbuf,0xAD);
2416     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417     // SAR    $dst.hi,$shift"
2418     emit_opcode(cbuf,0xD3);
2419     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2420   %}
2421 
2422 
2423   // ----------------- Encodings for floating point unit -----------------
2424   // May leave result in FPU-TOS or FPU reg depending on opcodes
2425   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2426     $$$emit8$primary;
2427     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2428   %}
2429 
2430   // Pop argument in FPR0 with FSTP ST(0)
2431   enc_class PopFPU() %{
2432     emit_opcode( cbuf, 0xDD );
2433     emit_d8( cbuf, 0xD8 );
2434   %}
2435 
2436   // !!!!! equivalent to Pop_Reg_F
2437   enc_class Pop_Reg_DPR( regDPR dst ) %{
2438     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2439     emit_d8( cbuf, 0xD8+$dst$$reg );
2440   %}
2441 
2442   enc_class Push_Reg_DPR( regDPR dst ) %{
2443     emit_opcode( cbuf, 0xD9 );
2444     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2445   %}
2446 
2447   enc_class strictfp_bias1( regDPR dst ) %{
2448     emit_opcode( cbuf, 0xDB );           // FLD m80real
2449     emit_opcode( cbuf, 0x2D );
2450     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2451     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2452     emit_opcode( cbuf, 0xC8+$dst$$reg );
2453   %}
2454 
2455   enc_class strictfp_bias2( regDPR dst ) %{
2456     emit_opcode( cbuf, 0xDB );           // FLD m80real
2457     emit_opcode( cbuf, 0x2D );
2458     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2459     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2460     emit_opcode( cbuf, 0xC8+$dst$$reg );
2461   %}
2462 
2463   // Special case for moving an integer register to a stack slot.
2464   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2465     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2466   %}
2467 
2468   // Special case for moving a register to a stack slot.
2469   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2470     // Opcode already emitted
2471     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2472     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2473     emit_d32(cbuf, $dst$$disp);   // Displacement
2474   %}
2475 
2476   // Push the integer in stackSlot 'src' onto FP-stack
2477   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2478     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2479   %}
2480 
2481   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2482   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2483     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2484   %}
2485 
2486   // Same as Pop_Mem_F except for opcode
2487   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2488   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2489     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2490   %}
2491 
2492   enc_class Pop_Reg_FPR( regFPR dst ) %{
2493     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2494     emit_d8( cbuf, 0xD8+$dst$$reg );
2495   %}
2496 
2497   enc_class Push_Reg_FPR( regFPR dst ) %{
2498     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2499     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2500   %}
2501 
2502   // Push FPU's float to a stack-slot, and pop FPU-stack
2503   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2504     int pop = 0x02;
2505     if ($src$$reg != FPR1L_enc) {
2506       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2507       emit_d8( cbuf, 0xC0-1+$src$$reg );
2508       pop = 0x03;
2509     }
2510     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2511   %}
2512 
2513   // Push FPU's double to a stack-slot, and pop FPU-stack
2514   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2515     int pop = 0x02;
2516     if ($src$$reg != FPR1L_enc) {
2517       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2518       emit_d8( cbuf, 0xC0-1+$src$$reg );
2519       pop = 0x03;
2520     }
2521     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2522   %}
2523 
2524   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2525   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2526     int pop = 0xD0 - 1; // -1 since we skip FLD
2527     if ($src$$reg != FPR1L_enc) {
2528       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2529       emit_d8( cbuf, 0xC0-1+$src$$reg );
2530       pop = 0xD8;
2531     }
2532     emit_opcode( cbuf, 0xDD );
2533     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2534   %}
2535 
2536 
2537   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2538     // load dst in FPR0
2539     emit_opcode( cbuf, 0xD9 );
2540     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2541     if ($src$$reg != FPR1L_enc) {
2542       // fincstp
2543       emit_opcode (cbuf, 0xD9);
2544       emit_opcode (cbuf, 0xF7);
2545       // swap src with FPR1:
2546       // FXCH FPR1 with src
2547       emit_opcode(cbuf, 0xD9);
2548       emit_d8(cbuf, 0xC8-1+$src$$reg );
2549       // fdecstp
2550       emit_opcode (cbuf, 0xD9);
2551       emit_opcode (cbuf, 0xF6);
2552     }
2553   %}
2554 
2555   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ subptr(rsp, 8);
2558     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2559     __ fld_d(Address(rsp, 0));
2560     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2561     __ fld_d(Address(rsp, 0));
2562   %}
2563 
2564   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ subptr(rsp, 4);
2567     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2568     __ fld_s(Address(rsp, 0));
2569     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2570     __ fld_s(Address(rsp, 0));
2571   %}
2572 
2573   enc_class Push_ResultD(regD dst) %{
2574     MacroAssembler _masm(&cbuf);
2575     __ fstp_d(Address(rsp, 0));
2576     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2577     __ addptr(rsp, 8);
2578   %}
2579 
2580   enc_class Push_ResultF(regF dst, immI d8) %{
2581     MacroAssembler _masm(&cbuf);
2582     __ fstp_s(Address(rsp, 0));
2583     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2584     __ addptr(rsp, $d8$$constant);
2585   %}
2586 
2587   enc_class Push_SrcD(regD src) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ subptr(rsp, 8);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class push_stack_temp_qword() %{
2595     MacroAssembler _masm(&cbuf);
2596     __ subptr(rsp, 8);
2597   %}
2598 
2599   enc_class pop_stack_temp_qword() %{
2600     MacroAssembler _masm(&cbuf);
2601     __ addptr(rsp, 8);
2602   %}
2603 
2604   enc_class push_xmm_to_fpr1(regD src) %{
2605     MacroAssembler _masm(&cbuf);
2606     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2607     __ fld_d(Address(rsp, 0));
2608   %}
2609 
2610   enc_class Push_Result_Mod_DPR( regDPR src) %{
2611     if ($src$$reg != FPR1L_enc) {
2612       // fincstp
2613       emit_opcode (cbuf, 0xD9);
2614       emit_opcode (cbuf, 0xF7);
2615       // FXCH FPR1 with src
2616       emit_opcode(cbuf, 0xD9);
2617       emit_d8(cbuf, 0xC8-1+$src$$reg );
2618       // fdecstp
2619       emit_opcode (cbuf, 0xD9);
2620       emit_opcode (cbuf, 0xF6);
2621     }
2622     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2623     // // FSTP   FPR$dst$$reg
2624     // emit_opcode( cbuf, 0xDD );
2625     // emit_d8( cbuf, 0xD8+$dst$$reg );
2626   %}
2627 
2628   enc_class fnstsw_sahf_skip_parity() %{
2629     // fnstsw ax
2630     emit_opcode( cbuf, 0xDF );
2631     emit_opcode( cbuf, 0xE0 );
2632     // sahf
2633     emit_opcode( cbuf, 0x9E );
2634     // jnp  ::skip
2635     emit_opcode( cbuf, 0x7B );
2636     emit_opcode( cbuf, 0x05 );
2637   %}
2638 
2639   enc_class emitModDPR() %{
2640     // fprem must be iterative
2641     // :: loop
2642     // fprem
2643     emit_opcode( cbuf, 0xD9 );
2644     emit_opcode( cbuf, 0xF8 );
2645     // wait
2646     emit_opcode( cbuf, 0x9b );
2647     // fnstsw ax
2648     emit_opcode( cbuf, 0xDF );
2649     emit_opcode( cbuf, 0xE0 );
2650     // sahf
2651     emit_opcode( cbuf, 0x9E );
2652     // jp  ::loop
2653     emit_opcode( cbuf, 0x0F );
2654     emit_opcode( cbuf, 0x8A );
2655     emit_opcode( cbuf, 0xF4 );
2656     emit_opcode( cbuf, 0xFF );
2657     emit_opcode( cbuf, 0xFF );
2658     emit_opcode( cbuf, 0xFF );
2659   %}
2660 
2661   enc_class fpu_flags() %{
2662     // fnstsw_ax
2663     emit_opcode( cbuf, 0xDF);
2664     emit_opcode( cbuf, 0xE0);
2665     // test ax,0x0400
2666     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2667     emit_opcode( cbuf, 0xA9 );
2668     emit_d16   ( cbuf, 0x0400 );
2669     // // // This sequence works, but stalls for 12-16 cycles on PPro
2670     // // test rax,0x0400
2671     // emit_opcode( cbuf, 0xA9 );
2672     // emit_d32   ( cbuf, 0x00000400 );
2673     //
2674     // jz exit (no unordered comparison)
2675     emit_opcode( cbuf, 0x74 );
2676     emit_d8    ( cbuf, 0x02 );
2677     // mov ah,1 - treat as LT case (set carry flag)
2678     emit_opcode( cbuf, 0xB4 );
2679     emit_d8    ( cbuf, 0x01 );
2680     // sahf
2681     emit_opcode( cbuf, 0x9E);
2682   %}
2683 
2684   enc_class cmpF_P6_fixup() %{
2685     // Fixup the integer flags in case comparison involved a NaN
2686     //
2687     // JNP exit (no unordered comparison, P-flag is set by NaN)
2688     emit_opcode( cbuf, 0x7B );
2689     emit_d8    ( cbuf, 0x03 );
2690     // MOV AH,1 - treat as LT case (set carry flag)
2691     emit_opcode( cbuf, 0xB4 );
2692     emit_d8    ( cbuf, 0x01 );
2693     // SAHF
2694     emit_opcode( cbuf, 0x9E);
2695     // NOP     // target for branch to avoid branch to branch
2696     emit_opcode( cbuf, 0x90);
2697   %}
2698 
2699 //     fnstsw_ax();
2700 //     sahf();
2701 //     movl(dst, nan_result);
2702 //     jcc(Assembler::parity, exit);
2703 //     movl(dst, less_result);
2704 //     jcc(Assembler::below, exit);
2705 //     movl(dst, equal_result);
2706 //     jcc(Assembler::equal, exit);
2707 //     movl(dst, greater_result);
2708 
2709 // less_result     =  1;
2710 // greater_result  = -1;
2711 // equal_result    = 0;
2712 // nan_result      = -1;
2713 
2714   enc_class CmpF_Result(rRegI dst) %{
2715     // fnstsw_ax();
2716     emit_opcode( cbuf, 0xDF);
2717     emit_opcode( cbuf, 0xE0);
2718     // sahf
2719     emit_opcode( cbuf, 0x9E);
2720     // movl(dst, nan_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, -1 );
2723     // jcc(Assembler::parity, exit);
2724     emit_opcode( cbuf, 0x7A );
2725     emit_d8    ( cbuf, 0x13 );
2726     // movl(dst, less_result);
2727     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2728     emit_d32( cbuf, -1 );
2729     // jcc(Assembler::below, exit);
2730     emit_opcode( cbuf, 0x72 );
2731     emit_d8    ( cbuf, 0x0C );
2732     // movl(dst, equal_result);
2733     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2734     emit_d32( cbuf, 0 );
2735     // jcc(Assembler::equal, exit);
2736     emit_opcode( cbuf, 0x74 );
2737     emit_d8    ( cbuf, 0x05 );
2738     // movl(dst, greater_result);
2739     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2740     emit_d32( cbuf, 1 );
2741   %}
2742 
2743 
2744   // Compare the longs and set flags
2745   // BROKEN!  Do Not use as-is
2746   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2747     // CMP    $src1.hi,$src2.hi
2748     emit_opcode( cbuf, 0x3B );
2749     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2750     // JNE,s  done
2751     emit_opcode(cbuf,0x75);
2752     emit_d8(cbuf, 2 );
2753     // CMP    $src1.lo,$src2.lo
2754     emit_opcode( cbuf, 0x3B );
2755     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2756 // done:
2757   %}
2758 
2759   enc_class convert_int_long( regL dst, rRegI src ) %{
2760     // mov $dst.lo,$src
2761     int dst_encoding = $dst$$reg;
2762     int src_encoding = $src$$reg;
2763     encode_Copy( cbuf, dst_encoding  , src_encoding );
2764     // mov $dst.hi,$src
2765     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2766     // sar $dst.hi,31
2767     emit_opcode( cbuf, 0xC1 );
2768     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2769     emit_d8(cbuf, 0x1F );
2770   %}
2771 
2772   enc_class convert_long_double( eRegL src ) %{
2773     // push $src.hi
2774     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2775     // push $src.lo
2776     emit_opcode(cbuf, 0x50+$src$$reg  );
2777     // fild 64-bits at [SP]
2778     emit_opcode(cbuf,0xdf);
2779     emit_d8(cbuf, 0x6C);
2780     emit_d8(cbuf, 0x24);
2781     emit_d8(cbuf, 0x00);
2782     // pop stack
2783     emit_opcode(cbuf, 0x83); // add  SP, #8
2784     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2785     emit_d8(cbuf, 0x8);
2786   %}
2787 
2788   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2789     // IMUL   EDX:EAX,$src1
2790     emit_opcode( cbuf, 0xF7 );
2791     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2792     // SAR    EDX,$cnt-32
2793     int shift_count = ((int)$cnt$$constant) - 32;
2794     if (shift_count > 0) {
2795       emit_opcode(cbuf, 0xC1);
2796       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2797       emit_d8(cbuf, shift_count);
2798     }
2799   %}
2800 
2801   // this version doesn't have add sp, 8
2802   enc_class convert_long_double2( eRegL src ) %{
2803     // push $src.hi
2804     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2805     // push $src.lo
2806     emit_opcode(cbuf, 0x50+$src$$reg  );
2807     // fild 64-bits at [SP]
2808     emit_opcode(cbuf,0xdf);
2809     emit_d8(cbuf, 0x6C);
2810     emit_d8(cbuf, 0x24);
2811     emit_d8(cbuf, 0x00);
2812   %}
2813 
2814   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2815     // Basic idea: long = (long)int * (long)int
2816     // IMUL EDX:EAX, src
2817     emit_opcode( cbuf, 0xF7 );
2818     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2819   %}
2820 
2821   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2822     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2823     // MUL EDX:EAX, src
2824     emit_opcode( cbuf, 0xF7 );
2825     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2826   %}
2827 
2828   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2829     // Basic idea: lo(result) = lo(x_lo * y_lo)
2830     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2831     // MOV    $tmp,$src.lo
2832     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2833     // IMUL   $tmp,EDX
2834     emit_opcode( cbuf, 0x0F );
2835     emit_opcode( cbuf, 0xAF );
2836     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2837     // MOV    EDX,$src.hi
2838     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2839     // IMUL   EDX,EAX
2840     emit_opcode( cbuf, 0x0F );
2841     emit_opcode( cbuf, 0xAF );
2842     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2843     // ADD    $tmp,EDX
2844     emit_opcode( cbuf, 0x03 );
2845     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2846     // MUL   EDX:EAX,$src.lo
2847     emit_opcode( cbuf, 0xF7 );
2848     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2849     // ADD    EDX,ESI
2850     emit_opcode( cbuf, 0x03 );
2851     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2852   %}
2853 
2854   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2855     // Basic idea: lo(result) = lo(src * y_lo)
2856     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2857     // IMUL   $tmp,EDX,$src
2858     emit_opcode( cbuf, 0x6B );
2859     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860     emit_d8( cbuf, (int)$src$$constant );
2861     // MOV    EDX,$src
2862     emit_opcode(cbuf, 0xB8 + EDX_enc);
2863     emit_d32( cbuf, (int)$src$$constant );
2864     // MUL   EDX:EAX,EDX
2865     emit_opcode( cbuf, 0xF7 );
2866     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2867     // ADD    EDX,ESI
2868     emit_opcode( cbuf, 0x03 );
2869     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2870   %}
2871 
2872   enc_class long_div( eRegL src1, eRegL src2 ) %{
2873     // PUSH src1.hi
2874     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2875     // PUSH src1.lo
2876     emit_opcode(cbuf,               0x50+$src1$$reg  );
2877     // PUSH src2.hi
2878     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2879     // PUSH src2.lo
2880     emit_opcode(cbuf,               0x50+$src2$$reg  );
2881     // CALL directly to the runtime
2882     cbuf.set_insts_mark();
2883     emit_opcode(cbuf,0xE8);       // Call into runtime
2884     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2885     // Restore stack
2886     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2887     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2888     emit_d8(cbuf, 4*4);
2889   %}
2890 
2891   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2892     // PUSH src1.hi
2893     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2894     // PUSH src1.lo
2895     emit_opcode(cbuf,               0x50+$src1$$reg  );
2896     // PUSH src2.hi
2897     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2898     // PUSH src2.lo
2899     emit_opcode(cbuf,               0x50+$src2$$reg  );
2900     // CALL directly to the runtime
2901     cbuf.set_insts_mark();
2902     emit_opcode(cbuf,0xE8);       // Call into runtime
2903     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2904     // Restore stack
2905     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2906     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2907     emit_d8(cbuf, 4*4);
2908   %}
2909 
2910   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2911     // MOV   $tmp,$src.lo
2912     emit_opcode(cbuf, 0x8B);
2913     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2914     // OR    $tmp,$src.hi
2915     emit_opcode(cbuf, 0x0B);
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2917   %}
2918 
2919   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2920     // CMP    $src1.lo,$src2.lo
2921     emit_opcode( cbuf, 0x3B );
2922     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2923     // JNE,s  skip
2924     emit_cc(cbuf, 0x70, 0x5);
2925     emit_d8(cbuf,2);
2926     // CMP    $src1.hi,$src2.hi
2927     emit_opcode( cbuf, 0x3B );
2928     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2929   %}
2930 
2931   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2932     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2933     emit_opcode( cbuf, 0x3B );
2934     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2935     // MOV    $tmp,$src1.hi
2936     emit_opcode( cbuf, 0x8B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2938     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2939     emit_opcode( cbuf, 0x1B );
2940     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2941   %}
2942 
2943   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2944     // XOR    $tmp,$tmp
2945     emit_opcode(cbuf,0x33);  // XOR
2946     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2947     // CMP    $tmp,$src.lo
2948     emit_opcode( cbuf, 0x3B );
2949     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2950     // SBB    $tmp,$src.hi
2951     emit_opcode( cbuf, 0x1B );
2952     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2953   %}
2954 
2955  // Sniff, sniff... smells like Gnu Superoptimizer
2956   enc_class neg_long( eRegL dst ) %{
2957     emit_opcode(cbuf,0xF7);    // NEG hi
2958     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2959     emit_opcode(cbuf,0xF7);    // NEG lo
2960     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2961     emit_opcode(cbuf,0x83);    // SBB hi,0
2962     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2963     emit_d8    (cbuf,0 );
2964   %}
2965 
2966   enc_class enc_pop_rdx() %{
2967     emit_opcode(cbuf,0x5A);
2968   %}
2969 
2970   enc_class enc_rethrow() %{
2971     cbuf.set_insts_mark();
2972     emit_opcode(cbuf, 0xE9);        // jmp    entry
2973     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2974                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2975   %}
2976 
2977 
2978   // Convert a double to an int.  Java semantics require we do complex
2979   // manglelations in the corner cases.  So we set the rounding mode to
2980   // 'zero', store the darned double down as an int, and reset the
2981   // rounding mode to 'nearest'.  The hardware throws an exception which
2982   // patches up the correct value directly to the stack.
2983   enc_class DPR2I_encoding( regDPR src ) %{
2984     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2985     // exceptions here, so that a NAN or other corner-case value will
2986     // thrown an exception (but normal values get converted at full speed).
2987     // However, I2C adapters and other float-stack manglers leave pending
2988     // invalid-op exceptions hanging.  We would have to clear them before
2989     // enabling them and that is more expensive than just testing for the
2990     // invalid value Intel stores down in the corner cases.
2991     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2992     emit_opcode(cbuf,0x2D);
2993     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2994     // Allocate a word
2995     emit_opcode(cbuf,0x83);            // SUB ESP,4
2996     emit_opcode(cbuf,0xEC);
2997     emit_d8(cbuf,0x04);
2998     // Encoding assumes a double has been pushed into FPR0.
2999     // Store down the double as an int, popping the FPU stack
3000     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3001     emit_opcode(cbuf,0x1C);
3002     emit_d8(cbuf,0x24);
3003     // Restore the rounding mode; mask the exception
3004     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3005     emit_opcode(cbuf,0x2D);
3006     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3007         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3008         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3009 
3010     // Load the converted int; adjust CPU stack
3011     emit_opcode(cbuf,0x58);       // POP EAX
3012     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3013     emit_d32   (cbuf,0x80000000); //         0x80000000
3014     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3015     emit_d8    (cbuf,0x07);       // Size of slow_call
3016     // Push src onto stack slow-path
3017     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3018     emit_d8    (cbuf,0xC0-1+$src$$reg );
3019     // CALL directly to the runtime
3020     cbuf.set_insts_mark();
3021     emit_opcode(cbuf,0xE8);       // Call into runtime
3022     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3023     // Carry on here...
3024   %}
3025 
3026   enc_class DPR2L_encoding( regDPR src ) %{
3027     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3028     emit_opcode(cbuf,0x2D);
3029     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3030     // Allocate a word
3031     emit_opcode(cbuf,0x83);            // SUB ESP,8
3032     emit_opcode(cbuf,0xEC);
3033     emit_d8(cbuf,0x08);
3034     // Encoding assumes a double has been pushed into FPR0.
3035     // Store down the double as a long, popping the FPU stack
3036     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3037     emit_opcode(cbuf,0x3C);
3038     emit_d8(cbuf,0x24);
3039     // Restore the rounding mode; mask the exception
3040     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3041     emit_opcode(cbuf,0x2D);
3042     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3043         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3044         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3045 
3046     // Load the converted int; adjust CPU stack
3047     emit_opcode(cbuf,0x58);       // POP EAX
3048     emit_opcode(cbuf,0x5A);       // POP EDX
3049     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3050     emit_d8    (cbuf,0xFA);       // rdx
3051     emit_d32   (cbuf,0x80000000); //         0x80000000
3052     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3053     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3054     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3055     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3056     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3057     emit_d8    (cbuf,0x07);       // Size of slow_call
3058     // Push src onto stack slow-path
3059     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3060     emit_d8    (cbuf,0xC0-1+$src$$reg );
3061     // CALL directly to the runtime
3062     cbuf.set_insts_mark();
3063     emit_opcode(cbuf,0xE8);       // Call into runtime
3064     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065     // Carry on here...
3066   %}
3067 
3068   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3069     // Operand was loaded from memory into fp ST (stack top)
3070     // FMUL   ST,$src  /* D8 C8+i */
3071     emit_opcode(cbuf, 0xD8);
3072     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3073   %}
3074 
3075   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3076     // FADDP  ST,src2  /* D8 C0+i */
3077     emit_opcode(cbuf, 0xD8);
3078     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3079     //could use FADDP  src2,fpST  /* DE C0+i */
3080   %}
3081 
3082   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3083     // FADDP  src2,ST  /* DE C0+i */
3084     emit_opcode(cbuf, 0xDE);
3085     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3086   %}
3087 
3088   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3089     // Operand has been loaded into fp ST (stack top)
3090       // FSUB   ST,$src1
3091       emit_opcode(cbuf, 0xD8);
3092       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3093 
3094       // FDIV
3095       emit_opcode(cbuf, 0xD8);
3096       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3097   %}
3098 
3099   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3100     // Operand was loaded from memory into fp ST (stack top)
3101     // FADD   ST,$src  /* D8 C0+i */
3102     emit_opcode(cbuf, 0xD8);
3103     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3104 
3105     // FMUL  ST,src2  /* D8 C*+i */
3106     emit_opcode(cbuf, 0xD8);
3107     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3108   %}
3109 
3110 
3111   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3112     // Operand was loaded from memory into fp ST (stack top)
3113     // FADD   ST,$src  /* D8 C0+i */
3114     emit_opcode(cbuf, 0xD8);
3115     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3116 
3117     // FMULP  src2,ST  /* DE C8+i */
3118     emit_opcode(cbuf, 0xDE);
3119     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3120   %}
3121 
3122   // Atomically load the volatile long
3123   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3124     emit_opcode(cbuf,0xDF);
3125     int rm_byte_opcode = 0x05;
3126     int base     = $mem$$base;
3127     int index    = $mem$$index;
3128     int scale    = $mem$$scale;
3129     int displace = $mem$$disp;
3130     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3133   %}
3134 
3135   // Volatile Store Long.  Must be atomic, so move it into
3136   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3137   // target address before the store (for null-ptr checks)
3138   // so the memory operand is used twice in the encoding.
3139   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3140     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3141     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3142     emit_opcode(cbuf,0xDF);
3143     int rm_byte_opcode = 0x07;
3144     int base     = $mem$$base;
3145     int index    = $mem$$index;
3146     int scale    = $mem$$scale;
3147     int displace = $mem$$disp;
3148     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3149     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3150   %}
3151 
3152   // Safepoint Poll.  This polls the safepoint page, and causes an
3153   // exception if it is not readable. Unfortunately, it kills the condition code
3154   // in the process
3155   // We current use TESTL [spp],EDI
3156   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3157 
3158   enc_class Safepoint_Poll() %{
3159     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3160     emit_opcode(cbuf,0x85);
3161     emit_rm (cbuf, 0x0, 0x7, 0x5);
3162     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3163   %}
3164 %}
3165 
3166 
3167 //----------FRAME--------------------------------------------------------------
3168 // Definition of frame structure and management information.
3169 //
3170 //  S T A C K   L A Y O U T    Allocators stack-slot number
3171 //                             |   (to get allocators register number
3172 //  G  Owned by    |        |  v    add OptoReg::stack0())
3173 //  r   CALLER     |        |
3174 //  o     |        +--------+      pad to even-align allocators stack-slot
3175 //  w     V        |  pad0  |        numbers; owned by CALLER
3176 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3177 //  h     ^        |   in   |  5
3178 //        |        |  args  |  4   Holes in incoming args owned by SELF
3179 //  |     |        |        |  3
3180 //  |     |        +--------+
3181 //  V     |        | old out|      Empty on Intel, window on Sparc
3182 //        |    old |preserve|      Must be even aligned.
3183 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3184 //        |        |   in   |  3   area for Intel ret address
3185 //     Owned by    |preserve|      Empty on Sparc.
3186 //       SELF      +--------+
3187 //        |        |  pad2  |  2   pad to align old SP
3188 //        |        +--------+  1
3189 //        |        | locks  |  0
3190 //        |        +--------+----> OptoReg::stack0(), even aligned
3191 //        |        |  pad1  | 11   pad to align new SP
3192 //        |        +--------+
3193 //        |        |        | 10
3194 //        |        | spills |  9   spills
3195 //        V        |        |  8   (pad0 slot for callee)
3196 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3197 //        ^        |  out   |  7
3198 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3199 //     Owned by    +--------+
3200 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3201 //        |    new |preserve|      Must be even-aligned.
3202 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3203 //        |        |        |
3204 //
3205 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3206 //         known from SELF's arguments and the Java calling convention.
3207 //         Region 6-7 is determined per call site.
3208 // Note 2: If the calling convention leaves holes in the incoming argument
3209 //         area, those holes are owned by SELF.  Holes in the outgoing area
3210 //         are owned by the CALLEE.  Holes should not be nessecary in the
3211 //         incoming area, as the Java calling convention is completely under
3212 //         the control of the AD file.  Doubles can be sorted and packed to
3213 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3214 //         varargs C calling conventions.
3215 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3216 //         even aligned with pad0 as needed.
3217 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3218 //         region 6-11 is even aligned; it may be padded out more so that
3219 //         the region from SP to FP meets the minimum stack alignment.
3220 
3221 frame %{
3222   // What direction does stack grow in (assumed to be same for C & Java)
3223   stack_direction(TOWARDS_LOW);
3224 
3225   // These three registers define part of the calling convention
3226   // between compiled code and the interpreter.
3227   inline_cache_reg(EAX);                // Inline Cache Register
3228   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3229 
3230   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3231   cisc_spilling_operand_name(indOffset32);
3232 
3233   // Number of stack slots consumed by locking an object
3234   sync_stack_slots(1);
3235 
3236   // Compiled code's Frame Pointer
3237   frame_pointer(ESP);
3238   // Interpreter stores its frame pointer in a register which is
3239   // stored to the stack by I2CAdaptors.
3240   // I2CAdaptors convert from interpreted java to compiled java.
3241   interpreter_frame_pointer(EBP);
3242 
3243   // Stack alignment requirement
3244   // Alignment size in bytes (128-bit -> 16 bytes)
3245   stack_alignment(StackAlignmentInBytes);
3246 
3247   // Number of stack slots between incoming argument block and the start of
3248   // a new frame.  The PROLOG must add this many slots to the stack.  The
3249   // EPILOG must remove this many slots.  Intel needs one slot for
3250   // return address and one for rbp, (must save rbp)
3251   in_preserve_stack_slots(2+VerifyStackAtCalls);
3252 
3253   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3254   // for calls to C.  Supports the var-args backing area for register parms.
3255   varargs_C_out_slots_killed(0);
3256 
3257   // The after-PROLOG location of the return address.  Location of
3258   // return address specifies a type (REG or STACK) and a number
3259   // representing the register number (i.e. - use a register name) or
3260   // stack slot.
3261   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3262   // Otherwise, it is above the locks and verification slot and alignment word
3263   return_addr(STACK - 1 +
3264               align_up((Compile::current()->in_preserve_stack_slots() +
3265                         Compile::current()->fixed_slots()),
3266                        stack_alignment_in_slots()));
3267 
3268   // Body of function which returns an integer array locating
3269   // arguments either in registers or in stack slots.  Passed an array
3270   // of ideal registers called "sig" and a "length" count.  Stack-slot
3271   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3272   // arguments for a CALLEE.  Incoming stack arguments are
3273   // automatically biased by the preserve_stack_slots field above.
3274   calling_convention %{
3275     // No difference between ingoing/outgoing just pass false
3276     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3277   %}
3278 
3279 
3280   // Body of function which returns an integer array locating
3281   // arguments either in registers or in stack slots.  Passed an array
3282   // of ideal registers called "sig" and a "length" count.  Stack-slot
3283   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3284   // arguments for a CALLEE.  Incoming stack arguments are
3285   // automatically biased by the preserve_stack_slots field above.
3286   c_calling_convention %{
3287     // This is obviously always outgoing
3288     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3289   %}
3290 
3291   // Location of C & interpreter return values
3292   c_return_value %{
3293     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3294     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3295     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3296 
3297     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3298     // that C functions return float and double results in XMM0.
3299     if( ideal_reg == Op_RegD && UseSSE>=2 )
3300       return OptoRegPair(XMM0b_num,XMM0_num);
3301     if( ideal_reg == Op_RegF && UseSSE>=2 )
3302       return OptoRegPair(OptoReg::Bad,XMM0_num);
3303 
3304     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3305   %}
3306 
3307   // Location of return values
3308   return_value %{
3309     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3310     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3311     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3312     if( ideal_reg == Op_RegD && UseSSE>=2 )
3313       return OptoRegPair(XMM0b_num,XMM0_num);
3314     if( ideal_reg == Op_RegF && UseSSE>=1 )
3315       return OptoRegPair(OptoReg::Bad,XMM0_num);
3316     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3317   %}
3318 
3319 %}
3320 
3321 //----------ATTRIBUTES---------------------------------------------------------
3322 //----------Operand Attributes-------------------------------------------------
3323 op_attrib op_cost(0);        // Required cost attribute
3324 
3325 //----------Instruction Attributes---------------------------------------------
3326 ins_attrib ins_cost(100);       // Required cost attribute
3327 ins_attrib ins_size(8);         // Required size attribute (in bits)
3328 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3329                                 // non-matching short branch variant of some
3330                                                             // long branch?
3331 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3332                                 // specifies the alignment that some part of the instruction (not
3333                                 // necessarily the start) requires.  If > 1, a compute_padding()
3334                                 // function must be provided for the instruction
3335 
3336 //----------OPERANDS-----------------------------------------------------------
3337 // Operand definitions must precede instruction definitions for correct parsing
3338 // in the ADLC because operands constitute user defined types which are used in
3339 // instruction definitions.
3340 
3341 //----------Simple Operands----------------------------------------------------
3342 // Immediate Operands
3343 // Integer Immediate
3344 operand immI() %{
3345   match(ConI);
3346 
3347   op_cost(10);
3348   format %{ %}
3349   interface(CONST_INTER);
3350 %}
3351 
3352 // Constant for test vs zero
3353 operand immI0() %{
3354   predicate(n->get_int() == 0);
3355   match(ConI);
3356 
3357   op_cost(0);
3358   format %{ %}
3359   interface(CONST_INTER);
3360 %}
3361 
3362 // Constant for increment
3363 operand immI1() %{
3364   predicate(n->get_int() == 1);
3365   match(ConI);
3366 
3367   op_cost(0);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 // Constant for decrement
3373 operand immI_M1() %{
3374   predicate(n->get_int() == -1);
3375   match(ConI);
3376 
3377   op_cost(0);
3378   format %{ %}
3379   interface(CONST_INTER);
3380 %}
3381 
3382 // Valid scale values for addressing modes
3383 operand immI2() %{
3384   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3385   match(ConI);
3386 
3387   format %{ %}
3388   interface(CONST_INTER);
3389 %}
3390 
3391 operand immI8() %{
3392   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3393   match(ConI);
3394 
3395   op_cost(5);
3396   format %{ %}
3397   interface(CONST_INTER);
3398 %}
3399 
3400 operand immI16() %{
3401   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3402   match(ConI);
3403 
3404   op_cost(10);
3405   format %{ %}
3406   interface(CONST_INTER);
3407 %}
3408 
3409 // Int Immediate non-negative
3410 operand immU31()
3411 %{
3412   predicate(n->get_int() >= 0);
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 // Constant for long shifts
3421 operand immI_32() %{
3422   predicate( n->get_int() == 32 );
3423   match(ConI);
3424 
3425   op_cost(0);
3426   format %{ %}
3427   interface(CONST_INTER);
3428 %}
3429 
3430 operand immI_1_31() %{
3431   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3432   match(ConI);
3433 
3434   op_cost(0);
3435   format %{ %}
3436   interface(CONST_INTER);
3437 %}
3438 
3439 operand immI_32_63() %{
3440   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3441   match(ConI);
3442   op_cost(0);
3443 
3444   format %{ %}
3445   interface(CONST_INTER);
3446 %}
3447 
3448 operand immI_1() %{
3449   predicate( n->get_int() == 1 );
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 operand immI_2() %{
3458   predicate( n->get_int() == 2 );
3459   match(ConI);
3460 
3461   op_cost(0);
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 operand immI_3() %{
3467   predicate( n->get_int() == 3 );
3468   match(ConI);
3469 
3470   op_cost(0);
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Pointer Immediate
3476 operand immP() %{
3477   match(ConP);
3478 
3479   op_cost(10);
3480   format %{ %}
3481   interface(CONST_INTER);
3482 %}
3483 
3484 // NULL Pointer Immediate
3485 operand immP0() %{
3486   predicate( n->get_ptr() == 0 );
3487   match(ConP);
3488   op_cost(0);
3489 
3490   format %{ %}
3491   interface(CONST_INTER);
3492 %}
3493 
3494 // Long Immediate
3495 operand immL() %{
3496   match(ConL);
3497 
3498   op_cost(20);
3499   format %{ %}
3500   interface(CONST_INTER);
3501 %}
3502 
3503 // Long Immediate zero
3504 operand immL0() %{
3505   predicate( n->get_long() == 0L );
3506   match(ConL);
3507   op_cost(0);
3508 
3509   format %{ %}
3510   interface(CONST_INTER);
3511 %}
3512 
3513 // Long Immediate zero
3514 operand immL_M1() %{
3515   predicate( n->get_long() == -1L );
3516   match(ConL);
3517   op_cost(0);
3518 
3519   format %{ %}
3520   interface(CONST_INTER);
3521 %}
3522 
3523 // Long immediate from 0 to 127.
3524 // Used for a shorter form of long mul by 10.
3525 operand immL_127() %{
3526   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3527   match(ConL);
3528   op_cost(0);
3529 
3530   format %{ %}
3531   interface(CONST_INTER);
3532 %}
3533 
3534 // Long Immediate: low 32-bit mask
3535 operand immL_32bits() %{
3536   predicate(n->get_long() == 0xFFFFFFFFL);
3537   match(ConL);
3538   op_cost(0);
3539 
3540   format %{ %}
3541   interface(CONST_INTER);
3542 %}
3543 
3544 // Long Immediate: low 32-bit mask
3545 operand immL32() %{
3546   predicate(n->get_long() == (int)(n->get_long()));
3547   match(ConL);
3548   op_cost(20);
3549 
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 //Double Immediate zero
3555 operand immDPR0() %{
3556   // Do additional (and counter-intuitive) test against NaN to work around VC++
3557   // bug that generates code such that NaNs compare equal to 0.0
3558   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3559   match(ConD);
3560 
3561   op_cost(5);
3562   format %{ %}
3563   interface(CONST_INTER);
3564 %}
3565 
3566 // Double Immediate one
3567 operand immDPR1() %{
3568   predicate( UseSSE<=1 && n->getd() == 1.0 );
3569   match(ConD);
3570 
3571   op_cost(5);
3572   format %{ %}
3573   interface(CONST_INTER);
3574 %}
3575 
3576 // Double Immediate
3577 operand immDPR() %{
3578   predicate(UseSSE<=1);
3579   match(ConD);
3580 
3581   op_cost(5);
3582   format %{ %}
3583   interface(CONST_INTER);
3584 %}
3585 
3586 operand immD() %{
3587   predicate(UseSSE>=2);
3588   match(ConD);
3589 
3590   op_cost(5);
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Double Immediate zero
3596 operand immD0() %{
3597   // Do additional (and counter-intuitive) test against NaN to work around VC++
3598   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3599   // compare equal to -0.0.
3600   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3601   match(ConD);
3602 
3603   format %{ %}
3604   interface(CONST_INTER);
3605 %}
3606 
3607 // Float Immediate zero
3608 operand immFPR0() %{
3609   predicate(UseSSE == 0 && n->getf() == 0.0F);
3610   match(ConF);
3611 
3612   op_cost(5);
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Float Immediate one
3618 operand immFPR1() %{
3619   predicate(UseSSE == 0 && n->getf() == 1.0F);
3620   match(ConF);
3621 
3622   op_cost(5);
3623   format %{ %}
3624   interface(CONST_INTER);
3625 %}
3626 
3627 // Float Immediate
3628 operand immFPR() %{
3629   predicate( UseSSE == 0 );
3630   match(ConF);
3631 
3632   op_cost(5);
3633   format %{ %}
3634   interface(CONST_INTER);
3635 %}
3636 
3637 // Float Immediate
3638 operand immF() %{
3639   predicate(UseSSE >= 1);
3640   match(ConF);
3641 
3642   op_cost(5);
3643   format %{ %}
3644   interface(CONST_INTER);
3645 %}
3646 
3647 // Float Immediate zero.  Zero and not -0.0
3648 operand immF0() %{
3649   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3650   match(ConF);
3651 
3652   op_cost(5);
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Immediates for special shifts (sign extend)
3658 
3659 // Constants for increment
3660 operand immI_16() %{
3661   predicate( n->get_int() == 16 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 operand immI_24() %{
3669   predicate( n->get_int() == 24 );
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 // Constant for byte-wide masking
3677 operand immI_255() %{
3678   predicate( n->get_int() == 255 );
3679   match(ConI);
3680 
3681   format %{ %}
3682   interface(CONST_INTER);
3683 %}
3684 
3685 // Constant for short-wide masking
3686 operand immI_65535() %{
3687   predicate(n->get_int() == 65535);
3688   match(ConI);
3689 
3690   format %{ %}
3691   interface(CONST_INTER);
3692 %}
3693 
3694 // Register Operands
3695 // Integer Register
3696 operand rRegI() %{
3697   constraint(ALLOC_IN_RC(int_reg));
3698   match(RegI);
3699   match(xRegI);
3700   match(eAXRegI);
3701   match(eBXRegI);
3702   match(eCXRegI);
3703   match(eDXRegI);
3704   match(eDIRegI);
3705   match(eSIRegI);
3706 
3707   format %{ %}
3708   interface(REG_INTER);
3709 %}
3710 
3711 // Subset of Integer Register
3712 operand xRegI(rRegI reg) %{
3713   constraint(ALLOC_IN_RC(int_x_reg));
3714   match(reg);
3715   match(eAXRegI);
3716   match(eBXRegI);
3717   match(eCXRegI);
3718   match(eDXRegI);
3719 
3720   format %{ %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 // Special Registers
3725 operand eAXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(eax_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "EAX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 // Special Registers
3735 operand eBXRegI(xRegI reg) %{
3736   constraint(ALLOC_IN_RC(ebx_reg));
3737   match(reg);
3738   match(rRegI);
3739 
3740   format %{ "EBX" %}
3741   interface(REG_INTER);
3742 %}
3743 
3744 operand eCXRegI(xRegI reg) %{
3745   constraint(ALLOC_IN_RC(ecx_reg));
3746   match(reg);
3747   match(rRegI);
3748 
3749   format %{ "ECX" %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 operand eDXRegI(xRegI reg) %{
3754   constraint(ALLOC_IN_RC(edx_reg));
3755   match(reg);
3756   match(rRegI);
3757 
3758   format %{ "EDX" %}
3759   interface(REG_INTER);
3760 %}
3761 
3762 operand eDIRegI(xRegI reg) %{
3763   constraint(ALLOC_IN_RC(edi_reg));
3764   match(reg);
3765   match(rRegI);
3766 
3767   format %{ "EDI" %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand naxRegI() %{
3772   constraint(ALLOC_IN_RC(nax_reg));
3773   match(RegI);
3774   match(eCXRegI);
3775   match(eDXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 operand nadxRegI() %{
3784   constraint(ALLOC_IN_RC(nadx_reg));
3785   match(RegI);
3786   match(eBXRegI);
3787   match(eCXRegI);
3788   match(eSIRegI);
3789   match(eDIRegI);
3790 
3791   format %{ %}
3792   interface(REG_INTER);
3793 %}
3794 
3795 operand ncxRegI() %{
3796   constraint(ALLOC_IN_RC(ncx_reg));
3797   match(RegI);
3798   match(eAXRegI);
3799   match(eDXRegI);
3800   match(eSIRegI);
3801   match(eDIRegI);
3802 
3803   format %{ %}
3804   interface(REG_INTER);
3805 %}
3806 
3807 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3808 // //
3809 operand eSIRegI(xRegI reg) %{
3810    constraint(ALLOC_IN_RC(esi_reg));
3811    match(reg);
3812    match(rRegI);
3813 
3814    format %{ "ESI" %}
3815    interface(REG_INTER);
3816 %}
3817 
3818 // Pointer Register
3819 operand anyRegP() %{
3820   constraint(ALLOC_IN_RC(any_reg));
3821   match(RegP);
3822   match(eAXRegP);
3823   match(eBXRegP);
3824   match(eCXRegP);
3825   match(eDIRegP);
3826   match(eRegP);
3827 
3828   format %{ %}
3829   interface(REG_INTER);
3830 %}
3831 
3832 operand eRegP() %{
3833   constraint(ALLOC_IN_RC(int_reg));
3834   match(RegP);
3835   match(eAXRegP);
3836   match(eBXRegP);
3837   match(eCXRegP);
3838   match(eDIRegP);
3839 
3840   format %{ %}
3841   interface(REG_INTER);
3842 %}
3843 
3844 // On windows95, EBP is not safe to use for implicit null tests.
3845 operand eRegP_no_EBP() %{
3846   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3847   match(RegP);
3848   match(eAXRegP);
3849   match(eBXRegP);
3850   match(eCXRegP);
3851   match(eDIRegP);
3852 
3853   op_cost(100);
3854   format %{ %}
3855   interface(REG_INTER);
3856 %}
3857 
3858 operand naxRegP() %{
3859   constraint(ALLOC_IN_RC(nax_reg));
3860   match(RegP);
3861   match(eBXRegP);
3862   match(eDXRegP);
3863   match(eCXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 operand nabxRegP() %{
3872   constraint(ALLOC_IN_RC(nabx_reg));
3873   match(RegP);
3874   match(eCXRegP);
3875   match(eDXRegP);
3876   match(eSIRegP);
3877   match(eDIRegP);
3878 
3879   format %{ %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 operand pRegP() %{
3884   constraint(ALLOC_IN_RC(p_reg));
3885   match(RegP);
3886   match(eBXRegP);
3887   match(eDXRegP);
3888   match(eSIRegP);
3889   match(eDIRegP);
3890 
3891   format %{ %}
3892   interface(REG_INTER);
3893 %}
3894 
3895 // Special Registers
3896 // Return a pointer value
3897 operand eAXRegP(eRegP reg) %{
3898   constraint(ALLOC_IN_RC(eax_reg));
3899   match(reg);
3900   format %{ "EAX" %}
3901   interface(REG_INTER);
3902 %}
3903 
3904 // Used in AtomicAdd
3905 operand eBXRegP(eRegP reg) %{
3906   constraint(ALLOC_IN_RC(ebx_reg));
3907   match(reg);
3908   format %{ "EBX" %}
3909   interface(REG_INTER);
3910 %}
3911 
3912 // Tail-call (interprocedural jump) to interpreter
3913 operand eCXRegP(eRegP reg) %{
3914   constraint(ALLOC_IN_RC(ecx_reg));
3915   match(reg);
3916   format %{ "ECX" %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eSIRegP(eRegP reg) %{
3921   constraint(ALLOC_IN_RC(esi_reg));
3922   match(reg);
3923   format %{ "ESI" %}
3924   interface(REG_INTER);
3925 %}
3926 
3927 // Used in rep stosw
3928 operand eDIRegP(eRegP reg) %{
3929   constraint(ALLOC_IN_RC(edi_reg));
3930   match(reg);
3931   format %{ "EDI" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 operand eRegL() %{
3936   constraint(ALLOC_IN_RC(long_reg));
3937   match(RegL);
3938   match(eADXRegL);
3939 
3940   format %{ %}
3941   interface(REG_INTER);
3942 %}
3943 
3944 operand eADXRegL( eRegL reg ) %{
3945   constraint(ALLOC_IN_RC(eadx_reg));
3946   match(reg);
3947 
3948   format %{ "EDX:EAX" %}
3949   interface(REG_INTER);
3950 %}
3951 
3952 operand eBCXRegL( eRegL reg ) %{
3953   constraint(ALLOC_IN_RC(ebcx_reg));
3954   match(reg);
3955 
3956   format %{ "EBX:ECX" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 // Special case for integer high multiply
3961 operand eADXRegL_low_only() %{
3962   constraint(ALLOC_IN_RC(eadx_reg));
3963   match(RegL);
3964 
3965   format %{ "EAX" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 // Flags register, used as output of compare instructions
3970 operand eFlagsReg() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973 
3974   format %{ "EFLAGS" %}
3975   interface(REG_INTER);
3976 %}
3977 
3978 // Flags register, used as output of FLOATING POINT compare instructions
3979 operand eFlagsRegU() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982 
3983   format %{ "EFLAGS_U" %}
3984   interface(REG_INTER);
3985 %}
3986 
3987 operand eFlagsRegUCF() %{
3988   constraint(ALLOC_IN_RC(int_flags));
3989   match(RegFlags);
3990   predicate(false);
3991 
3992   format %{ "EFLAGS_U_CF" %}
3993   interface(REG_INTER);
3994 %}
3995 
3996 // Condition Code Register used by long compare
3997 operand flagsReg_long_LTGE() %{
3998   constraint(ALLOC_IN_RC(int_flags));
3999   match(RegFlags);
4000   format %{ "FLAGS_LTGE" %}
4001   interface(REG_INTER);
4002 %}
4003 operand flagsReg_long_EQNE() %{
4004   constraint(ALLOC_IN_RC(int_flags));
4005   match(RegFlags);
4006   format %{ "FLAGS_EQNE" %}
4007   interface(REG_INTER);
4008 %}
4009 operand flagsReg_long_LEGT() %{
4010   constraint(ALLOC_IN_RC(int_flags));
4011   match(RegFlags);
4012   format %{ "FLAGS_LEGT" %}
4013   interface(REG_INTER);
4014 %}
4015 
4016 // Condition Code Register used by unsigned long compare
4017 operand flagsReg_ulong_LTGE() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_U_LTGE" %}
4021   interface(REG_INTER);
4022 %}
4023 operand flagsReg_ulong_EQNE() %{
4024   constraint(ALLOC_IN_RC(int_flags));
4025   match(RegFlags);
4026   format %{ "FLAGS_U_EQNE" %}
4027   interface(REG_INTER);
4028 %}
4029 operand flagsReg_ulong_LEGT() %{
4030   constraint(ALLOC_IN_RC(int_flags));
4031   match(RegFlags);
4032   format %{ "FLAGS_U_LEGT" %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 // Float register operands
4037 operand regDPR() %{
4038   predicate( UseSSE < 2 );
4039   constraint(ALLOC_IN_RC(fp_dbl_reg));
4040   match(RegD);
4041   match(regDPR1);
4042   match(regDPR2);
4043   format %{ %}
4044   interface(REG_INTER);
4045 %}
4046 
4047 operand regDPR1(regDPR reg) %{
4048   predicate( UseSSE < 2 );
4049   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4050   match(reg);
4051   format %{ "FPR1" %}
4052   interface(REG_INTER);
4053 %}
4054 
4055 operand regDPR2(regDPR reg) %{
4056   predicate( UseSSE < 2 );
4057   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4058   match(reg);
4059   format %{ "FPR2" %}
4060   interface(REG_INTER);
4061 %}
4062 
4063 operand regnotDPR1(regDPR reg) %{
4064   predicate( UseSSE < 2 );
4065   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4066   match(reg);
4067   format %{ %}
4068   interface(REG_INTER);
4069 %}
4070 
4071 // Float register operands
4072 operand regFPR() %{
4073   predicate( UseSSE < 2 );
4074   constraint(ALLOC_IN_RC(fp_flt_reg));
4075   match(RegF);
4076   match(regFPR1);
4077   format %{ %}
4078   interface(REG_INTER);
4079 %}
4080 
4081 // Float register operands
4082 operand regFPR1(regFPR reg) %{
4083   predicate( UseSSE < 2 );
4084   constraint(ALLOC_IN_RC(fp_flt_reg0));
4085   match(reg);
4086   format %{ "FPR1" %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 // XMM Float register operands
4091 operand regF() %{
4092   predicate( UseSSE>=1 );
4093   constraint(ALLOC_IN_RC(float_reg_legacy));
4094   match(RegF);
4095   format %{ %}
4096   interface(REG_INTER);
4097 %}
4098 
4099 // Float register operands
4100 operand vlRegF() %{
4101    constraint(ALLOC_IN_RC(float_reg_vl));
4102    match(RegF);
4103 
4104    format %{ %}
4105    interface(REG_INTER);
4106 %}
4107 
4108 // XMM Double register operands
4109 operand regD() %{
4110   predicate( UseSSE>=2 );
4111   constraint(ALLOC_IN_RC(double_reg_legacy));
4112   match(RegD);
4113   format %{ %}
4114   interface(REG_INTER);
4115 %}
4116 
4117 // Double register operands
4118 operand vlRegD() %{
4119    constraint(ALLOC_IN_RC(double_reg_vl));
4120    match(RegD);
4121 
4122    format %{ %}
4123    interface(REG_INTER);
4124 %}
4125 
4126 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4127 // runtime code generation via reg_class_dynamic.
4128 operand vecS() %{
4129   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4130   match(VecS);
4131 
4132   format %{ %}
4133   interface(REG_INTER);
4134 %}
4135 
4136 operand legVecS() %{
4137   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4138   match(VecS);
4139 
4140   format %{ %}
4141   interface(REG_INTER);
4142 %}
4143 
4144 operand vecD() %{
4145   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4146   match(VecD);
4147 
4148   format %{ %}
4149   interface(REG_INTER);
4150 %}
4151 
4152 operand legVecD() %{
4153   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4154   match(VecD);
4155 
4156   format %{ %}
4157   interface(REG_INTER);
4158 %}
4159 
4160 operand vecX() %{
4161   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4162   match(VecX);
4163 
4164   format %{ %}
4165   interface(REG_INTER);
4166 %}
4167 
4168 operand legVecX() %{
4169   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4170   match(VecX);
4171 
4172   format %{ %}
4173   interface(REG_INTER);
4174 %}
4175 
4176 operand vecY() %{
4177   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4178   match(VecY);
4179 
4180   format %{ %}
4181   interface(REG_INTER);
4182 %}
4183 
4184 operand legVecY() %{
4185   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4186   match(VecY);
4187 
4188   format %{ %}
4189   interface(REG_INTER);
4190 %}
4191 
4192 //----------Memory Operands----------------------------------------------------
4193 // Direct Memory Operand
4194 operand direct(immP addr) %{
4195   match(addr);
4196 
4197   format %{ "[$addr]" %}
4198   interface(MEMORY_INTER) %{
4199     base(0xFFFFFFFF);
4200     index(0x4);
4201     scale(0x0);
4202     disp($addr);
4203   %}
4204 %}
4205 
4206 // Indirect Memory Operand
4207 operand indirect(eRegP reg) %{
4208   constraint(ALLOC_IN_RC(int_reg));
4209   match(reg);
4210 
4211   format %{ "[$reg]" %}
4212   interface(MEMORY_INTER) %{
4213     base($reg);
4214     index(0x4);
4215     scale(0x0);
4216     disp(0x0);
4217   %}
4218 %}
4219 
4220 // Indirect Memory Plus Short Offset Operand
4221 operand indOffset8(eRegP reg, immI8 off) %{
4222   match(AddP reg off);
4223 
4224   format %{ "[$reg + $off]" %}
4225   interface(MEMORY_INTER) %{
4226     base($reg);
4227     index(0x4);
4228     scale(0x0);
4229     disp($off);
4230   %}
4231 %}
4232 
4233 // Indirect Memory Plus Long Offset Operand
4234 operand indOffset32(eRegP reg, immI off) %{
4235   match(AddP reg off);
4236 
4237   format %{ "[$reg + $off]" %}
4238   interface(MEMORY_INTER) %{
4239     base($reg);
4240     index(0x4);
4241     scale(0x0);
4242     disp($off);
4243   %}
4244 %}
4245 
4246 // Indirect Memory Plus Long Offset Operand
4247 operand indOffset32X(rRegI reg, immP off) %{
4248   match(AddP off reg);
4249 
4250   format %{ "[$reg + $off]" %}
4251   interface(MEMORY_INTER) %{
4252     base($reg);
4253     index(0x4);
4254     scale(0x0);
4255     disp($off);
4256   %}
4257 %}
4258 
4259 // Indirect Memory Plus Index Register Plus Offset Operand
4260 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4261   match(AddP (AddP reg ireg) off);
4262 
4263   op_cost(10);
4264   format %{"[$reg + $off + $ireg]" %}
4265   interface(MEMORY_INTER) %{
4266     base($reg);
4267     index($ireg);
4268     scale(0x0);
4269     disp($off);
4270   %}
4271 %}
4272 
4273 // Indirect Memory Plus Index Register Plus Offset Operand
4274 operand indIndex(eRegP reg, rRegI ireg) %{
4275   match(AddP reg ireg);
4276 
4277   op_cost(10);
4278   format %{"[$reg + $ireg]" %}
4279   interface(MEMORY_INTER) %{
4280     base($reg);
4281     index($ireg);
4282     scale(0x0);
4283     disp(0x0);
4284   %}
4285 %}
4286 
4287 // // -------------------------------------------------------------------------
4288 // // 486 architecture doesn't support "scale * index + offset" with out a base
4289 // // -------------------------------------------------------------------------
4290 // // Scaled Memory Operands
4291 // // Indirect Memory Times Scale Plus Offset Operand
4292 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4293 //   match(AddP off (LShiftI ireg scale));
4294 //
4295 //   op_cost(10);
4296 //   format %{"[$off + $ireg << $scale]" %}
4297 //   interface(MEMORY_INTER) %{
4298 //     base(0x4);
4299 //     index($ireg);
4300 //     scale($scale);
4301 //     disp($off);
4302 //   %}
4303 // %}
4304 
4305 // Indirect Memory Times Scale Plus Index Register
4306 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4307   match(AddP reg (LShiftI ireg scale));
4308 
4309   op_cost(10);
4310   format %{"[$reg + $ireg << $scale]" %}
4311   interface(MEMORY_INTER) %{
4312     base($reg);
4313     index($ireg);
4314     scale($scale);
4315     disp(0x0);
4316   %}
4317 %}
4318 
4319 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4320 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4321   match(AddP (AddP reg (LShiftI ireg scale)) off);
4322 
4323   op_cost(10);
4324   format %{"[$reg + $off + $ireg << $scale]" %}
4325   interface(MEMORY_INTER) %{
4326     base($reg);
4327     index($ireg);
4328     scale($scale);
4329     disp($off);
4330   %}
4331 %}
4332 
4333 //----------Load Long Memory Operands------------------------------------------
4334 // The load-long idiom will use it's address expression again after loading
4335 // the first word of the long.  If the load-long destination overlaps with
4336 // registers used in the addressing expression, the 2nd half will be loaded
4337 // from a clobbered address.  Fix this by requiring that load-long use
4338 // address registers that do not overlap with the load-long target.
4339 
4340 // load-long support
4341 operand load_long_RegP() %{
4342   constraint(ALLOC_IN_RC(esi_reg));
4343   match(RegP);
4344   match(eSIRegP);
4345   op_cost(100);
4346   format %{  %}
4347   interface(REG_INTER);
4348 %}
4349 
4350 // Indirect Memory Operand Long
4351 operand load_long_indirect(load_long_RegP reg) %{
4352   constraint(ALLOC_IN_RC(esi_reg));
4353   match(reg);
4354 
4355   format %{ "[$reg]" %}
4356   interface(MEMORY_INTER) %{
4357     base($reg);
4358     index(0x4);
4359     scale(0x0);
4360     disp(0x0);
4361   %}
4362 %}
4363 
4364 // Indirect Memory Plus Long Offset Operand
4365 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4366   match(AddP reg off);
4367 
4368   format %{ "[$reg + $off]" %}
4369   interface(MEMORY_INTER) %{
4370     base($reg);
4371     index(0x4);
4372     scale(0x0);
4373     disp($off);
4374   %}
4375 %}
4376 
4377 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4378 
4379 
4380 //----------Special Memory Operands--------------------------------------------
4381 // Stack Slot Operand - This operand is used for loading and storing temporary
4382 //                      values on the stack where a match requires a value to
4383 //                      flow through memory.
4384 operand stackSlotP(sRegP reg) %{
4385   constraint(ALLOC_IN_RC(stack_slots));
4386   // No match rule because this operand is only generated in matching
4387   format %{ "[$reg]" %}
4388   interface(MEMORY_INTER) %{
4389     base(0x4);   // ESP
4390     index(0x4);  // No Index
4391     scale(0x0);  // No Scale
4392     disp($reg);  // Stack Offset
4393   %}
4394 %}
4395 
4396 operand stackSlotI(sRegI reg) %{
4397   constraint(ALLOC_IN_RC(stack_slots));
4398   // No match rule because this operand is only generated in matching
4399   format %{ "[$reg]" %}
4400   interface(MEMORY_INTER) %{
4401     base(0x4);   // ESP
4402     index(0x4);  // No Index
4403     scale(0x0);  // No Scale
4404     disp($reg);  // Stack Offset
4405   %}
4406 %}
4407 
4408 operand stackSlotF(sRegF reg) %{
4409   constraint(ALLOC_IN_RC(stack_slots));
4410   // No match rule because this operand is only generated in matching
4411   format %{ "[$reg]" %}
4412   interface(MEMORY_INTER) %{
4413     base(0x4);   // ESP
4414     index(0x4);  // No Index
4415     scale(0x0);  // No Scale
4416     disp($reg);  // Stack Offset
4417   %}
4418 %}
4419 
4420 operand stackSlotD(sRegD reg) %{
4421   constraint(ALLOC_IN_RC(stack_slots));
4422   // No match rule because this operand is only generated in matching
4423   format %{ "[$reg]" %}
4424   interface(MEMORY_INTER) %{
4425     base(0x4);   // ESP
4426     index(0x4);  // No Index
4427     scale(0x0);  // No Scale
4428     disp($reg);  // Stack Offset
4429   %}
4430 %}
4431 
4432 operand stackSlotL(sRegL reg) %{
4433   constraint(ALLOC_IN_RC(stack_slots));
4434   // No match rule because this operand is only generated in matching
4435   format %{ "[$reg]" %}
4436   interface(MEMORY_INTER) %{
4437     base(0x4);   // ESP
4438     index(0x4);  // No Index
4439     scale(0x0);  // No Scale
4440     disp($reg);  // Stack Offset
4441   %}
4442 %}
4443 
4444 //----------Memory Operands - Win95 Implicit Null Variants----------------
4445 // Indirect Memory Operand
4446 operand indirect_win95_safe(eRegP_no_EBP reg)
4447 %{
4448   constraint(ALLOC_IN_RC(int_reg));
4449   match(reg);
4450 
4451   op_cost(100);
4452   format %{ "[$reg]" %}
4453   interface(MEMORY_INTER) %{
4454     base($reg);
4455     index(0x4);
4456     scale(0x0);
4457     disp(0x0);
4458   %}
4459 %}
4460 
4461 // Indirect Memory Plus Short Offset Operand
4462 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4463 %{
4464   match(AddP reg off);
4465 
4466   op_cost(100);
4467   format %{ "[$reg + $off]" %}
4468   interface(MEMORY_INTER) %{
4469     base($reg);
4470     index(0x4);
4471     scale(0x0);
4472     disp($off);
4473   %}
4474 %}
4475 
4476 // Indirect Memory Plus Long Offset Operand
4477 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4478 %{
4479   match(AddP reg off);
4480 
4481   op_cost(100);
4482   format %{ "[$reg + $off]" %}
4483   interface(MEMORY_INTER) %{
4484     base($reg);
4485     index(0x4);
4486     scale(0x0);
4487     disp($off);
4488   %}
4489 %}
4490 
4491 // Indirect Memory Plus Index Register Plus Offset Operand
4492 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4493 %{
4494   match(AddP (AddP reg ireg) off);
4495 
4496   op_cost(100);
4497   format %{"[$reg + $off + $ireg]" %}
4498   interface(MEMORY_INTER) %{
4499     base($reg);
4500     index($ireg);
4501     scale(0x0);
4502     disp($off);
4503   %}
4504 %}
4505 
4506 // Indirect Memory Times Scale Plus Index Register
4507 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4508 %{
4509   match(AddP reg (LShiftI ireg scale));
4510 
4511   op_cost(100);
4512   format %{"[$reg + $ireg << $scale]" %}
4513   interface(MEMORY_INTER) %{
4514     base($reg);
4515     index($ireg);
4516     scale($scale);
4517     disp(0x0);
4518   %}
4519 %}
4520 
4521 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4522 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4523 %{
4524   match(AddP (AddP reg (LShiftI ireg scale)) off);
4525 
4526   op_cost(100);
4527   format %{"[$reg + $off + $ireg << $scale]" %}
4528   interface(MEMORY_INTER) %{
4529     base($reg);
4530     index($ireg);
4531     scale($scale);
4532     disp($off);
4533   %}
4534 %}
4535 
4536 //----------Conditional Branch Operands----------------------------------------
4537 // Comparison Op  - This is the operation of the comparison, and is limited to
4538 //                  the following set of codes:
4539 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4540 //
4541 // Other attributes of the comparison, such as unsignedness, are specified
4542 // by the comparison instruction that sets a condition code flags register.
4543 // That result is represented by a flags operand whose subtype is appropriate
4544 // to the unsignedness (etc.) of the comparison.
4545 //
4546 // Later, the instruction which matches both the Comparison Op (a Bool) and
4547 // the flags (produced by the Cmp) specifies the coding of the comparison op
4548 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4549 
4550 // Comparision Code
4551 operand cmpOp() %{
4552   match(Bool);
4553 
4554   format %{ "" %}
4555   interface(COND_INTER) %{
4556     equal(0x4, "e");
4557     not_equal(0x5, "ne");
4558     less(0xC, "l");
4559     greater_equal(0xD, "ge");
4560     less_equal(0xE, "le");
4561     greater(0xF, "g");
4562     overflow(0x0, "o");
4563     no_overflow(0x1, "no");
4564   %}
4565 %}
4566 
4567 // Comparison Code, unsigned compare.  Used by FP also, with
4568 // C2 (unordered) turned into GT or LT already.  The other bits
4569 // C0 and C3 are turned into Carry & Zero flags.
4570 operand cmpOpU() %{
4571   match(Bool);
4572 
4573   format %{ "" %}
4574   interface(COND_INTER) %{
4575     equal(0x4, "e");
4576     not_equal(0x5, "ne");
4577     less(0x2, "b");
4578     greater_equal(0x3, "nb");
4579     less_equal(0x6, "be");
4580     greater(0x7, "nbe");
4581     overflow(0x0, "o");
4582     no_overflow(0x1, "no");
4583   %}
4584 %}
4585 
4586 // Floating comparisons that don't require any fixup for the unordered case
4587 operand cmpOpUCF() %{
4588   match(Bool);
4589   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4590             n->as_Bool()->_test._test == BoolTest::ge ||
4591             n->as_Bool()->_test._test == BoolTest::le ||
4592             n->as_Bool()->_test._test == BoolTest::gt);
4593   format %{ "" %}
4594   interface(COND_INTER) %{
4595     equal(0x4, "e");
4596     not_equal(0x5, "ne");
4597     less(0x2, "b");
4598     greater_equal(0x3, "nb");
4599     less_equal(0x6, "be");
4600     greater(0x7, "nbe");
4601     overflow(0x0, "o");
4602     no_overflow(0x1, "no");
4603   %}
4604 %}
4605 
4606 
4607 // Floating comparisons that can be fixed up with extra conditional jumps
4608 operand cmpOpUCF2() %{
4609   match(Bool);
4610   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4611             n->as_Bool()->_test._test == BoolTest::eq);
4612   format %{ "" %}
4613   interface(COND_INTER) %{
4614     equal(0x4, "e");
4615     not_equal(0x5, "ne");
4616     less(0x2, "b");
4617     greater_equal(0x3, "nb");
4618     less_equal(0x6, "be");
4619     greater(0x7, "nbe");
4620     overflow(0x0, "o");
4621     no_overflow(0x1, "no");
4622   %}
4623 %}
4624 
4625 // Comparison Code for FP conditional move
4626 operand cmpOp_fcmov() %{
4627   match(Bool);
4628 
4629   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4630             n->as_Bool()->_test._test != BoolTest::no_overflow);
4631   format %{ "" %}
4632   interface(COND_INTER) %{
4633     equal        (0x0C8);
4634     not_equal    (0x1C8);
4635     less         (0x0C0);
4636     greater_equal(0x1C0);
4637     less_equal   (0x0D0);
4638     greater      (0x1D0);
4639     overflow(0x0, "o"); // not really supported by the instruction
4640     no_overflow(0x1, "no"); // not really supported by the instruction
4641   %}
4642 %}
4643 
4644 // Comparison Code used in long compares
4645 operand cmpOp_commute() %{
4646   match(Bool);
4647 
4648   format %{ "" %}
4649   interface(COND_INTER) %{
4650     equal(0x4, "e");
4651     not_equal(0x5, "ne");
4652     less(0xF, "g");
4653     greater_equal(0xE, "le");
4654     less_equal(0xD, "ge");
4655     greater(0xC, "l");
4656     overflow(0x0, "o");
4657     no_overflow(0x1, "no");
4658   %}
4659 %}
4660 
4661 // Comparison Code used in unsigned long compares
4662 operand cmpOpU_commute() %{
4663   match(Bool);
4664 
4665   format %{ "" %}
4666   interface(COND_INTER) %{
4667     equal(0x4, "e");
4668     not_equal(0x5, "ne");
4669     less(0x7, "nbe");
4670     greater_equal(0x6, "be");
4671     less_equal(0x3, "nb");
4672     greater(0x2, "b");
4673     overflow(0x0, "o");
4674     no_overflow(0x1, "no");
4675   %}
4676 %}
4677 
4678 //----------OPERAND CLASSES----------------------------------------------------
4679 // Operand Classes are groups of operands that are used as to simplify
4680 // instruction definitions by not requiring the AD writer to specify separate
4681 // instructions for every form of operand when the instruction accepts
4682 // multiple operand types with the same basic encoding and format.  The classic
4683 // case of this is memory operands.
4684 
4685 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4686                indIndex, indIndexScale, indIndexScaleOffset);
4687 
4688 // Long memory operations are encoded in 2 instructions and a +4 offset.
4689 // This means some kind of offset is always required and you cannot use
4690 // an oop as the offset (done when working on static globals).
4691 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4692                     indIndex, indIndexScale, indIndexScaleOffset);
4693 
4694 
4695 //----------PIPELINE-----------------------------------------------------------
4696 // Rules which define the behavior of the target architectures pipeline.
4697 pipeline %{
4698 
4699 //----------ATTRIBUTES---------------------------------------------------------
4700 attributes %{
4701   variable_size_instructions;        // Fixed size instructions
4702   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4703   instruction_unit_size = 1;         // An instruction is 1 bytes long
4704   instruction_fetch_unit_size = 16;  // The processor fetches one line
4705   instruction_fetch_units = 1;       // of 16 bytes
4706 
4707   // List of nop instructions
4708   nops( MachNop );
4709 %}
4710 
4711 //----------RESOURCES----------------------------------------------------------
4712 // Resources are the functional units available to the machine
4713 
4714 // Generic P2/P3 pipeline
4715 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4716 // 3 instructions decoded per cycle.
4717 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4718 // 2 ALU op, only ALU0 handles mul/div instructions.
4719 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4720            MS0, MS1, MEM = MS0 | MS1,
4721            BR, FPU,
4722            ALU0, ALU1, ALU = ALU0 | ALU1 );
4723 
4724 //----------PIPELINE DESCRIPTION-----------------------------------------------
4725 // Pipeline Description specifies the stages in the machine's pipeline
4726 
4727 // Generic P2/P3 pipeline
4728 pipe_desc(S0, S1, S2, S3, S4, S5);
4729 
4730 //----------PIPELINE CLASSES---------------------------------------------------
4731 // Pipeline Classes describe the stages in which input and output are
4732 // referenced by the hardware pipeline.
4733 
4734 // Naming convention: ialu or fpu
4735 // Then: _reg
4736 // Then: _reg if there is a 2nd register
4737 // Then: _long if it's a pair of instructions implementing a long
4738 // Then: _fat if it requires the big decoder
4739 //   Or: _mem if it requires the big decoder and a memory unit.
4740 
4741 // Integer ALU reg operation
4742 pipe_class ialu_reg(rRegI dst) %{
4743     single_instruction;
4744     dst    : S4(write);
4745     dst    : S3(read);
4746     DECODE : S0;        // any decoder
4747     ALU    : S3;        // any alu
4748 %}
4749 
4750 // Long ALU reg operation
4751 pipe_class ialu_reg_long(eRegL dst) %{
4752     instruction_count(2);
4753     dst    : S4(write);
4754     dst    : S3(read);
4755     DECODE : S0(2);     // any 2 decoders
4756     ALU    : S3(2);     // both alus
4757 %}
4758 
4759 // Integer ALU reg operation using big decoder
4760 pipe_class ialu_reg_fat(rRegI dst) %{
4761     single_instruction;
4762     dst    : S4(write);
4763     dst    : S3(read);
4764     D0     : S0;        // big decoder only
4765     ALU    : S3;        // any alu
4766 %}
4767 
4768 // Long ALU reg operation using big decoder
4769 pipe_class ialu_reg_long_fat(eRegL dst) %{
4770     instruction_count(2);
4771     dst    : S4(write);
4772     dst    : S3(read);
4773     D0     : S0(2);     // big decoder only; twice
4774     ALU    : S3(2);     // any 2 alus
4775 %}
4776 
4777 // Integer ALU reg-reg operation
4778 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4779     single_instruction;
4780     dst    : S4(write);
4781     src    : S3(read);
4782     DECODE : S0;        // any decoder
4783     ALU    : S3;        // any alu
4784 %}
4785 
4786 // Long ALU reg-reg operation
4787 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4788     instruction_count(2);
4789     dst    : S4(write);
4790     src    : S3(read);
4791     DECODE : S0(2);     // any 2 decoders
4792     ALU    : S3(2);     // both alus
4793 %}
4794 
4795 // Integer ALU reg-reg operation
4796 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4797     single_instruction;
4798     dst    : S4(write);
4799     src    : S3(read);
4800     D0     : S0;        // big decoder only
4801     ALU    : S3;        // any alu
4802 %}
4803 
4804 // Long ALU reg-reg operation
4805 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4806     instruction_count(2);
4807     dst    : S4(write);
4808     src    : S3(read);
4809     D0     : S0(2);     // big decoder only; twice
4810     ALU    : S3(2);     // both alus
4811 %}
4812 
4813 // Integer ALU reg-mem operation
4814 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4815     single_instruction;
4816     dst    : S5(write);
4817     mem    : S3(read);
4818     D0     : S0;        // big decoder only
4819     ALU    : S4;        // any alu
4820     MEM    : S3;        // any mem
4821 %}
4822 
4823 // Long ALU reg-mem operation
4824 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4825     instruction_count(2);
4826     dst    : S5(write);
4827     mem    : S3(read);
4828     D0     : S0(2);     // big decoder only; twice
4829     ALU    : S4(2);     // any 2 alus
4830     MEM    : S3(2);     // both mems
4831 %}
4832 
4833 // Integer mem operation (prefetch)
4834 pipe_class ialu_mem(memory mem)
4835 %{
4836     single_instruction;
4837     mem    : S3(read);
4838     D0     : S0;        // big decoder only
4839     MEM    : S3;        // any mem
4840 %}
4841 
4842 // Integer Store to Memory
4843 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4844     single_instruction;
4845     mem    : S3(read);
4846     src    : S5(read);
4847     D0     : S0;        // big decoder only
4848     ALU    : S4;        // any alu
4849     MEM    : S3;
4850 %}
4851 
4852 // Long Store to Memory
4853 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4854     instruction_count(2);
4855     mem    : S3(read);
4856     src    : S5(read);
4857     D0     : S0(2);     // big decoder only; twice
4858     ALU    : S4(2);     // any 2 alus
4859     MEM    : S3(2);     // Both mems
4860 %}
4861 
4862 // Integer Store to Memory
4863 pipe_class ialu_mem_imm(memory mem) %{
4864     single_instruction;
4865     mem    : S3(read);
4866     D0     : S0;        // big decoder only
4867     ALU    : S4;        // any alu
4868     MEM    : S3;
4869 %}
4870 
4871 // Integer ALU0 reg-reg operation
4872 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4873     single_instruction;
4874     dst    : S4(write);
4875     src    : S3(read);
4876     D0     : S0;        // Big decoder only
4877     ALU0   : S3;        // only alu0
4878 %}
4879 
4880 // Integer ALU0 reg-mem operation
4881 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4882     single_instruction;
4883     dst    : S5(write);
4884     mem    : S3(read);
4885     D0     : S0;        // big decoder only
4886     ALU0   : S4;        // ALU0 only
4887     MEM    : S3;        // any mem
4888 %}
4889 
4890 // Integer ALU reg-reg operation
4891 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4892     single_instruction;
4893     cr     : S4(write);
4894     src1   : S3(read);
4895     src2   : S3(read);
4896     DECODE : S0;        // any decoder
4897     ALU    : S3;        // any alu
4898 %}
4899 
4900 // Integer ALU reg-imm operation
4901 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4902     single_instruction;
4903     cr     : S4(write);
4904     src1   : S3(read);
4905     DECODE : S0;        // any decoder
4906     ALU    : S3;        // any alu
4907 %}
4908 
4909 // Integer ALU reg-mem operation
4910 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4911     single_instruction;
4912     cr     : S4(write);
4913     src1   : S3(read);
4914     src2   : S3(read);
4915     D0     : S0;        // big decoder only
4916     ALU    : S4;        // any alu
4917     MEM    : S3;
4918 %}
4919 
4920 // Conditional move reg-reg
4921 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4922     instruction_count(4);
4923     y      : S4(read);
4924     q      : S3(read);
4925     p      : S3(read);
4926     DECODE : S0(4);     // any decoder
4927 %}
4928 
4929 // Conditional move reg-reg
4930 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4931     single_instruction;
4932     dst    : S4(write);
4933     src    : S3(read);
4934     cr     : S3(read);
4935     DECODE : S0;        // any decoder
4936 %}
4937 
4938 // Conditional move reg-mem
4939 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4940     single_instruction;
4941     dst    : S4(write);
4942     src    : S3(read);
4943     cr     : S3(read);
4944     DECODE : S0;        // any decoder
4945     MEM    : S3;
4946 %}
4947 
4948 // Conditional move reg-reg long
4949 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4950     single_instruction;
4951     dst    : S4(write);
4952     src    : S3(read);
4953     cr     : S3(read);
4954     DECODE : S0(2);     // any 2 decoders
4955 %}
4956 
4957 // Conditional move double reg-reg
4958 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4959     single_instruction;
4960     dst    : S4(write);
4961     src    : S3(read);
4962     cr     : S3(read);
4963     DECODE : S0;        // any decoder
4964 %}
4965 
4966 // Float reg-reg operation
4967 pipe_class fpu_reg(regDPR dst) %{
4968     instruction_count(2);
4969     dst    : S3(read);
4970     DECODE : S0(2);     // any 2 decoders
4971     FPU    : S3;
4972 %}
4973 
4974 // Float reg-reg operation
4975 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4976     instruction_count(2);
4977     dst    : S4(write);
4978     src    : S3(read);
4979     DECODE : S0(2);     // any 2 decoders
4980     FPU    : S3;
4981 %}
4982 
4983 // Float reg-reg operation
4984 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4985     instruction_count(3);
4986     dst    : S4(write);
4987     src1   : S3(read);
4988     src2   : S3(read);
4989     DECODE : S0(3);     // any 3 decoders
4990     FPU    : S3(2);
4991 %}
4992 
4993 // Float reg-reg operation
4994 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4995     instruction_count(4);
4996     dst    : S4(write);
4997     src1   : S3(read);
4998     src2   : S3(read);
4999     src3   : S3(read);
5000     DECODE : S0(4);     // any 3 decoders
5001     FPU    : S3(2);
5002 %}
5003 
5004 // Float reg-reg operation
5005 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5006     instruction_count(4);
5007     dst    : S4(write);
5008     src1   : S3(read);
5009     src2   : S3(read);
5010     src3   : S3(read);
5011     DECODE : S1(3);     // any 3 decoders
5012     D0     : S0;        // Big decoder only
5013     FPU    : S3(2);
5014     MEM    : S3;
5015 %}
5016 
5017 // Float reg-mem operation
5018 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5019     instruction_count(2);
5020     dst    : S5(write);
5021     mem    : S3(read);
5022     D0     : S0;        // big decoder only
5023     DECODE : S1;        // any decoder for FPU POP
5024     FPU    : S4;
5025     MEM    : S3;        // any mem
5026 %}
5027 
5028 // Float reg-mem operation
5029 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5030     instruction_count(3);
5031     dst    : S5(write);
5032     src1   : S3(read);
5033     mem    : S3(read);
5034     D0     : S0;        // big decoder only
5035     DECODE : S1(2);     // any decoder for FPU POP
5036     FPU    : S4;
5037     MEM    : S3;        // any mem
5038 %}
5039 
5040 // Float mem-reg operation
5041 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5042     instruction_count(2);
5043     src    : S5(read);
5044     mem    : S3(read);
5045     DECODE : S0;        // any decoder for FPU PUSH
5046     D0     : S1;        // big decoder only
5047     FPU    : S4;
5048     MEM    : S3;        // any mem
5049 %}
5050 
5051 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5052     instruction_count(3);
5053     src1   : S3(read);
5054     src2   : S3(read);
5055     mem    : S3(read);
5056     DECODE : S0(2);     // any decoder for FPU PUSH
5057     D0     : S1;        // big decoder only
5058     FPU    : S4;
5059     MEM    : S3;        // any mem
5060 %}
5061 
5062 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5063     instruction_count(3);
5064     src1   : S3(read);
5065     src2   : S3(read);
5066     mem    : S4(read);
5067     DECODE : S0;        // any decoder for FPU PUSH
5068     D0     : S0(2);     // big decoder only
5069     FPU    : S4;
5070     MEM    : S3(2);     // any mem
5071 %}
5072 
5073 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5074     instruction_count(2);
5075     src1   : S3(read);
5076     dst    : S4(read);
5077     D0     : S0(2);     // big decoder only
5078     MEM    : S3(2);     // any mem
5079 %}
5080 
5081 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5082     instruction_count(3);
5083     src1   : S3(read);
5084     src2   : S3(read);
5085     dst    : S4(read);
5086     D0     : S0(3);     // big decoder only
5087     FPU    : S4;
5088     MEM    : S3(3);     // any mem
5089 %}
5090 
5091 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5092     instruction_count(3);
5093     src1   : S4(read);
5094     mem    : S4(read);
5095     DECODE : S0;        // any decoder for FPU PUSH
5096     D0     : S0(2);     // big decoder only
5097     FPU    : S4;
5098     MEM    : S3(2);     // any mem
5099 %}
5100 
5101 // Float load constant
5102 pipe_class fpu_reg_con(regDPR dst) %{
5103     instruction_count(2);
5104     dst    : S5(write);
5105     D0     : S0;        // big decoder only for the load
5106     DECODE : S1;        // any decoder for FPU POP
5107     FPU    : S4;
5108     MEM    : S3;        // any mem
5109 %}
5110 
5111 // Float load constant
5112 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5113     instruction_count(3);
5114     dst    : S5(write);
5115     src    : S3(read);
5116     D0     : S0;        // big decoder only for the load
5117     DECODE : S1(2);     // any decoder for FPU POP
5118     FPU    : S4;
5119     MEM    : S3;        // any mem
5120 %}
5121 
5122 // UnConditional branch
5123 pipe_class pipe_jmp( label labl ) %{
5124     single_instruction;
5125     BR   : S3;
5126 %}
5127 
5128 // Conditional branch
5129 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5130     single_instruction;
5131     cr    : S1(read);
5132     BR    : S3;
5133 %}
5134 
5135 // Allocation idiom
5136 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5137     instruction_count(1); force_serialization;
5138     fixed_latency(6);
5139     heap_ptr : S3(read);
5140     DECODE   : S0(3);
5141     D0       : S2;
5142     MEM      : S3;
5143     ALU      : S3(2);
5144     dst      : S5(write);
5145     BR       : S5;
5146 %}
5147 
5148 // Generic big/slow expanded idiom
5149 pipe_class pipe_slow(  ) %{
5150     instruction_count(10); multiple_bundles; force_serialization;
5151     fixed_latency(100);
5152     D0  : S0(2);
5153     MEM : S3(2);
5154 %}
5155 
5156 // The real do-nothing guy
5157 pipe_class empty( ) %{
5158     instruction_count(0);
5159 %}
5160 
5161 // Define the class for the Nop node
5162 define %{
5163    MachNop = empty;
5164 %}
5165 
5166 %}
5167 
5168 //----------INSTRUCTIONS-------------------------------------------------------
5169 //
5170 // match      -- States which machine-independent subtree may be replaced
5171 //               by this instruction.
5172 // ins_cost   -- The estimated cost of this instruction is used by instruction
5173 //               selection to identify a minimum cost tree of machine
5174 //               instructions that matches a tree of machine-independent
5175 //               instructions.
5176 // format     -- A string providing the disassembly for this instruction.
5177 //               The value of an instruction's operand may be inserted
5178 //               by referring to it with a '$' prefix.
5179 // opcode     -- Three instruction opcodes may be provided.  These are referred
5180 //               to within an encode class as $primary, $secondary, and $tertiary
5181 //               respectively.  The primary opcode is commonly used to
5182 //               indicate the type of machine instruction, while secondary
5183 //               and tertiary are often used for prefix options or addressing
5184 //               modes.
5185 // ins_encode -- A list of encode classes with parameters. The encode class
5186 //               name must have been defined in an 'enc_class' specification
5187 //               in the encode section of the architecture description.
5188 
5189 //----------BSWAP-Instruction--------------------------------------------------
5190 instruct bytes_reverse_int(rRegI dst) %{
5191   match(Set dst (ReverseBytesI dst));
5192 
5193   format %{ "BSWAP  $dst" %}
5194   opcode(0x0F, 0xC8);
5195   ins_encode( OpcP, OpcSReg(dst) );
5196   ins_pipe( ialu_reg );
5197 %}
5198 
5199 instruct bytes_reverse_long(eRegL dst) %{
5200   match(Set dst (ReverseBytesL dst));
5201 
5202   format %{ "BSWAP  $dst.lo\n\t"
5203             "BSWAP  $dst.hi\n\t"
5204             "XCHG   $dst.lo $dst.hi" %}
5205 
5206   ins_cost(125);
5207   ins_encode( bswap_long_bytes(dst) );
5208   ins_pipe( ialu_reg_reg);
5209 %}
5210 
5211 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5212   match(Set dst (ReverseBytesUS dst));
5213   effect(KILL cr);
5214 
5215   format %{ "BSWAP  $dst\n\t"
5216             "SHR    $dst,16\n\t" %}
5217   ins_encode %{
5218     __ bswapl($dst$$Register);
5219     __ shrl($dst$$Register, 16);
5220   %}
5221   ins_pipe( ialu_reg );
5222 %}
5223 
5224 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5225   match(Set dst (ReverseBytesS dst));
5226   effect(KILL cr);
5227 
5228   format %{ "BSWAP  $dst\n\t"
5229             "SAR    $dst,16\n\t" %}
5230   ins_encode %{
5231     __ bswapl($dst$$Register);
5232     __ sarl($dst$$Register, 16);
5233   %}
5234   ins_pipe( ialu_reg );
5235 %}
5236 
5237 
5238 //---------- Zeros Count Instructions ------------------------------------------
5239 
5240 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5241   predicate(UseCountLeadingZerosInstruction);
5242   match(Set dst (CountLeadingZerosI src));
5243   effect(KILL cr);
5244 
5245   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5246   ins_encode %{
5247     __ lzcntl($dst$$Register, $src$$Register);
5248   %}
5249   ins_pipe(ialu_reg);
5250 %}
5251 
5252 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5253   predicate(!UseCountLeadingZerosInstruction);
5254   match(Set dst (CountLeadingZerosI src));
5255   effect(KILL cr);
5256 
5257   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5258             "JNZ    skip\n\t"
5259             "MOV    $dst, -1\n"
5260       "skip:\n\t"
5261             "NEG    $dst\n\t"
5262             "ADD    $dst, 31" %}
5263   ins_encode %{
5264     Register Rdst = $dst$$Register;
5265     Register Rsrc = $src$$Register;
5266     Label skip;
5267     __ bsrl(Rdst, Rsrc);
5268     __ jccb(Assembler::notZero, skip);
5269     __ movl(Rdst, -1);
5270     __ bind(skip);
5271     __ negl(Rdst);
5272     __ addl(Rdst, BitsPerInt - 1);
5273   %}
5274   ins_pipe(ialu_reg);
5275 %}
5276 
5277 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5278   predicate(UseCountLeadingZerosInstruction);
5279   match(Set dst (CountLeadingZerosL src));
5280   effect(TEMP dst, KILL cr);
5281 
5282   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5283             "JNC    done\n\t"
5284             "LZCNT  $dst, $src.lo\n\t"
5285             "ADD    $dst, 32\n"
5286       "done:" %}
5287   ins_encode %{
5288     Register Rdst = $dst$$Register;
5289     Register Rsrc = $src$$Register;
5290     Label done;
5291     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5292     __ jccb(Assembler::carryClear, done);
5293     __ lzcntl(Rdst, Rsrc);
5294     __ addl(Rdst, BitsPerInt);
5295     __ bind(done);
5296   %}
5297   ins_pipe(ialu_reg);
5298 %}
5299 
5300 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5301   predicate(!UseCountLeadingZerosInstruction);
5302   match(Set dst (CountLeadingZerosL src));
5303   effect(TEMP dst, KILL cr);
5304 
5305   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5306             "JZ     msw_is_zero\n\t"
5307             "ADD    $dst, 32\n\t"
5308             "JMP    not_zero\n"
5309       "msw_is_zero:\n\t"
5310             "BSR    $dst, $src.lo\n\t"
5311             "JNZ    not_zero\n\t"
5312             "MOV    $dst, -1\n"
5313       "not_zero:\n\t"
5314             "NEG    $dst\n\t"
5315             "ADD    $dst, 63\n" %}
5316  ins_encode %{
5317     Register Rdst = $dst$$Register;
5318     Register Rsrc = $src$$Register;
5319     Label msw_is_zero;
5320     Label not_zero;
5321     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5322     __ jccb(Assembler::zero, msw_is_zero);
5323     __ addl(Rdst, BitsPerInt);
5324     __ jmpb(not_zero);
5325     __ bind(msw_is_zero);
5326     __ bsrl(Rdst, Rsrc);
5327     __ jccb(Assembler::notZero, not_zero);
5328     __ movl(Rdst, -1);
5329     __ bind(not_zero);
5330     __ negl(Rdst);
5331     __ addl(Rdst, BitsPerLong - 1);
5332   %}
5333   ins_pipe(ialu_reg);
5334 %}
5335 
5336 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5337   predicate(UseCountTrailingZerosInstruction);
5338   match(Set dst (CountTrailingZerosI src));
5339   effect(KILL cr);
5340 
5341   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5342   ins_encode %{
5343     __ tzcntl($dst$$Register, $src$$Register);
5344   %}
5345   ins_pipe(ialu_reg);
5346 %}
5347 
5348 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5349   predicate(!UseCountTrailingZerosInstruction);
5350   match(Set dst (CountTrailingZerosI src));
5351   effect(KILL cr);
5352 
5353   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5354             "JNZ    done\n\t"
5355             "MOV    $dst, 32\n"
5356       "done:" %}
5357   ins_encode %{
5358     Register Rdst = $dst$$Register;
5359     Label done;
5360     __ bsfl(Rdst, $src$$Register);
5361     __ jccb(Assembler::notZero, done);
5362     __ movl(Rdst, BitsPerInt);
5363     __ bind(done);
5364   %}
5365   ins_pipe(ialu_reg);
5366 %}
5367 
5368 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5369   predicate(UseCountTrailingZerosInstruction);
5370   match(Set dst (CountTrailingZerosL src));
5371   effect(TEMP dst, KILL cr);
5372 
5373   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5374             "JNC    done\n\t"
5375             "TZCNT  $dst, $src.hi\n\t"
5376             "ADD    $dst, 32\n"
5377             "done:" %}
5378   ins_encode %{
5379     Register Rdst = $dst$$Register;
5380     Register Rsrc = $src$$Register;
5381     Label done;
5382     __ tzcntl(Rdst, Rsrc);
5383     __ jccb(Assembler::carryClear, done);
5384     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5385     __ addl(Rdst, BitsPerInt);
5386     __ bind(done);
5387   %}
5388   ins_pipe(ialu_reg);
5389 %}
5390 
5391 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5392   predicate(!UseCountTrailingZerosInstruction);
5393   match(Set dst (CountTrailingZerosL src));
5394   effect(TEMP dst, KILL cr);
5395 
5396   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5397             "JNZ    done\n\t"
5398             "BSF    $dst, $src.hi\n\t"
5399             "JNZ    msw_not_zero\n\t"
5400             "MOV    $dst, 32\n"
5401       "msw_not_zero:\n\t"
5402             "ADD    $dst, 32\n"
5403       "done:" %}
5404   ins_encode %{
5405     Register Rdst = $dst$$Register;
5406     Register Rsrc = $src$$Register;
5407     Label msw_not_zero;
5408     Label done;
5409     __ bsfl(Rdst, Rsrc);
5410     __ jccb(Assembler::notZero, done);
5411     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5412     __ jccb(Assembler::notZero, msw_not_zero);
5413     __ movl(Rdst, BitsPerInt);
5414     __ bind(msw_not_zero);
5415     __ addl(Rdst, BitsPerInt);
5416     __ bind(done);
5417   %}
5418   ins_pipe(ialu_reg);
5419 %}
5420 
5421 
5422 //---------- Population Count Instructions -------------------------------------
5423 
5424 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5425   predicate(UsePopCountInstruction);
5426   match(Set dst (PopCountI src));
5427   effect(KILL cr);
5428 
5429   format %{ "POPCNT $dst, $src" %}
5430   ins_encode %{
5431     __ popcntl($dst$$Register, $src$$Register);
5432   %}
5433   ins_pipe(ialu_reg);
5434 %}
5435 
5436 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5437   predicate(UsePopCountInstruction);
5438   match(Set dst (PopCountI (LoadI mem)));
5439   effect(KILL cr);
5440 
5441   format %{ "POPCNT $dst, $mem" %}
5442   ins_encode %{
5443     __ popcntl($dst$$Register, $mem$$Address);
5444   %}
5445   ins_pipe(ialu_reg);
5446 %}
5447 
5448 // Note: Long.bitCount(long) returns an int.
5449 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5450   predicate(UsePopCountInstruction);
5451   match(Set dst (PopCountL src));
5452   effect(KILL cr, TEMP tmp, TEMP dst);
5453 
5454   format %{ "POPCNT $dst, $src.lo\n\t"
5455             "POPCNT $tmp, $src.hi\n\t"
5456             "ADD    $dst, $tmp" %}
5457   ins_encode %{
5458     __ popcntl($dst$$Register, $src$$Register);
5459     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5460     __ addl($dst$$Register, $tmp$$Register);
5461   %}
5462   ins_pipe(ialu_reg);
5463 %}
5464 
5465 // Note: Long.bitCount(long) returns an int.
5466 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5467   predicate(UsePopCountInstruction);
5468   match(Set dst (PopCountL (LoadL mem)));
5469   effect(KILL cr, TEMP tmp, TEMP dst);
5470 
5471   format %{ "POPCNT $dst, $mem\n\t"
5472             "POPCNT $tmp, $mem+4\n\t"
5473             "ADD    $dst, $tmp" %}
5474   ins_encode %{
5475     //__ popcntl($dst$$Register, $mem$$Address$$first);
5476     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5477     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5478     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5479     __ addl($dst$$Register, $tmp$$Register);
5480   %}
5481   ins_pipe(ialu_reg);
5482 %}
5483 
5484 
5485 //----------Load/Store/Move Instructions---------------------------------------
5486 //----------Load Instructions--------------------------------------------------
5487 // Load Byte (8bit signed)
5488 instruct loadB(xRegI dst, memory mem) %{
5489   match(Set dst (LoadB mem));
5490 
5491   ins_cost(125);
5492   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5493 
5494   ins_encode %{
5495     __ movsbl($dst$$Register, $mem$$Address);
5496   %}
5497 
5498   ins_pipe(ialu_reg_mem);
5499 %}
5500 
5501 // Load Byte (8bit signed) into Long Register
5502 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5503   match(Set dst (ConvI2L (LoadB mem)));
5504   effect(KILL cr);
5505 
5506   ins_cost(375);
5507   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5508             "MOV    $dst.hi,$dst.lo\n\t"
5509             "SAR    $dst.hi,7" %}
5510 
5511   ins_encode %{
5512     __ movsbl($dst$$Register, $mem$$Address);
5513     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5514     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5515   %}
5516 
5517   ins_pipe(ialu_reg_mem);
5518 %}
5519 
5520 // Load Unsigned Byte (8bit UNsigned)
5521 instruct loadUB(xRegI dst, memory mem) %{
5522   match(Set dst (LoadUB mem));
5523 
5524   ins_cost(125);
5525   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5526 
5527   ins_encode %{
5528     __ movzbl($dst$$Register, $mem$$Address);
5529   %}
5530 
5531   ins_pipe(ialu_reg_mem);
5532 %}
5533 
5534 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5535 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5536   match(Set dst (ConvI2L (LoadUB mem)));
5537   effect(KILL cr);
5538 
5539   ins_cost(250);
5540   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5541             "XOR    $dst.hi,$dst.hi" %}
5542 
5543   ins_encode %{
5544     Register Rdst = $dst$$Register;
5545     __ movzbl(Rdst, $mem$$Address);
5546     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5547   %}
5548 
5549   ins_pipe(ialu_reg_mem);
5550 %}
5551 
5552 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5553 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5554   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5555   effect(KILL cr);
5556 
5557   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5558             "XOR    $dst.hi,$dst.hi\n\t"
5559             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5560   ins_encode %{
5561     Register Rdst = $dst$$Register;
5562     __ movzbl(Rdst, $mem$$Address);
5563     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5564     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5565   %}
5566   ins_pipe(ialu_reg_mem);
5567 %}
5568 
5569 // Load Short (16bit signed)
5570 instruct loadS(rRegI dst, memory mem) %{
5571   match(Set dst (LoadS mem));
5572 
5573   ins_cost(125);
5574   format %{ "MOVSX  $dst,$mem\t# short" %}
5575 
5576   ins_encode %{
5577     __ movswl($dst$$Register, $mem$$Address);
5578   %}
5579 
5580   ins_pipe(ialu_reg_mem);
5581 %}
5582 
5583 // Load Short (16 bit signed) to Byte (8 bit signed)
5584 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5585   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5586 
5587   ins_cost(125);
5588   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5589   ins_encode %{
5590     __ movsbl($dst$$Register, $mem$$Address);
5591   %}
5592   ins_pipe(ialu_reg_mem);
5593 %}
5594 
5595 // Load Short (16bit signed) into Long Register
5596 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5597   match(Set dst (ConvI2L (LoadS mem)));
5598   effect(KILL cr);
5599 
5600   ins_cost(375);
5601   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5602             "MOV    $dst.hi,$dst.lo\n\t"
5603             "SAR    $dst.hi,15" %}
5604 
5605   ins_encode %{
5606     __ movswl($dst$$Register, $mem$$Address);
5607     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5608     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5609   %}
5610 
5611   ins_pipe(ialu_reg_mem);
5612 %}
5613 
5614 // Load Unsigned Short/Char (16bit unsigned)
5615 instruct loadUS(rRegI dst, memory mem) %{
5616   match(Set dst (LoadUS mem));
5617 
5618   ins_cost(125);
5619   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5620 
5621   ins_encode %{
5622     __ movzwl($dst$$Register, $mem$$Address);
5623   %}
5624 
5625   ins_pipe(ialu_reg_mem);
5626 %}
5627 
5628 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5629 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5630   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5631 
5632   ins_cost(125);
5633   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5634   ins_encode %{
5635     __ movsbl($dst$$Register, $mem$$Address);
5636   %}
5637   ins_pipe(ialu_reg_mem);
5638 %}
5639 
5640 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5641 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5642   match(Set dst (ConvI2L (LoadUS mem)));
5643   effect(KILL cr);
5644 
5645   ins_cost(250);
5646   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5647             "XOR    $dst.hi,$dst.hi" %}
5648 
5649   ins_encode %{
5650     __ movzwl($dst$$Register, $mem$$Address);
5651     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5652   %}
5653 
5654   ins_pipe(ialu_reg_mem);
5655 %}
5656 
5657 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5658 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5659   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5660   effect(KILL cr);
5661 
5662   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5663             "XOR    $dst.hi,$dst.hi" %}
5664   ins_encode %{
5665     Register Rdst = $dst$$Register;
5666     __ movzbl(Rdst, $mem$$Address);
5667     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5668   %}
5669   ins_pipe(ialu_reg_mem);
5670 %}
5671 
5672 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5673 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5674   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5675   effect(KILL cr);
5676 
5677   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5678             "XOR    $dst.hi,$dst.hi\n\t"
5679             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5680   ins_encode %{
5681     Register Rdst = $dst$$Register;
5682     __ movzwl(Rdst, $mem$$Address);
5683     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5684     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5685   %}
5686   ins_pipe(ialu_reg_mem);
5687 %}
5688 
5689 // Load Integer
5690 instruct loadI(rRegI dst, memory mem) %{
5691   match(Set dst (LoadI mem));
5692 
5693   ins_cost(125);
5694   format %{ "MOV    $dst,$mem\t# int" %}
5695 
5696   ins_encode %{
5697     __ movl($dst$$Register, $mem$$Address);
5698   %}
5699 
5700   ins_pipe(ialu_reg_mem);
5701 %}
5702 
5703 // Load Integer (32 bit signed) to Byte (8 bit signed)
5704 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5705   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5706 
5707   ins_cost(125);
5708   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5709   ins_encode %{
5710     __ movsbl($dst$$Register, $mem$$Address);
5711   %}
5712   ins_pipe(ialu_reg_mem);
5713 %}
5714 
5715 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5716 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5717   match(Set dst (AndI (LoadI mem) mask));
5718 
5719   ins_cost(125);
5720   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5721   ins_encode %{
5722     __ movzbl($dst$$Register, $mem$$Address);
5723   %}
5724   ins_pipe(ialu_reg_mem);
5725 %}
5726 
5727 // Load Integer (32 bit signed) to Short (16 bit signed)
5728 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5729   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5730 
5731   ins_cost(125);
5732   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5733   ins_encode %{
5734     __ movswl($dst$$Register, $mem$$Address);
5735   %}
5736   ins_pipe(ialu_reg_mem);
5737 %}
5738 
5739 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5740 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5741   match(Set dst (AndI (LoadI mem) mask));
5742 
5743   ins_cost(125);
5744   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5745   ins_encode %{
5746     __ movzwl($dst$$Register, $mem$$Address);
5747   %}
5748   ins_pipe(ialu_reg_mem);
5749 %}
5750 
5751 // Load Integer into Long Register
5752 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5753   match(Set dst (ConvI2L (LoadI mem)));
5754   effect(KILL cr);
5755 
5756   ins_cost(375);
5757   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5758             "MOV    $dst.hi,$dst.lo\n\t"
5759             "SAR    $dst.hi,31" %}
5760 
5761   ins_encode %{
5762     __ movl($dst$$Register, $mem$$Address);
5763     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5764     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5765   %}
5766 
5767   ins_pipe(ialu_reg_mem);
5768 %}
5769 
5770 // Load Integer with mask 0xFF into Long Register
5771 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5772   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5773   effect(KILL cr);
5774 
5775   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5776             "XOR    $dst.hi,$dst.hi" %}
5777   ins_encode %{
5778     Register Rdst = $dst$$Register;
5779     __ movzbl(Rdst, $mem$$Address);
5780     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5781   %}
5782   ins_pipe(ialu_reg_mem);
5783 %}
5784 
5785 // Load Integer with mask 0xFFFF into Long Register
5786 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5787   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5788   effect(KILL cr);
5789 
5790   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5791             "XOR    $dst.hi,$dst.hi" %}
5792   ins_encode %{
5793     Register Rdst = $dst$$Register;
5794     __ movzwl(Rdst, $mem$$Address);
5795     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5796   %}
5797   ins_pipe(ialu_reg_mem);
5798 %}
5799 
5800 // Load Integer with 31-bit mask into Long Register
5801 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5802   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5803   effect(KILL cr);
5804 
5805   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5806             "XOR    $dst.hi,$dst.hi\n\t"
5807             "AND    $dst.lo,$mask" %}
5808   ins_encode %{
5809     Register Rdst = $dst$$Register;
5810     __ movl(Rdst, $mem$$Address);
5811     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5812     __ andl(Rdst, $mask$$constant);
5813   %}
5814   ins_pipe(ialu_reg_mem);
5815 %}
5816 
5817 // Load Unsigned Integer into Long Register
5818 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5819   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5820   effect(KILL cr);
5821 
5822   ins_cost(250);
5823   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5824             "XOR    $dst.hi,$dst.hi" %}
5825 
5826   ins_encode %{
5827     __ movl($dst$$Register, $mem$$Address);
5828     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5829   %}
5830 
5831   ins_pipe(ialu_reg_mem);
5832 %}
5833 
5834 // Load Long.  Cannot clobber address while loading, so restrict address
5835 // register to ESI
5836 instruct loadL(eRegL dst, load_long_memory mem) %{
5837   predicate(!((LoadLNode*)n)->require_atomic_access());
5838   match(Set dst (LoadL mem));
5839 
5840   ins_cost(250);
5841   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5842             "MOV    $dst.hi,$mem+4" %}
5843 
5844   ins_encode %{
5845     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5846     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5847     __ movl($dst$$Register, Amemlo);
5848     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5849   %}
5850 
5851   ins_pipe(ialu_reg_long_mem);
5852 %}
5853 
5854 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5855 // then store it down to the stack and reload on the int
5856 // side.
5857 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5858   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5859   match(Set dst (LoadL mem));
5860 
5861   ins_cost(200);
5862   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5863             "FISTp  $dst" %}
5864   ins_encode(enc_loadL_volatile(mem,dst));
5865   ins_pipe( fpu_reg_mem );
5866 %}
5867 
5868 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5869   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5870   match(Set dst (LoadL mem));
5871   effect(TEMP tmp);
5872   ins_cost(180);
5873   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5874             "MOVSD  $dst,$tmp" %}
5875   ins_encode %{
5876     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5877     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5878   %}
5879   ins_pipe( pipe_slow );
5880 %}
5881 
5882 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5883   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5884   match(Set dst (LoadL mem));
5885   effect(TEMP tmp);
5886   ins_cost(160);
5887   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5888             "MOVD   $dst.lo,$tmp\n\t"
5889             "PSRLQ  $tmp,32\n\t"
5890             "MOVD   $dst.hi,$tmp" %}
5891   ins_encode %{
5892     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5893     __ movdl($dst$$Register, $tmp$$XMMRegister);
5894     __ psrlq($tmp$$XMMRegister, 32);
5895     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5896   %}
5897   ins_pipe( pipe_slow );
5898 %}
5899 
5900 // Load Range
5901 instruct loadRange(rRegI dst, memory mem) %{
5902   match(Set dst (LoadRange mem));
5903 
5904   ins_cost(125);
5905   format %{ "MOV    $dst,$mem" %}
5906   opcode(0x8B);
5907   ins_encode( OpcP, RegMem(dst,mem));
5908   ins_pipe( ialu_reg_mem );
5909 %}
5910 
5911 
5912 // Load Pointer
5913 instruct loadP(eRegP dst, memory mem) %{
5914   match(Set dst (LoadP mem));
5915 
5916   ins_cost(125);
5917   format %{ "MOV    $dst,$mem" %}
5918   opcode(0x8B);
5919   ins_encode( OpcP, RegMem(dst,mem));
5920   ins_pipe( ialu_reg_mem );
5921 %}
5922 
5923 // Load Klass Pointer
5924 instruct loadKlass(eRegP dst, memory mem) %{
5925   match(Set dst (LoadKlass mem));
5926 
5927   ins_cost(125);
5928   format %{ "MOV    $dst,$mem" %}
5929   opcode(0x8B);
5930   ins_encode( OpcP, RegMem(dst,mem));
5931   ins_pipe( ialu_reg_mem );
5932 %}
5933 
5934 // Load Double
5935 instruct loadDPR(regDPR dst, memory mem) %{
5936   predicate(UseSSE<=1);
5937   match(Set dst (LoadD mem));
5938 
5939   ins_cost(150);
5940   format %{ "FLD_D  ST,$mem\n\t"
5941             "FSTP   $dst" %}
5942   opcode(0xDD);               /* DD /0 */
5943   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5944               Pop_Reg_DPR(dst) );
5945   ins_pipe( fpu_reg_mem );
5946 %}
5947 
5948 // Load Double to XMM
5949 instruct loadD(regD dst, memory mem) %{
5950   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5951   match(Set dst (LoadD mem));
5952   ins_cost(145);
5953   format %{ "MOVSD  $dst,$mem" %}
5954   ins_encode %{
5955     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5956   %}
5957   ins_pipe( pipe_slow );
5958 %}
5959 
5960 instruct loadD_partial(regD dst, memory mem) %{
5961   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5962   match(Set dst (LoadD mem));
5963   ins_cost(145);
5964   format %{ "MOVLPD $dst,$mem" %}
5965   ins_encode %{
5966     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5967   %}
5968   ins_pipe( pipe_slow );
5969 %}
5970 
5971 // Load to XMM register (single-precision floating point)
5972 // MOVSS instruction
5973 instruct loadF(regF dst, memory mem) %{
5974   predicate(UseSSE>=1);
5975   match(Set dst (LoadF mem));
5976   ins_cost(145);
5977   format %{ "MOVSS  $dst,$mem" %}
5978   ins_encode %{
5979     __ movflt ($dst$$XMMRegister, $mem$$Address);
5980   %}
5981   ins_pipe( pipe_slow );
5982 %}
5983 
5984 // Load Float
5985 instruct loadFPR(regFPR dst, memory mem) %{
5986   predicate(UseSSE==0);
5987   match(Set dst (LoadF mem));
5988 
5989   ins_cost(150);
5990   format %{ "FLD_S  ST,$mem\n\t"
5991             "FSTP   $dst" %}
5992   opcode(0xD9);               /* D9 /0 */
5993   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5994               Pop_Reg_FPR(dst) );
5995   ins_pipe( fpu_reg_mem );
5996 %}
5997 
5998 // Load Effective Address
5999 instruct leaP8(eRegP dst, indOffset8 mem) %{
6000   match(Set dst mem);
6001 
6002   ins_cost(110);
6003   format %{ "LEA    $dst,$mem" %}
6004   opcode(0x8D);
6005   ins_encode( OpcP, RegMem(dst,mem));
6006   ins_pipe( ialu_reg_reg_fat );
6007 %}
6008 
6009 instruct leaP32(eRegP dst, indOffset32 mem) %{
6010   match(Set dst mem);
6011 
6012   ins_cost(110);
6013   format %{ "LEA    $dst,$mem" %}
6014   opcode(0x8D);
6015   ins_encode( OpcP, RegMem(dst,mem));
6016   ins_pipe( ialu_reg_reg_fat );
6017 %}
6018 
6019 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6020   match(Set dst mem);
6021 
6022   ins_cost(110);
6023   format %{ "LEA    $dst,$mem" %}
6024   opcode(0x8D);
6025   ins_encode( OpcP, RegMem(dst,mem));
6026   ins_pipe( ialu_reg_reg_fat );
6027 %}
6028 
6029 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6030   match(Set dst mem);
6031 
6032   ins_cost(110);
6033   format %{ "LEA    $dst,$mem" %}
6034   opcode(0x8D);
6035   ins_encode( OpcP, RegMem(dst,mem));
6036   ins_pipe( ialu_reg_reg_fat );
6037 %}
6038 
6039 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6040   match(Set dst mem);
6041 
6042   ins_cost(110);
6043   format %{ "LEA    $dst,$mem" %}
6044   opcode(0x8D);
6045   ins_encode( OpcP, RegMem(dst,mem));
6046   ins_pipe( ialu_reg_reg_fat );
6047 %}
6048 
6049 // Load Constant
6050 instruct loadConI(rRegI dst, immI src) %{
6051   match(Set dst src);
6052 
6053   format %{ "MOV    $dst,$src" %}
6054   ins_encode( LdImmI(dst, src) );
6055   ins_pipe( ialu_reg_fat );
6056 %}
6057 
6058 // Load Constant zero
6059 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6060   match(Set dst src);
6061   effect(KILL cr);
6062 
6063   ins_cost(50);
6064   format %{ "XOR    $dst,$dst" %}
6065   opcode(0x33);  /* + rd */
6066   ins_encode( OpcP, RegReg( dst, dst ) );
6067   ins_pipe( ialu_reg );
6068 %}
6069 
6070 instruct loadConP(eRegP dst, immP src) %{
6071   match(Set dst src);
6072 
6073   format %{ "MOV    $dst,$src" %}
6074   opcode(0xB8);  /* + rd */
6075   ins_encode( LdImmP(dst, src) );
6076   ins_pipe( ialu_reg_fat );
6077 %}
6078 
6079 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6080   match(Set dst src);
6081   effect(KILL cr);
6082   ins_cost(200);
6083   format %{ "MOV    $dst.lo,$src.lo\n\t"
6084             "MOV    $dst.hi,$src.hi" %}
6085   opcode(0xB8);
6086   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6087   ins_pipe( ialu_reg_long_fat );
6088 %}
6089 
6090 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6091   match(Set dst src);
6092   effect(KILL cr);
6093   ins_cost(150);
6094   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6095             "XOR    $dst.hi,$dst.hi" %}
6096   opcode(0x33,0x33);
6097   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6098   ins_pipe( ialu_reg_long );
6099 %}
6100 
6101 // The instruction usage is guarded by predicate in operand immFPR().
6102 instruct loadConFPR(regFPR dst, immFPR con) %{
6103   match(Set dst con);
6104   ins_cost(125);
6105   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6106             "FSTP   $dst" %}
6107   ins_encode %{
6108     __ fld_s($constantaddress($con));
6109     __ fstp_d($dst$$reg);
6110   %}
6111   ins_pipe(fpu_reg_con);
6112 %}
6113 
6114 // The instruction usage is guarded by predicate in operand immFPR0().
6115 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6116   match(Set dst con);
6117   ins_cost(125);
6118   format %{ "FLDZ   ST\n\t"
6119             "FSTP   $dst" %}
6120   ins_encode %{
6121     __ fldz();
6122     __ fstp_d($dst$$reg);
6123   %}
6124   ins_pipe(fpu_reg_con);
6125 %}
6126 
6127 // The instruction usage is guarded by predicate in operand immFPR1().
6128 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6129   match(Set dst con);
6130   ins_cost(125);
6131   format %{ "FLD1   ST\n\t"
6132             "FSTP   $dst" %}
6133   ins_encode %{
6134     __ fld1();
6135     __ fstp_d($dst$$reg);
6136   %}
6137   ins_pipe(fpu_reg_con);
6138 %}
6139 
6140 // The instruction usage is guarded by predicate in operand immF().
6141 instruct loadConF(regF dst, immF con) %{
6142   match(Set dst con);
6143   ins_cost(125);
6144   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6145   ins_encode %{
6146     __ movflt($dst$$XMMRegister, $constantaddress($con));
6147   %}
6148   ins_pipe(pipe_slow);
6149 %}
6150 
6151 // The instruction usage is guarded by predicate in operand immF0().
6152 instruct loadConF0(regF dst, immF0 src) %{
6153   match(Set dst src);
6154   ins_cost(100);
6155   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6156   ins_encode %{
6157     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6158   %}
6159   ins_pipe(pipe_slow);
6160 %}
6161 
6162 // The instruction usage is guarded by predicate in operand immDPR().
6163 instruct loadConDPR(regDPR dst, immDPR con) %{
6164   match(Set dst con);
6165   ins_cost(125);
6166 
6167   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6168             "FSTP   $dst" %}
6169   ins_encode %{
6170     __ fld_d($constantaddress($con));
6171     __ fstp_d($dst$$reg);
6172   %}
6173   ins_pipe(fpu_reg_con);
6174 %}
6175 
6176 // The instruction usage is guarded by predicate in operand immDPR0().
6177 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6178   match(Set dst con);
6179   ins_cost(125);
6180 
6181   format %{ "FLDZ   ST\n\t"
6182             "FSTP   $dst" %}
6183   ins_encode %{
6184     __ fldz();
6185     __ fstp_d($dst$$reg);
6186   %}
6187   ins_pipe(fpu_reg_con);
6188 %}
6189 
6190 // The instruction usage is guarded by predicate in operand immDPR1().
6191 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6192   match(Set dst con);
6193   ins_cost(125);
6194 
6195   format %{ "FLD1   ST\n\t"
6196             "FSTP   $dst" %}
6197   ins_encode %{
6198     __ fld1();
6199     __ fstp_d($dst$$reg);
6200   %}
6201   ins_pipe(fpu_reg_con);
6202 %}
6203 
6204 // The instruction usage is guarded by predicate in operand immD().
6205 instruct loadConD(regD dst, immD con) %{
6206   match(Set dst con);
6207   ins_cost(125);
6208   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6209   ins_encode %{
6210     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6211   %}
6212   ins_pipe(pipe_slow);
6213 %}
6214 
6215 // The instruction usage is guarded by predicate in operand immD0().
6216 instruct loadConD0(regD dst, immD0 src) %{
6217   match(Set dst src);
6218   ins_cost(100);
6219   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6220   ins_encode %{
6221     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6222   %}
6223   ins_pipe( pipe_slow );
6224 %}
6225 
6226 // Load Stack Slot
6227 instruct loadSSI(rRegI dst, stackSlotI src) %{
6228   match(Set dst src);
6229   ins_cost(125);
6230 
6231   format %{ "MOV    $dst,$src" %}
6232   opcode(0x8B);
6233   ins_encode( OpcP, RegMem(dst,src));
6234   ins_pipe( ialu_reg_mem );
6235 %}
6236 
6237 instruct loadSSL(eRegL dst, stackSlotL src) %{
6238   match(Set dst src);
6239 
6240   ins_cost(200);
6241   format %{ "MOV    $dst,$src.lo\n\t"
6242             "MOV    $dst+4,$src.hi" %}
6243   opcode(0x8B, 0x8B);
6244   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6245   ins_pipe( ialu_mem_long_reg );
6246 %}
6247 
6248 // Load Stack Slot
6249 instruct loadSSP(eRegP dst, stackSlotP src) %{
6250   match(Set dst src);
6251   ins_cost(125);
6252 
6253   format %{ "MOV    $dst,$src" %}
6254   opcode(0x8B);
6255   ins_encode( OpcP, RegMem(dst,src));
6256   ins_pipe( ialu_reg_mem );
6257 %}
6258 
6259 // Load Stack Slot
6260 instruct loadSSF(regFPR dst, stackSlotF src) %{
6261   match(Set dst src);
6262   ins_cost(125);
6263 
6264   format %{ "FLD_S  $src\n\t"
6265             "FSTP   $dst" %}
6266   opcode(0xD9);               /* D9 /0, FLD m32real */
6267   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6268               Pop_Reg_FPR(dst) );
6269   ins_pipe( fpu_reg_mem );
6270 %}
6271 
6272 // Load Stack Slot
6273 instruct loadSSD(regDPR dst, stackSlotD src) %{
6274   match(Set dst src);
6275   ins_cost(125);
6276 
6277   format %{ "FLD_D  $src\n\t"
6278             "FSTP   $dst" %}
6279   opcode(0xDD);               /* DD /0, FLD m64real */
6280   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6281               Pop_Reg_DPR(dst) );
6282   ins_pipe( fpu_reg_mem );
6283 %}
6284 
6285 // Prefetch instructions for allocation.
6286 // Must be safe to execute with invalid address (cannot fault).
6287 
6288 instruct prefetchAlloc0( memory mem ) %{
6289   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6290   match(PrefetchAllocation mem);
6291   ins_cost(0);
6292   size(0);
6293   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6294   ins_encode();
6295   ins_pipe(empty);
6296 %}
6297 
6298 instruct prefetchAlloc( memory mem ) %{
6299   predicate(AllocatePrefetchInstr==3);
6300   match( PrefetchAllocation mem );
6301   ins_cost(100);
6302 
6303   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6304   ins_encode %{
6305     __ prefetchw($mem$$Address);
6306   %}
6307   ins_pipe(ialu_mem);
6308 %}
6309 
6310 instruct prefetchAllocNTA( memory mem ) %{
6311   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6312   match(PrefetchAllocation mem);
6313   ins_cost(100);
6314 
6315   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6316   ins_encode %{
6317     __ prefetchnta($mem$$Address);
6318   %}
6319   ins_pipe(ialu_mem);
6320 %}
6321 
6322 instruct prefetchAllocT0( memory mem ) %{
6323   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6324   match(PrefetchAllocation mem);
6325   ins_cost(100);
6326 
6327   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6328   ins_encode %{
6329     __ prefetcht0($mem$$Address);
6330   %}
6331   ins_pipe(ialu_mem);
6332 %}
6333 
6334 instruct prefetchAllocT2( memory mem ) %{
6335   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6336   match(PrefetchAllocation mem);
6337   ins_cost(100);
6338 
6339   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6340   ins_encode %{
6341     __ prefetcht2($mem$$Address);
6342   %}
6343   ins_pipe(ialu_mem);
6344 %}
6345 
6346 //----------Store Instructions-------------------------------------------------
6347 
6348 // Store Byte
6349 instruct storeB(memory mem, xRegI src) %{
6350   match(Set mem (StoreB mem src));
6351 
6352   ins_cost(125);
6353   format %{ "MOV8   $mem,$src" %}
6354   opcode(0x88);
6355   ins_encode( OpcP, RegMem( src, mem ) );
6356   ins_pipe( ialu_mem_reg );
6357 %}
6358 
6359 // Store Char/Short
6360 instruct storeC(memory mem, rRegI src) %{
6361   match(Set mem (StoreC mem src));
6362 
6363   ins_cost(125);
6364   format %{ "MOV16  $mem,$src" %}
6365   opcode(0x89, 0x66);
6366   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6367   ins_pipe( ialu_mem_reg );
6368 %}
6369 
6370 // Store Integer
6371 instruct storeI(memory mem, rRegI src) %{
6372   match(Set mem (StoreI mem src));
6373 
6374   ins_cost(125);
6375   format %{ "MOV    $mem,$src" %}
6376   opcode(0x89);
6377   ins_encode( OpcP, RegMem( src, mem ) );
6378   ins_pipe( ialu_mem_reg );
6379 %}
6380 
6381 // Store Long
6382 instruct storeL(long_memory mem, eRegL src) %{
6383   predicate(!((StoreLNode*)n)->require_atomic_access());
6384   match(Set mem (StoreL mem src));
6385 
6386   ins_cost(200);
6387   format %{ "MOV    $mem,$src.lo\n\t"
6388             "MOV    $mem+4,$src.hi" %}
6389   opcode(0x89, 0x89);
6390   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6391   ins_pipe( ialu_mem_long_reg );
6392 %}
6393 
6394 // Store Long to Integer
6395 instruct storeL2I(memory mem, eRegL src) %{
6396   match(Set mem (StoreI mem (ConvL2I src)));
6397 
6398   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6399   ins_encode %{
6400     __ movl($mem$$Address, $src$$Register);
6401   %}
6402   ins_pipe(ialu_mem_reg);
6403 %}
6404 
6405 // Volatile Store Long.  Must be atomic, so move it into
6406 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6407 // target address before the store (for null-ptr checks)
6408 // so the memory operand is used twice in the encoding.
6409 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6410   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6411   match(Set mem (StoreL mem src));
6412   effect( KILL cr );
6413   ins_cost(400);
6414   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6415             "FILD   $src\n\t"
6416             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6417   opcode(0x3B);
6418   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6419   ins_pipe( fpu_reg_mem );
6420 %}
6421 
6422 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6423   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6424   match(Set mem (StoreL mem src));
6425   effect( TEMP tmp, KILL cr );
6426   ins_cost(380);
6427   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6428             "MOVSD  $tmp,$src\n\t"
6429             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6430   ins_encode %{
6431     __ cmpl(rax, $mem$$Address);
6432     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6433     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6434   %}
6435   ins_pipe( pipe_slow );
6436 %}
6437 
6438 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6439   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6440   match(Set mem (StoreL mem src));
6441   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6442   ins_cost(360);
6443   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6444             "MOVD   $tmp,$src.lo\n\t"
6445             "MOVD   $tmp2,$src.hi\n\t"
6446             "PUNPCKLDQ $tmp,$tmp2\n\t"
6447             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6448   ins_encode %{
6449     __ cmpl(rax, $mem$$Address);
6450     __ movdl($tmp$$XMMRegister, $src$$Register);
6451     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6452     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6453     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6454   %}
6455   ins_pipe( pipe_slow );
6456 %}
6457 
6458 // Store Pointer; for storing unknown oops and raw pointers
6459 instruct storeP(memory mem, anyRegP src) %{
6460   match(Set mem (StoreP mem src));
6461 
6462   ins_cost(125);
6463   format %{ "MOV    $mem,$src" %}
6464   opcode(0x89);
6465   ins_encode( OpcP, RegMem( src, mem ) );
6466   ins_pipe( ialu_mem_reg );
6467 %}
6468 
6469 // Store Integer Immediate
6470 instruct storeImmI(memory mem, immI src) %{
6471   match(Set mem (StoreI mem src));
6472 
6473   ins_cost(150);
6474   format %{ "MOV    $mem,$src" %}
6475   opcode(0xC7);               /* C7 /0 */
6476   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6477   ins_pipe( ialu_mem_imm );
6478 %}
6479 
6480 // Store Short/Char Immediate
6481 instruct storeImmI16(memory mem, immI16 src) %{
6482   predicate(UseStoreImmI16);
6483   match(Set mem (StoreC mem src));
6484 
6485   ins_cost(150);
6486   format %{ "MOV16  $mem,$src" %}
6487   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6488   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6489   ins_pipe( ialu_mem_imm );
6490 %}
6491 
6492 // Store Pointer Immediate; null pointers or constant oops that do not
6493 // need card-mark barriers.
6494 instruct storeImmP(memory mem, immP src) %{
6495   match(Set mem (StoreP mem src));
6496 
6497   ins_cost(150);
6498   format %{ "MOV    $mem,$src" %}
6499   opcode(0xC7);               /* C7 /0 */
6500   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6501   ins_pipe( ialu_mem_imm );
6502 %}
6503 
6504 // Store Byte Immediate
6505 instruct storeImmB(memory mem, immI8 src) %{
6506   match(Set mem (StoreB mem src));
6507 
6508   ins_cost(150);
6509   format %{ "MOV8   $mem,$src" %}
6510   opcode(0xC6);               /* C6 /0 */
6511   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6512   ins_pipe( ialu_mem_imm );
6513 %}
6514 
6515 // Store CMS card-mark Immediate
6516 instruct storeImmCM(memory mem, immI8 src) %{
6517   match(Set mem (StoreCM mem src));
6518 
6519   ins_cost(150);
6520   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6521   opcode(0xC6);               /* C6 /0 */
6522   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6523   ins_pipe( ialu_mem_imm );
6524 %}
6525 
6526 // Store Double
6527 instruct storeDPR( memory mem, regDPR1 src) %{
6528   predicate(UseSSE<=1);
6529   match(Set mem (StoreD mem src));
6530 
6531   ins_cost(100);
6532   format %{ "FST_D  $mem,$src" %}
6533   opcode(0xDD);       /* DD /2 */
6534   ins_encode( enc_FPR_store(mem,src) );
6535   ins_pipe( fpu_mem_reg );
6536 %}
6537 
6538 // Store double does rounding on x86
6539 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6540   predicate(UseSSE<=1);
6541   match(Set mem (StoreD mem (RoundDouble src)));
6542 
6543   ins_cost(100);
6544   format %{ "FST_D  $mem,$src\t# round" %}
6545   opcode(0xDD);       /* DD /2 */
6546   ins_encode( enc_FPR_store(mem,src) );
6547   ins_pipe( fpu_mem_reg );
6548 %}
6549 
6550 // Store XMM register to memory (double-precision floating points)
6551 // MOVSD instruction
6552 instruct storeD(memory mem, regD src) %{
6553   predicate(UseSSE>=2);
6554   match(Set mem (StoreD mem src));
6555   ins_cost(95);
6556   format %{ "MOVSD  $mem,$src" %}
6557   ins_encode %{
6558     __ movdbl($mem$$Address, $src$$XMMRegister);
6559   %}
6560   ins_pipe( pipe_slow );
6561 %}
6562 
6563 // Load Double
6564 instruct MoveD2VL(vlRegD dst, regD src) %{
6565   match(Set dst src);
6566   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6567   ins_encode %{
6568     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6569   %}
6570   ins_pipe( fpu_reg_reg );
6571 %}
6572 
6573 // Load Double
6574 instruct MoveVL2D(regD dst, vlRegD src) %{
6575   match(Set dst src);
6576   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6577   ins_encode %{
6578     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6579   %}
6580   ins_pipe( fpu_reg_reg );
6581 %}
6582 
6583 // Store XMM register to memory (single-precision floating point)
6584 // MOVSS instruction
6585 instruct storeF(memory mem, regF src) %{
6586   predicate(UseSSE>=1);
6587   match(Set mem (StoreF mem src));
6588   ins_cost(95);
6589   format %{ "MOVSS  $mem,$src" %}
6590   ins_encode %{
6591     __ movflt($mem$$Address, $src$$XMMRegister);
6592   %}
6593   ins_pipe( pipe_slow );
6594 %}
6595 
6596 // Load Float
6597 instruct MoveF2VL(vlRegF dst, regF src) %{
6598   match(Set dst src);
6599   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6600   ins_encode %{
6601     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6602   %}
6603   ins_pipe( fpu_reg_reg );
6604 %}
6605 
6606 // Load Float
6607 instruct MoveVL2F(regF dst, vlRegF src) %{
6608   match(Set dst src);
6609   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6610   ins_encode %{
6611     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6612   %}
6613   ins_pipe( fpu_reg_reg );
6614 %}
6615 
6616 // Store Float
6617 instruct storeFPR( memory mem, regFPR1 src) %{
6618   predicate(UseSSE==0);
6619   match(Set mem (StoreF mem src));
6620 
6621   ins_cost(100);
6622   format %{ "FST_S  $mem,$src" %}
6623   opcode(0xD9);       /* D9 /2 */
6624   ins_encode( enc_FPR_store(mem,src) );
6625   ins_pipe( fpu_mem_reg );
6626 %}
6627 
6628 // Store Float does rounding on x86
6629 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6630   predicate(UseSSE==0);
6631   match(Set mem (StoreF mem (RoundFloat src)));
6632 
6633   ins_cost(100);
6634   format %{ "FST_S  $mem,$src\t# round" %}
6635   opcode(0xD9);       /* D9 /2 */
6636   ins_encode( enc_FPR_store(mem,src) );
6637   ins_pipe( fpu_mem_reg );
6638 %}
6639 
6640 // Store Float does rounding on x86
6641 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6642   predicate(UseSSE<=1);
6643   match(Set mem (StoreF mem (ConvD2F src)));
6644 
6645   ins_cost(100);
6646   format %{ "FST_S  $mem,$src\t# D-round" %}
6647   opcode(0xD9);       /* D9 /2 */
6648   ins_encode( enc_FPR_store(mem,src) );
6649   ins_pipe( fpu_mem_reg );
6650 %}
6651 
6652 // Store immediate Float value (it is faster than store from FPU register)
6653 // The instruction usage is guarded by predicate in operand immFPR().
6654 instruct storeFPR_imm( memory mem, immFPR src) %{
6655   match(Set mem (StoreF mem src));
6656 
6657   ins_cost(50);
6658   format %{ "MOV    $mem,$src\t# store float" %}
6659   opcode(0xC7);               /* C7 /0 */
6660   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6661   ins_pipe( ialu_mem_imm );
6662 %}
6663 
6664 // Store immediate Float value (it is faster than store from XMM register)
6665 // The instruction usage is guarded by predicate in operand immF().
6666 instruct storeF_imm( memory mem, immF src) %{
6667   match(Set mem (StoreF mem src));
6668 
6669   ins_cost(50);
6670   format %{ "MOV    $mem,$src\t# store float" %}
6671   opcode(0xC7);               /* C7 /0 */
6672   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6673   ins_pipe( ialu_mem_imm );
6674 %}
6675 
6676 // Store Integer to stack slot
6677 instruct storeSSI(stackSlotI dst, rRegI src) %{
6678   match(Set dst src);
6679 
6680   ins_cost(100);
6681   format %{ "MOV    $dst,$src" %}
6682   opcode(0x89);
6683   ins_encode( OpcPRegSS( dst, src ) );
6684   ins_pipe( ialu_mem_reg );
6685 %}
6686 
6687 // Store Integer to stack slot
6688 instruct storeSSP(stackSlotP dst, eRegP src) %{
6689   match(Set dst src);
6690 
6691   ins_cost(100);
6692   format %{ "MOV    $dst,$src" %}
6693   opcode(0x89);
6694   ins_encode( OpcPRegSS( dst, src ) );
6695   ins_pipe( ialu_mem_reg );
6696 %}
6697 
6698 // Store Long to stack slot
6699 instruct storeSSL(stackSlotL dst, eRegL src) %{
6700   match(Set dst src);
6701 
6702   ins_cost(200);
6703   format %{ "MOV    $dst,$src.lo\n\t"
6704             "MOV    $dst+4,$src.hi" %}
6705   opcode(0x89, 0x89);
6706   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6707   ins_pipe( ialu_mem_long_reg );
6708 %}
6709 
6710 //----------MemBar Instructions-----------------------------------------------
6711 // Memory barrier flavors
6712 
6713 instruct membar_acquire() %{
6714   match(MemBarAcquire);
6715   match(LoadFence);
6716   ins_cost(400);
6717 
6718   size(0);
6719   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6720   ins_encode();
6721   ins_pipe(empty);
6722 %}
6723 
6724 instruct membar_acquire_lock() %{
6725   match(MemBarAcquireLock);
6726   ins_cost(0);
6727 
6728   size(0);
6729   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6730   ins_encode( );
6731   ins_pipe(empty);
6732 %}
6733 
6734 instruct membar_release() %{
6735   match(MemBarRelease);
6736   match(StoreFence);
6737   ins_cost(400);
6738 
6739   size(0);
6740   format %{ "MEMBAR-release ! (empty encoding)" %}
6741   ins_encode( );
6742   ins_pipe(empty);
6743 %}
6744 
6745 instruct membar_release_lock() %{
6746   match(MemBarReleaseLock);
6747   ins_cost(0);
6748 
6749   size(0);
6750   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6751   ins_encode( );
6752   ins_pipe(empty);
6753 %}
6754 
6755 instruct membar_volatile(eFlagsReg cr) %{
6756   match(MemBarVolatile);
6757   effect(KILL cr);
6758   ins_cost(400);
6759 
6760   format %{
6761     $$template
6762     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6763   %}
6764   ins_encode %{
6765     __ membar(Assembler::StoreLoad);
6766   %}
6767   ins_pipe(pipe_slow);
6768 %}
6769 
6770 instruct unnecessary_membar_volatile() %{
6771   match(MemBarVolatile);
6772   predicate(Matcher::post_store_load_barrier(n));
6773   ins_cost(0);
6774 
6775   size(0);
6776   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6777   ins_encode( );
6778   ins_pipe(empty);
6779 %}
6780 
6781 instruct membar_storestore() %{
6782   match(MemBarStoreStore);
6783   ins_cost(0);
6784 
6785   size(0);
6786   format %{ "MEMBAR-storestore (empty encoding)" %}
6787   ins_encode( );
6788   ins_pipe(empty);
6789 %}
6790 
6791 //----------Move Instructions--------------------------------------------------
6792 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6793   match(Set dst (CastX2P src));
6794   format %{ "# X2P  $dst, $src" %}
6795   ins_encode( /*empty encoding*/ );
6796   ins_cost(0);
6797   ins_pipe(empty);
6798 %}
6799 
6800 instruct castP2X(rRegI dst, eRegP src ) %{
6801   match(Set dst (CastP2X src));
6802   ins_cost(50);
6803   format %{ "MOV    $dst, $src\t# CastP2X" %}
6804   ins_encode( enc_Copy( dst, src) );
6805   ins_pipe( ialu_reg_reg );
6806 %}
6807 
6808 //----------Conditional Move---------------------------------------------------
6809 // Conditional move
6810 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6811   predicate(!VM_Version::supports_cmov() );
6812   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6813   ins_cost(200);
6814   format %{ "J$cop,us skip\t# signed cmove\n\t"
6815             "MOV    $dst,$src\n"
6816       "skip:" %}
6817   ins_encode %{
6818     Label Lskip;
6819     // Invert sense of branch from sense of CMOV
6820     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6821     __ movl($dst$$Register, $src$$Register);
6822     __ bind(Lskip);
6823   %}
6824   ins_pipe( pipe_cmov_reg );
6825 %}
6826 
6827 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6828   predicate(!VM_Version::supports_cmov() );
6829   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6830   ins_cost(200);
6831   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6832             "MOV    $dst,$src\n"
6833       "skip:" %}
6834   ins_encode %{
6835     Label Lskip;
6836     // Invert sense of branch from sense of CMOV
6837     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6838     __ movl($dst$$Register, $src$$Register);
6839     __ bind(Lskip);
6840   %}
6841   ins_pipe( pipe_cmov_reg );
6842 %}
6843 
6844 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6845   predicate(VM_Version::supports_cmov() );
6846   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6847   ins_cost(200);
6848   format %{ "CMOV$cop $dst,$src" %}
6849   opcode(0x0F,0x40);
6850   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6851   ins_pipe( pipe_cmov_reg );
6852 %}
6853 
6854 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6855   predicate(VM_Version::supports_cmov() );
6856   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6857   ins_cost(200);
6858   format %{ "CMOV$cop $dst,$src" %}
6859   opcode(0x0F,0x40);
6860   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6861   ins_pipe( pipe_cmov_reg );
6862 %}
6863 
6864 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6865   predicate(VM_Version::supports_cmov() );
6866   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6867   ins_cost(200);
6868   expand %{
6869     cmovI_regU(cop, cr, dst, src);
6870   %}
6871 %}
6872 
6873 // Conditional move
6874 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6875   predicate(VM_Version::supports_cmov() );
6876   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6877   ins_cost(250);
6878   format %{ "CMOV$cop $dst,$src" %}
6879   opcode(0x0F,0x40);
6880   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6881   ins_pipe( pipe_cmov_mem );
6882 %}
6883 
6884 // Conditional move
6885 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6886   predicate(VM_Version::supports_cmov() );
6887   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6888   ins_cost(250);
6889   format %{ "CMOV$cop $dst,$src" %}
6890   opcode(0x0F,0x40);
6891   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6892   ins_pipe( pipe_cmov_mem );
6893 %}
6894 
6895 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6896   predicate(VM_Version::supports_cmov() );
6897   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6898   ins_cost(250);
6899   expand %{
6900     cmovI_memU(cop, cr, dst, src);
6901   %}
6902 %}
6903 
6904 // Conditional move
6905 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6906   predicate(VM_Version::supports_cmov() );
6907   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6908   ins_cost(200);
6909   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6910   opcode(0x0F,0x40);
6911   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6912   ins_pipe( pipe_cmov_reg );
6913 %}
6914 
6915 // Conditional move (non-P6 version)
6916 // Note:  a CMoveP is generated for  stubs and native wrappers
6917 //        regardless of whether we are on a P6, so we
6918 //        emulate a cmov here
6919 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6920   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6921   ins_cost(300);
6922   format %{ "Jn$cop   skip\n\t"
6923           "MOV    $dst,$src\t# pointer\n"
6924       "skip:" %}
6925   opcode(0x8b);
6926   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6927   ins_pipe( pipe_cmov_reg );
6928 %}
6929 
6930 // Conditional move
6931 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6932   predicate(VM_Version::supports_cmov() );
6933   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6934   ins_cost(200);
6935   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6936   opcode(0x0F,0x40);
6937   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6938   ins_pipe( pipe_cmov_reg );
6939 %}
6940 
6941 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6942   predicate(VM_Version::supports_cmov() );
6943   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6944   ins_cost(200);
6945   expand %{
6946     cmovP_regU(cop, cr, dst, src);
6947   %}
6948 %}
6949 
6950 // DISABLED: Requires the ADLC to emit a bottom_type call that
6951 // correctly meets the two pointer arguments; one is an incoming
6952 // register but the other is a memory operand.  ALSO appears to
6953 // be buggy with implicit null checks.
6954 //
6955 //// Conditional move
6956 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6957 //  predicate(VM_Version::supports_cmov() );
6958 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6959 //  ins_cost(250);
6960 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6961 //  opcode(0x0F,0x40);
6962 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6963 //  ins_pipe( pipe_cmov_mem );
6964 //%}
6965 //
6966 //// Conditional move
6967 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6968 //  predicate(VM_Version::supports_cmov() );
6969 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6970 //  ins_cost(250);
6971 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6972 //  opcode(0x0F,0x40);
6973 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6974 //  ins_pipe( pipe_cmov_mem );
6975 //%}
6976 
6977 // Conditional move
6978 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6979   predicate(UseSSE<=1);
6980   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6981   ins_cost(200);
6982   format %{ "FCMOV$cop $dst,$src\t# double" %}
6983   opcode(0xDA);
6984   ins_encode( enc_cmov_dpr(cop,src) );
6985   ins_pipe( pipe_cmovDPR_reg );
6986 %}
6987 
6988 // Conditional move
6989 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6990   predicate(UseSSE==0);
6991   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6992   ins_cost(200);
6993   format %{ "FCMOV$cop $dst,$src\t# float" %}
6994   opcode(0xDA);
6995   ins_encode( enc_cmov_dpr(cop,src) );
6996   ins_pipe( pipe_cmovDPR_reg );
6997 %}
6998 
6999 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7000 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7001   predicate(UseSSE<=1);
7002   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7003   ins_cost(200);
7004   format %{ "Jn$cop   skip\n\t"
7005             "MOV    $dst,$src\t# double\n"
7006       "skip:" %}
7007   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7008   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7009   ins_pipe( pipe_cmovDPR_reg );
7010 %}
7011 
7012 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7013 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7014   predicate(UseSSE==0);
7015   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7016   ins_cost(200);
7017   format %{ "Jn$cop    skip\n\t"
7018             "MOV    $dst,$src\t# float\n"
7019       "skip:" %}
7020   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7021   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7022   ins_pipe( pipe_cmovDPR_reg );
7023 %}
7024 
7025 // No CMOVE with SSE/SSE2
7026 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7027   predicate (UseSSE>=1);
7028   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7029   ins_cost(200);
7030   format %{ "Jn$cop   skip\n\t"
7031             "MOVSS  $dst,$src\t# float\n"
7032       "skip:" %}
7033   ins_encode %{
7034     Label skip;
7035     // Invert sense of branch from sense of CMOV
7036     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7037     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7038     __ bind(skip);
7039   %}
7040   ins_pipe( pipe_slow );
7041 %}
7042 
7043 // No CMOVE with SSE/SSE2
7044 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7045   predicate (UseSSE>=2);
7046   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7047   ins_cost(200);
7048   format %{ "Jn$cop   skip\n\t"
7049             "MOVSD  $dst,$src\t# float\n"
7050       "skip:" %}
7051   ins_encode %{
7052     Label skip;
7053     // Invert sense of branch from sense of CMOV
7054     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7055     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7056     __ bind(skip);
7057   %}
7058   ins_pipe( pipe_slow );
7059 %}
7060 
7061 // unsigned version
7062 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7063   predicate (UseSSE>=1);
7064   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7065   ins_cost(200);
7066   format %{ "Jn$cop   skip\n\t"
7067             "MOVSS  $dst,$src\t# float\n"
7068       "skip:" %}
7069   ins_encode %{
7070     Label skip;
7071     // Invert sense of branch from sense of CMOV
7072     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7073     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7074     __ bind(skip);
7075   %}
7076   ins_pipe( pipe_slow );
7077 %}
7078 
7079 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7080   predicate (UseSSE>=1);
7081   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7082   ins_cost(200);
7083   expand %{
7084     fcmovF_regU(cop, cr, dst, src);
7085   %}
7086 %}
7087 
7088 // unsigned version
7089 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7090   predicate (UseSSE>=2);
7091   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7092   ins_cost(200);
7093   format %{ "Jn$cop   skip\n\t"
7094             "MOVSD  $dst,$src\t# float\n"
7095       "skip:" %}
7096   ins_encode %{
7097     Label skip;
7098     // Invert sense of branch from sense of CMOV
7099     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7100     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7101     __ bind(skip);
7102   %}
7103   ins_pipe( pipe_slow );
7104 %}
7105 
7106 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7107   predicate (UseSSE>=2);
7108   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7109   ins_cost(200);
7110   expand %{
7111     fcmovD_regU(cop, cr, dst, src);
7112   %}
7113 %}
7114 
7115 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7116   predicate(VM_Version::supports_cmov() );
7117   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7118   ins_cost(200);
7119   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7120             "CMOV$cop $dst.hi,$src.hi" %}
7121   opcode(0x0F,0x40);
7122   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7123   ins_pipe( pipe_cmov_reg_long );
7124 %}
7125 
7126 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7127   predicate(VM_Version::supports_cmov() );
7128   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7129   ins_cost(200);
7130   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7131             "CMOV$cop $dst.hi,$src.hi" %}
7132   opcode(0x0F,0x40);
7133   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7134   ins_pipe( pipe_cmov_reg_long );
7135 %}
7136 
7137 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7138   predicate(VM_Version::supports_cmov() );
7139   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7140   ins_cost(200);
7141   expand %{
7142     cmovL_regU(cop, cr, dst, src);
7143   %}
7144 %}
7145 
7146 //----------Arithmetic Instructions--------------------------------------------
7147 //----------Addition Instructions----------------------------------------------
7148 
7149 // Integer Addition Instructions
7150 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7151   match(Set dst (AddI dst src));
7152   effect(KILL cr);
7153 
7154   size(2);
7155   format %{ "ADD    $dst,$src" %}
7156   opcode(0x03);
7157   ins_encode( OpcP, RegReg( dst, src) );
7158   ins_pipe( ialu_reg_reg );
7159 %}
7160 
7161 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7162   match(Set dst (AddI dst src));
7163   effect(KILL cr);
7164 
7165   format %{ "ADD    $dst,$src" %}
7166   opcode(0x81, 0x00); /* /0 id */
7167   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7168   ins_pipe( ialu_reg );
7169 %}
7170 
7171 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7172   predicate(UseIncDec);
7173   match(Set dst (AddI dst src));
7174   effect(KILL cr);
7175 
7176   size(1);
7177   format %{ "INC    $dst" %}
7178   opcode(0x40); /*  */
7179   ins_encode( Opc_plus( primary, dst ) );
7180   ins_pipe( ialu_reg );
7181 %}
7182 
7183 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7184   match(Set dst (AddI src0 src1));
7185   ins_cost(110);
7186 
7187   format %{ "LEA    $dst,[$src0 + $src1]" %}
7188   opcode(0x8D); /* 0x8D /r */
7189   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7190   ins_pipe( ialu_reg_reg );
7191 %}
7192 
7193 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7194   match(Set dst (AddP src0 src1));
7195   ins_cost(110);
7196 
7197   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7198   opcode(0x8D); /* 0x8D /r */
7199   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7200   ins_pipe( ialu_reg_reg );
7201 %}
7202 
7203 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7204   predicate(UseIncDec);
7205   match(Set dst (AddI dst src));
7206   effect(KILL cr);
7207 
7208   size(1);
7209   format %{ "DEC    $dst" %}
7210   opcode(0x48); /*  */
7211   ins_encode( Opc_plus( primary, dst ) );
7212   ins_pipe( ialu_reg );
7213 %}
7214 
7215 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7216   match(Set dst (AddP dst src));
7217   effect(KILL cr);
7218 
7219   size(2);
7220   format %{ "ADD    $dst,$src" %}
7221   opcode(0x03);
7222   ins_encode( OpcP, RegReg( dst, src) );
7223   ins_pipe( ialu_reg_reg );
7224 %}
7225 
7226 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7227   match(Set dst (AddP dst src));
7228   effect(KILL cr);
7229 
7230   format %{ "ADD    $dst,$src" %}
7231   opcode(0x81,0x00); /* Opcode 81 /0 id */
7232   // ins_encode( RegImm( dst, src) );
7233   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7234   ins_pipe( ialu_reg );
7235 %}
7236 
7237 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7238   match(Set dst (AddI dst (LoadI src)));
7239   effect(KILL cr);
7240 
7241   ins_cost(125);
7242   format %{ "ADD    $dst,$src" %}
7243   opcode(0x03);
7244   ins_encode( OpcP, RegMem( dst, src) );
7245   ins_pipe( ialu_reg_mem );
7246 %}
7247 
7248 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7249   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7250   effect(KILL cr);
7251 
7252   ins_cost(150);
7253   format %{ "ADD    $dst,$src" %}
7254   opcode(0x01);  /* Opcode 01 /r */
7255   ins_encode( OpcP, RegMem( src, dst ) );
7256   ins_pipe( ialu_mem_reg );
7257 %}
7258 
7259 // Add Memory with Immediate
7260 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7261   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7262   effect(KILL cr);
7263 
7264   ins_cost(125);
7265   format %{ "ADD    $dst,$src" %}
7266   opcode(0x81);               /* Opcode 81 /0 id */
7267   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7268   ins_pipe( ialu_mem_imm );
7269 %}
7270 
7271 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7272   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7273   effect(KILL cr);
7274 
7275   ins_cost(125);
7276   format %{ "INC    $dst" %}
7277   opcode(0xFF);               /* Opcode FF /0 */
7278   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7279   ins_pipe( ialu_mem_imm );
7280 %}
7281 
7282 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7283   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7284   effect(KILL cr);
7285 
7286   ins_cost(125);
7287   format %{ "DEC    $dst" %}
7288   opcode(0xFF);               /* Opcode FF /1 */
7289   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7290   ins_pipe( ialu_mem_imm );
7291 %}
7292 
7293 
7294 instruct checkCastPP( eRegP dst ) %{
7295   match(Set dst (CheckCastPP dst));
7296 
7297   size(0);
7298   format %{ "#checkcastPP of $dst" %}
7299   ins_encode( /*empty encoding*/ );
7300   ins_pipe( empty );
7301 %}
7302 
7303 instruct castPP( eRegP dst ) %{
7304   match(Set dst (CastPP dst));
7305   format %{ "#castPP of $dst" %}
7306   ins_encode( /*empty encoding*/ );
7307   ins_pipe( empty );
7308 %}
7309 
7310 instruct castII( rRegI dst ) %{
7311   match(Set dst (CastII dst));
7312   format %{ "#castII of $dst" %}
7313   ins_encode( /*empty encoding*/ );
7314   ins_cost(0);
7315   ins_pipe( empty );
7316 %}
7317 
7318 
7319 // Load-locked - same as a regular pointer load when used with compare-swap
7320 instruct loadPLocked(eRegP dst, memory mem) %{
7321   match(Set dst (LoadPLocked mem));
7322 
7323   ins_cost(125);
7324   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7325   opcode(0x8B);
7326   ins_encode( OpcP, RegMem(dst,mem));
7327   ins_pipe( ialu_reg_mem );
7328 %}
7329 
7330 // Conditional-store of the updated heap-top.
7331 // Used during allocation of the shared heap.
7332 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7333 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7334   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7335   // EAX is killed if there is contention, but then it's also unused.
7336   // In the common case of no contention, EAX holds the new oop address.
7337   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7338   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7339   ins_pipe( pipe_cmpxchg );
7340 %}
7341 
7342 // Conditional-store of an int value.
7343 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7344 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7345   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7346   effect(KILL oldval);
7347   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7348   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7349   ins_pipe( pipe_cmpxchg );
7350 %}
7351 
7352 // Conditional-store of a long value.
7353 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7354 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7355   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7356   effect(KILL oldval);
7357   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7358             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7359             "XCHG   EBX,ECX"
7360   %}
7361   ins_encode %{
7362     // Note: we need to swap rbx, and rcx before and after the
7363     //       cmpxchg8 instruction because the instruction uses
7364     //       rcx as the high order word of the new value to store but
7365     //       our register encoding uses rbx.
7366     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7367     __ lock();
7368     __ cmpxchg8($mem$$Address);
7369     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7370   %}
7371   ins_pipe( pipe_cmpxchg );
7372 %}
7373 
7374 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7375 
7376 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7377   predicate(VM_Version::supports_cx8());
7378   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7379   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7380   effect(KILL cr, KILL oldval);
7381   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7382             "MOV    $res,0\n\t"
7383             "JNE,s  fail\n\t"
7384             "MOV    $res,1\n"
7385           "fail:" %}
7386   ins_encode( enc_cmpxchg8(mem_ptr),
7387               enc_flags_ne_to_boolean(res) );
7388   ins_pipe( pipe_cmpxchg );
7389 %}
7390 
7391 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7392   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7393   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7394   effect(KILL cr, KILL oldval);
7395   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7396             "MOV    $res,0\n\t"
7397             "JNE,s  fail\n\t"
7398             "MOV    $res,1\n"
7399           "fail:" %}
7400   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7401   ins_pipe( pipe_cmpxchg );
7402 %}
7403 
7404 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7405   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7406   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7407   effect(KILL cr, KILL oldval);
7408   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7409             "MOV    $res,0\n\t"
7410             "JNE,s  fail\n\t"
7411             "MOV    $res,1\n"
7412           "fail:" %}
7413   ins_encode( enc_cmpxchgb(mem_ptr),
7414               enc_flags_ne_to_boolean(res) );
7415   ins_pipe( pipe_cmpxchg );
7416 %}
7417 
7418 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7419   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7420   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7421   effect(KILL cr, KILL oldval);
7422   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7423             "MOV    $res,0\n\t"
7424             "JNE,s  fail\n\t"
7425             "MOV    $res,1\n"
7426           "fail:" %}
7427   ins_encode( enc_cmpxchgw(mem_ptr),
7428               enc_flags_ne_to_boolean(res) );
7429   ins_pipe( pipe_cmpxchg );
7430 %}
7431 
7432 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7433   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7434   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7435   effect(KILL cr, KILL oldval);
7436   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7437             "MOV    $res,0\n\t"
7438             "JNE,s  fail\n\t"
7439             "MOV    $res,1\n"
7440           "fail:" %}
7441   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7442   ins_pipe( pipe_cmpxchg );
7443 %}
7444 
7445 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7446   predicate(VM_Version::supports_cx8());
7447   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7448   effect(KILL cr);
7449   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7450   ins_encode( enc_cmpxchg8(mem_ptr) );
7451   ins_pipe( pipe_cmpxchg );
7452 %}
7453 
7454 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7455   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7456   effect(KILL cr);
7457   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7458   ins_encode( enc_cmpxchg(mem_ptr) );
7459   ins_pipe( pipe_cmpxchg );
7460 %}
7461 
7462 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7463   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7464   effect(KILL cr);
7465   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7466   ins_encode( enc_cmpxchgb(mem_ptr) );
7467   ins_pipe( pipe_cmpxchg );
7468 %}
7469 
7470 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7471   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7472   effect(KILL cr);
7473   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7474   ins_encode( enc_cmpxchgw(mem_ptr) );
7475   ins_pipe( pipe_cmpxchg );
7476 %}
7477 
7478 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7479   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7480   effect(KILL cr);
7481   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7482   ins_encode( enc_cmpxchg(mem_ptr) );
7483   ins_pipe( pipe_cmpxchg );
7484 %}
7485 
7486 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7487   predicate(n->as_LoadStore()->result_not_used());
7488   match(Set dummy (GetAndAddB mem add));
7489   effect(KILL cr);
7490   format %{ "ADDB  [$mem],$add" %}
7491   ins_encode %{
7492     __ lock();
7493     __ addb($mem$$Address, $add$$constant);
7494   %}
7495   ins_pipe( pipe_cmpxchg );
7496 %}
7497 
7498 // Important to match to xRegI: only 8-bit regs.
7499 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7500   match(Set newval (GetAndAddB mem newval));
7501   effect(KILL cr);
7502   format %{ "XADDB  [$mem],$newval" %}
7503   ins_encode %{
7504     __ lock();
7505     __ xaddb($mem$$Address, $newval$$Register);
7506   %}
7507   ins_pipe( pipe_cmpxchg );
7508 %}
7509 
7510 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7511   predicate(n->as_LoadStore()->result_not_used());
7512   match(Set dummy (GetAndAddS mem add));
7513   effect(KILL cr);
7514   format %{ "ADDS  [$mem],$add" %}
7515   ins_encode %{
7516     __ lock();
7517     __ addw($mem$$Address, $add$$constant);
7518   %}
7519   ins_pipe( pipe_cmpxchg );
7520 %}
7521 
7522 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7523   match(Set newval (GetAndAddS mem newval));
7524   effect(KILL cr);
7525   format %{ "XADDS  [$mem],$newval" %}
7526   ins_encode %{
7527     __ lock();
7528     __ xaddw($mem$$Address, $newval$$Register);
7529   %}
7530   ins_pipe( pipe_cmpxchg );
7531 %}
7532 
7533 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7534   predicate(n->as_LoadStore()->result_not_used());
7535   match(Set dummy (GetAndAddI mem add));
7536   effect(KILL cr);
7537   format %{ "ADDL  [$mem],$add" %}
7538   ins_encode %{
7539     __ lock();
7540     __ addl($mem$$Address, $add$$constant);
7541   %}
7542   ins_pipe( pipe_cmpxchg );
7543 %}
7544 
7545 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7546   match(Set newval (GetAndAddI mem newval));
7547   effect(KILL cr);
7548   format %{ "XADDL  [$mem],$newval" %}
7549   ins_encode %{
7550     __ lock();
7551     __ xaddl($mem$$Address, $newval$$Register);
7552   %}
7553   ins_pipe( pipe_cmpxchg );
7554 %}
7555 
7556 // Important to match to xRegI: only 8-bit regs.
7557 instruct xchgB( memory mem, xRegI newval) %{
7558   match(Set newval (GetAndSetB mem newval));
7559   format %{ "XCHGB  $newval,[$mem]" %}
7560   ins_encode %{
7561     __ xchgb($newval$$Register, $mem$$Address);
7562   %}
7563   ins_pipe( pipe_cmpxchg );
7564 %}
7565 
7566 instruct xchgS( memory mem, rRegI newval) %{
7567   match(Set newval (GetAndSetS mem newval));
7568   format %{ "XCHGW  $newval,[$mem]" %}
7569   ins_encode %{
7570     __ xchgw($newval$$Register, $mem$$Address);
7571   %}
7572   ins_pipe( pipe_cmpxchg );
7573 %}
7574 
7575 instruct xchgI( memory mem, rRegI newval) %{
7576   match(Set newval (GetAndSetI mem newval));
7577   format %{ "XCHGL  $newval,[$mem]" %}
7578   ins_encode %{
7579     __ xchgl($newval$$Register, $mem$$Address);
7580   %}
7581   ins_pipe( pipe_cmpxchg );
7582 %}
7583 
7584 instruct xchgP( memory mem, pRegP newval) %{
7585   match(Set newval (GetAndSetP mem newval));
7586   format %{ "XCHGL  $newval,[$mem]" %}
7587   ins_encode %{
7588     __ xchgl($newval$$Register, $mem$$Address);
7589   %}
7590   ins_pipe( pipe_cmpxchg );
7591 %}
7592 
7593 //----------Subtraction Instructions-------------------------------------------
7594 
7595 // Integer Subtraction Instructions
7596 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7597   match(Set dst (SubI dst src));
7598   effect(KILL cr);
7599 
7600   size(2);
7601   format %{ "SUB    $dst,$src" %}
7602   opcode(0x2B);
7603   ins_encode( OpcP, RegReg( dst, src) );
7604   ins_pipe( ialu_reg_reg );
7605 %}
7606 
7607 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7608   match(Set dst (SubI dst src));
7609   effect(KILL cr);
7610 
7611   format %{ "SUB    $dst,$src" %}
7612   opcode(0x81,0x05);  /* Opcode 81 /5 */
7613   // ins_encode( RegImm( dst, src) );
7614   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7615   ins_pipe( ialu_reg );
7616 %}
7617 
7618 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7619   match(Set dst (SubI dst (LoadI src)));
7620   effect(KILL cr);
7621 
7622   ins_cost(125);
7623   format %{ "SUB    $dst,$src" %}
7624   opcode(0x2B);
7625   ins_encode( OpcP, RegMem( dst, src) );
7626   ins_pipe( ialu_reg_mem );
7627 %}
7628 
7629 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7630   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7631   effect(KILL cr);
7632 
7633   ins_cost(150);
7634   format %{ "SUB    $dst,$src" %}
7635   opcode(0x29);  /* Opcode 29 /r */
7636   ins_encode( OpcP, RegMem( src, dst ) );
7637   ins_pipe( ialu_mem_reg );
7638 %}
7639 
7640 // Subtract from a pointer
7641 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7642   match(Set dst (AddP dst (SubI zero src)));
7643   effect(KILL cr);
7644 
7645   size(2);
7646   format %{ "SUB    $dst,$src" %}
7647   opcode(0x2B);
7648   ins_encode( OpcP, RegReg( dst, src) );
7649   ins_pipe( ialu_reg_reg );
7650 %}
7651 
7652 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7653   match(Set dst (SubI zero dst));
7654   effect(KILL cr);
7655 
7656   size(2);
7657   format %{ "NEG    $dst" %}
7658   opcode(0xF7,0x03);  // Opcode F7 /3
7659   ins_encode( OpcP, RegOpc( dst ) );
7660   ins_pipe( ialu_reg );
7661 %}
7662 
7663 //----------Multiplication/Division Instructions-------------------------------
7664 // Integer Multiplication Instructions
7665 // Multiply Register
7666 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7667   match(Set dst (MulI dst src));
7668   effect(KILL cr);
7669 
7670   size(3);
7671   ins_cost(300);
7672   format %{ "IMUL   $dst,$src" %}
7673   opcode(0xAF, 0x0F);
7674   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7675   ins_pipe( ialu_reg_reg_alu0 );
7676 %}
7677 
7678 // Multiply 32-bit Immediate
7679 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7680   match(Set dst (MulI src imm));
7681   effect(KILL cr);
7682 
7683   ins_cost(300);
7684   format %{ "IMUL   $dst,$src,$imm" %}
7685   opcode(0x69);  /* 69 /r id */
7686   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7687   ins_pipe( ialu_reg_reg_alu0 );
7688 %}
7689 
7690 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7691   match(Set dst src);
7692   effect(KILL cr);
7693 
7694   // Note that this is artificially increased to make it more expensive than loadConL
7695   ins_cost(250);
7696   format %{ "MOV    EAX,$src\t// low word only" %}
7697   opcode(0xB8);
7698   ins_encode( LdImmL_Lo(dst, src) );
7699   ins_pipe( ialu_reg_fat );
7700 %}
7701 
7702 // Multiply by 32-bit Immediate, taking the shifted high order results
7703 //  (special case for shift by 32)
7704 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7705   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7706   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7707              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7708              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7709   effect(USE src1, KILL cr);
7710 
7711   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7712   ins_cost(0*100 + 1*400 - 150);
7713   format %{ "IMUL   EDX:EAX,$src1" %}
7714   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7715   ins_pipe( pipe_slow );
7716 %}
7717 
7718 // Multiply by 32-bit Immediate, taking the shifted high order results
7719 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7720   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7721   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7722              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7723              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7724   effect(USE src1, KILL cr);
7725 
7726   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7727   ins_cost(1*100 + 1*400 - 150);
7728   format %{ "IMUL   EDX:EAX,$src1\n\t"
7729             "SAR    EDX,$cnt-32" %}
7730   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7731   ins_pipe( pipe_slow );
7732 %}
7733 
7734 // Multiply Memory 32-bit Immediate
7735 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7736   match(Set dst (MulI (LoadI src) imm));
7737   effect(KILL cr);
7738 
7739   ins_cost(300);
7740   format %{ "IMUL   $dst,$src,$imm" %}
7741   opcode(0x69);  /* 69 /r id */
7742   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7743   ins_pipe( ialu_reg_mem_alu0 );
7744 %}
7745 
7746 // Multiply Memory
7747 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7748   match(Set dst (MulI dst (LoadI src)));
7749   effect(KILL cr);
7750 
7751   ins_cost(350);
7752   format %{ "IMUL   $dst,$src" %}
7753   opcode(0xAF, 0x0F);
7754   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7755   ins_pipe( ialu_reg_mem_alu0 );
7756 %}
7757 
7758 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7759 %{
7760   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7761   effect(KILL cr, KILL src2);
7762 
7763   expand %{ mulI_eReg(dst, src1, cr);
7764            mulI_eReg(src2, src3, cr);
7765            addI_eReg(dst, src2, cr); %}
7766 %}
7767 
7768 // Multiply Register Int to Long
7769 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7770   // Basic Idea: long = (long)int * (long)int
7771   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7772   effect(DEF dst, USE src, USE src1, KILL flags);
7773 
7774   ins_cost(300);
7775   format %{ "IMUL   $dst,$src1" %}
7776 
7777   ins_encode( long_int_multiply( dst, src1 ) );
7778   ins_pipe( ialu_reg_reg_alu0 );
7779 %}
7780 
7781 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7782   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7783   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7784   effect(KILL flags);
7785 
7786   ins_cost(300);
7787   format %{ "MUL    $dst,$src1" %}
7788 
7789   ins_encode( long_uint_multiply(dst, src1) );
7790   ins_pipe( ialu_reg_reg_alu0 );
7791 %}
7792 
7793 // Multiply Register Long
7794 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7795   match(Set dst (MulL dst src));
7796   effect(KILL cr, TEMP tmp);
7797   ins_cost(4*100+3*400);
7798 // Basic idea: lo(result) = lo(x_lo * y_lo)
7799 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7800   format %{ "MOV    $tmp,$src.lo\n\t"
7801             "IMUL   $tmp,EDX\n\t"
7802             "MOV    EDX,$src.hi\n\t"
7803             "IMUL   EDX,EAX\n\t"
7804             "ADD    $tmp,EDX\n\t"
7805             "MUL    EDX:EAX,$src.lo\n\t"
7806             "ADD    EDX,$tmp" %}
7807   ins_encode( long_multiply( dst, src, tmp ) );
7808   ins_pipe( pipe_slow );
7809 %}
7810 
7811 // Multiply Register Long where the left operand's high 32 bits are zero
7812 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7813   predicate(is_operand_hi32_zero(n->in(1)));
7814   match(Set dst (MulL dst src));
7815   effect(KILL cr, TEMP tmp);
7816   ins_cost(2*100+2*400);
7817 // Basic idea: lo(result) = lo(x_lo * y_lo)
7818 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7819   format %{ "MOV    $tmp,$src.hi\n\t"
7820             "IMUL   $tmp,EAX\n\t"
7821             "MUL    EDX:EAX,$src.lo\n\t"
7822             "ADD    EDX,$tmp" %}
7823   ins_encode %{
7824     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7825     __ imull($tmp$$Register, rax);
7826     __ mull($src$$Register);
7827     __ addl(rdx, $tmp$$Register);
7828   %}
7829   ins_pipe( pipe_slow );
7830 %}
7831 
7832 // Multiply Register Long where the right operand's high 32 bits are zero
7833 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7834   predicate(is_operand_hi32_zero(n->in(2)));
7835   match(Set dst (MulL dst src));
7836   effect(KILL cr, TEMP tmp);
7837   ins_cost(2*100+2*400);
7838 // Basic idea: lo(result) = lo(x_lo * y_lo)
7839 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7840   format %{ "MOV    $tmp,$src.lo\n\t"
7841             "IMUL   $tmp,EDX\n\t"
7842             "MUL    EDX:EAX,$src.lo\n\t"
7843             "ADD    EDX,$tmp" %}
7844   ins_encode %{
7845     __ movl($tmp$$Register, $src$$Register);
7846     __ imull($tmp$$Register, rdx);
7847     __ mull($src$$Register);
7848     __ addl(rdx, $tmp$$Register);
7849   %}
7850   ins_pipe( pipe_slow );
7851 %}
7852 
7853 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7854 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7855   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7856   match(Set dst (MulL dst src));
7857   effect(KILL cr);
7858   ins_cost(1*400);
7859 // Basic idea: lo(result) = lo(x_lo * y_lo)
7860 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7861   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7862   ins_encode %{
7863     __ mull($src$$Register);
7864   %}
7865   ins_pipe( pipe_slow );
7866 %}
7867 
7868 // Multiply Register Long by small constant
7869 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7870   match(Set dst (MulL dst src));
7871   effect(KILL cr, TEMP tmp);
7872   ins_cost(2*100+2*400);
7873   size(12);
7874 // Basic idea: lo(result) = lo(src * EAX)
7875 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7876   format %{ "IMUL   $tmp,EDX,$src\n\t"
7877             "MOV    EDX,$src\n\t"
7878             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7879             "ADD    EDX,$tmp" %}
7880   ins_encode( long_multiply_con( dst, src, tmp ) );
7881   ins_pipe( pipe_slow );
7882 %}
7883 
7884 // Integer DIV with Register
7885 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7886   match(Set rax (DivI rax div));
7887   effect(KILL rdx, KILL cr);
7888   size(26);
7889   ins_cost(30*100+10*100);
7890   format %{ "CMP    EAX,0x80000000\n\t"
7891             "JNE,s  normal\n\t"
7892             "XOR    EDX,EDX\n\t"
7893             "CMP    ECX,-1\n\t"
7894             "JE,s   done\n"
7895     "normal: CDQ\n\t"
7896             "IDIV   $div\n\t"
7897     "done:"        %}
7898   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7899   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7900   ins_pipe( ialu_reg_reg_alu0 );
7901 %}
7902 
7903 // Divide Register Long
7904 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7905   match(Set dst (DivL src1 src2));
7906   effect( KILL cr, KILL cx, KILL bx );
7907   ins_cost(10000);
7908   format %{ "PUSH   $src1.hi\n\t"
7909             "PUSH   $src1.lo\n\t"
7910             "PUSH   $src2.hi\n\t"
7911             "PUSH   $src2.lo\n\t"
7912             "CALL   SharedRuntime::ldiv\n\t"
7913             "ADD    ESP,16" %}
7914   ins_encode( long_div(src1,src2) );
7915   ins_pipe( pipe_slow );
7916 %}
7917 
7918 // Integer DIVMOD with Register, both quotient and mod results
7919 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7920   match(DivModI rax div);
7921   effect(KILL cr);
7922   size(26);
7923   ins_cost(30*100+10*100);
7924   format %{ "CMP    EAX,0x80000000\n\t"
7925             "JNE,s  normal\n\t"
7926             "XOR    EDX,EDX\n\t"
7927             "CMP    ECX,-1\n\t"
7928             "JE,s   done\n"
7929     "normal: CDQ\n\t"
7930             "IDIV   $div\n\t"
7931     "done:"        %}
7932   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7933   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7934   ins_pipe( pipe_slow );
7935 %}
7936 
7937 // Integer MOD with Register
7938 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7939   match(Set rdx (ModI rax div));
7940   effect(KILL rax, KILL cr);
7941 
7942   size(26);
7943   ins_cost(300);
7944   format %{ "CDQ\n\t"
7945             "IDIV   $div" %}
7946   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7947   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7948   ins_pipe( ialu_reg_reg_alu0 );
7949 %}
7950 
7951 // Remainder Register Long
7952 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7953   match(Set dst (ModL src1 src2));
7954   effect( KILL cr, KILL cx, KILL bx );
7955   ins_cost(10000);
7956   format %{ "PUSH   $src1.hi\n\t"
7957             "PUSH   $src1.lo\n\t"
7958             "PUSH   $src2.hi\n\t"
7959             "PUSH   $src2.lo\n\t"
7960             "CALL   SharedRuntime::lrem\n\t"
7961             "ADD    ESP,16" %}
7962   ins_encode( long_mod(src1,src2) );
7963   ins_pipe( pipe_slow );
7964 %}
7965 
7966 // Divide Register Long (no special case since divisor != -1)
7967 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7968   match(Set dst (DivL dst imm));
7969   effect( TEMP tmp, TEMP tmp2, KILL cr );
7970   ins_cost(1000);
7971   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7972             "XOR    $tmp2,$tmp2\n\t"
7973             "CMP    $tmp,EDX\n\t"
7974             "JA,s   fast\n\t"
7975             "MOV    $tmp2,EAX\n\t"
7976             "MOV    EAX,EDX\n\t"
7977             "MOV    EDX,0\n\t"
7978             "JLE,s  pos\n\t"
7979             "LNEG   EAX : $tmp2\n\t"
7980             "DIV    $tmp # unsigned division\n\t"
7981             "XCHG   EAX,$tmp2\n\t"
7982             "DIV    $tmp\n\t"
7983             "LNEG   $tmp2 : EAX\n\t"
7984             "JMP,s  done\n"
7985     "pos:\n\t"
7986             "DIV    $tmp\n\t"
7987             "XCHG   EAX,$tmp2\n"
7988     "fast:\n\t"
7989             "DIV    $tmp\n"
7990     "done:\n\t"
7991             "MOV    EDX,$tmp2\n\t"
7992             "NEG    EDX:EAX # if $imm < 0" %}
7993   ins_encode %{
7994     int con = (int)$imm$$constant;
7995     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7996     int pcon = (con > 0) ? con : -con;
7997     Label Lfast, Lpos, Ldone;
7998 
7999     __ movl($tmp$$Register, pcon);
8000     __ xorl($tmp2$$Register,$tmp2$$Register);
8001     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8002     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8003 
8004     __ movl($tmp2$$Register, $dst$$Register); // save
8005     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8006     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8007     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8008 
8009     // Negative dividend.
8010     // convert value to positive to use unsigned division
8011     __ lneg($dst$$Register, $tmp2$$Register);
8012     __ divl($tmp$$Register);
8013     __ xchgl($dst$$Register, $tmp2$$Register);
8014     __ divl($tmp$$Register);
8015     // revert result back to negative
8016     __ lneg($tmp2$$Register, $dst$$Register);
8017     __ jmpb(Ldone);
8018 
8019     __ bind(Lpos);
8020     __ divl($tmp$$Register); // Use unsigned division
8021     __ xchgl($dst$$Register, $tmp2$$Register);
8022     // Fallthrow for final divide, tmp2 has 32 bit hi result
8023 
8024     __ bind(Lfast);
8025     // fast path: src is positive
8026     __ divl($tmp$$Register); // Use unsigned division
8027 
8028     __ bind(Ldone);
8029     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8030     if (con < 0) {
8031       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8032     }
8033   %}
8034   ins_pipe( pipe_slow );
8035 %}
8036 
8037 // Remainder Register Long (remainder fit into 32 bits)
8038 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8039   match(Set dst (ModL dst imm));
8040   effect( TEMP tmp, TEMP tmp2, KILL cr );
8041   ins_cost(1000);
8042   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8043             "CMP    $tmp,EDX\n\t"
8044             "JA,s   fast\n\t"
8045             "MOV    $tmp2,EAX\n\t"
8046             "MOV    EAX,EDX\n\t"
8047             "MOV    EDX,0\n\t"
8048             "JLE,s  pos\n\t"
8049             "LNEG   EAX : $tmp2\n\t"
8050             "DIV    $tmp # unsigned division\n\t"
8051             "MOV    EAX,$tmp2\n\t"
8052             "DIV    $tmp\n\t"
8053             "NEG    EDX\n\t"
8054             "JMP,s  done\n"
8055     "pos:\n\t"
8056             "DIV    $tmp\n\t"
8057             "MOV    EAX,$tmp2\n"
8058     "fast:\n\t"
8059             "DIV    $tmp\n"
8060     "done:\n\t"
8061             "MOV    EAX,EDX\n\t"
8062             "SAR    EDX,31\n\t" %}
8063   ins_encode %{
8064     int con = (int)$imm$$constant;
8065     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8066     int pcon = (con > 0) ? con : -con;
8067     Label  Lfast, Lpos, Ldone;
8068 
8069     __ movl($tmp$$Register, pcon);
8070     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8071     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8072 
8073     __ movl($tmp2$$Register, $dst$$Register); // save
8074     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8075     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8076     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8077 
8078     // Negative dividend.
8079     // convert value to positive to use unsigned division
8080     __ lneg($dst$$Register, $tmp2$$Register);
8081     __ divl($tmp$$Register);
8082     __ movl($dst$$Register, $tmp2$$Register);
8083     __ divl($tmp$$Register);
8084     // revert remainder back to negative
8085     __ negl(HIGH_FROM_LOW($dst$$Register));
8086     __ jmpb(Ldone);
8087 
8088     __ bind(Lpos);
8089     __ divl($tmp$$Register);
8090     __ movl($dst$$Register, $tmp2$$Register);
8091 
8092     __ bind(Lfast);
8093     // fast path: src is positive
8094     __ divl($tmp$$Register);
8095 
8096     __ bind(Ldone);
8097     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8098     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8099 
8100   %}
8101   ins_pipe( pipe_slow );
8102 %}
8103 
8104 // Integer Shift Instructions
8105 // Shift Left by one
8106 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8107   match(Set dst (LShiftI dst shift));
8108   effect(KILL cr);
8109 
8110   size(2);
8111   format %{ "SHL    $dst,$shift" %}
8112   opcode(0xD1, 0x4);  /* D1 /4 */
8113   ins_encode( OpcP, RegOpc( dst ) );
8114   ins_pipe( ialu_reg );
8115 %}
8116 
8117 // Shift Left by 8-bit immediate
8118 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8119   match(Set dst (LShiftI dst shift));
8120   effect(KILL cr);
8121 
8122   size(3);
8123   format %{ "SHL    $dst,$shift" %}
8124   opcode(0xC1, 0x4);  /* C1 /4 ib */
8125   ins_encode( RegOpcImm( dst, shift) );
8126   ins_pipe( ialu_reg );
8127 %}
8128 
8129 // Shift Left by variable
8130 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8131   match(Set dst (LShiftI dst shift));
8132   effect(KILL cr);
8133 
8134   size(2);
8135   format %{ "SHL    $dst,$shift" %}
8136   opcode(0xD3, 0x4);  /* D3 /4 */
8137   ins_encode( OpcP, RegOpc( dst ) );
8138   ins_pipe( ialu_reg_reg );
8139 %}
8140 
8141 // Arithmetic shift right by one
8142 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8143   match(Set dst (RShiftI dst shift));
8144   effect(KILL cr);
8145 
8146   size(2);
8147   format %{ "SAR    $dst,$shift" %}
8148   opcode(0xD1, 0x7);  /* D1 /7 */
8149   ins_encode( OpcP, RegOpc( dst ) );
8150   ins_pipe( ialu_reg );
8151 %}
8152 
8153 // Arithmetic shift right by one
8154 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8155   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8156   effect(KILL cr);
8157   format %{ "SAR    $dst,$shift" %}
8158   opcode(0xD1, 0x7);  /* D1 /7 */
8159   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8160   ins_pipe( ialu_mem_imm );
8161 %}
8162 
8163 // Arithmetic Shift Right by 8-bit immediate
8164 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8165   match(Set dst (RShiftI dst shift));
8166   effect(KILL cr);
8167 
8168   size(3);
8169   format %{ "SAR    $dst,$shift" %}
8170   opcode(0xC1, 0x7);  /* C1 /7 ib */
8171   ins_encode( RegOpcImm( dst, shift ) );
8172   ins_pipe( ialu_mem_imm );
8173 %}
8174 
8175 // Arithmetic Shift Right by 8-bit immediate
8176 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8177   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8178   effect(KILL cr);
8179 
8180   format %{ "SAR    $dst,$shift" %}
8181   opcode(0xC1, 0x7);  /* C1 /7 ib */
8182   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8183   ins_pipe( ialu_mem_imm );
8184 %}
8185 
8186 // Arithmetic Shift Right by variable
8187 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8188   match(Set dst (RShiftI dst shift));
8189   effect(KILL cr);
8190 
8191   size(2);
8192   format %{ "SAR    $dst,$shift" %}
8193   opcode(0xD3, 0x7);  /* D3 /7 */
8194   ins_encode( OpcP, RegOpc( dst ) );
8195   ins_pipe( ialu_reg_reg );
8196 %}
8197 
8198 // Logical shift right by one
8199 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8200   match(Set dst (URShiftI dst shift));
8201   effect(KILL cr);
8202 
8203   size(2);
8204   format %{ "SHR    $dst,$shift" %}
8205   opcode(0xD1, 0x5);  /* D1 /5 */
8206   ins_encode( OpcP, RegOpc( dst ) );
8207   ins_pipe( ialu_reg );
8208 %}
8209 
8210 // Logical Shift Right by 8-bit immediate
8211 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8212   match(Set dst (URShiftI dst shift));
8213   effect(KILL cr);
8214 
8215   size(3);
8216   format %{ "SHR    $dst,$shift" %}
8217   opcode(0xC1, 0x5);  /* C1 /5 ib */
8218   ins_encode( RegOpcImm( dst, shift) );
8219   ins_pipe( ialu_reg );
8220 %}
8221 
8222 
8223 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8224 // This idiom is used by the compiler for the i2b bytecode.
8225 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8226   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8227 
8228   size(3);
8229   format %{ "MOVSX  $dst,$src :8" %}
8230   ins_encode %{
8231     __ movsbl($dst$$Register, $src$$Register);
8232   %}
8233   ins_pipe(ialu_reg_reg);
8234 %}
8235 
8236 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8237 // This idiom is used by the compiler the i2s bytecode.
8238 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8239   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8240 
8241   size(3);
8242   format %{ "MOVSX  $dst,$src :16" %}
8243   ins_encode %{
8244     __ movswl($dst$$Register, $src$$Register);
8245   %}
8246   ins_pipe(ialu_reg_reg);
8247 %}
8248 
8249 
8250 // Logical Shift Right by variable
8251 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8252   match(Set dst (URShiftI dst shift));
8253   effect(KILL cr);
8254 
8255   size(2);
8256   format %{ "SHR    $dst,$shift" %}
8257   opcode(0xD3, 0x5);  /* D3 /5 */
8258   ins_encode( OpcP, RegOpc( dst ) );
8259   ins_pipe( ialu_reg_reg );
8260 %}
8261 
8262 
8263 //----------Logical Instructions-----------------------------------------------
8264 //----------Integer Logical Instructions---------------------------------------
8265 // And Instructions
8266 // And Register with Register
8267 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8268   match(Set dst (AndI dst src));
8269   effect(KILL cr);
8270 
8271   size(2);
8272   format %{ "AND    $dst,$src" %}
8273   opcode(0x23);
8274   ins_encode( OpcP, RegReg( dst, src) );
8275   ins_pipe( ialu_reg_reg );
8276 %}
8277 
8278 // And Register with Immediate
8279 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8280   match(Set dst (AndI dst src));
8281   effect(KILL cr);
8282 
8283   format %{ "AND    $dst,$src" %}
8284   opcode(0x81,0x04);  /* Opcode 81 /4 */
8285   // ins_encode( RegImm( dst, src) );
8286   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8287   ins_pipe( ialu_reg );
8288 %}
8289 
8290 // And Register with Memory
8291 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8292   match(Set dst (AndI dst (LoadI src)));
8293   effect(KILL cr);
8294 
8295   ins_cost(125);
8296   format %{ "AND    $dst,$src" %}
8297   opcode(0x23);
8298   ins_encode( OpcP, RegMem( dst, src) );
8299   ins_pipe( ialu_reg_mem );
8300 %}
8301 
8302 // And Memory with Register
8303 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8304   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8305   effect(KILL cr);
8306 
8307   ins_cost(150);
8308   format %{ "AND    $dst,$src" %}
8309   opcode(0x21);  /* Opcode 21 /r */
8310   ins_encode( OpcP, RegMem( src, dst ) );
8311   ins_pipe( ialu_mem_reg );
8312 %}
8313 
8314 // And Memory with Immediate
8315 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8316   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8317   effect(KILL cr);
8318 
8319   ins_cost(125);
8320   format %{ "AND    $dst,$src" %}
8321   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8322   // ins_encode( MemImm( dst, src) );
8323   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8324   ins_pipe( ialu_mem_imm );
8325 %}
8326 
8327 // BMI1 instructions
8328 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8329   match(Set dst (AndI (XorI src1 minus_1) src2));
8330   predicate(UseBMI1Instructions);
8331   effect(KILL cr);
8332 
8333   format %{ "ANDNL  $dst, $src1, $src2" %}
8334 
8335   ins_encode %{
8336     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8337   %}
8338   ins_pipe(ialu_reg);
8339 %}
8340 
8341 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8342   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8343   predicate(UseBMI1Instructions);
8344   effect(KILL cr);
8345 
8346   ins_cost(125);
8347   format %{ "ANDNL  $dst, $src1, $src2" %}
8348 
8349   ins_encode %{
8350     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8351   %}
8352   ins_pipe(ialu_reg_mem);
8353 %}
8354 
8355 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8356   match(Set dst (AndI (SubI imm_zero src) src));
8357   predicate(UseBMI1Instructions);
8358   effect(KILL cr);
8359 
8360   format %{ "BLSIL  $dst, $src" %}
8361 
8362   ins_encode %{
8363     __ blsil($dst$$Register, $src$$Register);
8364   %}
8365   ins_pipe(ialu_reg);
8366 %}
8367 
8368 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8369   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8370   predicate(UseBMI1Instructions);
8371   effect(KILL cr);
8372 
8373   ins_cost(125);
8374   format %{ "BLSIL  $dst, $src" %}
8375 
8376   ins_encode %{
8377     __ blsil($dst$$Register, $src$$Address);
8378   %}
8379   ins_pipe(ialu_reg_mem);
8380 %}
8381 
8382 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8383 %{
8384   match(Set dst (XorI (AddI src minus_1) src));
8385   predicate(UseBMI1Instructions);
8386   effect(KILL cr);
8387 
8388   format %{ "BLSMSKL $dst, $src" %}
8389 
8390   ins_encode %{
8391     __ blsmskl($dst$$Register, $src$$Register);
8392   %}
8393 
8394   ins_pipe(ialu_reg);
8395 %}
8396 
8397 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8398 %{
8399   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8400   predicate(UseBMI1Instructions);
8401   effect(KILL cr);
8402 
8403   ins_cost(125);
8404   format %{ "BLSMSKL $dst, $src" %}
8405 
8406   ins_encode %{
8407     __ blsmskl($dst$$Register, $src$$Address);
8408   %}
8409 
8410   ins_pipe(ialu_reg_mem);
8411 %}
8412 
8413 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8414 %{
8415   match(Set dst (AndI (AddI src minus_1) src) );
8416   predicate(UseBMI1Instructions);
8417   effect(KILL cr);
8418 
8419   format %{ "BLSRL  $dst, $src" %}
8420 
8421   ins_encode %{
8422     __ blsrl($dst$$Register, $src$$Register);
8423   %}
8424 
8425   ins_pipe(ialu_reg);
8426 %}
8427 
8428 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8429 %{
8430   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8431   predicate(UseBMI1Instructions);
8432   effect(KILL cr);
8433 
8434   ins_cost(125);
8435   format %{ "BLSRL  $dst, $src" %}
8436 
8437   ins_encode %{
8438     __ blsrl($dst$$Register, $src$$Address);
8439   %}
8440 
8441   ins_pipe(ialu_reg_mem);
8442 %}
8443 
8444 // Or Instructions
8445 // Or Register with Register
8446 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8447   match(Set dst (OrI dst src));
8448   effect(KILL cr);
8449 
8450   size(2);
8451   format %{ "OR     $dst,$src" %}
8452   opcode(0x0B);
8453   ins_encode( OpcP, RegReg( dst, src) );
8454   ins_pipe( ialu_reg_reg );
8455 %}
8456 
8457 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8458   match(Set dst (OrI dst (CastP2X src)));
8459   effect(KILL cr);
8460 
8461   size(2);
8462   format %{ "OR     $dst,$src" %}
8463   opcode(0x0B);
8464   ins_encode( OpcP, RegReg( dst, src) );
8465   ins_pipe( ialu_reg_reg );
8466 %}
8467 
8468 
8469 // Or Register with Immediate
8470 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8471   match(Set dst (OrI dst src));
8472   effect(KILL cr);
8473 
8474   format %{ "OR     $dst,$src" %}
8475   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8476   // ins_encode( RegImm( dst, src) );
8477   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8478   ins_pipe( ialu_reg );
8479 %}
8480 
8481 // Or Register with Memory
8482 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8483   match(Set dst (OrI dst (LoadI src)));
8484   effect(KILL cr);
8485 
8486   ins_cost(125);
8487   format %{ "OR     $dst,$src" %}
8488   opcode(0x0B);
8489   ins_encode( OpcP, RegMem( dst, src) );
8490   ins_pipe( ialu_reg_mem );
8491 %}
8492 
8493 // Or Memory with Register
8494 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8495   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8496   effect(KILL cr);
8497 
8498   ins_cost(150);
8499   format %{ "OR     $dst,$src" %}
8500   opcode(0x09);  /* Opcode 09 /r */
8501   ins_encode( OpcP, RegMem( src, dst ) );
8502   ins_pipe( ialu_mem_reg );
8503 %}
8504 
8505 // Or Memory with Immediate
8506 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8507   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8508   effect(KILL cr);
8509 
8510   ins_cost(125);
8511   format %{ "OR     $dst,$src" %}
8512   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8513   // ins_encode( MemImm( dst, src) );
8514   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8515   ins_pipe( ialu_mem_imm );
8516 %}
8517 
8518 // ROL/ROR
8519 // ROL expand
8520 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8521   effect(USE_DEF dst, USE shift, KILL cr);
8522 
8523   format %{ "ROL    $dst, $shift" %}
8524   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8525   ins_encode( OpcP, RegOpc( dst ));
8526   ins_pipe( ialu_reg );
8527 %}
8528 
8529 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8530   effect(USE_DEF dst, USE shift, KILL cr);
8531 
8532   format %{ "ROL    $dst, $shift" %}
8533   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8534   ins_encode( RegOpcImm(dst, shift) );
8535   ins_pipe(ialu_reg);
8536 %}
8537 
8538 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8539   effect(USE_DEF dst, USE shift, KILL cr);
8540 
8541   format %{ "ROL    $dst, $shift" %}
8542   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8543   ins_encode(OpcP, RegOpc(dst));
8544   ins_pipe( ialu_reg_reg );
8545 %}
8546 // end of ROL expand
8547 
8548 // ROL 32bit by one once
8549 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8550   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8551 
8552   expand %{
8553     rolI_eReg_imm1(dst, lshift, cr);
8554   %}
8555 %}
8556 
8557 // ROL 32bit var by imm8 once
8558 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8559   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8560   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8561 
8562   expand %{
8563     rolI_eReg_imm8(dst, lshift, cr);
8564   %}
8565 %}
8566 
8567 // ROL 32bit var by var once
8568 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8569   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8570 
8571   expand %{
8572     rolI_eReg_CL(dst, shift, cr);
8573   %}
8574 %}
8575 
8576 // ROL 32bit var by var once
8577 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8578   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8579 
8580   expand %{
8581     rolI_eReg_CL(dst, shift, cr);
8582   %}
8583 %}
8584 
8585 // ROR expand
8586 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8587   effect(USE_DEF dst, USE shift, KILL cr);
8588 
8589   format %{ "ROR    $dst, $shift" %}
8590   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8591   ins_encode( OpcP, RegOpc( dst ) );
8592   ins_pipe( ialu_reg );
8593 %}
8594 
8595 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8596   effect (USE_DEF dst, USE shift, KILL cr);
8597 
8598   format %{ "ROR    $dst, $shift" %}
8599   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8600   ins_encode( RegOpcImm(dst, shift) );
8601   ins_pipe( ialu_reg );
8602 %}
8603 
8604 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8605   effect(USE_DEF dst, USE shift, KILL cr);
8606 
8607   format %{ "ROR    $dst, $shift" %}
8608   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8609   ins_encode(OpcP, RegOpc(dst));
8610   ins_pipe( ialu_reg_reg );
8611 %}
8612 // end of ROR expand
8613 
8614 // ROR right once
8615 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8616   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8617 
8618   expand %{
8619     rorI_eReg_imm1(dst, rshift, cr);
8620   %}
8621 %}
8622 
8623 // ROR 32bit by immI8 once
8624 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8625   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8626   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8627 
8628   expand %{
8629     rorI_eReg_imm8(dst, rshift, cr);
8630   %}
8631 %}
8632 
8633 // ROR 32bit var by var once
8634 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8635   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8636 
8637   expand %{
8638     rorI_eReg_CL(dst, shift, cr);
8639   %}
8640 %}
8641 
8642 // ROR 32bit var by var once
8643 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8644   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8645 
8646   expand %{
8647     rorI_eReg_CL(dst, shift, cr);
8648   %}
8649 %}
8650 
8651 // Xor Instructions
8652 // Xor Register with Register
8653 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8654   match(Set dst (XorI dst src));
8655   effect(KILL cr);
8656 
8657   size(2);
8658   format %{ "XOR    $dst,$src" %}
8659   opcode(0x33);
8660   ins_encode( OpcP, RegReg( dst, src) );
8661   ins_pipe( ialu_reg_reg );
8662 %}
8663 
8664 // Xor Register with Immediate -1
8665 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8666   match(Set dst (XorI dst imm));
8667 
8668   size(2);
8669   format %{ "NOT    $dst" %}
8670   ins_encode %{
8671      __ notl($dst$$Register);
8672   %}
8673   ins_pipe( ialu_reg );
8674 %}
8675 
8676 // Xor Register with Immediate
8677 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8678   match(Set dst (XorI dst src));
8679   effect(KILL cr);
8680 
8681   format %{ "XOR    $dst,$src" %}
8682   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8683   // ins_encode( RegImm( dst, src) );
8684   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8685   ins_pipe( ialu_reg );
8686 %}
8687 
8688 // Xor Register with Memory
8689 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8690   match(Set dst (XorI dst (LoadI src)));
8691   effect(KILL cr);
8692 
8693   ins_cost(125);
8694   format %{ "XOR    $dst,$src" %}
8695   opcode(0x33);
8696   ins_encode( OpcP, RegMem(dst, src) );
8697   ins_pipe( ialu_reg_mem );
8698 %}
8699 
8700 // Xor Memory with Register
8701 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8702   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8703   effect(KILL cr);
8704 
8705   ins_cost(150);
8706   format %{ "XOR    $dst,$src" %}
8707   opcode(0x31);  /* Opcode 31 /r */
8708   ins_encode( OpcP, RegMem( src, dst ) );
8709   ins_pipe( ialu_mem_reg );
8710 %}
8711 
8712 // Xor Memory with Immediate
8713 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8714   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8715   effect(KILL cr);
8716 
8717   ins_cost(125);
8718   format %{ "XOR    $dst,$src" %}
8719   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8720   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8721   ins_pipe( ialu_mem_imm );
8722 %}
8723 
8724 //----------Convert Int to Boolean---------------------------------------------
8725 
8726 instruct movI_nocopy(rRegI dst, rRegI src) %{
8727   effect( DEF dst, USE src );
8728   format %{ "MOV    $dst,$src" %}
8729   ins_encode( enc_Copy( dst, src) );
8730   ins_pipe( ialu_reg_reg );
8731 %}
8732 
8733 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8734   effect( USE_DEF dst, USE src, KILL cr );
8735 
8736   size(4);
8737   format %{ "NEG    $dst\n\t"
8738             "ADC    $dst,$src" %}
8739   ins_encode( neg_reg(dst),
8740               OpcRegReg(0x13,dst,src) );
8741   ins_pipe( ialu_reg_reg_long );
8742 %}
8743 
8744 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8745   match(Set dst (Conv2B src));
8746 
8747   expand %{
8748     movI_nocopy(dst,src);
8749     ci2b(dst,src,cr);
8750   %}
8751 %}
8752 
8753 instruct movP_nocopy(rRegI dst, eRegP src) %{
8754   effect( DEF dst, USE src );
8755   format %{ "MOV    $dst,$src" %}
8756   ins_encode( enc_Copy( dst, src) );
8757   ins_pipe( ialu_reg_reg );
8758 %}
8759 
8760 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8761   effect( USE_DEF dst, USE src, KILL cr );
8762   format %{ "NEG    $dst\n\t"
8763             "ADC    $dst,$src" %}
8764   ins_encode( neg_reg(dst),
8765               OpcRegReg(0x13,dst,src) );
8766   ins_pipe( ialu_reg_reg_long );
8767 %}
8768 
8769 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8770   match(Set dst (Conv2B src));
8771 
8772   expand %{
8773     movP_nocopy(dst,src);
8774     cp2b(dst,src,cr);
8775   %}
8776 %}
8777 
8778 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8779   match(Set dst (CmpLTMask p q));
8780   effect(KILL cr);
8781   ins_cost(400);
8782 
8783   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8784   format %{ "XOR    $dst,$dst\n\t"
8785             "CMP    $p,$q\n\t"
8786             "SETlt  $dst\n\t"
8787             "NEG    $dst" %}
8788   ins_encode %{
8789     Register Rp = $p$$Register;
8790     Register Rq = $q$$Register;
8791     Register Rd = $dst$$Register;
8792     Label done;
8793     __ xorl(Rd, Rd);
8794     __ cmpl(Rp, Rq);
8795     __ setb(Assembler::less, Rd);
8796     __ negl(Rd);
8797   %}
8798 
8799   ins_pipe(pipe_slow);
8800 %}
8801 
8802 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8803   match(Set dst (CmpLTMask dst zero));
8804   effect(DEF dst, KILL cr);
8805   ins_cost(100);
8806 
8807   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8808   ins_encode %{
8809   __ sarl($dst$$Register, 31);
8810   %}
8811   ins_pipe(ialu_reg);
8812 %}
8813 
8814 /* better to save a register than avoid a branch */
8815 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8816   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8817   effect(KILL cr);
8818   ins_cost(400);
8819   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8820             "JGE    done\n\t"
8821             "ADD    $p,$y\n"
8822             "done:  " %}
8823   ins_encode %{
8824     Register Rp = $p$$Register;
8825     Register Rq = $q$$Register;
8826     Register Ry = $y$$Register;
8827     Label done;
8828     __ subl(Rp, Rq);
8829     __ jccb(Assembler::greaterEqual, done);
8830     __ addl(Rp, Ry);
8831     __ bind(done);
8832   %}
8833 
8834   ins_pipe(pipe_cmplt);
8835 %}
8836 
8837 /* better to save a register than avoid a branch */
8838 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8839   match(Set y (AndI (CmpLTMask p q) y));
8840   effect(KILL cr);
8841 
8842   ins_cost(300);
8843 
8844   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8845             "JLT      done\n\t"
8846             "XORL     $y, $y\n"
8847             "done:  " %}
8848   ins_encode %{
8849     Register Rp = $p$$Register;
8850     Register Rq = $q$$Register;
8851     Register Ry = $y$$Register;
8852     Label done;
8853     __ cmpl(Rp, Rq);
8854     __ jccb(Assembler::less, done);
8855     __ xorl(Ry, Ry);
8856     __ bind(done);
8857   %}
8858 
8859   ins_pipe(pipe_cmplt);
8860 %}
8861 
8862 /* If I enable this, I encourage spilling in the inner loop of compress.
8863 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8864   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8865 */
8866 //----------Overflow Math Instructions-----------------------------------------
8867 
8868 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8869 %{
8870   match(Set cr (OverflowAddI op1 op2));
8871   effect(DEF cr, USE_KILL op1, USE op2);
8872 
8873   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8874 
8875   ins_encode %{
8876     __ addl($op1$$Register, $op2$$Register);
8877   %}
8878   ins_pipe(ialu_reg_reg);
8879 %}
8880 
8881 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8882 %{
8883   match(Set cr (OverflowAddI op1 op2));
8884   effect(DEF cr, USE_KILL op1, USE op2);
8885 
8886   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8887 
8888   ins_encode %{
8889     __ addl($op1$$Register, $op2$$constant);
8890   %}
8891   ins_pipe(ialu_reg_reg);
8892 %}
8893 
8894 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8895 %{
8896   match(Set cr (OverflowSubI op1 op2));
8897 
8898   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8899   ins_encode %{
8900     __ cmpl($op1$$Register, $op2$$Register);
8901   %}
8902   ins_pipe(ialu_reg_reg);
8903 %}
8904 
8905 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8906 %{
8907   match(Set cr (OverflowSubI op1 op2));
8908 
8909   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8910   ins_encode %{
8911     __ cmpl($op1$$Register, $op2$$constant);
8912   %}
8913   ins_pipe(ialu_reg_reg);
8914 %}
8915 
8916 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8917 %{
8918   match(Set cr (OverflowSubI zero op2));
8919   effect(DEF cr, USE_KILL op2);
8920 
8921   format %{ "NEG    $op2\t# overflow check int" %}
8922   ins_encode %{
8923     __ negl($op2$$Register);
8924   %}
8925   ins_pipe(ialu_reg_reg);
8926 %}
8927 
8928 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8929 %{
8930   match(Set cr (OverflowMulI op1 op2));
8931   effect(DEF cr, USE_KILL op1, USE op2);
8932 
8933   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8934   ins_encode %{
8935     __ imull($op1$$Register, $op2$$Register);
8936   %}
8937   ins_pipe(ialu_reg_reg_alu0);
8938 %}
8939 
8940 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8941 %{
8942   match(Set cr (OverflowMulI op1 op2));
8943   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8944 
8945   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8946   ins_encode %{
8947     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8948   %}
8949   ins_pipe(ialu_reg_reg_alu0);
8950 %}
8951 
8952 //----------Long Instructions------------------------------------------------
8953 // Add Long Register with Register
8954 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8955   match(Set dst (AddL dst src));
8956   effect(KILL cr);
8957   ins_cost(200);
8958   format %{ "ADD    $dst.lo,$src.lo\n\t"
8959             "ADC    $dst.hi,$src.hi" %}
8960   opcode(0x03, 0x13);
8961   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8962   ins_pipe( ialu_reg_reg_long );
8963 %}
8964 
8965 // Add Long Register with Immediate
8966 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8967   match(Set dst (AddL dst src));
8968   effect(KILL cr);
8969   format %{ "ADD    $dst.lo,$src.lo\n\t"
8970             "ADC    $dst.hi,$src.hi" %}
8971   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8972   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8973   ins_pipe( ialu_reg_long );
8974 %}
8975 
8976 // Add Long Register with Memory
8977 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8978   match(Set dst (AddL dst (LoadL mem)));
8979   effect(KILL cr);
8980   ins_cost(125);
8981   format %{ "ADD    $dst.lo,$mem\n\t"
8982             "ADC    $dst.hi,$mem+4" %}
8983   opcode(0x03, 0x13);
8984   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8985   ins_pipe( ialu_reg_long_mem );
8986 %}
8987 
8988 // Subtract Long Register with Register.
8989 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8990   match(Set dst (SubL dst src));
8991   effect(KILL cr);
8992   ins_cost(200);
8993   format %{ "SUB    $dst.lo,$src.lo\n\t"
8994             "SBB    $dst.hi,$src.hi" %}
8995   opcode(0x2B, 0x1B);
8996   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8997   ins_pipe( ialu_reg_reg_long );
8998 %}
8999 
9000 // Subtract Long Register with Immediate
9001 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9002   match(Set dst (SubL dst src));
9003   effect(KILL cr);
9004   format %{ "SUB    $dst.lo,$src.lo\n\t"
9005             "SBB    $dst.hi,$src.hi" %}
9006   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9007   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9008   ins_pipe( ialu_reg_long );
9009 %}
9010 
9011 // Subtract Long Register with Memory
9012 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9013   match(Set dst (SubL dst (LoadL mem)));
9014   effect(KILL cr);
9015   ins_cost(125);
9016   format %{ "SUB    $dst.lo,$mem\n\t"
9017             "SBB    $dst.hi,$mem+4" %}
9018   opcode(0x2B, 0x1B);
9019   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9020   ins_pipe( ialu_reg_long_mem );
9021 %}
9022 
9023 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9024   match(Set dst (SubL zero dst));
9025   effect(KILL cr);
9026   ins_cost(300);
9027   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9028   ins_encode( neg_long(dst) );
9029   ins_pipe( ialu_reg_reg_long );
9030 %}
9031 
9032 // And Long Register with Register
9033 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9034   match(Set dst (AndL dst src));
9035   effect(KILL cr);
9036   format %{ "AND    $dst.lo,$src.lo\n\t"
9037             "AND    $dst.hi,$src.hi" %}
9038   opcode(0x23,0x23);
9039   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9040   ins_pipe( ialu_reg_reg_long );
9041 %}
9042 
9043 // And Long Register with Immediate
9044 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9045   match(Set dst (AndL dst src));
9046   effect(KILL cr);
9047   format %{ "AND    $dst.lo,$src.lo\n\t"
9048             "AND    $dst.hi,$src.hi" %}
9049   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9050   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9051   ins_pipe( ialu_reg_long );
9052 %}
9053 
9054 // And Long Register with Memory
9055 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9056   match(Set dst (AndL dst (LoadL mem)));
9057   effect(KILL cr);
9058   ins_cost(125);
9059   format %{ "AND    $dst.lo,$mem\n\t"
9060             "AND    $dst.hi,$mem+4" %}
9061   opcode(0x23, 0x23);
9062   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9063   ins_pipe( ialu_reg_long_mem );
9064 %}
9065 
9066 // BMI1 instructions
9067 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9068   match(Set dst (AndL (XorL src1 minus_1) src2));
9069   predicate(UseBMI1Instructions);
9070   effect(KILL cr, TEMP dst);
9071 
9072   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9073             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9074          %}
9075 
9076   ins_encode %{
9077     Register Rdst = $dst$$Register;
9078     Register Rsrc1 = $src1$$Register;
9079     Register Rsrc2 = $src2$$Register;
9080     __ andnl(Rdst, Rsrc1, Rsrc2);
9081     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9082   %}
9083   ins_pipe(ialu_reg_reg_long);
9084 %}
9085 
9086 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9087   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9088   predicate(UseBMI1Instructions);
9089   effect(KILL cr, TEMP dst);
9090 
9091   ins_cost(125);
9092   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9093             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9094          %}
9095 
9096   ins_encode %{
9097     Register Rdst = $dst$$Register;
9098     Register Rsrc1 = $src1$$Register;
9099     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9100 
9101     __ andnl(Rdst, Rsrc1, $src2$$Address);
9102     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9103   %}
9104   ins_pipe(ialu_reg_mem);
9105 %}
9106 
9107 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9108   match(Set dst (AndL (SubL imm_zero src) src));
9109   predicate(UseBMI1Instructions);
9110   effect(KILL cr, TEMP dst);
9111 
9112   format %{ "MOVL   $dst.hi, 0\n\t"
9113             "BLSIL  $dst.lo, $src.lo\n\t"
9114             "JNZ    done\n\t"
9115             "BLSIL  $dst.hi, $src.hi\n"
9116             "done:"
9117          %}
9118 
9119   ins_encode %{
9120     Label done;
9121     Register Rdst = $dst$$Register;
9122     Register Rsrc = $src$$Register;
9123     __ movl(HIGH_FROM_LOW(Rdst), 0);
9124     __ blsil(Rdst, Rsrc);
9125     __ jccb(Assembler::notZero, done);
9126     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9127     __ bind(done);
9128   %}
9129   ins_pipe(ialu_reg);
9130 %}
9131 
9132 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9133   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9134   predicate(UseBMI1Instructions);
9135   effect(KILL cr, TEMP dst);
9136 
9137   ins_cost(125);
9138   format %{ "MOVL   $dst.hi, 0\n\t"
9139             "BLSIL  $dst.lo, $src\n\t"
9140             "JNZ    done\n\t"
9141             "BLSIL  $dst.hi, $src+4\n"
9142             "done:"
9143          %}
9144 
9145   ins_encode %{
9146     Label done;
9147     Register Rdst = $dst$$Register;
9148     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9149 
9150     __ movl(HIGH_FROM_LOW(Rdst), 0);
9151     __ blsil(Rdst, $src$$Address);
9152     __ jccb(Assembler::notZero, done);
9153     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9154     __ bind(done);
9155   %}
9156   ins_pipe(ialu_reg_mem);
9157 %}
9158 
9159 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9160 %{
9161   match(Set dst (XorL (AddL src minus_1) src));
9162   predicate(UseBMI1Instructions);
9163   effect(KILL cr, TEMP dst);
9164 
9165   format %{ "MOVL    $dst.hi, 0\n\t"
9166             "BLSMSKL $dst.lo, $src.lo\n\t"
9167             "JNC     done\n\t"
9168             "BLSMSKL $dst.hi, $src.hi\n"
9169             "done:"
9170          %}
9171 
9172   ins_encode %{
9173     Label done;
9174     Register Rdst = $dst$$Register;
9175     Register Rsrc = $src$$Register;
9176     __ movl(HIGH_FROM_LOW(Rdst), 0);
9177     __ blsmskl(Rdst, Rsrc);
9178     __ jccb(Assembler::carryClear, done);
9179     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9180     __ bind(done);
9181   %}
9182 
9183   ins_pipe(ialu_reg);
9184 %}
9185 
9186 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9187 %{
9188   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9189   predicate(UseBMI1Instructions);
9190   effect(KILL cr, TEMP dst);
9191 
9192   ins_cost(125);
9193   format %{ "MOVL    $dst.hi, 0\n\t"
9194             "BLSMSKL $dst.lo, $src\n\t"
9195             "JNC     done\n\t"
9196             "BLSMSKL $dst.hi, $src+4\n"
9197             "done:"
9198          %}
9199 
9200   ins_encode %{
9201     Label done;
9202     Register Rdst = $dst$$Register;
9203     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9204 
9205     __ movl(HIGH_FROM_LOW(Rdst), 0);
9206     __ blsmskl(Rdst, $src$$Address);
9207     __ jccb(Assembler::carryClear, done);
9208     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9209     __ bind(done);
9210   %}
9211 
9212   ins_pipe(ialu_reg_mem);
9213 %}
9214 
9215 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9216 %{
9217   match(Set dst (AndL (AddL src minus_1) src) );
9218   predicate(UseBMI1Instructions);
9219   effect(KILL cr, TEMP dst);
9220 
9221   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9222             "BLSRL  $dst.lo, $src.lo\n\t"
9223             "JNC    done\n\t"
9224             "BLSRL  $dst.hi, $src.hi\n"
9225             "done:"
9226   %}
9227 
9228   ins_encode %{
9229     Label done;
9230     Register Rdst = $dst$$Register;
9231     Register Rsrc = $src$$Register;
9232     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9233     __ blsrl(Rdst, Rsrc);
9234     __ jccb(Assembler::carryClear, done);
9235     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9236     __ bind(done);
9237   %}
9238 
9239   ins_pipe(ialu_reg);
9240 %}
9241 
9242 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9243 %{
9244   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9245   predicate(UseBMI1Instructions);
9246   effect(KILL cr, TEMP dst);
9247 
9248   ins_cost(125);
9249   format %{ "MOVL   $dst.hi, $src+4\n\t"
9250             "BLSRL  $dst.lo, $src\n\t"
9251             "JNC    done\n\t"
9252             "BLSRL  $dst.hi, $src+4\n"
9253             "done:"
9254   %}
9255 
9256   ins_encode %{
9257     Label done;
9258     Register Rdst = $dst$$Register;
9259     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9260     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9261     __ blsrl(Rdst, $src$$Address);
9262     __ jccb(Assembler::carryClear, done);
9263     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9264     __ bind(done);
9265   %}
9266 
9267   ins_pipe(ialu_reg_mem);
9268 %}
9269 
9270 // Or Long Register with Register
9271 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9272   match(Set dst (OrL dst src));
9273   effect(KILL cr);
9274   format %{ "OR     $dst.lo,$src.lo\n\t"
9275             "OR     $dst.hi,$src.hi" %}
9276   opcode(0x0B,0x0B);
9277   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9278   ins_pipe( ialu_reg_reg_long );
9279 %}
9280 
9281 // Or Long Register with Immediate
9282 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9283   match(Set dst (OrL dst src));
9284   effect(KILL cr);
9285   format %{ "OR     $dst.lo,$src.lo\n\t"
9286             "OR     $dst.hi,$src.hi" %}
9287   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9288   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9289   ins_pipe( ialu_reg_long );
9290 %}
9291 
9292 // Or Long Register with Memory
9293 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9294   match(Set dst (OrL dst (LoadL mem)));
9295   effect(KILL cr);
9296   ins_cost(125);
9297   format %{ "OR     $dst.lo,$mem\n\t"
9298             "OR     $dst.hi,$mem+4" %}
9299   opcode(0x0B,0x0B);
9300   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9301   ins_pipe( ialu_reg_long_mem );
9302 %}
9303 
9304 // Xor Long Register with Register
9305 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9306   match(Set dst (XorL dst src));
9307   effect(KILL cr);
9308   format %{ "XOR    $dst.lo,$src.lo\n\t"
9309             "XOR    $dst.hi,$src.hi" %}
9310   opcode(0x33,0x33);
9311   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9312   ins_pipe( ialu_reg_reg_long );
9313 %}
9314 
9315 // Xor Long Register with Immediate -1
9316 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9317   match(Set dst (XorL dst imm));
9318   format %{ "NOT    $dst.lo\n\t"
9319             "NOT    $dst.hi" %}
9320   ins_encode %{
9321      __ notl($dst$$Register);
9322      __ notl(HIGH_FROM_LOW($dst$$Register));
9323   %}
9324   ins_pipe( ialu_reg_long );
9325 %}
9326 
9327 // Xor Long Register with Immediate
9328 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9329   match(Set dst (XorL dst src));
9330   effect(KILL cr);
9331   format %{ "XOR    $dst.lo,$src.lo\n\t"
9332             "XOR    $dst.hi,$src.hi" %}
9333   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9334   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9335   ins_pipe( ialu_reg_long );
9336 %}
9337 
9338 // Xor Long Register with Memory
9339 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9340   match(Set dst (XorL dst (LoadL mem)));
9341   effect(KILL cr);
9342   ins_cost(125);
9343   format %{ "XOR    $dst.lo,$mem\n\t"
9344             "XOR    $dst.hi,$mem+4" %}
9345   opcode(0x33,0x33);
9346   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9347   ins_pipe( ialu_reg_long_mem );
9348 %}
9349 
9350 // Shift Left Long by 1
9351 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9352   predicate(UseNewLongLShift);
9353   match(Set dst (LShiftL dst cnt));
9354   effect(KILL cr);
9355   ins_cost(100);
9356   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9357             "ADC    $dst.hi,$dst.hi" %}
9358   ins_encode %{
9359     __ addl($dst$$Register,$dst$$Register);
9360     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9361   %}
9362   ins_pipe( ialu_reg_long );
9363 %}
9364 
9365 // Shift Left Long by 2
9366 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9367   predicate(UseNewLongLShift);
9368   match(Set dst (LShiftL dst cnt));
9369   effect(KILL cr);
9370   ins_cost(100);
9371   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9372             "ADC    $dst.hi,$dst.hi\n\t"
9373             "ADD    $dst.lo,$dst.lo\n\t"
9374             "ADC    $dst.hi,$dst.hi" %}
9375   ins_encode %{
9376     __ addl($dst$$Register,$dst$$Register);
9377     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9378     __ addl($dst$$Register,$dst$$Register);
9379     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9380   %}
9381   ins_pipe( ialu_reg_long );
9382 %}
9383 
9384 // Shift Left Long by 3
9385 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9386   predicate(UseNewLongLShift);
9387   match(Set dst (LShiftL dst cnt));
9388   effect(KILL cr);
9389   ins_cost(100);
9390   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9391             "ADC    $dst.hi,$dst.hi\n\t"
9392             "ADD    $dst.lo,$dst.lo\n\t"
9393             "ADC    $dst.hi,$dst.hi\n\t"
9394             "ADD    $dst.lo,$dst.lo\n\t"
9395             "ADC    $dst.hi,$dst.hi" %}
9396   ins_encode %{
9397     __ addl($dst$$Register,$dst$$Register);
9398     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9399     __ addl($dst$$Register,$dst$$Register);
9400     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9401     __ addl($dst$$Register,$dst$$Register);
9402     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9403   %}
9404   ins_pipe( ialu_reg_long );
9405 %}
9406 
9407 // Shift Left Long by 1-31
9408 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9409   match(Set dst (LShiftL dst cnt));
9410   effect(KILL cr);
9411   ins_cost(200);
9412   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9413             "SHL    $dst.lo,$cnt" %}
9414   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9415   ins_encode( move_long_small_shift(dst,cnt) );
9416   ins_pipe( ialu_reg_long );
9417 %}
9418 
9419 // Shift Left Long by 32-63
9420 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9421   match(Set dst (LShiftL dst cnt));
9422   effect(KILL cr);
9423   ins_cost(300);
9424   format %{ "MOV    $dst.hi,$dst.lo\n"
9425           "\tSHL    $dst.hi,$cnt-32\n"
9426           "\tXOR    $dst.lo,$dst.lo" %}
9427   opcode(0xC1, 0x4);  /* C1 /4 ib */
9428   ins_encode( move_long_big_shift_clr(dst,cnt) );
9429   ins_pipe( ialu_reg_long );
9430 %}
9431 
9432 // Shift Left Long by variable
9433 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9434   match(Set dst (LShiftL dst shift));
9435   effect(KILL cr);
9436   ins_cost(500+200);
9437   size(17);
9438   format %{ "TEST   $shift,32\n\t"
9439             "JEQ,s  small\n\t"
9440             "MOV    $dst.hi,$dst.lo\n\t"
9441             "XOR    $dst.lo,$dst.lo\n"
9442     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9443             "SHL    $dst.lo,$shift" %}
9444   ins_encode( shift_left_long( dst, shift ) );
9445   ins_pipe( pipe_slow );
9446 %}
9447 
9448 // Shift Right Long by 1-31
9449 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9450   match(Set dst (URShiftL dst cnt));
9451   effect(KILL cr);
9452   ins_cost(200);
9453   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9454             "SHR    $dst.hi,$cnt" %}
9455   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9456   ins_encode( move_long_small_shift(dst,cnt) );
9457   ins_pipe( ialu_reg_long );
9458 %}
9459 
9460 // Shift Right Long by 32-63
9461 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9462   match(Set dst (URShiftL dst cnt));
9463   effect(KILL cr);
9464   ins_cost(300);
9465   format %{ "MOV    $dst.lo,$dst.hi\n"
9466           "\tSHR    $dst.lo,$cnt-32\n"
9467           "\tXOR    $dst.hi,$dst.hi" %}
9468   opcode(0xC1, 0x5);  /* C1 /5 ib */
9469   ins_encode( move_long_big_shift_clr(dst,cnt) );
9470   ins_pipe( ialu_reg_long );
9471 %}
9472 
9473 // Shift Right Long by variable
9474 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9475   match(Set dst (URShiftL dst shift));
9476   effect(KILL cr);
9477   ins_cost(600);
9478   size(17);
9479   format %{ "TEST   $shift,32\n\t"
9480             "JEQ,s  small\n\t"
9481             "MOV    $dst.lo,$dst.hi\n\t"
9482             "XOR    $dst.hi,$dst.hi\n"
9483     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9484             "SHR    $dst.hi,$shift" %}
9485   ins_encode( shift_right_long( dst, shift ) );
9486   ins_pipe( pipe_slow );
9487 %}
9488 
9489 // Shift Right Long by 1-31
9490 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9491   match(Set dst (RShiftL dst cnt));
9492   effect(KILL cr);
9493   ins_cost(200);
9494   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9495             "SAR    $dst.hi,$cnt" %}
9496   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9497   ins_encode( move_long_small_shift(dst,cnt) );
9498   ins_pipe( ialu_reg_long );
9499 %}
9500 
9501 // Shift Right Long by 32-63
9502 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9503   match(Set dst (RShiftL dst cnt));
9504   effect(KILL cr);
9505   ins_cost(300);
9506   format %{ "MOV    $dst.lo,$dst.hi\n"
9507           "\tSAR    $dst.lo,$cnt-32\n"
9508           "\tSAR    $dst.hi,31" %}
9509   opcode(0xC1, 0x7);  /* C1 /7 ib */
9510   ins_encode( move_long_big_shift_sign(dst,cnt) );
9511   ins_pipe( ialu_reg_long );
9512 %}
9513 
9514 // Shift Right arithmetic Long by variable
9515 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9516   match(Set dst (RShiftL dst shift));
9517   effect(KILL cr);
9518   ins_cost(600);
9519   size(18);
9520   format %{ "TEST   $shift,32\n\t"
9521             "JEQ,s  small\n\t"
9522             "MOV    $dst.lo,$dst.hi\n\t"
9523             "SAR    $dst.hi,31\n"
9524     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9525             "SAR    $dst.hi,$shift" %}
9526   ins_encode( shift_right_arith_long( dst, shift ) );
9527   ins_pipe( pipe_slow );
9528 %}
9529 
9530 
9531 //----------Double Instructions------------------------------------------------
9532 // Double Math
9533 
9534 // Compare & branch
9535 
9536 // P6 version of float compare, sets condition codes in EFLAGS
9537 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9538   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9539   match(Set cr (CmpD src1 src2));
9540   effect(KILL rax);
9541   ins_cost(150);
9542   format %{ "FLD    $src1\n\t"
9543             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9544             "JNP    exit\n\t"
9545             "MOV    ah,1       // saw a NaN, set CF\n\t"
9546             "SAHF\n"
9547      "exit:\tNOP               // avoid branch to branch" %}
9548   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9549   ins_encode( Push_Reg_DPR(src1),
9550               OpcP, RegOpc(src2),
9551               cmpF_P6_fixup );
9552   ins_pipe( pipe_slow );
9553 %}
9554 
9555 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9556   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9557   match(Set cr (CmpD src1 src2));
9558   ins_cost(150);
9559   format %{ "FLD    $src1\n\t"
9560             "FUCOMIP ST,$src2  // P6 instruction" %}
9561   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9562   ins_encode( Push_Reg_DPR(src1),
9563               OpcP, RegOpc(src2));
9564   ins_pipe( pipe_slow );
9565 %}
9566 
9567 // Compare & branch
9568 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9569   predicate(UseSSE<=1);
9570   match(Set cr (CmpD src1 src2));
9571   effect(KILL rax);
9572   ins_cost(200);
9573   format %{ "FLD    $src1\n\t"
9574             "FCOMp  $src2\n\t"
9575             "FNSTSW AX\n\t"
9576             "TEST   AX,0x400\n\t"
9577             "JZ,s   flags\n\t"
9578             "MOV    AH,1\t# unordered treat as LT\n"
9579     "flags:\tSAHF" %}
9580   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9581   ins_encode( Push_Reg_DPR(src1),
9582               OpcP, RegOpc(src2),
9583               fpu_flags);
9584   ins_pipe( pipe_slow );
9585 %}
9586 
9587 // Compare vs zero into -1,0,1
9588 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9589   predicate(UseSSE<=1);
9590   match(Set dst (CmpD3 src1 zero));
9591   effect(KILL cr, KILL rax);
9592   ins_cost(280);
9593   format %{ "FTSTD  $dst,$src1" %}
9594   opcode(0xE4, 0xD9);
9595   ins_encode( Push_Reg_DPR(src1),
9596               OpcS, OpcP, PopFPU,
9597               CmpF_Result(dst));
9598   ins_pipe( pipe_slow );
9599 %}
9600 
9601 // Compare into -1,0,1
9602 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9603   predicate(UseSSE<=1);
9604   match(Set dst (CmpD3 src1 src2));
9605   effect(KILL cr, KILL rax);
9606   ins_cost(300);
9607   format %{ "FCMPD  $dst,$src1,$src2" %}
9608   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9609   ins_encode( Push_Reg_DPR(src1),
9610               OpcP, RegOpc(src2),
9611               CmpF_Result(dst));
9612   ins_pipe( pipe_slow );
9613 %}
9614 
9615 // float compare and set condition codes in EFLAGS by XMM regs
9616 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9617   predicate(UseSSE>=2);
9618   match(Set cr (CmpD src1 src2));
9619   ins_cost(145);
9620   format %{ "UCOMISD $src1,$src2\n\t"
9621             "JNP,s   exit\n\t"
9622             "PUSHF\t# saw NaN, set CF\n\t"
9623             "AND     [rsp], #0xffffff2b\n\t"
9624             "POPF\n"
9625     "exit:" %}
9626   ins_encode %{
9627     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9628     emit_cmpfp_fixup(_masm);
9629   %}
9630   ins_pipe( pipe_slow );
9631 %}
9632 
9633 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9634   predicate(UseSSE>=2);
9635   match(Set cr (CmpD src1 src2));
9636   ins_cost(100);
9637   format %{ "UCOMISD $src1,$src2" %}
9638   ins_encode %{
9639     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9640   %}
9641   ins_pipe( pipe_slow );
9642 %}
9643 
9644 // float compare and set condition codes in EFLAGS by XMM regs
9645 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9646   predicate(UseSSE>=2);
9647   match(Set cr (CmpD src1 (LoadD src2)));
9648   ins_cost(145);
9649   format %{ "UCOMISD $src1,$src2\n\t"
9650             "JNP,s   exit\n\t"
9651             "PUSHF\t# saw NaN, set CF\n\t"
9652             "AND     [rsp], #0xffffff2b\n\t"
9653             "POPF\n"
9654     "exit:" %}
9655   ins_encode %{
9656     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9657     emit_cmpfp_fixup(_masm);
9658   %}
9659   ins_pipe( pipe_slow );
9660 %}
9661 
9662 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9663   predicate(UseSSE>=2);
9664   match(Set cr (CmpD src1 (LoadD src2)));
9665   ins_cost(100);
9666   format %{ "UCOMISD $src1,$src2" %}
9667   ins_encode %{
9668     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9669   %}
9670   ins_pipe( pipe_slow );
9671 %}
9672 
9673 // Compare into -1,0,1 in XMM
9674 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9675   predicate(UseSSE>=2);
9676   match(Set dst (CmpD3 src1 src2));
9677   effect(KILL cr);
9678   ins_cost(255);
9679   format %{ "UCOMISD $src1, $src2\n\t"
9680             "MOV     $dst, #-1\n\t"
9681             "JP,s    done\n\t"
9682             "JB,s    done\n\t"
9683             "SETNE   $dst\n\t"
9684             "MOVZB   $dst, $dst\n"
9685     "done:" %}
9686   ins_encode %{
9687     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9688     emit_cmpfp3(_masm, $dst$$Register);
9689   %}
9690   ins_pipe( pipe_slow );
9691 %}
9692 
9693 // Compare into -1,0,1 in XMM and memory
9694 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9695   predicate(UseSSE>=2);
9696   match(Set dst (CmpD3 src1 (LoadD src2)));
9697   effect(KILL cr);
9698   ins_cost(275);
9699   format %{ "UCOMISD $src1, $src2\n\t"
9700             "MOV     $dst, #-1\n\t"
9701             "JP,s    done\n\t"
9702             "JB,s    done\n\t"
9703             "SETNE   $dst\n\t"
9704             "MOVZB   $dst, $dst\n"
9705     "done:" %}
9706   ins_encode %{
9707     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9708     emit_cmpfp3(_masm, $dst$$Register);
9709   %}
9710   ins_pipe( pipe_slow );
9711 %}
9712 
9713 
9714 instruct subDPR_reg(regDPR dst, regDPR src) %{
9715   predicate (UseSSE <=1);
9716   match(Set dst (SubD dst src));
9717 
9718   format %{ "FLD    $src\n\t"
9719             "DSUBp  $dst,ST" %}
9720   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9721   ins_cost(150);
9722   ins_encode( Push_Reg_DPR(src),
9723               OpcP, RegOpc(dst) );
9724   ins_pipe( fpu_reg_reg );
9725 %}
9726 
9727 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9728   predicate (UseSSE <=1);
9729   match(Set dst (RoundDouble (SubD src1 src2)));
9730   ins_cost(250);
9731 
9732   format %{ "FLD    $src2\n\t"
9733             "DSUB   ST,$src1\n\t"
9734             "FSTP_D $dst\t# D-round" %}
9735   opcode(0xD8, 0x5);
9736   ins_encode( Push_Reg_DPR(src2),
9737               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9738   ins_pipe( fpu_mem_reg_reg );
9739 %}
9740 
9741 
9742 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9743   predicate (UseSSE <=1);
9744   match(Set dst (SubD dst (LoadD src)));
9745   ins_cost(150);
9746 
9747   format %{ "FLD    $src\n\t"
9748             "DSUBp  $dst,ST" %}
9749   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9750   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9751               OpcP, RegOpc(dst) );
9752   ins_pipe( fpu_reg_mem );
9753 %}
9754 
9755 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9756   predicate (UseSSE<=1);
9757   match(Set dst (AbsD src));
9758   ins_cost(100);
9759   format %{ "FABS" %}
9760   opcode(0xE1, 0xD9);
9761   ins_encode( OpcS, OpcP );
9762   ins_pipe( fpu_reg_reg );
9763 %}
9764 
9765 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9766   predicate(UseSSE<=1);
9767   match(Set dst (NegD src));
9768   ins_cost(100);
9769   format %{ "FCHS" %}
9770   opcode(0xE0, 0xD9);
9771   ins_encode( OpcS, OpcP );
9772   ins_pipe( fpu_reg_reg );
9773 %}
9774 
9775 instruct addDPR_reg(regDPR dst, regDPR src) %{
9776   predicate(UseSSE<=1);
9777   match(Set dst (AddD dst src));
9778   format %{ "FLD    $src\n\t"
9779             "DADD   $dst,ST" %}
9780   size(4);
9781   ins_cost(150);
9782   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9783   ins_encode( Push_Reg_DPR(src),
9784               OpcP, RegOpc(dst) );
9785   ins_pipe( fpu_reg_reg );
9786 %}
9787 
9788 
9789 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9790   predicate(UseSSE<=1);
9791   match(Set dst (RoundDouble (AddD src1 src2)));
9792   ins_cost(250);
9793 
9794   format %{ "FLD    $src2\n\t"
9795             "DADD   ST,$src1\n\t"
9796             "FSTP_D $dst\t# D-round" %}
9797   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9798   ins_encode( Push_Reg_DPR(src2),
9799               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9800   ins_pipe( fpu_mem_reg_reg );
9801 %}
9802 
9803 
9804 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9805   predicate(UseSSE<=1);
9806   match(Set dst (AddD dst (LoadD src)));
9807   ins_cost(150);
9808 
9809   format %{ "FLD    $src\n\t"
9810             "DADDp  $dst,ST" %}
9811   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9812   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9813               OpcP, RegOpc(dst) );
9814   ins_pipe( fpu_reg_mem );
9815 %}
9816 
9817 // add-to-memory
9818 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9819   predicate(UseSSE<=1);
9820   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9821   ins_cost(150);
9822 
9823   format %{ "FLD_D  $dst\n\t"
9824             "DADD   ST,$src\n\t"
9825             "FST_D  $dst" %}
9826   opcode(0xDD, 0x0);
9827   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9828               Opcode(0xD8), RegOpc(src),
9829               set_instruction_start,
9830               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9831   ins_pipe( fpu_reg_mem );
9832 %}
9833 
9834 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9835   predicate(UseSSE<=1);
9836   match(Set dst (AddD dst con));
9837   ins_cost(125);
9838   format %{ "FLD1\n\t"
9839             "DADDp  $dst,ST" %}
9840   ins_encode %{
9841     __ fld1();
9842     __ faddp($dst$$reg);
9843   %}
9844   ins_pipe(fpu_reg);
9845 %}
9846 
9847 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9848   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9849   match(Set dst (AddD dst con));
9850   ins_cost(200);
9851   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9852             "DADDp  $dst,ST" %}
9853   ins_encode %{
9854     __ fld_d($constantaddress($con));
9855     __ faddp($dst$$reg);
9856   %}
9857   ins_pipe(fpu_reg_mem);
9858 %}
9859 
9860 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9861   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9862   match(Set dst (RoundDouble (AddD src con)));
9863   ins_cost(200);
9864   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9865             "DADD   ST,$src\n\t"
9866             "FSTP_D $dst\t# D-round" %}
9867   ins_encode %{
9868     __ fld_d($constantaddress($con));
9869     __ fadd($src$$reg);
9870     __ fstp_d(Address(rsp, $dst$$disp));
9871   %}
9872   ins_pipe(fpu_mem_reg_con);
9873 %}
9874 
9875 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9876   predicate(UseSSE<=1);
9877   match(Set dst (MulD dst src));
9878   format %{ "FLD    $src\n\t"
9879             "DMULp  $dst,ST" %}
9880   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9881   ins_cost(150);
9882   ins_encode( Push_Reg_DPR(src),
9883               OpcP, RegOpc(dst) );
9884   ins_pipe( fpu_reg_reg );
9885 %}
9886 
9887 // Strict FP instruction biases argument before multiply then
9888 // biases result to avoid double rounding of subnormals.
9889 //
9890 // scale arg1 by multiplying arg1 by 2^(-15360)
9891 // load arg2
9892 // multiply scaled arg1 by arg2
9893 // rescale product by 2^(15360)
9894 //
9895 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9896   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9897   match(Set dst (MulD dst src));
9898   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9899 
9900   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9901             "DMULp  $dst,ST\n\t"
9902             "FLD    $src\n\t"
9903             "DMULp  $dst,ST\n\t"
9904             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9905             "DMULp  $dst,ST\n\t" %}
9906   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9907   ins_encode( strictfp_bias1(dst),
9908               Push_Reg_DPR(src),
9909               OpcP, RegOpc(dst),
9910               strictfp_bias2(dst) );
9911   ins_pipe( fpu_reg_reg );
9912 %}
9913 
9914 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9915   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9916   match(Set dst (MulD dst con));
9917   ins_cost(200);
9918   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9919             "DMULp  $dst,ST" %}
9920   ins_encode %{
9921     __ fld_d($constantaddress($con));
9922     __ fmulp($dst$$reg);
9923   %}
9924   ins_pipe(fpu_reg_mem);
9925 %}
9926 
9927 
9928 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9929   predicate( UseSSE<=1 );
9930   match(Set dst (MulD dst (LoadD src)));
9931   ins_cost(200);
9932   format %{ "FLD_D  $src\n\t"
9933             "DMULp  $dst,ST" %}
9934   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9935   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9936               OpcP, RegOpc(dst) );
9937   ins_pipe( fpu_reg_mem );
9938 %}
9939 
9940 //
9941 // Cisc-alternate to reg-reg multiply
9942 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9943   predicate( UseSSE<=1 );
9944   match(Set dst (MulD src (LoadD mem)));
9945   ins_cost(250);
9946   format %{ "FLD_D  $mem\n\t"
9947             "DMUL   ST,$src\n\t"
9948             "FSTP_D $dst" %}
9949   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9950   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9951               OpcReg_FPR(src),
9952               Pop_Reg_DPR(dst) );
9953   ins_pipe( fpu_reg_reg_mem );
9954 %}
9955 
9956 
9957 // MACRO3 -- addDPR a mulDPR
9958 // This instruction is a '2-address' instruction in that the result goes
9959 // back to src2.  This eliminates a move from the macro; possibly the
9960 // register allocator will have to add it back (and maybe not).
9961 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9962   predicate( UseSSE<=1 );
9963   match(Set src2 (AddD (MulD src0 src1) src2));
9964   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9965             "DMUL   ST,$src1\n\t"
9966             "DADDp  $src2,ST" %}
9967   ins_cost(250);
9968   opcode(0xDD); /* LoadD DD /0 */
9969   ins_encode( Push_Reg_FPR(src0),
9970               FMul_ST_reg(src1),
9971               FAddP_reg_ST(src2) );
9972   ins_pipe( fpu_reg_reg_reg );
9973 %}
9974 
9975 
9976 // MACRO3 -- subDPR a mulDPR
9977 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9978   predicate( UseSSE<=1 );
9979   match(Set src2 (SubD (MulD src0 src1) src2));
9980   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9981             "DMUL   ST,$src1\n\t"
9982             "DSUBRp $src2,ST" %}
9983   ins_cost(250);
9984   ins_encode( Push_Reg_FPR(src0),
9985               FMul_ST_reg(src1),
9986               Opcode(0xDE), Opc_plus(0xE0,src2));
9987   ins_pipe( fpu_reg_reg_reg );
9988 %}
9989 
9990 
9991 instruct divDPR_reg(regDPR dst, regDPR src) %{
9992   predicate( UseSSE<=1 );
9993   match(Set dst (DivD dst src));
9994 
9995   format %{ "FLD    $src\n\t"
9996             "FDIVp  $dst,ST" %}
9997   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9998   ins_cost(150);
9999   ins_encode( Push_Reg_DPR(src),
10000               OpcP, RegOpc(dst) );
10001   ins_pipe( fpu_reg_reg );
10002 %}
10003 
10004 // Strict FP instruction biases argument before division then
10005 // biases result, to avoid double rounding of subnormals.
10006 //
10007 // scale dividend by multiplying dividend by 2^(-15360)
10008 // load divisor
10009 // divide scaled dividend by divisor
10010 // rescale quotient by 2^(15360)
10011 //
10012 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10013   predicate (UseSSE<=1);
10014   match(Set dst (DivD dst src));
10015   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10016   ins_cost(01);
10017 
10018   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10019             "DMULp  $dst,ST\n\t"
10020             "FLD    $src\n\t"
10021             "FDIVp  $dst,ST\n\t"
10022             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10023             "DMULp  $dst,ST\n\t" %}
10024   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10025   ins_encode( strictfp_bias1(dst),
10026               Push_Reg_DPR(src),
10027               OpcP, RegOpc(dst),
10028               strictfp_bias2(dst) );
10029   ins_pipe( fpu_reg_reg );
10030 %}
10031 
10032 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10033   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10034   match(Set dst (RoundDouble (DivD src1 src2)));
10035 
10036   format %{ "FLD    $src1\n\t"
10037             "FDIV   ST,$src2\n\t"
10038             "FSTP_D $dst\t# D-round" %}
10039   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10040   ins_encode( Push_Reg_DPR(src1),
10041               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10042   ins_pipe( fpu_mem_reg_reg );
10043 %}
10044 
10045 
10046 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10047   predicate(UseSSE<=1);
10048   match(Set dst (ModD dst src));
10049   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10050 
10051   format %{ "DMOD   $dst,$src" %}
10052   ins_cost(250);
10053   ins_encode(Push_Reg_Mod_DPR(dst, src),
10054               emitModDPR(),
10055               Push_Result_Mod_DPR(src),
10056               Pop_Reg_DPR(dst));
10057   ins_pipe( pipe_slow );
10058 %}
10059 
10060 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10061   predicate(UseSSE>=2);
10062   match(Set dst (ModD src0 src1));
10063   effect(KILL rax, KILL cr);
10064 
10065   format %{ "SUB    ESP,8\t # DMOD\n"
10066           "\tMOVSD  [ESP+0],$src1\n"
10067           "\tFLD_D  [ESP+0]\n"
10068           "\tMOVSD  [ESP+0],$src0\n"
10069           "\tFLD_D  [ESP+0]\n"
10070      "loop:\tFPREM\n"
10071           "\tFWAIT\n"
10072           "\tFNSTSW AX\n"
10073           "\tSAHF\n"
10074           "\tJP     loop\n"
10075           "\tFSTP_D [ESP+0]\n"
10076           "\tMOVSD  $dst,[ESP+0]\n"
10077           "\tADD    ESP,8\n"
10078           "\tFSTP   ST0\t # Restore FPU Stack"
10079     %}
10080   ins_cost(250);
10081   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10082   ins_pipe( pipe_slow );
10083 %}
10084 
10085 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10086   predicate (UseSSE<=1);
10087   match(Set dst(AtanD dst src));
10088   format %{ "DATA   $dst,$src" %}
10089   opcode(0xD9, 0xF3);
10090   ins_encode( Push_Reg_DPR(src),
10091               OpcP, OpcS, RegOpc(dst) );
10092   ins_pipe( pipe_slow );
10093 %}
10094 
10095 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10096   predicate (UseSSE>=2);
10097   match(Set dst(AtanD dst src));
10098   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10099   format %{ "DATA   $dst,$src" %}
10100   opcode(0xD9, 0xF3);
10101   ins_encode( Push_SrcD(src),
10102               OpcP, OpcS, Push_ResultD(dst) );
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10107   predicate (UseSSE<=1);
10108   match(Set dst (SqrtD src));
10109   format %{ "DSQRT  $dst,$src" %}
10110   opcode(0xFA, 0xD9);
10111   ins_encode( Push_Reg_DPR(src),
10112               OpcS, OpcP, Pop_Reg_DPR(dst) );
10113   ins_pipe( pipe_slow );
10114 %}
10115 
10116 //-------------Float Instructions-------------------------------
10117 // Float Math
10118 
10119 // Code for float compare:
10120 //     fcompp();
10121 //     fwait(); fnstsw_ax();
10122 //     sahf();
10123 //     movl(dst, unordered_result);
10124 //     jcc(Assembler::parity, exit);
10125 //     movl(dst, less_result);
10126 //     jcc(Assembler::below, exit);
10127 //     movl(dst, equal_result);
10128 //     jcc(Assembler::equal, exit);
10129 //     movl(dst, greater_result);
10130 //   exit:
10131 
10132 // P6 version of float compare, sets condition codes in EFLAGS
10133 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10134   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10135   match(Set cr (CmpF src1 src2));
10136   effect(KILL rax);
10137   ins_cost(150);
10138   format %{ "FLD    $src1\n\t"
10139             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10140             "JNP    exit\n\t"
10141             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10142             "SAHF\n"
10143      "exit:\tNOP               // avoid branch to branch" %}
10144   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10145   ins_encode( Push_Reg_DPR(src1),
10146               OpcP, RegOpc(src2),
10147               cmpF_P6_fixup );
10148   ins_pipe( pipe_slow );
10149 %}
10150 
10151 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10152   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10153   match(Set cr (CmpF src1 src2));
10154   ins_cost(100);
10155   format %{ "FLD    $src1\n\t"
10156             "FUCOMIP ST,$src2  // P6 instruction" %}
10157   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10158   ins_encode( Push_Reg_DPR(src1),
10159               OpcP, RegOpc(src2));
10160   ins_pipe( pipe_slow );
10161 %}
10162 
10163 
10164 // Compare & branch
10165 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10166   predicate(UseSSE == 0);
10167   match(Set cr (CmpF src1 src2));
10168   effect(KILL rax);
10169   ins_cost(200);
10170   format %{ "FLD    $src1\n\t"
10171             "FCOMp  $src2\n\t"
10172             "FNSTSW AX\n\t"
10173             "TEST   AX,0x400\n\t"
10174             "JZ,s   flags\n\t"
10175             "MOV    AH,1\t# unordered treat as LT\n"
10176     "flags:\tSAHF" %}
10177   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10178   ins_encode( Push_Reg_DPR(src1),
10179               OpcP, RegOpc(src2),
10180               fpu_flags);
10181   ins_pipe( pipe_slow );
10182 %}
10183 
10184 // Compare vs zero into -1,0,1
10185 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10186   predicate(UseSSE == 0);
10187   match(Set dst (CmpF3 src1 zero));
10188   effect(KILL cr, KILL rax);
10189   ins_cost(280);
10190   format %{ "FTSTF  $dst,$src1" %}
10191   opcode(0xE4, 0xD9);
10192   ins_encode( Push_Reg_DPR(src1),
10193               OpcS, OpcP, PopFPU,
10194               CmpF_Result(dst));
10195   ins_pipe( pipe_slow );
10196 %}
10197 
10198 // Compare into -1,0,1
10199 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10200   predicate(UseSSE == 0);
10201   match(Set dst (CmpF3 src1 src2));
10202   effect(KILL cr, KILL rax);
10203   ins_cost(300);
10204   format %{ "FCMPF  $dst,$src1,$src2" %}
10205   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10206   ins_encode( Push_Reg_DPR(src1),
10207               OpcP, RegOpc(src2),
10208               CmpF_Result(dst));
10209   ins_pipe( pipe_slow );
10210 %}
10211 
10212 // float compare and set condition codes in EFLAGS by XMM regs
10213 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10214   predicate(UseSSE>=1);
10215   match(Set cr (CmpF src1 src2));
10216   ins_cost(145);
10217   format %{ "UCOMISS $src1,$src2\n\t"
10218             "JNP,s   exit\n\t"
10219             "PUSHF\t# saw NaN, set CF\n\t"
10220             "AND     [rsp], #0xffffff2b\n\t"
10221             "POPF\n"
10222     "exit:" %}
10223   ins_encode %{
10224     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10225     emit_cmpfp_fixup(_masm);
10226   %}
10227   ins_pipe( pipe_slow );
10228 %}
10229 
10230 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10231   predicate(UseSSE>=1);
10232   match(Set cr (CmpF src1 src2));
10233   ins_cost(100);
10234   format %{ "UCOMISS $src1,$src2" %}
10235   ins_encode %{
10236     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10237   %}
10238   ins_pipe( pipe_slow );
10239 %}
10240 
10241 // float compare and set condition codes in EFLAGS by XMM regs
10242 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10243   predicate(UseSSE>=1);
10244   match(Set cr (CmpF src1 (LoadF src2)));
10245   ins_cost(165);
10246   format %{ "UCOMISS $src1,$src2\n\t"
10247             "JNP,s   exit\n\t"
10248             "PUSHF\t# saw NaN, set CF\n\t"
10249             "AND     [rsp], #0xffffff2b\n\t"
10250             "POPF\n"
10251     "exit:" %}
10252   ins_encode %{
10253     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10254     emit_cmpfp_fixup(_masm);
10255   %}
10256   ins_pipe( pipe_slow );
10257 %}
10258 
10259 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10260   predicate(UseSSE>=1);
10261   match(Set cr (CmpF src1 (LoadF src2)));
10262   ins_cost(100);
10263   format %{ "UCOMISS $src1,$src2" %}
10264   ins_encode %{
10265     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10266   %}
10267   ins_pipe( pipe_slow );
10268 %}
10269 
10270 // Compare into -1,0,1 in XMM
10271 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10272   predicate(UseSSE>=1);
10273   match(Set dst (CmpF3 src1 src2));
10274   effect(KILL cr);
10275   ins_cost(255);
10276   format %{ "UCOMISS $src1, $src2\n\t"
10277             "MOV     $dst, #-1\n\t"
10278             "JP,s    done\n\t"
10279             "JB,s    done\n\t"
10280             "SETNE   $dst\n\t"
10281             "MOVZB   $dst, $dst\n"
10282     "done:" %}
10283   ins_encode %{
10284     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10285     emit_cmpfp3(_masm, $dst$$Register);
10286   %}
10287   ins_pipe( pipe_slow );
10288 %}
10289 
10290 // Compare into -1,0,1 in XMM and memory
10291 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10292   predicate(UseSSE>=1);
10293   match(Set dst (CmpF3 src1 (LoadF src2)));
10294   effect(KILL cr);
10295   ins_cost(275);
10296   format %{ "UCOMISS $src1, $src2\n\t"
10297             "MOV     $dst, #-1\n\t"
10298             "JP,s    done\n\t"
10299             "JB,s    done\n\t"
10300             "SETNE   $dst\n\t"
10301             "MOVZB   $dst, $dst\n"
10302     "done:" %}
10303   ins_encode %{
10304     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10305     emit_cmpfp3(_masm, $dst$$Register);
10306   %}
10307   ins_pipe( pipe_slow );
10308 %}
10309 
10310 // Spill to obtain 24-bit precision
10311 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10312   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10313   match(Set dst (SubF src1 src2));
10314 
10315   format %{ "FSUB   $dst,$src1 - $src2" %}
10316   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10317   ins_encode( Push_Reg_FPR(src1),
10318               OpcReg_FPR(src2),
10319               Pop_Mem_FPR(dst) );
10320   ins_pipe( fpu_mem_reg_reg );
10321 %}
10322 //
10323 // This instruction does not round to 24-bits
10324 instruct subFPR_reg(regFPR dst, regFPR src) %{
10325   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10326   match(Set dst (SubF dst src));
10327 
10328   format %{ "FSUB   $dst,$src" %}
10329   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10330   ins_encode( Push_Reg_FPR(src),
10331               OpcP, RegOpc(dst) );
10332   ins_pipe( fpu_reg_reg );
10333 %}
10334 
10335 // Spill to obtain 24-bit precision
10336 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10337   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10338   match(Set dst (AddF src1 src2));
10339 
10340   format %{ "FADD   $dst,$src1,$src2" %}
10341   opcode(0xD8, 0x0); /* D8 C0+i */
10342   ins_encode( Push_Reg_FPR(src2),
10343               OpcReg_FPR(src1),
10344               Pop_Mem_FPR(dst) );
10345   ins_pipe( fpu_mem_reg_reg );
10346 %}
10347 //
10348 // This instruction does not round to 24-bits
10349 instruct addFPR_reg(regFPR dst, regFPR src) %{
10350   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10351   match(Set dst (AddF dst src));
10352 
10353   format %{ "FLD    $src\n\t"
10354             "FADDp  $dst,ST" %}
10355   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10356   ins_encode( Push_Reg_FPR(src),
10357               OpcP, RegOpc(dst) );
10358   ins_pipe( fpu_reg_reg );
10359 %}
10360 
10361 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10362   predicate(UseSSE==0);
10363   match(Set dst (AbsF src));
10364   ins_cost(100);
10365   format %{ "FABS" %}
10366   opcode(0xE1, 0xD9);
10367   ins_encode( OpcS, OpcP );
10368   ins_pipe( fpu_reg_reg );
10369 %}
10370 
10371 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10372   predicate(UseSSE==0);
10373   match(Set dst (NegF src));
10374   ins_cost(100);
10375   format %{ "FCHS" %}
10376   opcode(0xE0, 0xD9);
10377   ins_encode( OpcS, OpcP );
10378   ins_pipe( fpu_reg_reg );
10379 %}
10380 
10381 // Cisc-alternate to addFPR_reg
10382 // Spill to obtain 24-bit precision
10383 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10384   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10385   match(Set dst (AddF src1 (LoadF src2)));
10386 
10387   format %{ "FLD    $src2\n\t"
10388             "FADD   ST,$src1\n\t"
10389             "FSTP_S $dst" %}
10390   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10391   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10392               OpcReg_FPR(src1),
10393               Pop_Mem_FPR(dst) );
10394   ins_pipe( fpu_mem_reg_mem );
10395 %}
10396 //
10397 // Cisc-alternate to addFPR_reg
10398 // This instruction does not round to 24-bits
10399 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10400   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10401   match(Set dst (AddF dst (LoadF src)));
10402 
10403   format %{ "FADD   $dst,$src" %}
10404   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10405   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10406               OpcP, RegOpc(dst) );
10407   ins_pipe( fpu_reg_mem );
10408 %}
10409 
10410 // // Following two instructions for _222_mpegaudio
10411 // Spill to obtain 24-bit precision
10412 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10413   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10414   match(Set dst (AddF src1 src2));
10415 
10416   format %{ "FADD   $dst,$src1,$src2" %}
10417   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10418   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10419               OpcReg_FPR(src2),
10420               Pop_Mem_FPR(dst) );
10421   ins_pipe( fpu_mem_reg_mem );
10422 %}
10423 
10424 // Cisc-spill variant
10425 // Spill to obtain 24-bit precision
10426 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10427   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10428   match(Set dst (AddF src1 (LoadF src2)));
10429 
10430   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10431   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10432   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10433               set_instruction_start,
10434               OpcP, RMopc_Mem(secondary,src1),
10435               Pop_Mem_FPR(dst) );
10436   ins_pipe( fpu_mem_mem_mem );
10437 %}
10438 
10439 // Spill to obtain 24-bit precision
10440 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10441   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10442   match(Set dst (AddF src1 src2));
10443 
10444   format %{ "FADD   $dst,$src1,$src2" %}
10445   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10446   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10447               set_instruction_start,
10448               OpcP, RMopc_Mem(secondary,src1),
10449               Pop_Mem_FPR(dst) );
10450   ins_pipe( fpu_mem_mem_mem );
10451 %}
10452 
10453 
10454 // Spill to obtain 24-bit precision
10455 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10456   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10457   match(Set dst (AddF src con));
10458   format %{ "FLD    $src\n\t"
10459             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10460             "FSTP_S $dst"  %}
10461   ins_encode %{
10462     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10463     __ fadd_s($constantaddress($con));
10464     __ fstp_s(Address(rsp, $dst$$disp));
10465   %}
10466   ins_pipe(fpu_mem_reg_con);
10467 %}
10468 //
10469 // This instruction does not round to 24-bits
10470 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10471   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10472   match(Set dst (AddF src con));
10473   format %{ "FLD    $src\n\t"
10474             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10475             "FSTP   $dst"  %}
10476   ins_encode %{
10477     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10478     __ fadd_s($constantaddress($con));
10479     __ fstp_d($dst$$reg);
10480   %}
10481   ins_pipe(fpu_reg_reg_con);
10482 %}
10483 
10484 // Spill to obtain 24-bit precision
10485 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10486   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10487   match(Set dst (MulF src1 src2));
10488 
10489   format %{ "FLD    $src1\n\t"
10490             "FMUL   $src2\n\t"
10491             "FSTP_S $dst"  %}
10492   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10493   ins_encode( Push_Reg_FPR(src1),
10494               OpcReg_FPR(src2),
10495               Pop_Mem_FPR(dst) );
10496   ins_pipe( fpu_mem_reg_reg );
10497 %}
10498 //
10499 // This instruction does not round to 24-bits
10500 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10501   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10502   match(Set dst (MulF src1 src2));
10503 
10504   format %{ "FLD    $src1\n\t"
10505             "FMUL   $src2\n\t"
10506             "FSTP_S $dst"  %}
10507   opcode(0xD8, 0x1); /* D8 C8+i */
10508   ins_encode( Push_Reg_FPR(src2),
10509               OpcReg_FPR(src1),
10510               Pop_Reg_FPR(dst) );
10511   ins_pipe( fpu_reg_reg_reg );
10512 %}
10513 
10514 
10515 // Spill to obtain 24-bit precision
10516 // Cisc-alternate to reg-reg multiply
10517 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10518   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10519   match(Set dst (MulF src1 (LoadF src2)));
10520 
10521   format %{ "FLD_S  $src2\n\t"
10522             "FMUL   $src1\n\t"
10523             "FSTP_S $dst"  %}
10524   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10525   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10526               OpcReg_FPR(src1),
10527               Pop_Mem_FPR(dst) );
10528   ins_pipe( fpu_mem_reg_mem );
10529 %}
10530 //
10531 // This instruction does not round to 24-bits
10532 // Cisc-alternate to reg-reg multiply
10533 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10534   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10535   match(Set dst (MulF src1 (LoadF src2)));
10536 
10537   format %{ "FMUL   $dst,$src1,$src2" %}
10538   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10539   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10540               OpcReg_FPR(src1),
10541               Pop_Reg_FPR(dst) );
10542   ins_pipe( fpu_reg_reg_mem );
10543 %}
10544 
10545 // Spill to obtain 24-bit precision
10546 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10547   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10548   match(Set dst (MulF src1 src2));
10549 
10550   format %{ "FMUL   $dst,$src1,$src2" %}
10551   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10552   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10553               set_instruction_start,
10554               OpcP, RMopc_Mem(secondary,src1),
10555               Pop_Mem_FPR(dst) );
10556   ins_pipe( fpu_mem_mem_mem );
10557 %}
10558 
10559 // Spill to obtain 24-bit precision
10560 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10561   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10562   match(Set dst (MulF src con));
10563 
10564   format %{ "FLD    $src\n\t"
10565             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10566             "FSTP_S $dst"  %}
10567   ins_encode %{
10568     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10569     __ fmul_s($constantaddress($con));
10570     __ fstp_s(Address(rsp, $dst$$disp));
10571   %}
10572   ins_pipe(fpu_mem_reg_con);
10573 %}
10574 //
10575 // This instruction does not round to 24-bits
10576 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10577   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10578   match(Set dst (MulF src con));
10579 
10580   format %{ "FLD    $src\n\t"
10581             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10582             "FSTP   $dst"  %}
10583   ins_encode %{
10584     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10585     __ fmul_s($constantaddress($con));
10586     __ fstp_d($dst$$reg);
10587   %}
10588   ins_pipe(fpu_reg_reg_con);
10589 %}
10590 
10591 
10592 //
10593 // MACRO1 -- subsume unshared load into mulFPR
10594 // This instruction does not round to 24-bits
10595 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10596   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10597   match(Set dst (MulF (LoadF mem1) src));
10598 
10599   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10600             "FMUL   ST,$src\n\t"
10601             "FSTP   $dst" %}
10602   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10603   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10604               OpcReg_FPR(src),
10605               Pop_Reg_FPR(dst) );
10606   ins_pipe( fpu_reg_reg_mem );
10607 %}
10608 //
10609 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10610 // This instruction does not round to 24-bits
10611 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10612   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10613   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10614   ins_cost(95);
10615 
10616   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10617             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10618             "FADD   ST,$src2\n\t"
10619             "FSTP   $dst" %}
10620   opcode(0xD9); /* LoadF D9 /0 */
10621   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10622               FMul_ST_reg(src1),
10623               FAdd_ST_reg(src2),
10624               Pop_Reg_FPR(dst) );
10625   ins_pipe( fpu_reg_mem_reg_reg );
10626 %}
10627 
10628 // MACRO3 -- addFPR a mulFPR
10629 // This instruction does not round to 24-bits.  It is a '2-address'
10630 // instruction in that the result goes back to src2.  This eliminates
10631 // a move from the macro; possibly the register allocator will have
10632 // to add it back (and maybe not).
10633 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10634   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10635   match(Set src2 (AddF (MulF src0 src1) src2));
10636 
10637   format %{ "FLD    $src0     ===MACRO3===\n\t"
10638             "FMUL   ST,$src1\n\t"
10639             "FADDP  $src2,ST" %}
10640   opcode(0xD9); /* LoadF D9 /0 */
10641   ins_encode( Push_Reg_FPR(src0),
10642               FMul_ST_reg(src1),
10643               FAddP_reg_ST(src2) );
10644   ins_pipe( fpu_reg_reg_reg );
10645 %}
10646 
10647 // MACRO4 -- divFPR subFPR
10648 // This instruction does not round to 24-bits
10649 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10650   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10651   match(Set dst (DivF (SubF src2 src1) src3));
10652 
10653   format %{ "FLD    $src2   ===MACRO4===\n\t"
10654             "FSUB   ST,$src1\n\t"
10655             "FDIV   ST,$src3\n\t"
10656             "FSTP  $dst" %}
10657   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10658   ins_encode( Push_Reg_FPR(src2),
10659               subFPR_divFPR_encode(src1,src3),
10660               Pop_Reg_FPR(dst) );
10661   ins_pipe( fpu_reg_reg_reg_reg );
10662 %}
10663 
10664 // Spill to obtain 24-bit precision
10665 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10666   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10667   match(Set dst (DivF src1 src2));
10668 
10669   format %{ "FDIV   $dst,$src1,$src2" %}
10670   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10671   ins_encode( Push_Reg_FPR(src1),
10672               OpcReg_FPR(src2),
10673               Pop_Mem_FPR(dst) );
10674   ins_pipe( fpu_mem_reg_reg );
10675 %}
10676 //
10677 // This instruction does not round to 24-bits
10678 instruct divFPR_reg(regFPR dst, regFPR src) %{
10679   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10680   match(Set dst (DivF dst src));
10681 
10682   format %{ "FDIV   $dst,$src" %}
10683   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10684   ins_encode( Push_Reg_FPR(src),
10685               OpcP, RegOpc(dst) );
10686   ins_pipe( fpu_reg_reg );
10687 %}
10688 
10689 
10690 // Spill to obtain 24-bit precision
10691 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10692   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10693   match(Set dst (ModF src1 src2));
10694   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10695 
10696   format %{ "FMOD   $dst,$src1,$src2" %}
10697   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10698               emitModDPR(),
10699               Push_Result_Mod_DPR(src2),
10700               Pop_Mem_FPR(dst));
10701   ins_pipe( pipe_slow );
10702 %}
10703 //
10704 // This instruction does not round to 24-bits
10705 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10706   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10707   match(Set dst (ModF dst src));
10708   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10709 
10710   format %{ "FMOD   $dst,$src" %}
10711   ins_encode(Push_Reg_Mod_DPR(dst, src),
10712               emitModDPR(),
10713               Push_Result_Mod_DPR(src),
10714               Pop_Reg_FPR(dst));
10715   ins_pipe( pipe_slow );
10716 %}
10717 
10718 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10719   predicate(UseSSE>=1);
10720   match(Set dst (ModF src0 src1));
10721   effect(KILL rax, KILL cr);
10722   format %{ "SUB    ESP,4\t # FMOD\n"
10723           "\tMOVSS  [ESP+0],$src1\n"
10724           "\tFLD_S  [ESP+0]\n"
10725           "\tMOVSS  [ESP+0],$src0\n"
10726           "\tFLD_S  [ESP+0]\n"
10727      "loop:\tFPREM\n"
10728           "\tFWAIT\n"
10729           "\tFNSTSW AX\n"
10730           "\tSAHF\n"
10731           "\tJP     loop\n"
10732           "\tFSTP_S [ESP+0]\n"
10733           "\tMOVSS  $dst,[ESP+0]\n"
10734           "\tADD    ESP,4\n"
10735           "\tFSTP   ST0\t # Restore FPU Stack"
10736     %}
10737   ins_cost(250);
10738   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10739   ins_pipe( pipe_slow );
10740 %}
10741 
10742 
10743 //----------Arithmetic Conversion Instructions---------------------------------
10744 // The conversions operations are all Alpha sorted.  Please keep it that way!
10745 
10746 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10747   predicate(UseSSE==0);
10748   match(Set dst (RoundFloat src));
10749   ins_cost(125);
10750   format %{ "FST_S  $dst,$src\t# F-round" %}
10751   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10752   ins_pipe( fpu_mem_reg );
10753 %}
10754 
10755 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10756   predicate(UseSSE<=1);
10757   match(Set dst (RoundDouble src));
10758   ins_cost(125);
10759   format %{ "FST_D  $dst,$src\t# D-round" %}
10760   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10761   ins_pipe( fpu_mem_reg );
10762 %}
10763 
10764 // Force rounding to 24-bit precision and 6-bit exponent
10765 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10766   predicate(UseSSE==0);
10767   match(Set dst (ConvD2F src));
10768   format %{ "FST_S  $dst,$src\t# F-round" %}
10769   expand %{
10770     roundFloat_mem_reg(dst,src);
10771   %}
10772 %}
10773 
10774 // Force rounding to 24-bit precision and 6-bit exponent
10775 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10776   predicate(UseSSE==1);
10777   match(Set dst (ConvD2F src));
10778   effect( KILL cr );
10779   format %{ "SUB    ESP,4\n\t"
10780             "FST_S  [ESP],$src\t# F-round\n\t"
10781             "MOVSS  $dst,[ESP]\n\t"
10782             "ADD ESP,4" %}
10783   ins_encode %{
10784     __ subptr(rsp, 4);
10785     if ($src$$reg != FPR1L_enc) {
10786       __ fld_s($src$$reg-1);
10787       __ fstp_s(Address(rsp, 0));
10788     } else {
10789       __ fst_s(Address(rsp, 0));
10790     }
10791     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10792     __ addptr(rsp, 4);
10793   %}
10794   ins_pipe( pipe_slow );
10795 %}
10796 
10797 // Force rounding double precision to single precision
10798 instruct convD2F_reg(regF dst, regD src) %{
10799   predicate(UseSSE>=2);
10800   match(Set dst (ConvD2F src));
10801   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10802   ins_encode %{
10803     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10804   %}
10805   ins_pipe( pipe_slow );
10806 %}
10807 
10808 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10809   predicate(UseSSE==0);
10810   match(Set dst (ConvF2D src));
10811   format %{ "FST_S  $dst,$src\t# D-round" %}
10812   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10813   ins_pipe( fpu_reg_reg );
10814 %}
10815 
10816 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10817   predicate(UseSSE==1);
10818   match(Set dst (ConvF2D src));
10819   format %{ "FST_D  $dst,$src\t# D-round" %}
10820   expand %{
10821     roundDouble_mem_reg(dst,src);
10822   %}
10823 %}
10824 
10825 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10826   predicate(UseSSE==1);
10827   match(Set dst (ConvF2D src));
10828   effect( KILL cr );
10829   format %{ "SUB    ESP,4\n\t"
10830             "MOVSS  [ESP] $src\n\t"
10831             "FLD_S  [ESP]\n\t"
10832             "ADD    ESP,4\n\t"
10833             "FSTP   $dst\t# D-round" %}
10834   ins_encode %{
10835     __ subptr(rsp, 4);
10836     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10837     __ fld_s(Address(rsp, 0));
10838     __ addptr(rsp, 4);
10839     __ fstp_d($dst$$reg);
10840   %}
10841   ins_pipe( pipe_slow );
10842 %}
10843 
10844 instruct convF2D_reg(regD dst, regF src) %{
10845   predicate(UseSSE>=2);
10846   match(Set dst (ConvF2D src));
10847   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10848   ins_encode %{
10849     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10850   %}
10851   ins_pipe( pipe_slow );
10852 %}
10853 
10854 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10855 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10856   predicate(UseSSE<=1);
10857   match(Set dst (ConvD2I src));
10858   effect( KILL tmp, KILL cr );
10859   format %{ "FLD    $src\t# Convert double to int \n\t"
10860             "FLDCW  trunc mode\n\t"
10861             "SUB    ESP,4\n\t"
10862             "FISTp  [ESP + #0]\n\t"
10863             "FLDCW  std/24-bit mode\n\t"
10864             "POP    EAX\n\t"
10865             "CMP    EAX,0x80000000\n\t"
10866             "JNE,s  fast\n\t"
10867             "FLD_D  $src\n\t"
10868             "CALL   d2i_wrapper\n"
10869       "fast:" %}
10870   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10871   ins_pipe( pipe_slow );
10872 %}
10873 
10874 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10875 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10876   predicate(UseSSE>=2);
10877   match(Set dst (ConvD2I src));
10878   effect( KILL tmp, KILL cr );
10879   format %{ "CVTTSD2SI $dst, $src\n\t"
10880             "CMP    $dst,0x80000000\n\t"
10881             "JNE,s  fast\n\t"
10882             "SUB    ESP, 8\n\t"
10883             "MOVSD  [ESP], $src\n\t"
10884             "FLD_D  [ESP]\n\t"
10885             "ADD    ESP, 8\n\t"
10886             "CALL   d2i_wrapper\n"
10887       "fast:" %}
10888   ins_encode %{
10889     Label fast;
10890     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10891     __ cmpl($dst$$Register, 0x80000000);
10892     __ jccb(Assembler::notEqual, fast);
10893     __ subptr(rsp, 8);
10894     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10895     __ fld_d(Address(rsp, 0));
10896     __ addptr(rsp, 8);
10897     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10898     __ bind(fast);
10899   %}
10900   ins_pipe( pipe_slow );
10901 %}
10902 
10903 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10904   predicate(UseSSE<=1);
10905   match(Set dst (ConvD2L src));
10906   effect( KILL cr );
10907   format %{ "FLD    $src\t# Convert double to long\n\t"
10908             "FLDCW  trunc mode\n\t"
10909             "SUB    ESP,8\n\t"
10910             "FISTp  [ESP + #0]\n\t"
10911             "FLDCW  std/24-bit mode\n\t"
10912             "POP    EAX\n\t"
10913             "POP    EDX\n\t"
10914             "CMP    EDX,0x80000000\n\t"
10915             "JNE,s  fast\n\t"
10916             "TEST   EAX,EAX\n\t"
10917             "JNE,s  fast\n\t"
10918             "FLD    $src\n\t"
10919             "CALL   d2l_wrapper\n"
10920       "fast:" %}
10921   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10922   ins_pipe( pipe_slow );
10923 %}
10924 
10925 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10926 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10927   predicate (UseSSE>=2);
10928   match(Set dst (ConvD2L src));
10929   effect( KILL cr );
10930   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10931             "MOVSD  [ESP],$src\n\t"
10932             "FLD_D  [ESP]\n\t"
10933             "FLDCW  trunc mode\n\t"
10934             "FISTp  [ESP + #0]\n\t"
10935             "FLDCW  std/24-bit mode\n\t"
10936             "POP    EAX\n\t"
10937             "POP    EDX\n\t"
10938             "CMP    EDX,0x80000000\n\t"
10939             "JNE,s  fast\n\t"
10940             "TEST   EAX,EAX\n\t"
10941             "JNE,s  fast\n\t"
10942             "SUB    ESP,8\n\t"
10943             "MOVSD  [ESP],$src\n\t"
10944             "FLD_D  [ESP]\n\t"
10945             "ADD    ESP,8\n\t"
10946             "CALL   d2l_wrapper\n"
10947       "fast:" %}
10948   ins_encode %{
10949     Label fast;
10950     __ subptr(rsp, 8);
10951     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10952     __ fld_d(Address(rsp, 0));
10953     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10954     __ fistp_d(Address(rsp, 0));
10955     // Restore the rounding mode, mask the exception
10956     if (Compile::current()->in_24_bit_fp_mode()) {
10957       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10958     } else {
10959       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10960     }
10961     // Load the converted long, adjust CPU stack
10962     __ pop(rax);
10963     __ pop(rdx);
10964     __ cmpl(rdx, 0x80000000);
10965     __ jccb(Assembler::notEqual, fast);
10966     __ testl(rax, rax);
10967     __ jccb(Assembler::notEqual, fast);
10968     __ subptr(rsp, 8);
10969     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10970     __ fld_d(Address(rsp, 0));
10971     __ addptr(rsp, 8);
10972     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10973     __ bind(fast);
10974   %}
10975   ins_pipe( pipe_slow );
10976 %}
10977 
10978 // Convert a double to an int.  Java semantics require we do complex
10979 // manglations in the corner cases.  So we set the rounding mode to
10980 // 'zero', store the darned double down as an int, and reset the
10981 // rounding mode to 'nearest'.  The hardware stores a flag value down
10982 // if we would overflow or converted a NAN; we check for this and
10983 // and go the slow path if needed.
10984 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10985   predicate(UseSSE==0);
10986   match(Set dst (ConvF2I src));
10987   effect( KILL tmp, KILL cr );
10988   format %{ "FLD    $src\t# Convert float to int \n\t"
10989             "FLDCW  trunc mode\n\t"
10990             "SUB    ESP,4\n\t"
10991             "FISTp  [ESP + #0]\n\t"
10992             "FLDCW  std/24-bit mode\n\t"
10993             "POP    EAX\n\t"
10994             "CMP    EAX,0x80000000\n\t"
10995             "JNE,s  fast\n\t"
10996             "FLD    $src\n\t"
10997             "CALL   d2i_wrapper\n"
10998       "fast:" %}
10999   // DPR2I_encoding works for FPR2I
11000   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11001   ins_pipe( pipe_slow );
11002 %}
11003 
11004 // Convert a float in xmm to an int reg.
11005 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11006   predicate(UseSSE>=1);
11007   match(Set dst (ConvF2I src));
11008   effect( KILL tmp, KILL cr );
11009   format %{ "CVTTSS2SI $dst, $src\n\t"
11010             "CMP    $dst,0x80000000\n\t"
11011             "JNE,s  fast\n\t"
11012             "SUB    ESP, 4\n\t"
11013             "MOVSS  [ESP], $src\n\t"
11014             "FLD    [ESP]\n\t"
11015             "ADD    ESP, 4\n\t"
11016             "CALL   d2i_wrapper\n"
11017       "fast:" %}
11018   ins_encode %{
11019     Label fast;
11020     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11021     __ cmpl($dst$$Register, 0x80000000);
11022     __ jccb(Assembler::notEqual, fast);
11023     __ subptr(rsp, 4);
11024     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11025     __ fld_s(Address(rsp, 0));
11026     __ addptr(rsp, 4);
11027     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11028     __ bind(fast);
11029   %}
11030   ins_pipe( pipe_slow );
11031 %}
11032 
11033 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11034   predicate(UseSSE==0);
11035   match(Set dst (ConvF2L src));
11036   effect( KILL cr );
11037   format %{ "FLD    $src\t# Convert float to long\n\t"
11038             "FLDCW  trunc mode\n\t"
11039             "SUB    ESP,8\n\t"
11040             "FISTp  [ESP + #0]\n\t"
11041             "FLDCW  std/24-bit mode\n\t"
11042             "POP    EAX\n\t"
11043             "POP    EDX\n\t"
11044             "CMP    EDX,0x80000000\n\t"
11045             "JNE,s  fast\n\t"
11046             "TEST   EAX,EAX\n\t"
11047             "JNE,s  fast\n\t"
11048             "FLD    $src\n\t"
11049             "CALL   d2l_wrapper\n"
11050       "fast:" %}
11051   // DPR2L_encoding works for FPR2L
11052   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11053   ins_pipe( pipe_slow );
11054 %}
11055 
11056 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11057 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11058   predicate (UseSSE>=1);
11059   match(Set dst (ConvF2L src));
11060   effect( KILL cr );
11061   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11062             "MOVSS  [ESP],$src\n\t"
11063             "FLD_S  [ESP]\n\t"
11064             "FLDCW  trunc mode\n\t"
11065             "FISTp  [ESP + #0]\n\t"
11066             "FLDCW  std/24-bit mode\n\t"
11067             "POP    EAX\n\t"
11068             "POP    EDX\n\t"
11069             "CMP    EDX,0x80000000\n\t"
11070             "JNE,s  fast\n\t"
11071             "TEST   EAX,EAX\n\t"
11072             "JNE,s  fast\n\t"
11073             "SUB    ESP,4\t# Convert float to long\n\t"
11074             "MOVSS  [ESP],$src\n\t"
11075             "FLD_S  [ESP]\n\t"
11076             "ADD    ESP,4\n\t"
11077             "CALL   d2l_wrapper\n"
11078       "fast:" %}
11079   ins_encode %{
11080     Label fast;
11081     __ subptr(rsp, 8);
11082     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11083     __ fld_s(Address(rsp, 0));
11084     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11085     __ fistp_d(Address(rsp, 0));
11086     // Restore the rounding mode, mask the exception
11087     if (Compile::current()->in_24_bit_fp_mode()) {
11088       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11089     } else {
11090       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11091     }
11092     // Load the converted long, adjust CPU stack
11093     __ pop(rax);
11094     __ pop(rdx);
11095     __ cmpl(rdx, 0x80000000);
11096     __ jccb(Assembler::notEqual, fast);
11097     __ testl(rax, rax);
11098     __ jccb(Assembler::notEqual, fast);
11099     __ subptr(rsp, 4);
11100     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11101     __ fld_s(Address(rsp, 0));
11102     __ addptr(rsp, 4);
11103     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11104     __ bind(fast);
11105   %}
11106   ins_pipe( pipe_slow );
11107 %}
11108 
11109 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11110   predicate( UseSSE<=1 );
11111   match(Set dst (ConvI2D src));
11112   format %{ "FILD   $src\n\t"
11113             "FSTP   $dst" %}
11114   opcode(0xDB, 0x0);  /* DB /0 */
11115   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11116   ins_pipe( fpu_reg_mem );
11117 %}
11118 
11119 instruct convI2D_reg(regD dst, rRegI src) %{
11120   predicate( UseSSE>=2 && !UseXmmI2D );
11121   match(Set dst (ConvI2D src));
11122   format %{ "CVTSI2SD $dst,$src" %}
11123   ins_encode %{
11124     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11125   %}
11126   ins_pipe( pipe_slow );
11127 %}
11128 
11129 instruct convI2D_mem(regD dst, memory mem) %{
11130   predicate( UseSSE>=2 );
11131   match(Set dst (ConvI2D (LoadI mem)));
11132   format %{ "CVTSI2SD $dst,$mem" %}
11133   ins_encode %{
11134     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11135   %}
11136   ins_pipe( pipe_slow );
11137 %}
11138 
11139 instruct convXI2D_reg(regD dst, rRegI src)
11140 %{
11141   predicate( UseSSE>=2 && UseXmmI2D );
11142   match(Set dst (ConvI2D src));
11143 
11144   format %{ "MOVD  $dst,$src\n\t"
11145             "CVTDQ2PD $dst,$dst\t# i2d" %}
11146   ins_encode %{
11147     __ movdl($dst$$XMMRegister, $src$$Register);
11148     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11149   %}
11150   ins_pipe(pipe_slow); // XXX
11151 %}
11152 
11153 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11154   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11155   match(Set dst (ConvI2D (LoadI mem)));
11156   format %{ "FILD   $mem\n\t"
11157             "FSTP   $dst" %}
11158   opcode(0xDB);      /* DB /0 */
11159   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11160               Pop_Reg_DPR(dst));
11161   ins_pipe( fpu_reg_mem );
11162 %}
11163 
11164 // Convert a byte to a float; no rounding step needed.
11165 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11166   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11167   match(Set dst (ConvI2F src));
11168   format %{ "FILD   $src\n\t"
11169             "FSTP   $dst" %}
11170 
11171   opcode(0xDB, 0x0);  /* DB /0 */
11172   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11173   ins_pipe( fpu_reg_mem );
11174 %}
11175 
11176 // In 24-bit mode, force exponent rounding by storing back out
11177 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11178   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11179   match(Set dst (ConvI2F src));
11180   ins_cost(200);
11181   format %{ "FILD   $src\n\t"
11182             "FSTP_S $dst" %}
11183   opcode(0xDB, 0x0);  /* DB /0 */
11184   ins_encode( Push_Mem_I(src),
11185               Pop_Mem_FPR(dst));
11186   ins_pipe( fpu_mem_mem );
11187 %}
11188 
11189 // In 24-bit mode, force exponent rounding by storing back out
11190 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11191   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11192   match(Set dst (ConvI2F (LoadI mem)));
11193   ins_cost(200);
11194   format %{ "FILD   $mem\n\t"
11195             "FSTP_S $dst" %}
11196   opcode(0xDB);  /* DB /0 */
11197   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11198               Pop_Mem_FPR(dst));
11199   ins_pipe( fpu_mem_mem );
11200 %}
11201 
11202 // This instruction does not round to 24-bits
11203 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11204   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11205   match(Set dst (ConvI2F src));
11206   format %{ "FILD   $src\n\t"
11207             "FSTP   $dst" %}
11208   opcode(0xDB, 0x0);  /* DB /0 */
11209   ins_encode( Push_Mem_I(src),
11210               Pop_Reg_FPR(dst));
11211   ins_pipe( fpu_reg_mem );
11212 %}
11213 
11214 // This instruction does not round to 24-bits
11215 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11216   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11217   match(Set dst (ConvI2F (LoadI mem)));
11218   format %{ "FILD   $mem\n\t"
11219             "FSTP   $dst" %}
11220   opcode(0xDB);      /* DB /0 */
11221   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11222               Pop_Reg_FPR(dst));
11223   ins_pipe( fpu_reg_mem );
11224 %}
11225 
11226 // Convert an int to a float in xmm; no rounding step needed.
11227 instruct convI2F_reg(regF dst, rRegI src) %{
11228   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11229   match(Set dst (ConvI2F src));
11230   format %{ "CVTSI2SS $dst, $src" %}
11231   ins_encode %{
11232     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11233   %}
11234   ins_pipe( pipe_slow );
11235 %}
11236 
11237  instruct convXI2F_reg(regF dst, rRegI src)
11238 %{
11239   predicate( UseSSE>=2 && UseXmmI2F );
11240   match(Set dst (ConvI2F src));
11241 
11242   format %{ "MOVD  $dst,$src\n\t"
11243             "CVTDQ2PS $dst,$dst\t# i2f" %}
11244   ins_encode %{
11245     __ movdl($dst$$XMMRegister, $src$$Register);
11246     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11247   %}
11248   ins_pipe(pipe_slow); // XXX
11249 %}
11250 
11251 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11252   match(Set dst (ConvI2L src));
11253   effect(KILL cr);
11254   ins_cost(375);
11255   format %{ "MOV    $dst.lo,$src\n\t"
11256             "MOV    $dst.hi,$src\n\t"
11257             "SAR    $dst.hi,31" %}
11258   ins_encode(convert_int_long(dst,src));
11259   ins_pipe( ialu_reg_reg_long );
11260 %}
11261 
11262 // Zero-extend convert int to long
11263 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11264   match(Set dst (AndL (ConvI2L src) mask) );
11265   effect( KILL flags );
11266   ins_cost(250);
11267   format %{ "MOV    $dst.lo,$src\n\t"
11268             "XOR    $dst.hi,$dst.hi" %}
11269   opcode(0x33); // XOR
11270   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11271   ins_pipe( ialu_reg_reg_long );
11272 %}
11273 
11274 // Zero-extend long
11275 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11276   match(Set dst (AndL src mask) );
11277   effect( KILL flags );
11278   ins_cost(250);
11279   format %{ "MOV    $dst.lo,$src.lo\n\t"
11280             "XOR    $dst.hi,$dst.hi\n\t" %}
11281   opcode(0x33); // XOR
11282   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11283   ins_pipe( ialu_reg_reg_long );
11284 %}
11285 
11286 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11287   predicate (UseSSE<=1);
11288   match(Set dst (ConvL2D src));
11289   effect( KILL cr );
11290   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11291             "PUSH   $src.lo\n\t"
11292             "FILD   ST,[ESP + #0]\n\t"
11293             "ADD    ESP,8\n\t"
11294             "FSTP_D $dst\t# D-round" %}
11295   opcode(0xDF, 0x5);  /* DF /5 */
11296   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11297   ins_pipe( pipe_slow );
11298 %}
11299 
11300 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11301   predicate (UseSSE>=2);
11302   match(Set dst (ConvL2D src));
11303   effect( KILL cr );
11304   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11305             "PUSH   $src.lo\n\t"
11306             "FILD_D [ESP]\n\t"
11307             "FSTP_D [ESP]\n\t"
11308             "MOVSD  $dst,[ESP]\n\t"
11309             "ADD    ESP,8" %}
11310   opcode(0xDF, 0x5);  /* DF /5 */
11311   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11312   ins_pipe( pipe_slow );
11313 %}
11314 
11315 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11316   predicate (UseSSE>=1);
11317   match(Set dst (ConvL2F src));
11318   effect( KILL cr );
11319   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11320             "PUSH   $src.lo\n\t"
11321             "FILD_D [ESP]\n\t"
11322             "FSTP_S [ESP]\n\t"
11323             "MOVSS  $dst,[ESP]\n\t"
11324             "ADD    ESP,8" %}
11325   opcode(0xDF, 0x5);  /* DF /5 */
11326   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11327   ins_pipe( pipe_slow );
11328 %}
11329 
11330 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11331   match(Set dst (ConvL2F src));
11332   effect( KILL cr );
11333   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11334             "PUSH   $src.lo\n\t"
11335             "FILD   ST,[ESP + #0]\n\t"
11336             "ADD    ESP,8\n\t"
11337             "FSTP_S $dst\t# F-round" %}
11338   opcode(0xDF, 0x5);  /* DF /5 */
11339   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11340   ins_pipe( pipe_slow );
11341 %}
11342 
11343 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11344   match(Set dst (ConvL2I src));
11345   effect( DEF dst, USE src );
11346   format %{ "MOV    $dst,$src.lo" %}
11347   ins_encode(enc_CopyL_Lo(dst,src));
11348   ins_pipe( ialu_reg_reg );
11349 %}
11350 
11351 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11352   match(Set dst (MoveF2I src));
11353   effect( DEF dst, USE src );
11354   ins_cost(100);
11355   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11356   ins_encode %{
11357     __ movl($dst$$Register, Address(rsp, $src$$disp));
11358   %}
11359   ins_pipe( ialu_reg_mem );
11360 %}
11361 
11362 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11363   predicate(UseSSE==0);
11364   match(Set dst (MoveF2I src));
11365   effect( DEF dst, USE src );
11366 
11367   ins_cost(125);
11368   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11369   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11370   ins_pipe( fpu_mem_reg );
11371 %}
11372 
11373 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11374   predicate(UseSSE>=1);
11375   match(Set dst (MoveF2I src));
11376   effect( DEF dst, USE src );
11377 
11378   ins_cost(95);
11379   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11380   ins_encode %{
11381     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11382   %}
11383   ins_pipe( pipe_slow );
11384 %}
11385 
11386 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11387   predicate(UseSSE>=2);
11388   match(Set dst (MoveF2I src));
11389   effect( DEF dst, USE src );
11390   ins_cost(85);
11391   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11392   ins_encode %{
11393     __ movdl($dst$$Register, $src$$XMMRegister);
11394   %}
11395   ins_pipe( pipe_slow );
11396 %}
11397 
11398 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11399   match(Set dst (MoveI2F src));
11400   effect( DEF dst, USE src );
11401 
11402   ins_cost(100);
11403   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11404   ins_encode %{
11405     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11406   %}
11407   ins_pipe( ialu_mem_reg );
11408 %}
11409 
11410 
11411 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11412   predicate(UseSSE==0);
11413   match(Set dst (MoveI2F src));
11414   effect(DEF dst, USE src);
11415 
11416   ins_cost(125);
11417   format %{ "FLD_S  $src\n\t"
11418             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11419   opcode(0xD9);               /* D9 /0, FLD m32real */
11420   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11421               Pop_Reg_FPR(dst) );
11422   ins_pipe( fpu_reg_mem );
11423 %}
11424 
11425 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11426   predicate(UseSSE>=1);
11427   match(Set dst (MoveI2F src));
11428   effect( DEF dst, USE src );
11429 
11430   ins_cost(95);
11431   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11432   ins_encode %{
11433     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11434   %}
11435   ins_pipe( pipe_slow );
11436 %}
11437 
11438 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11439   predicate(UseSSE>=2);
11440   match(Set dst (MoveI2F src));
11441   effect( DEF dst, USE src );
11442 
11443   ins_cost(85);
11444   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11445   ins_encode %{
11446     __ movdl($dst$$XMMRegister, $src$$Register);
11447   %}
11448   ins_pipe( pipe_slow );
11449 %}
11450 
11451 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11452   match(Set dst (MoveD2L src));
11453   effect(DEF dst, USE src);
11454 
11455   ins_cost(250);
11456   format %{ "MOV    $dst.lo,$src\n\t"
11457             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11458   opcode(0x8B, 0x8B);
11459   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11460   ins_pipe( ialu_mem_long_reg );
11461 %}
11462 
11463 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11464   predicate(UseSSE<=1);
11465   match(Set dst (MoveD2L src));
11466   effect(DEF dst, USE src);
11467 
11468   ins_cost(125);
11469   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11470   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11471   ins_pipe( fpu_mem_reg );
11472 %}
11473 
11474 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11475   predicate(UseSSE>=2);
11476   match(Set dst (MoveD2L src));
11477   effect(DEF dst, USE src);
11478   ins_cost(95);
11479   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11480   ins_encode %{
11481     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11482   %}
11483   ins_pipe( pipe_slow );
11484 %}
11485 
11486 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11487   predicate(UseSSE>=2);
11488   match(Set dst (MoveD2L src));
11489   effect(DEF dst, USE src, TEMP tmp);
11490   ins_cost(85);
11491   format %{ "MOVD   $dst.lo,$src\n\t"
11492             "PSHUFLW $tmp,$src,0x4E\n\t"
11493             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11494   ins_encode %{
11495     __ movdl($dst$$Register, $src$$XMMRegister);
11496     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11497     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11498   %}
11499   ins_pipe( pipe_slow );
11500 %}
11501 
11502 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11503   match(Set dst (MoveL2D src));
11504   effect(DEF dst, USE src);
11505 
11506   ins_cost(200);
11507   format %{ "MOV    $dst,$src.lo\n\t"
11508             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11509   opcode(0x89, 0x89);
11510   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11511   ins_pipe( ialu_mem_long_reg );
11512 %}
11513 
11514 
11515 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11516   predicate(UseSSE<=1);
11517   match(Set dst (MoveL2D src));
11518   effect(DEF dst, USE src);
11519   ins_cost(125);
11520 
11521   format %{ "FLD_D  $src\n\t"
11522             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11523   opcode(0xDD);               /* DD /0, FLD m64real */
11524   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11525               Pop_Reg_DPR(dst) );
11526   ins_pipe( fpu_reg_mem );
11527 %}
11528 
11529 
11530 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11531   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11532   match(Set dst (MoveL2D src));
11533   effect(DEF dst, USE src);
11534 
11535   ins_cost(95);
11536   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11537   ins_encode %{
11538     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11539   %}
11540   ins_pipe( pipe_slow );
11541 %}
11542 
11543 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11544   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11545   match(Set dst (MoveL2D src));
11546   effect(DEF dst, USE src);
11547 
11548   ins_cost(95);
11549   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11550   ins_encode %{
11551     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11552   %}
11553   ins_pipe( pipe_slow );
11554 %}
11555 
11556 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11557   predicate(UseSSE>=2);
11558   match(Set dst (MoveL2D src));
11559   effect(TEMP dst, USE src, TEMP tmp);
11560   ins_cost(85);
11561   format %{ "MOVD   $dst,$src.lo\n\t"
11562             "MOVD   $tmp,$src.hi\n\t"
11563             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11564   ins_encode %{
11565     __ movdl($dst$$XMMRegister, $src$$Register);
11566     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11567     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11568   %}
11569   ins_pipe( pipe_slow );
11570 %}
11571 
11572 
11573 // =======================================================================
11574 // fast clearing of an array
11575 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11576   predicate(!((ClearArrayNode*)n)->is_large());
11577   match(Set dummy (ClearArray cnt base));
11578   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11579 
11580   format %{ $$template
11581     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11582     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11583     $$emit$$"JG     LARGE\n\t"
11584     $$emit$$"SHL    ECX, 1\n\t"
11585     $$emit$$"DEC    ECX\n\t"
11586     $$emit$$"JS     DONE\t# Zero length\n\t"
11587     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11588     $$emit$$"DEC    ECX\n\t"
11589     $$emit$$"JGE    LOOP\n\t"
11590     $$emit$$"JMP    DONE\n\t"
11591     $$emit$$"# LARGE:\n\t"
11592     if (UseFastStosb) {
11593        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11594        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11595     } else if (UseXMMForObjInit) {
11596        $$emit$$"MOV     RDI,RAX\n\t"
11597        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11598        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11599        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11600        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11601        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11602        $$emit$$"ADD     0x40,RAX\n\t"
11603        $$emit$$"# L_zero_64_bytes:\n\t"
11604        $$emit$$"SUB     0x8,RCX\n\t"
11605        $$emit$$"JGE     L_loop\n\t"
11606        $$emit$$"ADD     0x4,RCX\n\t"
11607        $$emit$$"JL      L_tail\n\t"
11608        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11609        $$emit$$"ADD     0x20,RAX\n\t"
11610        $$emit$$"SUB     0x4,RCX\n\t"
11611        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11612        $$emit$$"ADD     0x4,RCX\n\t"
11613        $$emit$$"JLE     L_end\n\t"
11614        $$emit$$"DEC     RCX\n\t"
11615        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11616        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11617        $$emit$$"ADD     0x8,RAX\n\t"
11618        $$emit$$"DEC     RCX\n\t"
11619        $$emit$$"JGE     L_sloop\n\t"
11620        $$emit$$"# L_end:\n\t"
11621     } else {
11622        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11623        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11624     }
11625     $$emit$$"# DONE"
11626   %}
11627   ins_encode %{
11628     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11629                  $tmp$$XMMRegister, false);
11630   %}
11631   ins_pipe( pipe_slow );
11632 %}
11633 
11634 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11635   predicate(((ClearArrayNode*)n)->is_large());
11636   match(Set dummy (ClearArray cnt base));
11637   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11638   format %{ $$template
11639     if (UseFastStosb) {
11640        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11641        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11642        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11643     } else if (UseXMMForObjInit) {
11644        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11645        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11646        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11647        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11648        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11649        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11650        $$emit$$"ADD     0x40,RAX\n\t"
11651        $$emit$$"# L_zero_64_bytes:\n\t"
11652        $$emit$$"SUB     0x8,RCX\n\t"
11653        $$emit$$"JGE     L_loop\n\t"
11654        $$emit$$"ADD     0x4,RCX\n\t"
11655        $$emit$$"JL      L_tail\n\t"
11656        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11657        $$emit$$"ADD     0x20,RAX\n\t"
11658        $$emit$$"SUB     0x4,RCX\n\t"
11659        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11660        $$emit$$"ADD     0x4,RCX\n\t"
11661        $$emit$$"JLE     L_end\n\t"
11662        $$emit$$"DEC     RCX\n\t"
11663        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11664        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11665        $$emit$$"ADD     0x8,RAX\n\t"
11666        $$emit$$"DEC     RCX\n\t"
11667        $$emit$$"JGE     L_sloop\n\t"
11668        $$emit$$"# L_end:\n\t"
11669     } else {
11670        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11671        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11672        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11673     }
11674     $$emit$$"# DONE"
11675   %}
11676   ins_encode %{
11677     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11678                  $tmp$$XMMRegister, true);
11679   %}
11680   ins_pipe( pipe_slow );
11681 %}
11682 
11683 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11684                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11685   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11686   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11687   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11688 
11689   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11690   ins_encode %{
11691     __ string_compare($str1$$Register, $str2$$Register,
11692                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11693                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11694   %}
11695   ins_pipe( pipe_slow );
11696 %}
11697 
11698 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11699                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11700   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11701   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11702   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11703 
11704   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11705   ins_encode %{
11706     __ string_compare($str1$$Register, $str2$$Register,
11707                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11708                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11709   %}
11710   ins_pipe( pipe_slow );
11711 %}
11712 
11713 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11714                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11715   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11716   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11717   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11718 
11719   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11720   ins_encode %{
11721     __ string_compare($str1$$Register, $str2$$Register,
11722                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11723                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11724   %}
11725   ins_pipe( pipe_slow );
11726 %}
11727 
11728 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11729                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11730   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11731   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11732   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11733 
11734   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11735   ins_encode %{
11736     __ string_compare($str2$$Register, $str1$$Register,
11737                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11738                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11739   %}
11740   ins_pipe( pipe_slow );
11741 %}
11742 
11743 // fast string equals
11744 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11745                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11746   match(Set result (StrEquals (Binary str1 str2) cnt));
11747   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11748 
11749   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11750   ins_encode %{
11751     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11752                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11753                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11754   %}
11755 
11756   ins_pipe( pipe_slow );
11757 %}
11758 
11759 // fast search of substring with known size.
11760 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11761                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11762   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11763   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11764   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11765 
11766   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11767   ins_encode %{
11768     int icnt2 = (int)$int_cnt2$$constant;
11769     if (icnt2 >= 16) {
11770       // IndexOf for constant substrings with size >= 16 elements
11771       // which don't need to be loaded through stack.
11772       __ string_indexofC8($str1$$Register, $str2$$Register,
11773                           $cnt1$$Register, $cnt2$$Register,
11774                           icnt2, $result$$Register,
11775                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11776     } else {
11777       // Small strings are loaded through stack if they cross page boundary.
11778       __ string_indexof($str1$$Register, $str2$$Register,
11779                         $cnt1$$Register, $cnt2$$Register,
11780                         icnt2, $result$$Register,
11781                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11782     }
11783   %}
11784   ins_pipe( pipe_slow );
11785 %}
11786 
11787 // fast search of substring with known size.
11788 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11789                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11790   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11791   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11792   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11793 
11794   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11795   ins_encode %{
11796     int icnt2 = (int)$int_cnt2$$constant;
11797     if (icnt2 >= 8) {
11798       // IndexOf for constant substrings with size >= 8 elements
11799       // which don't need to be loaded through stack.
11800       __ string_indexofC8($str1$$Register, $str2$$Register,
11801                           $cnt1$$Register, $cnt2$$Register,
11802                           icnt2, $result$$Register,
11803                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11804     } else {
11805       // Small strings are loaded through stack if they cross page boundary.
11806       __ string_indexof($str1$$Register, $str2$$Register,
11807                         $cnt1$$Register, $cnt2$$Register,
11808                         icnt2, $result$$Register,
11809                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11810     }
11811   %}
11812   ins_pipe( pipe_slow );
11813 %}
11814 
11815 // fast search of substring with known size.
11816 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11817                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11818   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11819   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11820   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11821 
11822   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11823   ins_encode %{
11824     int icnt2 = (int)$int_cnt2$$constant;
11825     if (icnt2 >= 8) {
11826       // IndexOf for constant substrings with size >= 8 elements
11827       // which don't need to be loaded through stack.
11828       __ string_indexofC8($str1$$Register, $str2$$Register,
11829                           $cnt1$$Register, $cnt2$$Register,
11830                           icnt2, $result$$Register,
11831                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11832     } else {
11833       // Small strings are loaded through stack if they cross page boundary.
11834       __ string_indexof($str1$$Register, $str2$$Register,
11835                         $cnt1$$Register, $cnt2$$Register,
11836                         icnt2, $result$$Register,
11837                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11838     }
11839   %}
11840   ins_pipe( pipe_slow );
11841 %}
11842 
11843 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11844                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11845   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11846   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11847   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11848 
11849   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11850   ins_encode %{
11851     __ string_indexof($str1$$Register, $str2$$Register,
11852                       $cnt1$$Register, $cnt2$$Register,
11853                       (-1), $result$$Register,
11854                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11855   %}
11856   ins_pipe( pipe_slow );
11857 %}
11858 
11859 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11860                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11861   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11862   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11863   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11864 
11865   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11866   ins_encode %{
11867     __ string_indexof($str1$$Register, $str2$$Register,
11868                       $cnt1$$Register, $cnt2$$Register,
11869                       (-1), $result$$Register,
11870                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11871   %}
11872   ins_pipe( pipe_slow );
11873 %}
11874 
11875 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11876                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11877   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11878   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11879   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11880 
11881   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11882   ins_encode %{
11883     __ string_indexof($str1$$Register, $str2$$Register,
11884                       $cnt1$$Register, $cnt2$$Register,
11885                       (-1), $result$$Register,
11886                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11887   %}
11888   ins_pipe( pipe_slow );
11889 %}
11890 
11891 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11892                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11893   predicate(UseSSE42Intrinsics);
11894   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11895   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11896   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11897   ins_encode %{
11898     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11899                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11900   %}
11901   ins_pipe( pipe_slow );
11902 %}
11903 
11904 // fast array equals
11905 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11906                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11907 %{
11908   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11909   match(Set result (AryEq ary1 ary2));
11910   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11911   //ins_cost(300);
11912 
11913   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11914   ins_encode %{
11915     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11916                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11917                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11918   %}
11919   ins_pipe( pipe_slow );
11920 %}
11921 
11922 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11923                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11924 %{
11925   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11926   match(Set result (AryEq ary1 ary2));
11927   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11928   //ins_cost(300);
11929 
11930   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11931   ins_encode %{
11932     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11933                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11934                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11935   %}
11936   ins_pipe( pipe_slow );
11937 %}
11938 
11939 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11940                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11941 %{
11942   match(Set result (HasNegatives ary1 len));
11943   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11944 
11945   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11946   ins_encode %{
11947     __ has_negatives($ary1$$Register, $len$$Register,
11948                      $result$$Register, $tmp3$$Register,
11949                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11950   %}
11951   ins_pipe( pipe_slow );
11952 %}
11953 
11954 // fast char[] to byte[] compression
11955 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11956                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11957   match(Set result (StrCompressedCopy src (Binary dst len)));
11958   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11959 
11960   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11961   ins_encode %{
11962     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11963                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11964                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11965   %}
11966   ins_pipe( pipe_slow );
11967 %}
11968 
11969 // fast byte[] to char[] inflation
11970 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11971                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11972   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11973   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11974 
11975   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11976   ins_encode %{
11977     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11978                           $tmp1$$XMMRegister, $tmp2$$Register);
11979   %}
11980   ins_pipe( pipe_slow );
11981 %}
11982 
11983 // encode char[] to byte[] in ISO_8859_1
11984 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11985                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11986                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11987   match(Set result (EncodeISOArray src (Binary dst len)));
11988   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11989 
11990   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11991   ins_encode %{
11992     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11993                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11994                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11995   %}
11996   ins_pipe( pipe_slow );
11997 %}
11998 
11999 
12000 //----------Control Flow Instructions------------------------------------------
12001 // Signed compare Instructions
12002 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12003   match(Set cr (CmpI op1 op2));
12004   effect( DEF cr, USE op1, USE op2 );
12005   format %{ "CMP    $op1,$op2" %}
12006   opcode(0x3B);  /* Opcode 3B /r */
12007   ins_encode( OpcP, RegReg( op1, op2) );
12008   ins_pipe( ialu_cr_reg_reg );
12009 %}
12010 
12011 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12012   match(Set cr (CmpI op1 op2));
12013   effect( DEF cr, USE op1 );
12014   format %{ "CMP    $op1,$op2" %}
12015   opcode(0x81,0x07);  /* Opcode 81 /7 */
12016   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12017   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12018   ins_pipe( ialu_cr_reg_imm );
12019 %}
12020 
12021 // Cisc-spilled version of cmpI_eReg
12022 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12023   match(Set cr (CmpI op1 (LoadI op2)));
12024 
12025   format %{ "CMP    $op1,$op2" %}
12026   ins_cost(500);
12027   opcode(0x3B);  /* Opcode 3B /r */
12028   ins_encode( OpcP, RegMem( op1, op2) );
12029   ins_pipe( ialu_cr_reg_mem );
12030 %}
12031 
12032 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
12033   match(Set cr (CmpI src zero));
12034   effect( DEF cr, USE src );
12035 
12036   format %{ "TEST   $src,$src" %}
12037   opcode(0x85);
12038   ins_encode( OpcP, RegReg( src, src ) );
12039   ins_pipe( ialu_cr_reg_imm );
12040 %}
12041 
12042 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
12043   match(Set cr (CmpI (AndI src con) zero));
12044 
12045   format %{ "TEST   $src,$con" %}
12046   opcode(0xF7,0x00);
12047   ins_encode( OpcP, RegOpc(src), Con32(con) );
12048   ins_pipe( ialu_cr_reg_imm );
12049 %}
12050 
12051 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
12052   match(Set cr (CmpI (AndI src mem) zero));
12053 
12054   format %{ "TEST   $src,$mem" %}
12055   opcode(0x85);
12056   ins_encode( OpcP, RegMem( src, mem ) );
12057   ins_pipe( ialu_cr_reg_mem );
12058 %}
12059 
12060 // Unsigned compare Instructions; really, same as signed except they
12061 // produce an eFlagsRegU instead of eFlagsReg.
12062 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12063   match(Set cr (CmpU op1 op2));
12064 
12065   format %{ "CMPu   $op1,$op2" %}
12066   opcode(0x3B);  /* Opcode 3B /r */
12067   ins_encode( OpcP, RegReg( op1, op2) );
12068   ins_pipe( ialu_cr_reg_reg );
12069 %}
12070 
12071 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12072   match(Set cr (CmpU op1 op2));
12073 
12074   format %{ "CMPu   $op1,$op2" %}
12075   opcode(0x81,0x07);  /* Opcode 81 /7 */
12076   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12077   ins_pipe( ialu_cr_reg_imm );
12078 %}
12079 
12080 // // Cisc-spilled version of cmpU_eReg
12081 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12082   match(Set cr (CmpU op1 (LoadI op2)));
12083 
12084   format %{ "CMPu   $op1,$op2" %}
12085   ins_cost(500);
12086   opcode(0x3B);  /* Opcode 3B /r */
12087   ins_encode( OpcP, RegMem( op1, op2) );
12088   ins_pipe( ialu_cr_reg_mem );
12089 %}
12090 
12091 // // Cisc-spilled version of cmpU_eReg
12092 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12093 //  match(Set cr (CmpU (LoadI op1) op2));
12094 //
12095 //  format %{ "CMPu   $op1,$op2" %}
12096 //  ins_cost(500);
12097 //  opcode(0x39);  /* Opcode 39 /r */
12098 //  ins_encode( OpcP, RegMem( op1, op2) );
12099 //%}
12100 
12101 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12102   match(Set cr (CmpU src zero));
12103 
12104   format %{ "TESTu  $src,$src" %}
12105   opcode(0x85);
12106   ins_encode( OpcP, RegReg( src, src ) );
12107   ins_pipe( ialu_cr_reg_imm );
12108 %}
12109 
12110 // Unsigned pointer compare Instructions
12111 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12112   match(Set cr (CmpP op1 op2));
12113 
12114   format %{ "CMPu   $op1,$op2" %}
12115   opcode(0x3B);  /* Opcode 3B /r */
12116   ins_encode( OpcP, RegReg( op1, op2) );
12117   ins_pipe( ialu_cr_reg_reg );
12118 %}
12119 
12120 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12121   match(Set cr (CmpP op1 op2));
12122 
12123   format %{ "CMPu   $op1,$op2" %}
12124   opcode(0x81,0x07);  /* Opcode 81 /7 */
12125   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12126   ins_pipe( ialu_cr_reg_imm );
12127 %}
12128 
12129 // // Cisc-spilled version of cmpP_eReg
12130 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12131   match(Set cr (CmpP op1 (LoadP op2)));
12132 
12133   format %{ "CMPu   $op1,$op2" %}
12134   ins_cost(500);
12135   opcode(0x3B);  /* Opcode 3B /r */
12136   ins_encode( OpcP, RegMem( op1, op2) );
12137   ins_pipe( ialu_cr_reg_mem );
12138 %}
12139 
12140 // // Cisc-spilled version of cmpP_eReg
12141 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12142 //  match(Set cr (CmpP (LoadP op1) op2));
12143 //
12144 //  format %{ "CMPu   $op1,$op2" %}
12145 //  ins_cost(500);
12146 //  opcode(0x39);  /* Opcode 39 /r */
12147 //  ins_encode( OpcP, RegMem( op1, op2) );
12148 //%}
12149 
12150 // Compare raw pointer (used in out-of-heap check).
12151 // Only works because non-oop pointers must be raw pointers
12152 // and raw pointers have no anti-dependencies.
12153 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12154   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12155   match(Set cr (CmpP op1 (LoadP op2)));
12156 
12157   format %{ "CMPu   $op1,$op2" %}
12158   opcode(0x3B);  /* Opcode 3B /r */
12159   ins_encode( OpcP, RegMem( op1, op2) );
12160   ins_pipe( ialu_cr_reg_mem );
12161 %}
12162 
12163 //
12164 // This will generate a signed flags result. This should be ok
12165 // since any compare to a zero should be eq/neq.
12166 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12167   match(Set cr (CmpP src zero));
12168 
12169   format %{ "TEST   $src,$src" %}
12170   opcode(0x85);
12171   ins_encode( OpcP, RegReg( src, src ) );
12172   ins_pipe( ialu_cr_reg_imm );
12173 %}
12174 
12175 // Cisc-spilled version of testP_reg
12176 // This will generate a signed flags result. This should be ok
12177 // since any compare to a zero should be eq/neq.
12178 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12179   match(Set cr (CmpP (LoadP op) zero));
12180 
12181   format %{ "TEST   $op,0xFFFFFFFF" %}
12182   ins_cost(500);
12183   opcode(0xF7);               /* Opcode F7 /0 */
12184   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12185   ins_pipe( ialu_cr_reg_imm );
12186 %}
12187 
12188 // Yanked all unsigned pointer compare operations.
12189 // Pointer compares are done with CmpP which is already unsigned.
12190 
12191 //----------Max and Min--------------------------------------------------------
12192 // Min Instructions
12193 ////
12194 //   *** Min and Max using the conditional move are slower than the
12195 //   *** branch version on a Pentium III.
12196 // // Conditional move for min
12197 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12198 //  effect( USE_DEF op2, USE op1, USE cr );
12199 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12200 //  opcode(0x4C,0x0F);
12201 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12202 //  ins_pipe( pipe_cmov_reg );
12203 //%}
12204 //
12205 //// Min Register with Register (P6 version)
12206 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12207 //  predicate(VM_Version::supports_cmov() );
12208 //  match(Set op2 (MinI op1 op2));
12209 //  ins_cost(200);
12210 //  expand %{
12211 //    eFlagsReg cr;
12212 //    compI_eReg(cr,op1,op2);
12213 //    cmovI_reg_lt(op2,op1,cr);
12214 //  %}
12215 //%}
12216 
12217 // Min Register with Register (generic version)
12218 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12219   match(Set dst (MinI dst src));
12220   effect(KILL flags);
12221   ins_cost(300);
12222 
12223   format %{ "MIN    $dst,$src" %}
12224   opcode(0xCC);
12225   ins_encode( min_enc(dst,src) );
12226   ins_pipe( pipe_slow );
12227 %}
12228 
12229 // Max Register with Register
12230 //   *** Min and Max using the conditional move are slower than the
12231 //   *** branch version on a Pentium III.
12232 // // Conditional move for max
12233 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12234 //  effect( USE_DEF op2, USE op1, USE cr );
12235 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12236 //  opcode(0x4F,0x0F);
12237 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12238 //  ins_pipe( pipe_cmov_reg );
12239 //%}
12240 //
12241 // // Max Register with Register (P6 version)
12242 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12243 //  predicate(VM_Version::supports_cmov() );
12244 //  match(Set op2 (MaxI op1 op2));
12245 //  ins_cost(200);
12246 //  expand %{
12247 //    eFlagsReg cr;
12248 //    compI_eReg(cr,op1,op2);
12249 //    cmovI_reg_gt(op2,op1,cr);
12250 //  %}
12251 //%}
12252 
12253 // Max Register with Register (generic version)
12254 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12255   match(Set dst (MaxI dst src));
12256   effect(KILL flags);
12257   ins_cost(300);
12258 
12259   format %{ "MAX    $dst,$src" %}
12260   opcode(0xCC);
12261   ins_encode( max_enc(dst,src) );
12262   ins_pipe( pipe_slow );
12263 %}
12264 
12265 // ============================================================================
12266 // Counted Loop limit node which represents exact final iterator value.
12267 // Note: the resulting value should fit into integer range since
12268 // counted loops have limit check on overflow.
12269 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12270   match(Set limit (LoopLimit (Binary init limit) stride));
12271   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12272   ins_cost(300);
12273 
12274   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12275   ins_encode %{
12276     int strd = (int)$stride$$constant;
12277     assert(strd != 1 && strd != -1, "sanity");
12278     int m1 = (strd > 0) ? 1 : -1;
12279     // Convert limit to long (EAX:EDX)
12280     __ cdql();
12281     // Convert init to long (init:tmp)
12282     __ movl($tmp$$Register, $init$$Register);
12283     __ sarl($tmp$$Register, 31);
12284     // $limit - $init
12285     __ subl($limit$$Register, $init$$Register);
12286     __ sbbl($limit_hi$$Register, $tmp$$Register);
12287     // + ($stride - 1)
12288     if (strd > 0) {
12289       __ addl($limit$$Register, (strd - 1));
12290       __ adcl($limit_hi$$Register, 0);
12291       __ movl($tmp$$Register, strd);
12292     } else {
12293       __ addl($limit$$Register, (strd + 1));
12294       __ adcl($limit_hi$$Register, -1);
12295       __ lneg($limit_hi$$Register, $limit$$Register);
12296       __ movl($tmp$$Register, -strd);
12297     }
12298     // signed devision: (EAX:EDX) / pos_stride
12299     __ idivl($tmp$$Register);
12300     if (strd < 0) {
12301       // restore sign
12302       __ negl($tmp$$Register);
12303     }
12304     // (EAX) * stride
12305     __ mull($tmp$$Register);
12306     // + init (ignore upper bits)
12307     __ addl($limit$$Register, $init$$Register);
12308   %}
12309   ins_pipe( pipe_slow );
12310 %}
12311 
12312 // ============================================================================
12313 // Branch Instructions
12314 // Jump Table
12315 instruct jumpXtnd(rRegI switch_val) %{
12316   match(Jump switch_val);
12317   ins_cost(350);
12318   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12319   ins_encode %{
12320     // Jump to Address(table_base + switch_reg)
12321     Address index(noreg, $switch_val$$Register, Address::times_1);
12322     __ jump(ArrayAddress($constantaddress, index));
12323   %}
12324   ins_pipe(pipe_jmp);
12325 %}
12326 
12327 // Jump Direct - Label defines a relative address from JMP+1
12328 instruct jmpDir(label labl) %{
12329   match(Goto);
12330   effect(USE labl);
12331 
12332   ins_cost(300);
12333   format %{ "JMP    $labl" %}
12334   size(5);
12335   ins_encode %{
12336     Label* L = $labl$$label;
12337     __ jmp(*L, false); // Always long jump
12338   %}
12339   ins_pipe( pipe_jmp );
12340 %}
12341 
12342 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12343 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12344   match(If cop cr);
12345   effect(USE labl);
12346 
12347   ins_cost(300);
12348   format %{ "J$cop    $labl" %}
12349   size(6);
12350   ins_encode %{
12351     Label* L = $labl$$label;
12352     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12353   %}
12354   ins_pipe( pipe_jcc );
12355 %}
12356 
12357 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12358 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12359   predicate(!n->has_vector_mask_set());
12360   match(CountedLoopEnd cop cr);
12361   effect(USE labl);
12362 
12363   ins_cost(300);
12364   format %{ "J$cop    $labl\t# Loop end" %}
12365   size(6);
12366   ins_encode %{
12367     Label* L = $labl$$label;
12368     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12369   %}
12370   ins_pipe( pipe_jcc );
12371 %}
12372 
12373 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12374 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12375   predicate(!n->has_vector_mask_set());
12376   match(CountedLoopEnd cop cmp);
12377   effect(USE labl);
12378 
12379   ins_cost(300);
12380   format %{ "J$cop,u  $labl\t# Loop end" %}
12381   size(6);
12382   ins_encode %{
12383     Label* L = $labl$$label;
12384     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12385   %}
12386   ins_pipe( pipe_jcc );
12387 %}
12388 
12389 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12390   predicate(!n->has_vector_mask_set());
12391   match(CountedLoopEnd cop cmp);
12392   effect(USE labl);
12393 
12394   ins_cost(200);
12395   format %{ "J$cop,u  $labl\t# Loop end" %}
12396   size(6);
12397   ins_encode %{
12398     Label* L = $labl$$label;
12399     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12400   %}
12401   ins_pipe( pipe_jcc );
12402 %}
12403 
12404 // mask version
12405 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12406 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12407   predicate(n->has_vector_mask_set());
12408   match(CountedLoopEnd cop cr);
12409   effect(USE labl);
12410 
12411   ins_cost(400);
12412   format %{ "J$cop    $labl\t# Loop end\n\t"
12413             "restorevectmask \t# vector mask restore for loops" %}
12414   size(10);
12415   ins_encode %{
12416     Label* L = $labl$$label;
12417     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12418     __ restorevectmask();
12419   %}
12420   ins_pipe( pipe_jcc );
12421 %}
12422 
12423 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12424 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12425   predicate(n->has_vector_mask_set());
12426   match(CountedLoopEnd cop cmp);
12427   effect(USE labl);
12428 
12429   ins_cost(400);
12430   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12431             "restorevectmask \t# vector mask restore for loops" %}
12432   size(10);
12433   ins_encode %{
12434     Label* L = $labl$$label;
12435     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12436     __ restorevectmask();
12437   %}
12438   ins_pipe( pipe_jcc );
12439 %}
12440 
12441 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12442   predicate(n->has_vector_mask_set());
12443   match(CountedLoopEnd cop cmp);
12444   effect(USE labl);
12445 
12446   ins_cost(300);
12447   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12448             "restorevectmask \t# vector mask restore for loops" %}
12449   size(10);
12450   ins_encode %{
12451     Label* L = $labl$$label;
12452     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12453     __ restorevectmask();
12454   %}
12455   ins_pipe( pipe_jcc );
12456 %}
12457 
12458 // Jump Direct Conditional - using unsigned comparison
12459 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12460   match(If cop cmp);
12461   effect(USE labl);
12462 
12463   ins_cost(300);
12464   format %{ "J$cop,u  $labl" %}
12465   size(6);
12466   ins_encode %{
12467     Label* L = $labl$$label;
12468     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12469   %}
12470   ins_pipe(pipe_jcc);
12471 %}
12472 
12473 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12474   match(If cop cmp);
12475   effect(USE labl);
12476 
12477   ins_cost(200);
12478   format %{ "J$cop,u  $labl" %}
12479   size(6);
12480   ins_encode %{
12481     Label* L = $labl$$label;
12482     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12483   %}
12484   ins_pipe(pipe_jcc);
12485 %}
12486 
12487 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12488   match(If cop cmp);
12489   effect(USE labl);
12490 
12491   ins_cost(200);
12492   format %{ $$template
12493     if ($cop$$cmpcode == Assembler::notEqual) {
12494       $$emit$$"JP,u   $labl\n\t"
12495       $$emit$$"J$cop,u   $labl"
12496     } else {
12497       $$emit$$"JP,u   done\n\t"
12498       $$emit$$"J$cop,u   $labl\n\t"
12499       $$emit$$"done:"
12500     }
12501   %}
12502   ins_encode %{
12503     Label* l = $labl$$label;
12504     if ($cop$$cmpcode == Assembler::notEqual) {
12505       __ jcc(Assembler::parity, *l, false);
12506       __ jcc(Assembler::notEqual, *l, false);
12507     } else if ($cop$$cmpcode == Assembler::equal) {
12508       Label done;
12509       __ jccb(Assembler::parity, done);
12510       __ jcc(Assembler::equal, *l, false);
12511       __ bind(done);
12512     } else {
12513        ShouldNotReachHere();
12514     }
12515   %}
12516   ins_pipe(pipe_jcc);
12517 %}
12518 
12519 // ============================================================================
12520 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12521 // array for an instance of the superklass.  Set a hidden internal cache on a
12522 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12523 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12524 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12525   match(Set result (PartialSubtypeCheck sub super));
12526   effect( KILL rcx, KILL cr );
12527 
12528   ins_cost(1100);  // slightly larger than the next version
12529   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12530             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12531             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12532             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12533             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12534             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12535             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12536      "miss:\t" %}
12537 
12538   opcode(0x1); // Force a XOR of EDI
12539   ins_encode( enc_PartialSubtypeCheck() );
12540   ins_pipe( pipe_slow );
12541 %}
12542 
12543 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12544   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12545   effect( KILL rcx, KILL result );
12546 
12547   ins_cost(1000);
12548   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12549             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12550             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12551             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12552             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12553             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12554      "miss:\t" %}
12555 
12556   opcode(0x0);  // No need to XOR EDI
12557   ins_encode( enc_PartialSubtypeCheck() );
12558   ins_pipe( pipe_slow );
12559 %}
12560 
12561 // ============================================================================
12562 // Branch Instructions -- short offset versions
12563 //
12564 // These instructions are used to replace jumps of a long offset (the default
12565 // match) with jumps of a shorter offset.  These instructions are all tagged
12566 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12567 // match rules in general matching.  Instead, the ADLC generates a conversion
12568 // method in the MachNode which can be used to do in-place replacement of the
12569 // long variant with the shorter variant.  The compiler will determine if a
12570 // branch can be taken by the is_short_branch_offset() predicate in the machine
12571 // specific code section of the file.
12572 
12573 // Jump Direct - Label defines a relative address from JMP+1
12574 instruct jmpDir_short(label labl) %{
12575   match(Goto);
12576   effect(USE labl);
12577 
12578   ins_cost(300);
12579   format %{ "JMP,s  $labl" %}
12580   size(2);
12581   ins_encode %{
12582     Label* L = $labl$$label;
12583     __ jmpb(*L);
12584   %}
12585   ins_pipe( pipe_jmp );
12586   ins_short_branch(1);
12587 %}
12588 
12589 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12590 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12591   match(If cop cr);
12592   effect(USE labl);
12593 
12594   ins_cost(300);
12595   format %{ "J$cop,s  $labl" %}
12596   size(2);
12597   ins_encode %{
12598     Label* L = $labl$$label;
12599     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12600   %}
12601   ins_pipe( pipe_jcc );
12602   ins_short_branch(1);
12603 %}
12604 
12605 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12606 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12607   match(CountedLoopEnd cop cr);
12608   effect(USE labl);
12609 
12610   ins_cost(300);
12611   format %{ "J$cop,s  $labl\t# Loop end" %}
12612   size(2);
12613   ins_encode %{
12614     Label* L = $labl$$label;
12615     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12616   %}
12617   ins_pipe( pipe_jcc );
12618   ins_short_branch(1);
12619 %}
12620 
12621 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12622 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12623   match(CountedLoopEnd cop cmp);
12624   effect(USE labl);
12625 
12626   ins_cost(300);
12627   format %{ "J$cop,us $labl\t# Loop end" %}
12628   size(2);
12629   ins_encode %{
12630     Label* L = $labl$$label;
12631     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12632   %}
12633   ins_pipe( pipe_jcc );
12634   ins_short_branch(1);
12635 %}
12636 
12637 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12638   match(CountedLoopEnd cop cmp);
12639   effect(USE labl);
12640 
12641   ins_cost(300);
12642   format %{ "J$cop,us $labl\t# Loop end" %}
12643   size(2);
12644   ins_encode %{
12645     Label* L = $labl$$label;
12646     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12647   %}
12648   ins_pipe( pipe_jcc );
12649   ins_short_branch(1);
12650 %}
12651 
12652 // Jump Direct Conditional - using unsigned comparison
12653 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12654   match(If cop cmp);
12655   effect(USE labl);
12656 
12657   ins_cost(300);
12658   format %{ "J$cop,us $labl" %}
12659   size(2);
12660   ins_encode %{
12661     Label* L = $labl$$label;
12662     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12663   %}
12664   ins_pipe( pipe_jcc );
12665   ins_short_branch(1);
12666 %}
12667 
12668 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12669   match(If cop cmp);
12670   effect(USE labl);
12671 
12672   ins_cost(300);
12673   format %{ "J$cop,us $labl" %}
12674   size(2);
12675   ins_encode %{
12676     Label* L = $labl$$label;
12677     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12678   %}
12679   ins_pipe( pipe_jcc );
12680   ins_short_branch(1);
12681 %}
12682 
12683 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12684   match(If cop cmp);
12685   effect(USE labl);
12686 
12687   ins_cost(300);
12688   format %{ $$template
12689     if ($cop$$cmpcode == Assembler::notEqual) {
12690       $$emit$$"JP,u,s   $labl\n\t"
12691       $$emit$$"J$cop,u,s   $labl"
12692     } else {
12693       $$emit$$"JP,u,s   done\n\t"
12694       $$emit$$"J$cop,u,s  $labl\n\t"
12695       $$emit$$"done:"
12696     }
12697   %}
12698   size(4);
12699   ins_encode %{
12700     Label* l = $labl$$label;
12701     if ($cop$$cmpcode == Assembler::notEqual) {
12702       __ jccb(Assembler::parity, *l);
12703       __ jccb(Assembler::notEqual, *l);
12704     } else if ($cop$$cmpcode == Assembler::equal) {
12705       Label done;
12706       __ jccb(Assembler::parity, done);
12707       __ jccb(Assembler::equal, *l);
12708       __ bind(done);
12709     } else {
12710        ShouldNotReachHere();
12711     }
12712   %}
12713   ins_pipe(pipe_jcc);
12714   ins_short_branch(1);
12715 %}
12716 
12717 // ============================================================================
12718 // Long Compare
12719 //
12720 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12721 // is tricky.  The flavor of compare used depends on whether we are testing
12722 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12723 // The GE test is the negated LT test.  The LE test can be had by commuting
12724 // the operands (yielding a GE test) and then negating; negate again for the
12725 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12726 // NE test is negated from that.
12727 
12728 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12729 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12730 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12731 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12732 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12733 // foo match ends up with the wrong leaf.  One fix is to not match both
12734 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12735 // both forms beat the trinary form of long-compare and both are very useful
12736 // on Intel which has so few registers.
12737 
12738 // Manifest a CmpL result in an integer register.  Very painful.
12739 // This is the test to avoid.
12740 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12741   match(Set dst (CmpL3 src1 src2));
12742   effect( KILL flags );
12743   ins_cost(1000);
12744   format %{ "XOR    $dst,$dst\n\t"
12745             "CMP    $src1.hi,$src2.hi\n\t"
12746             "JLT,s  m_one\n\t"
12747             "JGT,s  p_one\n\t"
12748             "CMP    $src1.lo,$src2.lo\n\t"
12749             "JB,s   m_one\n\t"
12750             "JEQ,s  done\n"
12751     "p_one:\tINC    $dst\n\t"
12752             "JMP,s  done\n"
12753     "m_one:\tDEC    $dst\n"
12754      "done:" %}
12755   ins_encode %{
12756     Label p_one, m_one, done;
12757     __ xorptr($dst$$Register, $dst$$Register);
12758     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12759     __ jccb(Assembler::less,    m_one);
12760     __ jccb(Assembler::greater, p_one);
12761     __ cmpl($src1$$Register, $src2$$Register);
12762     __ jccb(Assembler::below,   m_one);
12763     __ jccb(Assembler::equal,   done);
12764     __ bind(p_one);
12765     __ incrementl($dst$$Register);
12766     __ jmpb(done);
12767     __ bind(m_one);
12768     __ decrementl($dst$$Register);
12769     __ bind(done);
12770   %}
12771   ins_pipe( pipe_slow );
12772 %}
12773 
12774 //======
12775 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12776 // compares.  Can be used for LE or GT compares by reversing arguments.
12777 // NOT GOOD FOR EQ/NE tests.
12778 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12779   match( Set flags (CmpL src zero ));
12780   ins_cost(100);
12781   format %{ "TEST   $src.hi,$src.hi" %}
12782   opcode(0x85);
12783   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12784   ins_pipe( ialu_cr_reg_reg );
12785 %}
12786 
12787 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12788 // compares.  Can be used for LE or GT compares by reversing arguments.
12789 // NOT GOOD FOR EQ/NE tests.
12790 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12791   match( Set flags (CmpL src1 src2 ));
12792   effect( TEMP tmp );
12793   ins_cost(300);
12794   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12795             "MOV    $tmp,$src1.hi\n\t"
12796             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12797   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12798   ins_pipe( ialu_cr_reg_reg );
12799 %}
12800 
12801 // Long compares reg < zero/req OR reg >= zero/req.
12802 // Just a wrapper for a normal branch, plus the predicate test.
12803 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12804   match(If cmp flags);
12805   effect(USE labl);
12806   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12807   expand %{
12808     jmpCon(cmp,flags,labl);    // JLT or JGE...
12809   %}
12810 %}
12811 
12812 //======
12813 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12814 // compares.  Can be used for LE or GT compares by reversing arguments.
12815 // NOT GOOD FOR EQ/NE tests.
12816 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12817   match(Set flags (CmpUL src zero));
12818   ins_cost(100);
12819   format %{ "TEST   $src.hi,$src.hi" %}
12820   opcode(0x85);
12821   ins_encode(OpcP, RegReg_Hi2(src, src));
12822   ins_pipe(ialu_cr_reg_reg);
12823 %}
12824 
12825 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12826 // compares.  Can be used for LE or GT compares by reversing arguments.
12827 // NOT GOOD FOR EQ/NE tests.
12828 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12829   match(Set flags (CmpUL src1 src2));
12830   effect(TEMP tmp);
12831   ins_cost(300);
12832   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12833             "MOV    $tmp,$src1.hi\n\t"
12834             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12835   ins_encode(long_cmp_flags2(src1, src2, tmp));
12836   ins_pipe(ialu_cr_reg_reg);
12837 %}
12838 
12839 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12840 // Just a wrapper for a normal branch, plus the predicate test.
12841 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12842   match(If cmp flags);
12843   effect(USE labl);
12844   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12845   expand %{
12846     jmpCon(cmp, flags, labl);    // JLT or JGE...
12847   %}
12848 %}
12849 
12850 // Compare 2 longs and CMOVE longs.
12851 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12852   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12853   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12854   ins_cost(400);
12855   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12856             "CMOV$cmp $dst.hi,$src.hi" %}
12857   opcode(0x0F,0x40);
12858   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12859   ins_pipe( pipe_cmov_reg_long );
12860 %}
12861 
12862 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12863   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12864   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12865   ins_cost(500);
12866   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12867             "CMOV$cmp $dst.hi,$src.hi" %}
12868   opcode(0x0F,0x40);
12869   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12870   ins_pipe( pipe_cmov_reg_long );
12871 %}
12872 
12873 // Compare 2 longs and CMOVE ints.
12874 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12875   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12876   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12877   ins_cost(200);
12878   format %{ "CMOV$cmp $dst,$src" %}
12879   opcode(0x0F,0x40);
12880   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12881   ins_pipe( pipe_cmov_reg );
12882 %}
12883 
12884 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12885   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12886   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12887   ins_cost(250);
12888   format %{ "CMOV$cmp $dst,$src" %}
12889   opcode(0x0F,0x40);
12890   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12891   ins_pipe( pipe_cmov_mem );
12892 %}
12893 
12894 // Compare 2 longs and CMOVE ints.
12895 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12896   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12897   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12898   ins_cost(200);
12899   format %{ "CMOV$cmp $dst,$src" %}
12900   opcode(0x0F,0x40);
12901   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12902   ins_pipe( pipe_cmov_reg );
12903 %}
12904 
12905 // Compare 2 longs and CMOVE doubles
12906 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12907   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12908   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12909   ins_cost(200);
12910   expand %{
12911     fcmovDPR_regS(cmp,flags,dst,src);
12912   %}
12913 %}
12914 
12915 // Compare 2 longs and CMOVE doubles
12916 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12917   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12918   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12919   ins_cost(200);
12920   expand %{
12921     fcmovD_regS(cmp,flags,dst,src);
12922   %}
12923 %}
12924 
12925 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12926   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12927   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12928   ins_cost(200);
12929   expand %{
12930     fcmovFPR_regS(cmp,flags,dst,src);
12931   %}
12932 %}
12933 
12934 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12935   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12936   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12937   ins_cost(200);
12938   expand %{
12939     fcmovF_regS(cmp,flags,dst,src);
12940   %}
12941 %}
12942 
12943 //======
12944 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12945 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12946   match( Set flags (CmpL src zero ));
12947   effect(TEMP tmp);
12948   ins_cost(200);
12949   format %{ "MOV    $tmp,$src.lo\n\t"
12950             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12951   ins_encode( long_cmp_flags0( src, tmp ) );
12952   ins_pipe( ialu_reg_reg_long );
12953 %}
12954 
12955 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12956 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12957   match( Set flags (CmpL src1 src2 ));
12958   ins_cost(200+300);
12959   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12960             "JNE,s  skip\n\t"
12961             "CMP    $src1.hi,$src2.hi\n\t"
12962      "skip:\t" %}
12963   ins_encode( long_cmp_flags1( src1, src2 ) );
12964   ins_pipe( ialu_cr_reg_reg );
12965 %}
12966 
12967 // Long compare reg == zero/reg OR reg != zero/reg
12968 // Just a wrapper for a normal branch, plus the predicate test.
12969 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12970   match(If cmp flags);
12971   effect(USE labl);
12972   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12973   expand %{
12974     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12975   %}
12976 %}
12977 
12978 //======
12979 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12980 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12981   match(Set flags (CmpUL src zero));
12982   effect(TEMP tmp);
12983   ins_cost(200);
12984   format %{ "MOV    $tmp,$src.lo\n\t"
12985             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12986   ins_encode(long_cmp_flags0(src, tmp));
12987   ins_pipe(ialu_reg_reg_long);
12988 %}
12989 
12990 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12991 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12992   match(Set flags (CmpUL src1 src2));
12993   ins_cost(200+300);
12994   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12995             "JNE,s  skip\n\t"
12996             "CMP    $src1.hi,$src2.hi\n\t"
12997      "skip:\t" %}
12998   ins_encode(long_cmp_flags1(src1, src2));
12999   ins_pipe(ialu_cr_reg_reg);
13000 %}
13001 
13002 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13003 // Just a wrapper for a normal branch, plus the predicate test.
13004 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13005   match(If cmp flags);
13006   effect(USE labl);
13007   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13008   expand %{
13009     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13010   %}
13011 %}
13012 
13013 // Compare 2 longs and CMOVE longs.
13014 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13015   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13016   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13017   ins_cost(400);
13018   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13019             "CMOV$cmp $dst.hi,$src.hi" %}
13020   opcode(0x0F,0x40);
13021   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13022   ins_pipe( pipe_cmov_reg_long );
13023 %}
13024 
13025 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13026   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13027   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13028   ins_cost(500);
13029   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13030             "CMOV$cmp $dst.hi,$src.hi" %}
13031   opcode(0x0F,0x40);
13032   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13033   ins_pipe( pipe_cmov_reg_long );
13034 %}
13035 
13036 // Compare 2 longs and CMOVE ints.
13037 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13038   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13039   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13040   ins_cost(200);
13041   format %{ "CMOV$cmp $dst,$src" %}
13042   opcode(0x0F,0x40);
13043   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13044   ins_pipe( pipe_cmov_reg );
13045 %}
13046 
13047 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13048   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13049   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13050   ins_cost(250);
13051   format %{ "CMOV$cmp $dst,$src" %}
13052   opcode(0x0F,0x40);
13053   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13054   ins_pipe( pipe_cmov_mem );
13055 %}
13056 
13057 // Compare 2 longs and CMOVE ints.
13058 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13059   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13060   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13061   ins_cost(200);
13062   format %{ "CMOV$cmp $dst,$src" %}
13063   opcode(0x0F,0x40);
13064   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13065   ins_pipe( pipe_cmov_reg );
13066 %}
13067 
13068 // Compare 2 longs and CMOVE doubles
13069 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13070   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13071   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13072   ins_cost(200);
13073   expand %{
13074     fcmovDPR_regS(cmp,flags,dst,src);
13075   %}
13076 %}
13077 
13078 // Compare 2 longs and CMOVE doubles
13079 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13080   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13081   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13082   ins_cost(200);
13083   expand %{
13084     fcmovD_regS(cmp,flags,dst,src);
13085   %}
13086 %}
13087 
13088 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13089   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13090   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13091   ins_cost(200);
13092   expand %{
13093     fcmovFPR_regS(cmp,flags,dst,src);
13094   %}
13095 %}
13096 
13097 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13098   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13099   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13100   ins_cost(200);
13101   expand %{
13102     fcmovF_regS(cmp,flags,dst,src);
13103   %}
13104 %}
13105 
13106 //======
13107 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13108 // Same as cmpL_reg_flags_LEGT except must negate src
13109 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13110   match( Set flags (CmpL src zero ));
13111   effect( TEMP tmp );
13112   ins_cost(300);
13113   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13114             "CMP    $tmp,$src.lo\n\t"
13115             "SBB    $tmp,$src.hi\n\t" %}
13116   ins_encode( long_cmp_flags3(src, tmp) );
13117   ins_pipe( ialu_reg_reg_long );
13118 %}
13119 
13120 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13121 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13122 // requires a commuted test to get the same result.
13123 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13124   match( Set flags (CmpL src1 src2 ));
13125   effect( TEMP tmp );
13126   ins_cost(300);
13127   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13128             "MOV    $tmp,$src2.hi\n\t"
13129             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13130   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13131   ins_pipe( ialu_cr_reg_reg );
13132 %}
13133 
13134 // Long compares reg < zero/req OR reg >= zero/req.
13135 // Just a wrapper for a normal branch, plus the predicate test
13136 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13137   match(If cmp flags);
13138   effect(USE labl);
13139   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13140   ins_cost(300);
13141   expand %{
13142     jmpCon(cmp,flags,labl);    // JGT or JLE...
13143   %}
13144 %}
13145 
13146 //======
13147 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13148 // Same as cmpUL_reg_flags_LEGT except must negate src
13149 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13150   match(Set flags (CmpUL src zero));
13151   effect(TEMP tmp);
13152   ins_cost(300);
13153   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13154             "CMP    $tmp,$src.lo\n\t"
13155             "SBB    $tmp,$src.hi\n\t" %}
13156   ins_encode(long_cmp_flags3(src, tmp));
13157   ins_pipe(ialu_reg_reg_long);
13158 %}
13159 
13160 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13161 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13162 // requires a commuted test to get the same result.
13163 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13164   match(Set flags (CmpUL src1 src2));
13165   effect(TEMP tmp);
13166   ins_cost(300);
13167   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13168             "MOV    $tmp,$src2.hi\n\t"
13169             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13170   ins_encode(long_cmp_flags2( src2, src1, tmp));
13171   ins_pipe(ialu_cr_reg_reg);
13172 %}
13173 
13174 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13175 // Just a wrapper for a normal branch, plus the predicate test
13176 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13177   match(If cmp flags);
13178   effect(USE labl);
13179   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13180   ins_cost(300);
13181   expand %{
13182     jmpCon(cmp, flags, labl);    // JGT or JLE...
13183   %}
13184 %}
13185 
13186 // Compare 2 longs and CMOVE longs.
13187 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13188   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13189   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13190   ins_cost(400);
13191   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13192             "CMOV$cmp $dst.hi,$src.hi" %}
13193   opcode(0x0F,0x40);
13194   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13195   ins_pipe( pipe_cmov_reg_long );
13196 %}
13197 
13198 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13199   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13200   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13201   ins_cost(500);
13202   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13203             "CMOV$cmp $dst.hi,$src.hi+4" %}
13204   opcode(0x0F,0x40);
13205   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13206   ins_pipe( pipe_cmov_reg_long );
13207 %}
13208 
13209 // Compare 2 longs and CMOVE ints.
13210 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13211   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13212   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13213   ins_cost(200);
13214   format %{ "CMOV$cmp $dst,$src" %}
13215   opcode(0x0F,0x40);
13216   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13217   ins_pipe( pipe_cmov_reg );
13218 %}
13219 
13220 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13221   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13222   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13223   ins_cost(250);
13224   format %{ "CMOV$cmp $dst,$src" %}
13225   opcode(0x0F,0x40);
13226   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13227   ins_pipe( pipe_cmov_mem );
13228 %}
13229 
13230 // Compare 2 longs and CMOVE ptrs.
13231 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13232   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13233   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13234   ins_cost(200);
13235   format %{ "CMOV$cmp $dst,$src" %}
13236   opcode(0x0F,0x40);
13237   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13238   ins_pipe( pipe_cmov_reg );
13239 %}
13240 
13241 // Compare 2 longs and CMOVE doubles
13242 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13243   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13244   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13245   ins_cost(200);
13246   expand %{
13247     fcmovDPR_regS(cmp,flags,dst,src);
13248   %}
13249 %}
13250 
13251 // Compare 2 longs and CMOVE doubles
13252 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13253   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13254   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13255   ins_cost(200);
13256   expand %{
13257     fcmovD_regS(cmp,flags,dst,src);
13258   %}
13259 %}
13260 
13261 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13262   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13263   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13264   ins_cost(200);
13265   expand %{
13266     fcmovFPR_regS(cmp,flags,dst,src);
13267   %}
13268 %}
13269 
13270 
13271 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13272   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13273   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13274   ins_cost(200);
13275   expand %{
13276     fcmovF_regS(cmp,flags,dst,src);
13277   %}
13278 %}
13279 
13280 
13281 // ============================================================================
13282 // Procedure Call/Return Instructions
13283 // Call Java Static Instruction
13284 // Note: If this code changes, the corresponding ret_addr_offset() and
13285 //       compute_padding() functions will have to be adjusted.
13286 instruct CallStaticJavaDirect(method meth) %{
13287   match(CallStaticJava);
13288   effect(USE meth);
13289 
13290   ins_cost(300);
13291   format %{ "CALL,static " %}
13292   opcode(0xE8); /* E8 cd */
13293   ins_encode( pre_call_resets,
13294               Java_Static_Call( meth ),
13295               call_epilog,
13296               post_call_FPU );
13297   ins_pipe( pipe_slow );
13298   ins_alignment(4);
13299 %}
13300 
13301 // Call Java Dynamic Instruction
13302 // Note: If this code changes, the corresponding ret_addr_offset() and
13303 //       compute_padding() functions will have to be adjusted.
13304 instruct CallDynamicJavaDirect(method meth) %{
13305   match(CallDynamicJava);
13306   effect(USE meth);
13307 
13308   ins_cost(300);
13309   format %{ "MOV    EAX,(oop)-1\n\t"
13310             "CALL,dynamic" %}
13311   opcode(0xE8); /* E8 cd */
13312   ins_encode( pre_call_resets,
13313               Java_Dynamic_Call( meth ),
13314               call_epilog,
13315               post_call_FPU );
13316   ins_pipe( pipe_slow );
13317   ins_alignment(4);
13318 %}
13319 
13320 // Call Runtime Instruction
13321 instruct CallRuntimeDirect(method meth) %{
13322   match(CallRuntime );
13323   effect(USE meth);
13324 
13325   ins_cost(300);
13326   format %{ "CALL,runtime " %}
13327   opcode(0xE8); /* E8 cd */
13328   // Use FFREEs to clear entries in float stack
13329   ins_encode( pre_call_resets,
13330               FFree_Float_Stack_All,
13331               Java_To_Runtime( meth ),
13332               post_call_FPU );
13333   ins_pipe( pipe_slow );
13334 %}
13335 
13336 // Call runtime without safepoint
13337 instruct CallLeafDirect(method meth) %{
13338   match(CallLeaf);
13339   effect(USE meth);
13340 
13341   ins_cost(300);
13342   format %{ "CALL_LEAF,runtime " %}
13343   opcode(0xE8); /* E8 cd */
13344   ins_encode( pre_call_resets,
13345               FFree_Float_Stack_All,
13346               Java_To_Runtime( meth ),
13347               Verify_FPU_For_Leaf, post_call_FPU );
13348   ins_pipe( pipe_slow );
13349 %}
13350 
13351 instruct CallLeafNoFPDirect(method meth) %{
13352   match(CallLeafNoFP);
13353   effect(USE meth);
13354 
13355   ins_cost(300);
13356   format %{ "CALL_LEAF_NOFP,runtime " %}
13357   opcode(0xE8); /* E8 cd */
13358   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13359   ins_pipe( pipe_slow );
13360 %}
13361 
13362 
13363 // Return Instruction
13364 // Remove the return address & jump to it.
13365 instruct Ret() %{
13366   match(Return);
13367   format %{ "RET" %}
13368   opcode(0xC3);
13369   ins_encode(OpcP);
13370   ins_pipe( pipe_jmp );
13371 %}
13372 
13373 // Tail Call; Jump from runtime stub to Java code.
13374 // Also known as an 'interprocedural jump'.
13375 // Target of jump will eventually return to caller.
13376 // TailJump below removes the return address.
13377 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13378   match(TailCall jump_target method_oop );
13379   ins_cost(300);
13380   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13381   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13382   ins_encode( OpcP, RegOpc(jump_target) );
13383   ins_pipe( pipe_jmp );
13384 %}
13385 
13386 
13387 // Tail Jump; remove the return address; jump to target.
13388 // TailCall above leaves the return address around.
13389 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13390   match( TailJump jump_target ex_oop );
13391   ins_cost(300);
13392   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13393             "JMP    $jump_target " %}
13394   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13395   ins_encode( enc_pop_rdx,
13396               OpcP, RegOpc(jump_target) );
13397   ins_pipe( pipe_jmp );
13398 %}
13399 
13400 // Create exception oop: created by stack-crawling runtime code.
13401 // Created exception is now available to this handler, and is setup
13402 // just prior to jumping to this handler.  No code emitted.
13403 instruct CreateException( eAXRegP ex_oop )
13404 %{
13405   match(Set ex_oop (CreateEx));
13406 
13407   size(0);
13408   // use the following format syntax
13409   format %{ "# exception oop is in EAX; no code emitted" %}
13410   ins_encode();
13411   ins_pipe( empty );
13412 %}
13413 
13414 
13415 // Rethrow exception:
13416 // The exception oop will come in the first argument position.
13417 // Then JUMP (not call) to the rethrow stub code.
13418 instruct RethrowException()
13419 %{
13420   match(Rethrow);
13421 
13422   // use the following format syntax
13423   format %{ "JMP    rethrow_stub" %}
13424   ins_encode(enc_rethrow);
13425   ins_pipe( pipe_jmp );
13426 %}
13427 
13428 // inlined locking and unlocking
13429 
13430 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13431   predicate(Compile::current()->use_rtm());
13432   match(Set cr (FastLock object box));
13433   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13434   ins_cost(300);
13435   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13436   ins_encode %{
13437     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13438                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13439                  _counters, _rtm_counters, _stack_rtm_counters,
13440                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13441                  true, ra_->C->profile_rtm());
13442   %}
13443   ins_pipe(pipe_slow);
13444 %}
13445 
13446 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13447   predicate(!Compile::current()->use_rtm());
13448   match(Set cr (FastLock object box));
13449   effect(TEMP tmp, TEMP scr, USE_KILL box);
13450   ins_cost(300);
13451   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13452   ins_encode %{
13453     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13454                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13455   %}
13456   ins_pipe(pipe_slow);
13457 %}
13458 
13459 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13460   match(Set cr (FastUnlock object box));
13461   effect(TEMP tmp, USE_KILL box);
13462   ins_cost(300);
13463   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13464   ins_encode %{
13465     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13466   %}
13467   ins_pipe(pipe_slow);
13468 %}
13469 
13470 
13471 
13472 // ============================================================================
13473 // Safepoint Instruction
13474 instruct safePoint_poll(eFlagsReg cr) %{
13475   predicate(SafepointMechanism::uses_global_page_poll());
13476   match(SafePoint);
13477   effect(KILL cr);
13478 
13479   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13480   // On SPARC that might be acceptable as we can generate the address with
13481   // just a sethi, saving an or.  By polling at offset 0 we can end up
13482   // putting additional pressure on the index-0 in the D$.  Because of
13483   // alignment (just like the situation at hand) the lower indices tend
13484   // to see more traffic.  It'd be better to change the polling address
13485   // to offset 0 of the last $line in the polling page.
13486 
13487   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13488   ins_cost(125);
13489   size(6) ;
13490   ins_encode( Safepoint_Poll() );
13491   ins_pipe( ialu_reg_mem );
13492 %}
13493 
13494 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13495   predicate(SafepointMechanism::uses_thread_local_poll());
13496   match(SafePoint poll);
13497   effect(KILL cr, USE poll);
13498 
13499   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13500   ins_cost(125);
13501   // EBP would need size(3)
13502   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13503   ins_encode %{
13504     __ relocate(relocInfo::poll_type);
13505     address pre_pc = __ pc();
13506     __ testl(rax, Address($poll$$Register, 0));
13507     address post_pc = __ pc();
13508     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13509   %}
13510   ins_pipe(ialu_reg_mem);
13511 %}
13512 
13513 
13514 // ============================================================================
13515 // This name is KNOWN by the ADLC and cannot be changed.
13516 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13517 // for this guy.
13518 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13519   match(Set dst (ThreadLocal));
13520   effect(DEF dst, KILL cr);
13521 
13522   format %{ "MOV    $dst, Thread::current()" %}
13523   ins_encode %{
13524     Register dstReg = as_Register($dst$$reg);
13525     __ get_thread(dstReg);
13526   %}
13527   ins_pipe( ialu_reg_fat );
13528 %}
13529 
13530 
13531 
13532 //----------PEEPHOLE RULES-----------------------------------------------------
13533 // These must follow all instruction definitions as they use the names
13534 // defined in the instructions definitions.
13535 //
13536 // peepmatch ( root_instr_name [preceding_instruction]* );
13537 //
13538 // peepconstraint %{
13539 // (instruction_number.operand_name relational_op instruction_number.operand_name
13540 //  [, ...] );
13541 // // instruction numbers are zero-based using left to right order in peepmatch
13542 //
13543 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13544 // // provide an instruction_number.operand_name for each operand that appears
13545 // // in the replacement instruction's match rule
13546 //
13547 // ---------VM FLAGS---------------------------------------------------------
13548 //
13549 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13550 //
13551 // Each peephole rule is given an identifying number starting with zero and
13552 // increasing by one in the order seen by the parser.  An individual peephole
13553 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13554 // on the command-line.
13555 //
13556 // ---------CURRENT LIMITATIONS----------------------------------------------
13557 //
13558 // Only match adjacent instructions in same basic block
13559 // Only equality constraints
13560 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13561 // Only one replacement instruction
13562 //
13563 // ---------EXAMPLE----------------------------------------------------------
13564 //
13565 // // pertinent parts of existing instructions in architecture description
13566 // instruct movI(rRegI dst, rRegI src) %{
13567 //   match(Set dst (CopyI src));
13568 // %}
13569 //
13570 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13571 //   match(Set dst (AddI dst src));
13572 //   effect(KILL cr);
13573 // %}
13574 //
13575 // // Change (inc mov) to lea
13576 // peephole %{
13577 //   // increment preceeded by register-register move
13578 //   peepmatch ( incI_eReg movI );
13579 //   // require that the destination register of the increment
13580 //   // match the destination register of the move
13581 //   peepconstraint ( 0.dst == 1.dst );
13582 //   // construct a replacement instruction that sets
13583 //   // the destination to ( move's source register + one )
13584 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13585 // %}
13586 //
13587 // Implementation no longer uses movX instructions since
13588 // machine-independent system no longer uses CopyX nodes.
13589 //
13590 // peephole %{
13591 //   peepmatch ( incI_eReg movI );
13592 //   peepconstraint ( 0.dst == 1.dst );
13593 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13594 // %}
13595 //
13596 // peephole %{
13597 //   peepmatch ( decI_eReg movI );
13598 //   peepconstraint ( 0.dst == 1.dst );
13599 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13600 // %}
13601 //
13602 // peephole %{
13603 //   peepmatch ( addI_eReg_imm movI );
13604 //   peepconstraint ( 0.dst == 1.dst );
13605 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13606 // %}
13607 //
13608 // peephole %{
13609 //   peepmatch ( addP_eReg_imm movP );
13610 //   peepconstraint ( 0.dst == 1.dst );
13611 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13612 // %}
13613 
13614 // // Change load of spilled value to only a spill
13615 // instruct storeI(memory mem, rRegI src) %{
13616 //   match(Set mem (StoreI mem src));
13617 // %}
13618 //
13619 // instruct loadI(rRegI dst, memory mem) %{
13620 //   match(Set dst (LoadI mem));
13621 // %}
13622 //
13623 peephole %{
13624   peepmatch ( loadI storeI );
13625   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13626   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13627 %}
13628 
13629 //----------SMARTSPILL RULES---------------------------------------------------
13630 // These must follow all instruction definitions as they use the names
13631 // defined in the instructions definitions.