1 //
   2 // Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return SafepointMechanism::uses_thread_local_poll();
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   __ verified_entry(C);
 620 
 621   C->set_frame_complete(cbuf.insts_size());
 622 
 623   if (C->has_mach_constant_base_node()) {
 624     // NOTE: We set the table base offset here because users might be
 625     // emitted before MachConstantBaseNode.
 626     Compile::ConstantTable& constant_table = C->constant_table();
 627     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 628   }
 629 }
 630 
 631 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 632   return MachNode::size(ra_); // too many variables; just compute it the hard way
 633 }
 634 
 635 int MachPrologNode::reloc() const {
 636   return 0; // a large enough number
 637 }
 638 
 639 //=============================================================================
 640 #ifndef PRODUCT
 641 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 642   Compile *C = ra_->C;
 643   int framesize = C->frame_size_in_bytes();
 644   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 645   // Remove two words for return addr and rbp,
 646   framesize -= 2*wordSize;
 647 
 648   if (C->max_vector_size() > 16) {
 649     st->print("VZEROUPPER");
 650     st->cr(); st->print("\t");
 651   }
 652   if (C->in_24_bit_fp_mode()) {
 653     st->print("FLDCW  standard control word");
 654     st->cr(); st->print("\t");
 655   }
 656   if (framesize) {
 657     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 658     st->cr(); st->print("\t");
 659   }
 660   st->print_cr("POPL   EBP"); st->print("\t");
 661   if (do_polling() && C->is_method_compilation()) {
 662     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 663     st->cr(); st->print("\t");
 664   }
 665 }
 666 #endif
 667 
 668 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 669   Compile *C = ra_->C;
 670   MacroAssembler _masm(&cbuf);
 671 
 672   if (C->max_vector_size() > 16) {
 673     // Clear upper bits of YMM registers when current compiled code uses
 674     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 675     _masm.vzeroupper();
 676   }
 677   // If method set FPU control word, restore to standard control word
 678   if (C->in_24_bit_fp_mode()) {
 679     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 680   }
 681 
 682   int framesize = C->frame_size_in_bytes();
 683   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 684   // Remove two words for return addr and rbp,
 685   framesize -= 2*wordSize;
 686 
 687   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 688 
 689   if (framesize >= 128) {
 690     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 692     emit_d32(cbuf, framesize);
 693   } else if (framesize) {
 694     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 695     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 696     emit_d8(cbuf, framesize);
 697   }
 698 
 699   emit_opcode(cbuf, 0x58 | EBP_enc);
 700 
 701   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 702     __ reserved_stack_check();
 703   }
 704 
 705   if (do_polling() && C->is_method_compilation()) {
 706     if (SafepointMechanism::uses_thread_local_poll()) {
 707       Register pollReg = as_Register(EBX_enc);
 708       MacroAssembler masm(&cbuf);
 709       masm.get_thread(pollReg);
 710       masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 711       masm.relocate(relocInfo::poll_return_type);
 712       masm.testl(rax, Address(pollReg, 0));
 713     } else {
 714       cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 715       emit_opcode(cbuf,0x85);
 716       emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 717       emit_d32(cbuf, (intptr_t)os::get_polling_page());
 718     }
 719   }
 720 }
 721 
 722 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 723   return MachNode::size(ra_); // too many variables; just compute it
 724                               // the hard way
 725 }
 726 
 727 int MachEpilogNode::reloc() const {
 728   return 0; // a large enough number
 729 }
 730 
 731 const Pipeline * MachEpilogNode::pipeline() const {
 732   return MachNode::pipeline_class();
 733 }
 734 
 735 int MachEpilogNode::safepoint_offset() const { return 0; }
 736 
 737 //=============================================================================
 738 
 739 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 740 static enum RC rc_class( OptoReg::Name reg ) {
 741 
 742   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 743   if (OptoReg::is_stack(reg)) return rc_stack;
 744 
 745   VMReg r = OptoReg::as_VMReg(reg);
 746   if (r->is_Register()) return rc_int;
 747   if (r->is_FloatRegister()) {
 748     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 749     return rc_float;
 750   }
 751   assert(r->is_XMMRegister(), "must be");
 752   return rc_xmm;
 753 }
 754 
 755 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 756                         int opcode, const char *op_str, int size, outputStream* st ) {
 757   if( cbuf ) {
 758     emit_opcode  (*cbuf, opcode );
 759     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 760 #ifndef PRODUCT
 761   } else if( !do_size ) {
 762     if( size != 0 ) st->print("\n\t");
 763     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 764       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 765       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 766     } else { // FLD, FST, PUSH, POP
 767       st->print("%s [ESP + #%d]",op_str,offset);
 768     }
 769 #endif
 770   }
 771   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 772   return size+3+offset_size;
 773 }
 774 
 775 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 776 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 777                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 778   int in_size_in_bits = Assembler::EVEX_32bit;
 779   int evex_encoding = 0;
 780   if (reg_lo+1 == reg_hi) {
 781     in_size_in_bits = Assembler::EVEX_64bit;
 782     evex_encoding = Assembler::VEX_W;
 783   }
 784   if (cbuf) {
 785     MacroAssembler _masm(cbuf);
 786     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 787     //                          it maps more cases to single byte displacement
 788     _masm.set_managed();
 789     if (reg_lo+1 == reg_hi) { // double move?
 790       if (is_load) {
 791         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 792       } else {
 793         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 794       }
 795     } else {
 796       if (is_load) {
 797         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 798       } else {
 799         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 800       }
 801     }
 802 #ifndef PRODUCT
 803   } else if (!do_size) {
 804     if (size != 0) st->print("\n\t");
 805     if (reg_lo+1 == reg_hi) { // double move?
 806       if (is_load) st->print("%s %s,[ESP + #%d]",
 807                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 808                               Matcher::regName[reg_lo], offset);
 809       else         st->print("MOVSD  [ESP + #%d],%s",
 810                               offset, Matcher::regName[reg_lo]);
 811     } else {
 812       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 813                               Matcher::regName[reg_lo], offset);
 814       else         st->print("MOVSS  [ESP + #%d],%s",
 815                               offset, Matcher::regName[reg_lo]);
 816     }
 817 #endif
 818   }
 819   bool is_single_byte = false;
 820   if ((UseAVX > 2) && (offset != 0)) {
 821     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 822   }
 823   int offset_size = 0;
 824   if (UseAVX > 2 ) {
 825     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 826   } else {
 827     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 828   }
 829   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 830   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 831   return size+5+offset_size;
 832 }
 833 
 834 
 835 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 836                             int src_hi, int dst_hi, int size, outputStream* st ) {
 837   if (cbuf) {
 838     MacroAssembler _masm(cbuf);
 839     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 840     _masm.set_managed();
 841     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 842       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 843                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 844     } else {
 845       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 846                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 847     }
 848 #ifndef PRODUCT
 849   } else if (!do_size) {
 850     if (size != 0) st->print("\n\t");
 851     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 852       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 853         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 854       } else {
 855         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 856       }
 857     } else {
 858       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 859         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 860       } else {
 861         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 862       }
 863     }
 864 #endif
 865   }
 866   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 867   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 868   int sz = (UseAVX > 2) ? 6 : 4;
 869   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 870       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 871   return size + sz;
 872 }
 873 
 874 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 875                             int src_hi, int dst_hi, int size, outputStream* st ) {
 876   // 32-bit
 877   if (cbuf) {
 878     MacroAssembler _masm(cbuf);
 879     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 880     _masm.set_managed();
 881     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 882              as_Register(Matcher::_regEncode[src_lo]));
 883 #ifndef PRODUCT
 884   } else if (!do_size) {
 885     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 886 #endif
 887   }
 888   return (UseAVX> 2) ? 6 : 4;
 889 }
 890 
 891 
 892 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 893                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 894   // 32-bit
 895   if (cbuf) {
 896     MacroAssembler _masm(cbuf);
 897     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 898     _masm.set_managed();
 899     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 900              as_XMMRegister(Matcher::_regEncode[src_lo]));
 901 #ifndef PRODUCT
 902   } else if (!do_size) {
 903     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 904 #endif
 905   }
 906   return (UseAVX> 2) ? 6 : 4;
 907 }
 908 
 909 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 910   if( cbuf ) {
 911     emit_opcode(*cbuf, 0x8B );
 912     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 913 #ifndef PRODUCT
 914   } else if( !do_size ) {
 915     if( size != 0 ) st->print("\n\t");
 916     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 917 #endif
 918   }
 919   return size+2;
 920 }
 921 
 922 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 923                                  int offset, int size, outputStream* st ) {
 924   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 925     if( cbuf ) {
 926       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 927       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 928 #ifndef PRODUCT
 929     } else if( !do_size ) {
 930       if( size != 0 ) st->print("\n\t");
 931       st->print("FLD    %s",Matcher::regName[src_lo]);
 932 #endif
 933     }
 934     size += 2;
 935   }
 936 
 937   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 938   const char *op_str;
 939   int op;
 940   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 941     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 942     op = 0xDD;
 943   } else {                   // 32-bit store
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 945     op = 0xD9;
 946     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 947   }
 948 
 949   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 950 }
 951 
 952 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 953 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 954                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 955 
 956 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 957                             int stack_offset, int reg, uint ireg, outputStream* st);
 958 
 959 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 960                                      int dst_offset, uint ireg, outputStream* st) {
 961   int calc_size = 0;
 962   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 963   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 964   switch (ireg) {
 965   case Op_VecS:
 966     calc_size = 3+src_offset_size + 3+dst_offset_size;
 967     break;
 968   case Op_VecD: {
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     int tmp_src_offset = src_offset + 4;
 971     int tmp_dst_offset = dst_offset + 4;
 972     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 973     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 974     calc_size += 3+src_offset_size + 3+dst_offset_size;
 975     break;
 976   }
 977   case Op_VecX:
 978   case Op_VecY:
 979   case Op_VecZ:
 980     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 981     break;
 982   default:
 983     ShouldNotReachHere();
 984   }
 985   if (cbuf) {
 986     MacroAssembler _masm(cbuf);
 987     int offset = __ offset();
 988     switch (ireg) {
 989     case Op_VecS:
 990       __ pushl(Address(rsp, src_offset));
 991       __ popl (Address(rsp, dst_offset));
 992       break;
 993     case Op_VecD:
 994       __ pushl(Address(rsp, src_offset));
 995       __ popl (Address(rsp, dst_offset));
 996       __ pushl(Address(rsp, src_offset+4));
 997       __ popl (Address(rsp, dst_offset+4));
 998       break;
 999     case Op_VecX:
1000       __ movdqu(Address(rsp, -16), xmm0);
1001       __ movdqu(xmm0, Address(rsp, src_offset));
1002       __ movdqu(Address(rsp, dst_offset), xmm0);
1003       __ movdqu(xmm0, Address(rsp, -16));
1004       break;
1005     case Op_VecY:
1006       __ vmovdqu(Address(rsp, -32), xmm0);
1007       __ vmovdqu(xmm0, Address(rsp, src_offset));
1008       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1009       __ vmovdqu(xmm0, Address(rsp, -32));
1010       break;
1011     case Op_VecZ:
1012       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1013       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1014       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1015       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1016       break;
1017     default:
1018       ShouldNotReachHere();
1019     }
1020     int size = __ offset() - offset;
1021     assert(size == calc_size, "incorrect size calculation");
1022     return size;
1023 #ifndef PRODUCT
1024   } else if (!do_size) {
1025     switch (ireg) {
1026     case Op_VecS:
1027       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1028                 "popl    [rsp + #%d]",
1029                 src_offset, dst_offset);
1030       break;
1031     case Op_VecD:
1032       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1033                 "popq    [rsp + #%d]\n\t"
1034                 "pushl   [rsp + #%d]\n\t"
1035                 "popq    [rsp + #%d]",
1036                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1037       break;
1038      case Op_VecX:
1039       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1040                 "movdqu  xmm0, [rsp + #%d]\n\t"
1041                 "movdqu  [rsp + #%d], xmm0\n\t"
1042                 "movdqu  xmm0, [rsp - #16]",
1043                 src_offset, dst_offset);
1044       break;
1045     case Op_VecY:
1046       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1047                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1048                 "vmovdqu [rsp + #%d], xmm0\n\t"
1049                 "vmovdqu xmm0, [rsp - #32]",
1050                 src_offset, dst_offset);
1051       break;
1052     case Op_VecZ:
1053       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1054                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1055                 "vmovdqu [rsp + #%d], xmm0\n\t"
1056                 "vmovdqu xmm0, [rsp - #64]",
1057                 src_offset, dst_offset);
1058       break;
1059     default:
1060       ShouldNotReachHere();
1061     }
1062 #endif
1063   }
1064   return calc_size;
1065 }
1066 
1067 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1068   // Get registers to move
1069   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1070   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1071   OptoReg::Name dst_second = ra_->get_reg_second(this );
1072   OptoReg::Name dst_first = ra_->get_reg_first(this );
1073 
1074   enum RC src_second_rc = rc_class(src_second);
1075   enum RC src_first_rc = rc_class(src_first);
1076   enum RC dst_second_rc = rc_class(dst_second);
1077   enum RC dst_first_rc = rc_class(dst_first);
1078 
1079   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1080 
1081   // Generate spill code!
1082   int size = 0;
1083 
1084   if( src_first == dst_first && src_second == dst_second )
1085     return size;            // Self copy, no move
1086 
1087   if (bottom_type()->isa_vect() != NULL) {
1088     uint ireg = ideal_reg();
1089     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1090     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1091     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1092     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1093       // mem -> mem
1094       int src_offset = ra_->reg2offset(src_first);
1095       int dst_offset = ra_->reg2offset(dst_first);
1096       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1097     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1098       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1099     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1100       int stack_offset = ra_->reg2offset(dst_first);
1101       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1102     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1103       int stack_offset = ra_->reg2offset(src_first);
1104       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1105     } else {
1106       ShouldNotReachHere();
1107     }
1108   }
1109 
1110   // --------------------------------------
1111   // Check for mem-mem move.  push/pop to move.
1112   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1113     if( src_second == dst_first ) { // overlapping stack copy ranges
1114       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1115       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1116       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1117       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1118     }
1119     // move low bits
1120     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1121     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1122     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1123       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1124       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1125     }
1126     return size;
1127   }
1128 
1129   // --------------------------------------
1130   // Check for integer reg-reg copy
1131   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1132     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1133 
1134   // Check for integer store
1135   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1136     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1137 
1138   // Check for integer load
1139   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1140     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1141 
1142   // Check for integer reg-xmm reg copy
1143   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1144     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1145             "no 64 bit integer-float reg moves" );
1146     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1147   }
1148   // --------------------------------------
1149   // Check for float reg-reg copy
1150   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1151     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1152             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1153     if( cbuf ) {
1154 
1155       // Note the mucking with the register encode to compensate for the 0/1
1156       // indexing issue mentioned in a comment in the reg_def sections
1157       // for FPR registers many lines above here.
1158 
1159       if( src_first != FPR1L_num ) {
1160         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1161         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1162         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1163         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1164      } else {
1165         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1166         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1167      }
1168 #ifndef PRODUCT
1169     } else if( !do_size ) {
1170       if( size != 0 ) st->print("\n\t");
1171       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1172       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1173 #endif
1174     }
1175     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1176   }
1177 
1178   // Check for float store
1179   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1180     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1181   }
1182 
1183   // Check for float load
1184   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1185     int offset = ra_->reg2offset(src_first);
1186     const char *op_str;
1187     int op;
1188     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1189       op_str = "FLD_D";
1190       op = 0xDD;
1191     } else {                   // 32-bit load
1192       op_str = "FLD_S";
1193       op = 0xD9;
1194       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1195     }
1196     if( cbuf ) {
1197       emit_opcode  (*cbuf, op );
1198       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1199       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1200       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1201 #ifndef PRODUCT
1202     } else if( !do_size ) {
1203       if( size != 0 ) st->print("\n\t");
1204       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1205 #endif
1206     }
1207     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1208     return size + 3+offset_size+2;
1209   }
1210 
1211   // Check for xmm reg-reg copy
1212   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1213     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1214             (src_first+1 == src_second && dst_first+1 == dst_second),
1215             "no non-adjacent float-moves" );
1216     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1217   }
1218 
1219   // Check for xmm reg-integer reg copy
1220   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1221     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1222             "no 64 bit float-integer reg moves" );
1223     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1224   }
1225 
1226   // Check for xmm store
1227   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1228     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1229   }
1230 
1231   // Check for float xmm load
1232   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1233     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1234   }
1235 
1236   // Copy from float reg to xmm reg
1237   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1238     // copy to the top of stack from floating point reg
1239     // and use LEA to preserve flags
1240     if( cbuf ) {
1241       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1242       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1243       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1244       emit_d8(*cbuf,0xF8);
1245 #ifndef PRODUCT
1246     } else if( !do_size ) {
1247       if( size != 0 ) st->print("\n\t");
1248       st->print("LEA    ESP,[ESP-8]");
1249 #endif
1250     }
1251     size += 4;
1252 
1253     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1254 
1255     // Copy from the temp memory to the xmm reg.
1256     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1257 
1258     if( cbuf ) {
1259       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1260       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1261       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1262       emit_d8(*cbuf,0x08);
1263 #ifndef PRODUCT
1264     } else if( !do_size ) {
1265       if( size != 0 ) st->print("\n\t");
1266       st->print("LEA    ESP,[ESP+8]");
1267 #endif
1268     }
1269     size += 4;
1270     return size;
1271   }
1272 
1273   assert( size > 0, "missed a case" );
1274 
1275   // --------------------------------------------------------------------
1276   // Check for second bits still needing moving.
1277   if( src_second == dst_second )
1278     return size;               // Self copy; no move
1279   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1280 
1281   // Check for second word int-int move
1282   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1283     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1284 
1285   // Check for second word integer store
1286   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1287     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1288 
1289   // Check for second word integer load
1290   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1291     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1292 
1293 
1294   Unimplemented();
1295   return 0; // Mute compiler
1296 }
1297 
1298 #ifndef PRODUCT
1299 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1300   implementation( NULL, ra_, false, st );
1301 }
1302 #endif
1303 
1304 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1305   implementation( &cbuf, ra_, false, NULL );
1306 }
1307 
1308 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1309   return MachNode::size(ra_);
1310 }
1311 
1312 
1313 //=============================================================================
1314 #ifndef PRODUCT
1315 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1316   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1317   int reg = ra_->get_reg_first(this);
1318   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1319 }
1320 #endif
1321 
1322 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1323   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1324   int reg = ra_->get_encode(this);
1325   if( offset >= 128 ) {
1326     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1327     emit_rm(cbuf, 0x2, reg, 0x04);
1328     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1329     emit_d32(cbuf, offset);
1330   }
1331   else {
1332     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1333     emit_rm(cbuf, 0x1, reg, 0x04);
1334     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1335     emit_d8(cbuf, offset);
1336   }
1337 }
1338 
1339 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1340   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1341   if( offset >= 128 ) {
1342     return 7;
1343   }
1344   else {
1345     return 4;
1346   }
1347 }
1348 
1349 //=============================================================================
1350 #ifndef PRODUCT
1351 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1352   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1353   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1354   st->print_cr("\tNOP");
1355   st->print_cr("\tNOP");
1356   if( !OptoBreakpoint )
1357     st->print_cr("\tNOP");
1358 }
1359 #endif
1360 
1361 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1362   MacroAssembler masm(&cbuf);
1363 #ifdef ASSERT
1364   uint insts_size = cbuf.insts_size();
1365 #endif
1366   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1367   masm.jump_cc(Assembler::notEqual,
1368                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1369   /* WARNING these NOPs are critical so that verified entry point is properly
1370      aligned for patching by NativeJump::patch_verified_entry() */
1371   int nops_cnt = 2;
1372   if( !OptoBreakpoint ) // Leave space for int3
1373      nops_cnt += 1;
1374   masm.nop(nops_cnt);
1375 
1376   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1377 }
1378 
1379 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1380   return OptoBreakpoint ? 11 : 12;
1381 }
1382 
1383 
1384 //=============================================================================
1385 
1386 int Matcher::regnum_to_fpu_offset(int regnum) {
1387   return regnum - 32; // The FP registers are in the second chunk
1388 }
1389 
1390 // This is UltraSparc specific, true just means we have fast l2f conversion
1391 const bool Matcher::convL2FSupported(void) {
1392   return true;
1393 }
1394 
1395 // Is this branch offset short enough that a short branch can be used?
1396 //
1397 // NOTE: If the platform does not provide any short branch variants, then
1398 //       this method should return false for offset 0.
1399 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1400   // The passed offset is relative to address of the branch.
1401   // On 86 a branch displacement is calculated relative to address
1402   // of a next instruction.
1403   offset -= br_size;
1404 
1405   // the short version of jmpConUCF2 contains multiple branches,
1406   // making the reach slightly less
1407   if (rule == jmpConUCF2_rule)
1408     return (-126 <= offset && offset <= 125);
1409   return (-128 <= offset && offset <= 127);
1410 }
1411 
1412 const bool Matcher::isSimpleConstant64(jlong value) {
1413   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1414   return false;
1415 }
1416 
1417 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1418 const bool Matcher::init_array_count_is_in_bytes = false;
1419 
1420 // Needs 2 CMOV's for longs.
1421 const int Matcher::long_cmove_cost() { return 1; }
1422 
1423 // No CMOVF/CMOVD with SSE/SSE2
1424 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1425 
1426 // Does the CPU require late expand (see block.cpp for description of late expand)?
1427 const bool Matcher::require_postalloc_expand = false;
1428 
1429 // Do we need to mask the count passed to shift instructions or does
1430 // the cpu only look at the lower 5/6 bits anyway?
1431 const bool Matcher::need_masked_shift_count = false;
1432 
1433 bool Matcher::narrow_oop_use_complex_address() {
1434   ShouldNotCallThis();
1435   return true;
1436 }
1437 
1438 bool Matcher::narrow_klass_use_complex_address() {
1439   ShouldNotCallThis();
1440   return true;
1441 }
1442 
1443 bool Matcher::const_oop_prefer_decode() {
1444   ShouldNotCallThis();
1445   return true;
1446 }
1447 
1448 bool Matcher::const_klass_prefer_decode() {
1449   ShouldNotCallThis();
1450   return true;
1451 }
1452 
1453 // Is it better to copy float constants, or load them directly from memory?
1454 // Intel can load a float constant from a direct address, requiring no
1455 // extra registers.  Most RISCs will have to materialize an address into a
1456 // register first, so they would do better to copy the constant from stack.
1457 const bool Matcher::rematerialize_float_constants = true;
1458 
1459 // If CPU can load and store mis-aligned doubles directly then no fixup is
1460 // needed.  Else we split the double into 2 integer pieces and move it
1461 // piece-by-piece.  Only happens when passing doubles into C code as the
1462 // Java calling convention forces doubles to be aligned.
1463 const bool Matcher::misaligned_doubles_ok = true;
1464 
1465 
1466 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1467   // Get the memory operand from the node
1468   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1469   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1470   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1471   uint opcnt     = 1;                 // First operand
1472   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1473   while( idx >= skipped+num_edges ) {
1474     skipped += num_edges;
1475     opcnt++;                          // Bump operand count
1476     assert( opcnt < numopnds, "Accessing non-existent operand" );
1477     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1478   }
1479 
1480   MachOper *memory = node->_opnds[opcnt];
1481   MachOper *new_memory = NULL;
1482   switch (memory->opcode()) {
1483   case DIRECT:
1484   case INDOFFSET32X:
1485     // No transformation necessary.
1486     return;
1487   case INDIRECT:
1488     new_memory = new indirect_win95_safeOper( );
1489     break;
1490   case INDOFFSET8:
1491     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1492     break;
1493   case INDOFFSET32:
1494     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDINDEXOFFSET:
1497     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1498     break;
1499   case INDINDEXSCALE:
1500     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1501     break;
1502   case INDINDEXSCALEOFFSET:
1503     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1504     break;
1505   case LOAD_LONG_INDIRECT:
1506   case LOAD_LONG_INDOFFSET32:
1507     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1508     return;
1509   default:
1510     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1511     return;
1512   }
1513   node->_opnds[opcnt] = new_memory;
1514 }
1515 
1516 // Advertise here if the CPU requires explicit rounding operations
1517 // to implement the UseStrictFP mode.
1518 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1519 
1520 // Are floats conerted to double when stored to stack during deoptimization?
1521 // On x32 it is stored with convertion only when FPU is used for floats.
1522 bool Matcher::float_in_double() { return (UseSSE == 0); }
1523 
1524 // Do ints take an entire long register or just half?
1525 const bool Matcher::int_in_long = false;
1526 
1527 // Return whether or not this register is ever used as an argument.  This
1528 // function is used on startup to build the trampoline stubs in generateOptoStub.
1529 // Registers not mentioned will be killed by the VM call in the trampoline, and
1530 // arguments in those registers not be available to the callee.
1531 bool Matcher::can_be_java_arg( int reg ) {
1532   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1533   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1534   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1535   return false;
1536 }
1537 
1538 bool Matcher::is_spillable_arg( int reg ) {
1539   return can_be_java_arg(reg);
1540 }
1541 
1542 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1543   // Use hardware integer DIV instruction when
1544   // it is faster than a code which use multiply.
1545   // Only when constant divisor fits into 32 bit
1546   // (min_jint is excluded to get only correct
1547   // positive 32 bit values from negative).
1548   return VM_Version::has_fast_idiv() &&
1549          (divisor == (int)divisor && divisor != min_jint);
1550 }
1551 
1552 // Register for DIVI projection of divmodI
1553 RegMask Matcher::divI_proj_mask() {
1554   return EAX_REG_mask();
1555 }
1556 
1557 // Register for MODI projection of divmodI
1558 RegMask Matcher::modI_proj_mask() {
1559   return EDX_REG_mask();
1560 }
1561 
1562 // Register for DIVL projection of divmodL
1563 RegMask Matcher::divL_proj_mask() {
1564   ShouldNotReachHere();
1565   return RegMask();
1566 }
1567 
1568 // Register for MODL projection of divmodL
1569 RegMask Matcher::modL_proj_mask() {
1570   ShouldNotReachHere();
1571   return RegMask();
1572 }
1573 
1574 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1575   return NO_REG_mask();
1576 }
1577 
1578 // Returns true if the high 32 bits of the value is known to be zero.
1579 bool is_operand_hi32_zero(Node* n) {
1580   int opc = n->Opcode();
1581   if (opc == Op_AndL) {
1582     Node* o2 = n->in(2);
1583     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1584       return true;
1585     }
1586   }
1587   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1588     return true;
1589   }
1590   return false;
1591 }
1592 
1593 %}
1594 
1595 //----------ENCODING BLOCK-----------------------------------------------------
1596 // This block specifies the encoding classes used by the compiler to output
1597 // byte streams.  Encoding classes generate functions which are called by
1598 // Machine Instruction Nodes in order to generate the bit encoding of the
1599 // instruction.  Operands specify their base encoding interface with the
1600 // interface keyword.  There are currently supported four interfaces,
1601 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1602 // operand to generate a function which returns its register number when
1603 // queried.   CONST_INTER causes an operand to generate a function which
1604 // returns the value of the constant when queried.  MEMORY_INTER causes an
1605 // operand to generate four functions which return the Base Register, the
1606 // Index Register, the Scale Value, and the Offset Value of the operand when
1607 // queried.  COND_INTER causes an operand to generate six functions which
1608 // return the encoding code (ie - encoding bits for the instruction)
1609 // associated with each basic boolean condition for a conditional instruction.
1610 // Instructions specify two basic values for encoding.  They use the
1611 // ins_encode keyword to specify their encoding class (which must be one of
1612 // the class names specified in the encoding block), and they use the
1613 // opcode keyword to specify, in order, their primary, secondary, and
1614 // tertiary opcode.  Only the opcode sections which a particular instruction
1615 // needs for encoding need to be specified.
1616 encode %{
1617   // Build emit functions for each basic byte or larger field in the intel
1618   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1619   // code in the enc_class source block.  Emit functions will live in the
1620   // main source block for now.  In future, we can generalize this by
1621   // adding a syntax that specifies the sizes of fields in an order,
1622   // so that the adlc can build the emit functions automagically
1623 
1624   // Emit primary opcode
1625   enc_class OpcP %{
1626     emit_opcode(cbuf, $primary);
1627   %}
1628 
1629   // Emit secondary opcode
1630   enc_class OpcS %{
1631     emit_opcode(cbuf, $secondary);
1632   %}
1633 
1634   // Emit opcode directly
1635   enc_class Opcode(immI d8) %{
1636     emit_opcode(cbuf, $d8$$constant);
1637   %}
1638 
1639   enc_class SizePrefix %{
1640     emit_opcode(cbuf,0x66);
1641   %}
1642 
1643   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1644     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1645   %}
1646 
1647   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1648     emit_opcode(cbuf,$opcode$$constant);
1649     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1650   %}
1651 
1652   enc_class mov_r32_imm0( rRegI dst ) %{
1653     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1654     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1655   %}
1656 
1657   enc_class cdq_enc %{
1658     // Full implementation of Java idiv and irem; checks for
1659     // special case as described in JVM spec., p.243 & p.271.
1660     //
1661     //         normal case                           special case
1662     //
1663     // input : rax,: dividend                         min_int
1664     //         reg: divisor                          -1
1665     //
1666     // output: rax,: quotient  (= rax, idiv reg)       min_int
1667     //         rdx: remainder (= rax, irem reg)       0
1668     //
1669     //  Code sequnce:
1670     //
1671     //  81 F8 00 00 00 80    cmp         rax,80000000h
1672     //  0F 85 0B 00 00 00    jne         normal_case
1673     //  33 D2                xor         rdx,edx
1674     //  83 F9 FF             cmp         rcx,0FFh
1675     //  0F 84 03 00 00 00    je          done
1676     //                  normal_case:
1677     //  99                   cdq
1678     //  F7 F9                idiv        rax,ecx
1679     //                  done:
1680     //
1681     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1682     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1683     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1684     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1685     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1686     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1687     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1688     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1689     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1690     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1691     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1692     // normal_case:
1693     emit_opcode(cbuf,0x99);                                         // cdq
1694     // idiv (note: must be emitted by the user of this rule)
1695     // normal:
1696   %}
1697 
1698   // Dense encoding for older common ops
1699   enc_class Opc_plus(immI opcode, rRegI reg) %{
1700     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1701   %}
1702 
1703 
1704   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1705   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1706     // Check for 8-bit immediate, and set sign extend bit in opcode
1707     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1708       emit_opcode(cbuf, $primary | 0x02);
1709     }
1710     else {                          // If 32-bit immediate
1711       emit_opcode(cbuf, $primary);
1712     }
1713   %}
1714 
1715   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1716     // Emit primary opcode and set sign-extend bit
1717     // Check for 8-bit immediate, and set sign extend bit in opcode
1718     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1719       emit_opcode(cbuf, $primary | 0x02);    }
1720     else {                          // If 32-bit immediate
1721       emit_opcode(cbuf, $primary);
1722     }
1723     // Emit r/m byte with secondary opcode, after primary opcode.
1724     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1725   %}
1726 
1727   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1728     // Check for 8-bit immediate, and set sign extend bit in opcode
1729     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1730       $$$emit8$imm$$constant;
1731     }
1732     else {                          // If 32-bit immediate
1733       // Output immediate
1734       $$$emit32$imm$$constant;
1735     }
1736   %}
1737 
1738   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1739     // Emit primary opcode and set sign-extend bit
1740     // Check for 8-bit immediate, and set sign extend bit in opcode
1741     int con = (int)$imm$$constant; // Throw away top bits
1742     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1743     // Emit r/m byte with secondary opcode, after primary opcode.
1744     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1745     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1746     else                               emit_d32(cbuf,con);
1747   %}
1748 
1749   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1750     // Emit primary opcode and set sign-extend bit
1751     // Check for 8-bit immediate, and set sign extend bit in opcode
1752     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1753     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1754     // Emit r/m byte with tertiary opcode, after primary opcode.
1755     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1756     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1757     else                               emit_d32(cbuf,con);
1758   %}
1759 
1760   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1761     emit_cc(cbuf, $secondary, $dst$$reg );
1762   %}
1763 
1764   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1765     int destlo = $dst$$reg;
1766     int desthi = HIGH_FROM_LOW(destlo);
1767     // bswap lo
1768     emit_opcode(cbuf, 0x0F);
1769     emit_cc(cbuf, 0xC8, destlo);
1770     // bswap hi
1771     emit_opcode(cbuf, 0x0F);
1772     emit_cc(cbuf, 0xC8, desthi);
1773     // xchg lo and hi
1774     emit_opcode(cbuf, 0x87);
1775     emit_rm(cbuf, 0x3, destlo, desthi);
1776   %}
1777 
1778   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1779     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1780   %}
1781 
1782   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1783     $$$emit8$primary;
1784     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1785   %}
1786 
1787   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1788     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1789     emit_d8(cbuf, op >> 8 );
1790     emit_d8(cbuf, op & 255);
1791   %}
1792 
1793   // emulate a CMOV with a conditional branch around a MOV
1794   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1795     // Invert sense of branch from sense of CMOV
1796     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1797     emit_d8( cbuf, $brOffs$$constant );
1798   %}
1799 
1800   enc_class enc_PartialSubtypeCheck( ) %{
1801     Register Redi = as_Register(EDI_enc); // result register
1802     Register Reax = as_Register(EAX_enc); // super class
1803     Register Recx = as_Register(ECX_enc); // killed
1804     Register Resi = as_Register(ESI_enc); // sub class
1805     Label miss;
1806 
1807     MacroAssembler _masm(&cbuf);
1808     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1809                                      NULL, &miss,
1810                                      /*set_cond_codes:*/ true);
1811     if ($primary) {
1812       __ xorptr(Redi, Redi);
1813     }
1814     __ bind(miss);
1815   %}
1816 
1817   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1818     MacroAssembler masm(&cbuf);
1819     int start = masm.offset();
1820     if (UseSSE >= 2) {
1821       if (VerifyFPU) {
1822         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1823       }
1824     } else {
1825       // External c_calling_convention expects the FPU stack to be 'clean'.
1826       // Compiled code leaves it dirty.  Do cleanup now.
1827       masm.empty_FPU_stack();
1828     }
1829     if (sizeof_FFree_Float_Stack_All == -1) {
1830       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1831     } else {
1832       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1833     }
1834   %}
1835 
1836   enc_class Verify_FPU_For_Leaf %{
1837     if( VerifyFPU ) {
1838       MacroAssembler masm(&cbuf);
1839       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1840     }
1841   %}
1842 
1843   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1844     // This is the instruction starting address for relocation info.
1845     cbuf.set_insts_mark();
1846     $$$emit8$primary;
1847     // CALL directly to the runtime
1848     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1849                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1850 
1851     if (UseSSE >= 2) {
1852       MacroAssembler _masm(&cbuf);
1853       BasicType rt = tf()->return_type();
1854 
1855       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1856         // A C runtime call where the return value is unused.  In SSE2+
1857         // mode the result needs to be removed from the FPU stack.  It's
1858         // likely that this function call could be removed by the
1859         // optimizer if the C function is a pure function.
1860         __ ffree(0);
1861       } else if (rt == T_FLOAT) {
1862         __ lea(rsp, Address(rsp, -4));
1863         __ fstp_s(Address(rsp, 0));
1864         __ movflt(xmm0, Address(rsp, 0));
1865         __ lea(rsp, Address(rsp,  4));
1866       } else if (rt == T_DOUBLE) {
1867         __ lea(rsp, Address(rsp, -8));
1868         __ fstp_d(Address(rsp, 0));
1869         __ movdbl(xmm0, Address(rsp, 0));
1870         __ lea(rsp, Address(rsp,  8));
1871       }
1872     }
1873   %}
1874 
1875   enc_class pre_call_resets %{
1876     // If method sets FPU control word restore it here
1877     debug_only(int off0 = cbuf.insts_size());
1878     if (ra_->C->in_24_bit_fp_mode()) {
1879       MacroAssembler _masm(&cbuf);
1880       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881     }
1882     // Clear upper bits of YMM registers when current compiled code uses
1883     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1884     MacroAssembler _masm(&cbuf);
1885     __ vzeroupper();
1886     debug_only(int off1 = cbuf.insts_size());
1887     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1888   %}
1889 
1890   enc_class post_call_FPU %{
1891     // If method sets FPU control word do it here also
1892     if (Compile::current()->in_24_bit_fp_mode()) {
1893       MacroAssembler masm(&cbuf);
1894       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1895     }
1896   %}
1897 
1898   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1899     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1900     // who we intended to call.
1901     cbuf.set_insts_mark();
1902     $$$emit8$primary;
1903 
1904     if (!_method) {
1905       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1906                      runtime_call_Relocation::spec(),
1907                      RELOC_IMM32);
1908     } else {
1909       int method_index = resolved_method_index(cbuf);
1910       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1911                                                   : static_call_Relocation::spec(method_index);
1912       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1913                      rspec, RELOC_DISP32);
1914       // Emit stubs for static call.
1915       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1916       if (stub == NULL) {
1917         ciEnv::current()->record_failure("CodeCache is full");
1918         return;
1919       }
1920     }
1921   %}
1922 
1923   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1924     MacroAssembler _masm(&cbuf);
1925     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1926   %}
1927 
1928   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1929     int disp = in_bytes(Method::from_compiled_offset());
1930     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1931 
1932     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1933     cbuf.set_insts_mark();
1934     $$$emit8$primary;
1935     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1936     emit_d8(cbuf, disp);             // Displacement
1937 
1938   %}
1939 
1940 //   Following encoding is no longer used, but may be restored if calling
1941 //   convention changes significantly.
1942 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1943 //
1944 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1945 //     // int ic_reg     = Matcher::inline_cache_reg();
1946 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1947 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1948 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1949 //
1950 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1951 //     // // so we load it immediately before the call
1952 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1953 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1954 //
1955 //     // xor rbp,ebp
1956 //     emit_opcode(cbuf, 0x33);
1957 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1958 //
1959 //     // CALL to interpreter.
1960 //     cbuf.set_insts_mark();
1961 //     $$$emit8$primary;
1962 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1963 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1964 //   %}
1965 
1966   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1967     $$$emit8$primary;
1968     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1969     $$$emit8$shift$$constant;
1970   %}
1971 
1972   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1973     // Load immediate does not have a zero or sign extended version
1974     // for 8-bit immediates
1975     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1976     $$$emit32$src$$constant;
1977   %}
1978 
1979   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1980     // Load immediate does not have a zero or sign extended version
1981     // for 8-bit immediates
1982     emit_opcode(cbuf, $primary + $dst$$reg);
1983     $$$emit32$src$$constant;
1984   %}
1985 
1986   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1987     // Load immediate does not have a zero or sign extended version
1988     // for 8-bit immediates
1989     int dst_enc = $dst$$reg;
1990     int src_con = $src$$constant & 0x0FFFFFFFFL;
1991     if (src_con == 0) {
1992       // xor dst, dst
1993       emit_opcode(cbuf, 0x33);
1994       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1995     } else {
1996       emit_opcode(cbuf, $primary + dst_enc);
1997       emit_d32(cbuf, src_con);
1998     }
1999   %}
2000 
2001   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2002     // Load immediate does not have a zero or sign extended version
2003     // for 8-bit immediates
2004     int dst_enc = $dst$$reg + 2;
2005     int src_con = ((julong)($src$$constant)) >> 32;
2006     if (src_con == 0) {
2007       // xor dst, dst
2008       emit_opcode(cbuf, 0x33);
2009       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2010     } else {
2011       emit_opcode(cbuf, $primary + dst_enc);
2012       emit_d32(cbuf, src_con);
2013     }
2014   %}
2015 
2016 
2017   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2018   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2019     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2020   %}
2021 
2022   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2023     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2024   %}
2025 
2026   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2027     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2028   %}
2029 
2030   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2031     $$$emit8$primary;
2032     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2033   %}
2034 
2035   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2036     $$$emit8$secondary;
2037     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2038   %}
2039 
2040   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2041     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2042   %}
2043 
2044   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2045     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2046   %}
2047 
2048   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2049     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2050   %}
2051 
2052   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2053     // Output immediate
2054     $$$emit32$src$$constant;
2055   %}
2056 
2057   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2058     // Output Float immediate bits
2059     jfloat jf = $src$$constant;
2060     int    jf_as_bits = jint_cast( jf );
2061     emit_d32(cbuf, jf_as_bits);
2062   %}
2063 
2064   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2065     // Output Float immediate bits
2066     jfloat jf = $src$$constant;
2067     int    jf_as_bits = jint_cast( jf );
2068     emit_d32(cbuf, jf_as_bits);
2069   %}
2070 
2071   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2072     // Output immediate
2073     $$$emit16$src$$constant;
2074   %}
2075 
2076   enc_class Con_d32(immI src) %{
2077     emit_d32(cbuf,$src$$constant);
2078   %}
2079 
2080   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2081     // Output immediate memory reference
2082     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2083     emit_d32(cbuf, 0x00);
2084   %}
2085 
2086   enc_class lock_prefix( ) %{
2087     emit_opcode(cbuf,0xF0);         // [Lock]
2088   %}
2089 
2090   // Cmp-xchg long value.
2091   // Note: we need to swap rbx, and rcx before and after the
2092   //       cmpxchg8 instruction because the instruction uses
2093   //       rcx as the high order word of the new value to store but
2094   //       our register encoding uses rbx,.
2095   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2096 
2097     // XCHG  rbx,ecx
2098     emit_opcode(cbuf,0x87);
2099     emit_opcode(cbuf,0xD9);
2100     // [Lock]
2101     emit_opcode(cbuf,0xF0);
2102     // CMPXCHG8 [Eptr]
2103     emit_opcode(cbuf,0x0F);
2104     emit_opcode(cbuf,0xC7);
2105     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2106     // XCHG  rbx,ecx
2107     emit_opcode(cbuf,0x87);
2108     emit_opcode(cbuf,0xD9);
2109   %}
2110 
2111   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2112     // [Lock]
2113     emit_opcode(cbuf,0xF0);
2114 
2115     // CMPXCHG [Eptr]
2116     emit_opcode(cbuf,0x0F);
2117     emit_opcode(cbuf,0xB1);
2118     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2119   %}
2120 
2121   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2122     // [Lock]
2123     emit_opcode(cbuf,0xF0);
2124 
2125     // CMPXCHGB [Eptr]
2126     emit_opcode(cbuf,0x0F);
2127     emit_opcode(cbuf,0xB0);
2128     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2129   %}
2130 
2131   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2132     // [Lock]
2133     emit_opcode(cbuf,0xF0);
2134 
2135     // 16-bit mode
2136     emit_opcode(cbuf, 0x66);
2137 
2138     // CMPXCHGW [Eptr]
2139     emit_opcode(cbuf,0x0F);
2140     emit_opcode(cbuf,0xB1);
2141     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2142   %}
2143 
2144   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2145     int res_encoding = $res$$reg;
2146 
2147     // MOV  res,0
2148     emit_opcode( cbuf, 0xB8 + res_encoding);
2149     emit_d32( cbuf, 0 );
2150     // JNE,s  fail
2151     emit_opcode(cbuf,0x75);
2152     emit_d8(cbuf, 5 );
2153     // MOV  res,1
2154     emit_opcode( cbuf, 0xB8 + res_encoding);
2155     emit_d32( cbuf, 1 );
2156     // fail:
2157   %}
2158 
2159   enc_class set_instruction_start( ) %{
2160     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2161   %}
2162 
2163   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2164     int reg_encoding = $ereg$$reg;
2165     int base  = $mem$$base;
2166     int index = $mem$$index;
2167     int scale = $mem$$scale;
2168     int displace = $mem$$disp;
2169     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2170     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2171   %}
2172 
2173   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2174     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2175     int base  = $mem$$base;
2176     int index = $mem$$index;
2177     int scale = $mem$$scale;
2178     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2179     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2180     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2181   %}
2182 
2183   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2184     int r1, r2;
2185     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2186     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2187     emit_opcode(cbuf,0x0F);
2188     emit_opcode(cbuf,$tertiary);
2189     emit_rm(cbuf, 0x3, r1, r2);
2190     emit_d8(cbuf,$cnt$$constant);
2191     emit_d8(cbuf,$primary);
2192     emit_rm(cbuf, 0x3, $secondary, r1);
2193     emit_d8(cbuf,$cnt$$constant);
2194   %}
2195 
2196   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2197     emit_opcode( cbuf, 0x8B ); // Move
2198     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2199     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2200       emit_d8(cbuf,$primary);
2201       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2202       emit_d8(cbuf,$cnt$$constant-32);
2203     }
2204     emit_d8(cbuf,$primary);
2205     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2206     emit_d8(cbuf,31);
2207   %}
2208 
2209   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2210     int r1, r2;
2211     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2212     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2213 
2214     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2215     emit_rm(cbuf, 0x3, r1, r2);
2216     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2217       emit_opcode(cbuf,$primary);
2218       emit_rm(cbuf, 0x3, $secondary, r1);
2219       emit_d8(cbuf,$cnt$$constant-32);
2220     }
2221     emit_opcode(cbuf,0x33);  // XOR r2,r2
2222     emit_rm(cbuf, 0x3, r2, r2);
2223   %}
2224 
2225   // Clone of RegMem but accepts an extra parameter to access each
2226   // half of a double in memory; it never needs relocation info.
2227   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2228     emit_opcode(cbuf,$opcode$$constant);
2229     int reg_encoding = $rm_reg$$reg;
2230     int base     = $mem$$base;
2231     int index    = $mem$$index;
2232     int scale    = $mem$$scale;
2233     int displace = $mem$$disp + $disp_for_half$$constant;
2234     relocInfo::relocType disp_reloc = relocInfo::none;
2235     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2236   %}
2237 
2238   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2239   //
2240   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2241   // and it never needs relocation information.
2242   // Frequently used to move data between FPU's Stack Top and memory.
2243   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2244     int rm_byte_opcode = $rm_opcode$$constant;
2245     int base     = $mem$$base;
2246     int index    = $mem$$index;
2247     int scale    = $mem$$scale;
2248     int displace = $mem$$disp;
2249     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2250     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2251   %}
2252 
2253   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2254     int rm_byte_opcode = $rm_opcode$$constant;
2255     int base     = $mem$$base;
2256     int index    = $mem$$index;
2257     int scale    = $mem$$scale;
2258     int displace = $mem$$disp;
2259     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2260     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2261   %}
2262 
2263   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2264     int reg_encoding = $dst$$reg;
2265     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2266     int index        = 0x04;            // 0x04 indicates no index
2267     int scale        = 0x00;            // 0x00 indicates no scale
2268     int displace     = $src1$$constant; // 0x00 indicates no displacement
2269     relocInfo::relocType disp_reloc = relocInfo::none;
2270     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2271   %}
2272 
2273   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2274     // Compare dst,src
2275     emit_opcode(cbuf,0x3B);
2276     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2277     // jmp dst < src around move
2278     emit_opcode(cbuf,0x7C);
2279     emit_d8(cbuf,2);
2280     // move dst,src
2281     emit_opcode(cbuf,0x8B);
2282     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2283   %}
2284 
2285   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2286     // Compare dst,src
2287     emit_opcode(cbuf,0x3B);
2288     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2289     // jmp dst > src around move
2290     emit_opcode(cbuf,0x7F);
2291     emit_d8(cbuf,2);
2292     // move dst,src
2293     emit_opcode(cbuf,0x8B);
2294     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2295   %}
2296 
2297   enc_class enc_FPR_store(memory mem, regDPR src) %{
2298     // If src is FPR1, we can just FST to store it.
2299     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2300     int reg_encoding = 0x2; // Just store
2301     int base  = $mem$$base;
2302     int index = $mem$$index;
2303     int scale = $mem$$scale;
2304     int displace = $mem$$disp;
2305     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2306     if( $src$$reg != FPR1L_enc ) {
2307       reg_encoding = 0x3;  // Store & pop
2308       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2309       emit_d8( cbuf, 0xC0-1+$src$$reg );
2310     }
2311     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2312     emit_opcode(cbuf,$primary);
2313     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2314   %}
2315 
2316   enc_class neg_reg(rRegI dst) %{
2317     // NEG $dst
2318     emit_opcode(cbuf,0xF7);
2319     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2320   %}
2321 
2322   enc_class setLT_reg(eCXRegI dst) %{
2323     // SETLT $dst
2324     emit_opcode(cbuf,0x0F);
2325     emit_opcode(cbuf,0x9C);
2326     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2327   %}
2328 
2329   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2330     int tmpReg = $tmp$$reg;
2331 
2332     // SUB $p,$q
2333     emit_opcode(cbuf,0x2B);
2334     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2335     // SBB $tmp,$tmp
2336     emit_opcode(cbuf,0x1B);
2337     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2338     // AND $tmp,$y
2339     emit_opcode(cbuf,0x23);
2340     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2341     // ADD $p,$tmp
2342     emit_opcode(cbuf,0x03);
2343     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2344   %}
2345 
2346   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2347     // TEST shift,32
2348     emit_opcode(cbuf,0xF7);
2349     emit_rm(cbuf, 0x3, 0, ECX_enc);
2350     emit_d32(cbuf,0x20);
2351     // JEQ,s small
2352     emit_opcode(cbuf, 0x74);
2353     emit_d8(cbuf, 0x04);
2354     // MOV    $dst.hi,$dst.lo
2355     emit_opcode( cbuf, 0x8B );
2356     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2357     // CLR    $dst.lo
2358     emit_opcode(cbuf, 0x33);
2359     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2360 // small:
2361     // SHLD   $dst.hi,$dst.lo,$shift
2362     emit_opcode(cbuf,0x0F);
2363     emit_opcode(cbuf,0xA5);
2364     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2365     // SHL    $dst.lo,$shift"
2366     emit_opcode(cbuf,0xD3);
2367     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2368   %}
2369 
2370   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2371     // TEST shift,32
2372     emit_opcode(cbuf,0xF7);
2373     emit_rm(cbuf, 0x3, 0, ECX_enc);
2374     emit_d32(cbuf,0x20);
2375     // JEQ,s small
2376     emit_opcode(cbuf, 0x74);
2377     emit_d8(cbuf, 0x04);
2378     // MOV    $dst.lo,$dst.hi
2379     emit_opcode( cbuf, 0x8B );
2380     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2381     // CLR    $dst.hi
2382     emit_opcode(cbuf, 0x33);
2383     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2384 // small:
2385     // SHRD   $dst.lo,$dst.hi,$shift
2386     emit_opcode(cbuf,0x0F);
2387     emit_opcode(cbuf,0xAD);
2388     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2389     // SHR    $dst.hi,$shift"
2390     emit_opcode(cbuf,0xD3);
2391     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2392   %}
2393 
2394   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2395     // TEST shift,32
2396     emit_opcode(cbuf,0xF7);
2397     emit_rm(cbuf, 0x3, 0, ECX_enc);
2398     emit_d32(cbuf,0x20);
2399     // JEQ,s small
2400     emit_opcode(cbuf, 0x74);
2401     emit_d8(cbuf, 0x05);
2402     // MOV    $dst.lo,$dst.hi
2403     emit_opcode( cbuf, 0x8B );
2404     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2405     // SAR    $dst.hi,31
2406     emit_opcode(cbuf, 0xC1);
2407     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2408     emit_d8(cbuf, 0x1F );
2409 // small:
2410     // SHRD   $dst.lo,$dst.hi,$shift
2411     emit_opcode(cbuf,0x0F);
2412     emit_opcode(cbuf,0xAD);
2413     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2414     // SAR    $dst.hi,$shift"
2415     emit_opcode(cbuf,0xD3);
2416     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2417   %}
2418 
2419 
2420   // ----------------- Encodings for floating point unit -----------------
2421   // May leave result in FPU-TOS or FPU reg depending on opcodes
2422   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2423     $$$emit8$primary;
2424     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2425   %}
2426 
2427   // Pop argument in FPR0 with FSTP ST(0)
2428   enc_class PopFPU() %{
2429     emit_opcode( cbuf, 0xDD );
2430     emit_d8( cbuf, 0xD8 );
2431   %}
2432 
2433   // !!!!! equivalent to Pop_Reg_F
2434   enc_class Pop_Reg_DPR( regDPR dst ) %{
2435     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2436     emit_d8( cbuf, 0xD8+$dst$$reg );
2437   %}
2438 
2439   enc_class Push_Reg_DPR( regDPR dst ) %{
2440     emit_opcode( cbuf, 0xD9 );
2441     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2442   %}
2443 
2444   enc_class strictfp_bias1( regDPR dst ) %{
2445     emit_opcode( cbuf, 0xDB );           // FLD m80real
2446     emit_opcode( cbuf, 0x2D );
2447     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2448     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2449     emit_opcode( cbuf, 0xC8+$dst$$reg );
2450   %}
2451 
2452   enc_class strictfp_bias2( regDPR dst ) %{
2453     emit_opcode( cbuf, 0xDB );           // FLD m80real
2454     emit_opcode( cbuf, 0x2D );
2455     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2456     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2457     emit_opcode( cbuf, 0xC8+$dst$$reg );
2458   %}
2459 
2460   // Special case for moving an integer register to a stack slot.
2461   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2462     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2463   %}
2464 
2465   // Special case for moving a register to a stack slot.
2466   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2467     // Opcode already emitted
2468     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2469     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2470     emit_d32(cbuf, $dst$$disp);   // Displacement
2471   %}
2472 
2473   // Push the integer in stackSlot 'src' onto FP-stack
2474   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2475     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2476   %}
2477 
2478   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2479   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2480     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2481   %}
2482 
2483   // Same as Pop_Mem_F except for opcode
2484   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2485   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2486     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2487   %}
2488 
2489   enc_class Pop_Reg_FPR( regFPR dst ) %{
2490     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2491     emit_d8( cbuf, 0xD8+$dst$$reg );
2492   %}
2493 
2494   enc_class Push_Reg_FPR( regFPR dst ) %{
2495     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2496     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2497   %}
2498 
2499   // Push FPU's float to a stack-slot, and pop FPU-stack
2500   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2501     int pop = 0x02;
2502     if ($src$$reg != FPR1L_enc) {
2503       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2504       emit_d8( cbuf, 0xC0-1+$src$$reg );
2505       pop = 0x03;
2506     }
2507     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2508   %}
2509 
2510   // Push FPU's double to a stack-slot, and pop FPU-stack
2511   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2512     int pop = 0x02;
2513     if ($src$$reg != FPR1L_enc) {
2514       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2515       emit_d8( cbuf, 0xC0-1+$src$$reg );
2516       pop = 0x03;
2517     }
2518     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2519   %}
2520 
2521   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2522   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2523     int pop = 0xD0 - 1; // -1 since we skip FLD
2524     if ($src$$reg != FPR1L_enc) {
2525       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2526       emit_d8( cbuf, 0xC0-1+$src$$reg );
2527       pop = 0xD8;
2528     }
2529     emit_opcode( cbuf, 0xDD );
2530     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2531   %}
2532 
2533 
2534   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2535     // load dst in FPR0
2536     emit_opcode( cbuf, 0xD9 );
2537     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2538     if ($src$$reg != FPR1L_enc) {
2539       // fincstp
2540       emit_opcode (cbuf, 0xD9);
2541       emit_opcode (cbuf, 0xF7);
2542       // swap src with FPR1:
2543       // FXCH FPR1 with src
2544       emit_opcode(cbuf, 0xD9);
2545       emit_d8(cbuf, 0xC8-1+$src$$reg );
2546       // fdecstp
2547       emit_opcode (cbuf, 0xD9);
2548       emit_opcode (cbuf, 0xF6);
2549     }
2550   %}
2551 
2552   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2553     MacroAssembler _masm(&cbuf);
2554     __ subptr(rsp, 8);
2555     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2556     __ fld_d(Address(rsp, 0));
2557     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2558     __ fld_d(Address(rsp, 0));
2559   %}
2560 
2561   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2562     MacroAssembler _masm(&cbuf);
2563     __ subptr(rsp, 4);
2564     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2565     __ fld_s(Address(rsp, 0));
2566     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2567     __ fld_s(Address(rsp, 0));
2568   %}
2569 
2570   enc_class Push_ResultD(regD dst) %{
2571     MacroAssembler _masm(&cbuf);
2572     __ fstp_d(Address(rsp, 0));
2573     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2574     __ addptr(rsp, 8);
2575   %}
2576 
2577   enc_class Push_ResultF(regF dst, immI d8) %{
2578     MacroAssembler _masm(&cbuf);
2579     __ fstp_s(Address(rsp, 0));
2580     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2581     __ addptr(rsp, $d8$$constant);
2582   %}
2583 
2584   enc_class Push_SrcD(regD src) %{
2585     MacroAssembler _masm(&cbuf);
2586     __ subptr(rsp, 8);
2587     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2588     __ fld_d(Address(rsp, 0));
2589   %}
2590 
2591   enc_class push_stack_temp_qword() %{
2592     MacroAssembler _masm(&cbuf);
2593     __ subptr(rsp, 8);
2594   %}
2595 
2596   enc_class pop_stack_temp_qword() %{
2597     MacroAssembler _masm(&cbuf);
2598     __ addptr(rsp, 8);
2599   %}
2600 
2601   enc_class push_xmm_to_fpr1(regD src) %{
2602     MacroAssembler _masm(&cbuf);
2603     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2604     __ fld_d(Address(rsp, 0));
2605   %}
2606 
2607   enc_class Push_Result_Mod_DPR( regDPR src) %{
2608     if ($src$$reg != FPR1L_enc) {
2609       // fincstp
2610       emit_opcode (cbuf, 0xD9);
2611       emit_opcode (cbuf, 0xF7);
2612       // FXCH FPR1 with src
2613       emit_opcode(cbuf, 0xD9);
2614       emit_d8(cbuf, 0xC8-1+$src$$reg );
2615       // fdecstp
2616       emit_opcode (cbuf, 0xD9);
2617       emit_opcode (cbuf, 0xF6);
2618     }
2619     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2620     // // FSTP   FPR$dst$$reg
2621     // emit_opcode( cbuf, 0xDD );
2622     // emit_d8( cbuf, 0xD8+$dst$$reg );
2623   %}
2624 
2625   enc_class fnstsw_sahf_skip_parity() %{
2626     // fnstsw ax
2627     emit_opcode( cbuf, 0xDF );
2628     emit_opcode( cbuf, 0xE0 );
2629     // sahf
2630     emit_opcode( cbuf, 0x9E );
2631     // jnp  ::skip
2632     emit_opcode( cbuf, 0x7B );
2633     emit_opcode( cbuf, 0x05 );
2634   %}
2635 
2636   enc_class emitModDPR() %{
2637     // fprem must be iterative
2638     // :: loop
2639     // fprem
2640     emit_opcode( cbuf, 0xD9 );
2641     emit_opcode( cbuf, 0xF8 );
2642     // wait
2643     emit_opcode( cbuf, 0x9b );
2644     // fnstsw ax
2645     emit_opcode( cbuf, 0xDF );
2646     emit_opcode( cbuf, 0xE0 );
2647     // sahf
2648     emit_opcode( cbuf, 0x9E );
2649     // jp  ::loop
2650     emit_opcode( cbuf, 0x0F );
2651     emit_opcode( cbuf, 0x8A );
2652     emit_opcode( cbuf, 0xF4 );
2653     emit_opcode( cbuf, 0xFF );
2654     emit_opcode( cbuf, 0xFF );
2655     emit_opcode( cbuf, 0xFF );
2656   %}
2657 
2658   enc_class fpu_flags() %{
2659     // fnstsw_ax
2660     emit_opcode( cbuf, 0xDF);
2661     emit_opcode( cbuf, 0xE0);
2662     // test ax,0x0400
2663     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2664     emit_opcode( cbuf, 0xA9 );
2665     emit_d16   ( cbuf, 0x0400 );
2666     // // // This sequence works, but stalls for 12-16 cycles on PPro
2667     // // test rax,0x0400
2668     // emit_opcode( cbuf, 0xA9 );
2669     // emit_d32   ( cbuf, 0x00000400 );
2670     //
2671     // jz exit (no unordered comparison)
2672     emit_opcode( cbuf, 0x74 );
2673     emit_d8    ( cbuf, 0x02 );
2674     // mov ah,1 - treat as LT case (set carry flag)
2675     emit_opcode( cbuf, 0xB4 );
2676     emit_d8    ( cbuf, 0x01 );
2677     // sahf
2678     emit_opcode( cbuf, 0x9E);
2679   %}
2680 
2681   enc_class cmpF_P6_fixup() %{
2682     // Fixup the integer flags in case comparison involved a NaN
2683     //
2684     // JNP exit (no unordered comparison, P-flag is set by NaN)
2685     emit_opcode( cbuf, 0x7B );
2686     emit_d8    ( cbuf, 0x03 );
2687     // MOV AH,1 - treat as LT case (set carry flag)
2688     emit_opcode( cbuf, 0xB4 );
2689     emit_d8    ( cbuf, 0x01 );
2690     // SAHF
2691     emit_opcode( cbuf, 0x9E);
2692     // NOP     // target for branch to avoid branch to branch
2693     emit_opcode( cbuf, 0x90);
2694   %}
2695 
2696 //     fnstsw_ax();
2697 //     sahf();
2698 //     movl(dst, nan_result);
2699 //     jcc(Assembler::parity, exit);
2700 //     movl(dst, less_result);
2701 //     jcc(Assembler::below, exit);
2702 //     movl(dst, equal_result);
2703 //     jcc(Assembler::equal, exit);
2704 //     movl(dst, greater_result);
2705 
2706 // less_result     =  1;
2707 // greater_result  = -1;
2708 // equal_result    = 0;
2709 // nan_result      = -1;
2710 
2711   enc_class CmpF_Result(rRegI dst) %{
2712     // fnstsw_ax();
2713     emit_opcode( cbuf, 0xDF);
2714     emit_opcode( cbuf, 0xE0);
2715     // sahf
2716     emit_opcode( cbuf, 0x9E);
2717     // movl(dst, nan_result);
2718     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2719     emit_d32( cbuf, -1 );
2720     // jcc(Assembler::parity, exit);
2721     emit_opcode( cbuf, 0x7A );
2722     emit_d8    ( cbuf, 0x13 );
2723     // movl(dst, less_result);
2724     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2725     emit_d32( cbuf, -1 );
2726     // jcc(Assembler::below, exit);
2727     emit_opcode( cbuf, 0x72 );
2728     emit_d8    ( cbuf, 0x0C );
2729     // movl(dst, equal_result);
2730     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2731     emit_d32( cbuf, 0 );
2732     // jcc(Assembler::equal, exit);
2733     emit_opcode( cbuf, 0x74 );
2734     emit_d8    ( cbuf, 0x05 );
2735     // movl(dst, greater_result);
2736     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2737     emit_d32( cbuf, 1 );
2738   %}
2739 
2740 
2741   // Compare the longs and set flags
2742   // BROKEN!  Do Not use as-is
2743   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2744     // CMP    $src1.hi,$src2.hi
2745     emit_opcode( cbuf, 0x3B );
2746     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2747     // JNE,s  done
2748     emit_opcode(cbuf,0x75);
2749     emit_d8(cbuf, 2 );
2750     // CMP    $src1.lo,$src2.lo
2751     emit_opcode( cbuf, 0x3B );
2752     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2753 // done:
2754   %}
2755 
2756   enc_class convert_int_long( regL dst, rRegI src ) %{
2757     // mov $dst.lo,$src
2758     int dst_encoding = $dst$$reg;
2759     int src_encoding = $src$$reg;
2760     encode_Copy( cbuf, dst_encoding  , src_encoding );
2761     // mov $dst.hi,$src
2762     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2763     // sar $dst.hi,31
2764     emit_opcode( cbuf, 0xC1 );
2765     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2766     emit_d8(cbuf, 0x1F );
2767   %}
2768 
2769   enc_class convert_long_double( eRegL src ) %{
2770     // push $src.hi
2771     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2772     // push $src.lo
2773     emit_opcode(cbuf, 0x50+$src$$reg  );
2774     // fild 64-bits at [SP]
2775     emit_opcode(cbuf,0xdf);
2776     emit_d8(cbuf, 0x6C);
2777     emit_d8(cbuf, 0x24);
2778     emit_d8(cbuf, 0x00);
2779     // pop stack
2780     emit_opcode(cbuf, 0x83); // add  SP, #8
2781     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2782     emit_d8(cbuf, 0x8);
2783   %}
2784 
2785   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2786     // IMUL   EDX:EAX,$src1
2787     emit_opcode( cbuf, 0xF7 );
2788     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2789     // SAR    EDX,$cnt-32
2790     int shift_count = ((int)$cnt$$constant) - 32;
2791     if (shift_count > 0) {
2792       emit_opcode(cbuf, 0xC1);
2793       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2794       emit_d8(cbuf, shift_count);
2795     }
2796   %}
2797 
2798   // this version doesn't have add sp, 8
2799   enc_class convert_long_double2( eRegL src ) %{
2800     // push $src.hi
2801     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2802     // push $src.lo
2803     emit_opcode(cbuf, 0x50+$src$$reg  );
2804     // fild 64-bits at [SP]
2805     emit_opcode(cbuf,0xdf);
2806     emit_d8(cbuf, 0x6C);
2807     emit_d8(cbuf, 0x24);
2808     emit_d8(cbuf, 0x00);
2809   %}
2810 
2811   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2812     // Basic idea: long = (long)int * (long)int
2813     // IMUL EDX:EAX, src
2814     emit_opcode( cbuf, 0xF7 );
2815     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2816   %}
2817 
2818   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2819     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2820     // MUL EDX:EAX, src
2821     emit_opcode( cbuf, 0xF7 );
2822     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2823   %}
2824 
2825   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2826     // Basic idea: lo(result) = lo(x_lo * y_lo)
2827     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2828     // MOV    $tmp,$src.lo
2829     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2830     // IMUL   $tmp,EDX
2831     emit_opcode( cbuf, 0x0F );
2832     emit_opcode( cbuf, 0xAF );
2833     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2834     // MOV    EDX,$src.hi
2835     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2836     // IMUL   EDX,EAX
2837     emit_opcode( cbuf, 0x0F );
2838     emit_opcode( cbuf, 0xAF );
2839     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2840     // ADD    $tmp,EDX
2841     emit_opcode( cbuf, 0x03 );
2842     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2843     // MUL   EDX:EAX,$src.lo
2844     emit_opcode( cbuf, 0xF7 );
2845     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2846     // ADD    EDX,ESI
2847     emit_opcode( cbuf, 0x03 );
2848     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2849   %}
2850 
2851   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2852     // Basic idea: lo(result) = lo(src * y_lo)
2853     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2854     // IMUL   $tmp,EDX,$src
2855     emit_opcode( cbuf, 0x6B );
2856     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2857     emit_d8( cbuf, (int)$src$$constant );
2858     // MOV    EDX,$src
2859     emit_opcode(cbuf, 0xB8 + EDX_enc);
2860     emit_d32( cbuf, (int)$src$$constant );
2861     // MUL   EDX:EAX,EDX
2862     emit_opcode( cbuf, 0xF7 );
2863     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2864     // ADD    EDX,ESI
2865     emit_opcode( cbuf, 0x03 );
2866     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2867   %}
2868 
2869   enc_class long_div( eRegL src1, eRegL src2 ) %{
2870     // PUSH src1.hi
2871     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2872     // PUSH src1.lo
2873     emit_opcode(cbuf,               0x50+$src1$$reg  );
2874     // PUSH src2.hi
2875     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2876     // PUSH src2.lo
2877     emit_opcode(cbuf,               0x50+$src2$$reg  );
2878     // CALL directly to the runtime
2879     cbuf.set_insts_mark();
2880     emit_opcode(cbuf,0xE8);       // Call into runtime
2881     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2882     // Restore stack
2883     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2884     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2885     emit_d8(cbuf, 4*4);
2886   %}
2887 
2888   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2889     // PUSH src1.hi
2890     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2891     // PUSH src1.lo
2892     emit_opcode(cbuf,               0x50+$src1$$reg  );
2893     // PUSH src2.hi
2894     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2895     // PUSH src2.lo
2896     emit_opcode(cbuf,               0x50+$src2$$reg  );
2897     // CALL directly to the runtime
2898     cbuf.set_insts_mark();
2899     emit_opcode(cbuf,0xE8);       // Call into runtime
2900     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2901     // Restore stack
2902     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2903     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2904     emit_d8(cbuf, 4*4);
2905   %}
2906 
2907   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2908     // MOV   $tmp,$src.lo
2909     emit_opcode(cbuf, 0x8B);
2910     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2911     // OR    $tmp,$src.hi
2912     emit_opcode(cbuf, 0x0B);
2913     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2914   %}
2915 
2916   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2917     // CMP    $src1.lo,$src2.lo
2918     emit_opcode( cbuf, 0x3B );
2919     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2920     // JNE,s  skip
2921     emit_cc(cbuf, 0x70, 0x5);
2922     emit_d8(cbuf,2);
2923     // CMP    $src1.hi,$src2.hi
2924     emit_opcode( cbuf, 0x3B );
2925     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2926   %}
2927 
2928   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2929     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2930     emit_opcode( cbuf, 0x3B );
2931     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2932     // MOV    $tmp,$src1.hi
2933     emit_opcode( cbuf, 0x8B );
2934     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2935     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2936     emit_opcode( cbuf, 0x1B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2938   %}
2939 
2940   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2941     // XOR    $tmp,$tmp
2942     emit_opcode(cbuf,0x33);  // XOR
2943     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2944     // CMP    $tmp,$src.lo
2945     emit_opcode( cbuf, 0x3B );
2946     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2947     // SBB    $tmp,$src.hi
2948     emit_opcode( cbuf, 0x1B );
2949     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2950   %}
2951 
2952  // Sniff, sniff... smells like Gnu Superoptimizer
2953   enc_class neg_long( eRegL dst ) %{
2954     emit_opcode(cbuf,0xF7);    // NEG hi
2955     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2956     emit_opcode(cbuf,0xF7);    // NEG lo
2957     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2958     emit_opcode(cbuf,0x83);    // SBB hi,0
2959     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2960     emit_d8    (cbuf,0 );
2961   %}
2962 
2963   enc_class enc_pop_rdx() %{
2964     emit_opcode(cbuf,0x5A);
2965   %}
2966 
2967   enc_class enc_rethrow() %{
2968     cbuf.set_insts_mark();
2969     emit_opcode(cbuf, 0xE9);        // jmp    entry
2970     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2971                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2972   %}
2973 
2974 
2975   // Convert a double to an int.  Java semantics require we do complex
2976   // manglelations in the corner cases.  So we set the rounding mode to
2977   // 'zero', store the darned double down as an int, and reset the
2978   // rounding mode to 'nearest'.  The hardware throws an exception which
2979   // patches up the correct value directly to the stack.
2980   enc_class DPR2I_encoding( regDPR src ) %{
2981     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2982     // exceptions here, so that a NAN or other corner-case value will
2983     // thrown an exception (but normal values get converted at full speed).
2984     // However, I2C adapters and other float-stack manglers leave pending
2985     // invalid-op exceptions hanging.  We would have to clear them before
2986     // enabling them and that is more expensive than just testing for the
2987     // invalid value Intel stores down in the corner cases.
2988     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2989     emit_opcode(cbuf,0x2D);
2990     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2991     // Allocate a word
2992     emit_opcode(cbuf,0x83);            // SUB ESP,4
2993     emit_opcode(cbuf,0xEC);
2994     emit_d8(cbuf,0x04);
2995     // Encoding assumes a double has been pushed into FPR0.
2996     // Store down the double as an int, popping the FPU stack
2997     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2998     emit_opcode(cbuf,0x1C);
2999     emit_d8(cbuf,0x24);
3000     // Restore the rounding mode; mask the exception
3001     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3002     emit_opcode(cbuf,0x2D);
3003     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3004         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3005         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3006 
3007     // Load the converted int; adjust CPU stack
3008     emit_opcode(cbuf,0x58);       // POP EAX
3009     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3010     emit_d32   (cbuf,0x80000000); //         0x80000000
3011     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3012     emit_d8    (cbuf,0x07);       // Size of slow_call
3013     // Push src onto stack slow-path
3014     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3015     emit_d8    (cbuf,0xC0-1+$src$$reg );
3016     // CALL directly to the runtime
3017     cbuf.set_insts_mark();
3018     emit_opcode(cbuf,0xE8);       // Call into runtime
3019     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3020     // Carry on here...
3021   %}
3022 
3023   enc_class DPR2L_encoding( regDPR src ) %{
3024     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3025     emit_opcode(cbuf,0x2D);
3026     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3027     // Allocate a word
3028     emit_opcode(cbuf,0x83);            // SUB ESP,8
3029     emit_opcode(cbuf,0xEC);
3030     emit_d8(cbuf,0x08);
3031     // Encoding assumes a double has been pushed into FPR0.
3032     // Store down the double as a long, popping the FPU stack
3033     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3034     emit_opcode(cbuf,0x3C);
3035     emit_d8(cbuf,0x24);
3036     // Restore the rounding mode; mask the exception
3037     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3038     emit_opcode(cbuf,0x2D);
3039     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3040         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3041         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3042 
3043     // Load the converted int; adjust CPU stack
3044     emit_opcode(cbuf,0x58);       // POP EAX
3045     emit_opcode(cbuf,0x5A);       // POP EDX
3046     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3047     emit_d8    (cbuf,0xFA);       // rdx
3048     emit_d32   (cbuf,0x80000000); //         0x80000000
3049     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3050     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3051     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3052     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3053     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3054     emit_d8    (cbuf,0x07);       // Size of slow_call
3055     // Push src onto stack slow-path
3056     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3057     emit_d8    (cbuf,0xC0-1+$src$$reg );
3058     // CALL directly to the runtime
3059     cbuf.set_insts_mark();
3060     emit_opcode(cbuf,0xE8);       // Call into runtime
3061     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3062     // Carry on here...
3063   %}
3064 
3065   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3066     // Operand was loaded from memory into fp ST (stack top)
3067     // FMUL   ST,$src  /* D8 C8+i */
3068     emit_opcode(cbuf, 0xD8);
3069     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3070   %}
3071 
3072   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3073     // FADDP  ST,src2  /* D8 C0+i */
3074     emit_opcode(cbuf, 0xD8);
3075     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3076     //could use FADDP  src2,fpST  /* DE C0+i */
3077   %}
3078 
3079   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3080     // FADDP  src2,ST  /* DE C0+i */
3081     emit_opcode(cbuf, 0xDE);
3082     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3083   %}
3084 
3085   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3086     // Operand has been loaded into fp ST (stack top)
3087       // FSUB   ST,$src1
3088       emit_opcode(cbuf, 0xD8);
3089       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3090 
3091       // FDIV
3092       emit_opcode(cbuf, 0xD8);
3093       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3094   %}
3095 
3096   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3097     // Operand was loaded from memory into fp ST (stack top)
3098     // FADD   ST,$src  /* D8 C0+i */
3099     emit_opcode(cbuf, 0xD8);
3100     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3101 
3102     // FMUL  ST,src2  /* D8 C*+i */
3103     emit_opcode(cbuf, 0xD8);
3104     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3105   %}
3106 
3107 
3108   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3109     // Operand was loaded from memory into fp ST (stack top)
3110     // FADD   ST,$src  /* D8 C0+i */
3111     emit_opcode(cbuf, 0xD8);
3112     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3113 
3114     // FMULP  src2,ST  /* DE C8+i */
3115     emit_opcode(cbuf, 0xDE);
3116     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3117   %}
3118 
3119   // Atomically load the volatile long
3120   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3121     emit_opcode(cbuf,0xDF);
3122     int rm_byte_opcode = 0x05;
3123     int base     = $mem$$base;
3124     int index    = $mem$$index;
3125     int scale    = $mem$$scale;
3126     int displace = $mem$$disp;
3127     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3128     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3129     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3130   %}
3131 
3132   // Volatile Store Long.  Must be atomic, so move it into
3133   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3134   // target address before the store (for null-ptr checks)
3135   // so the memory operand is used twice in the encoding.
3136   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3137     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3138     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3139     emit_opcode(cbuf,0xDF);
3140     int rm_byte_opcode = 0x07;
3141     int base     = $mem$$base;
3142     int index    = $mem$$index;
3143     int scale    = $mem$$scale;
3144     int displace = $mem$$disp;
3145     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3146     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3147   %}
3148 
3149   // Safepoint Poll.  This polls the safepoint page, and causes an
3150   // exception if it is not readable. Unfortunately, it kills the condition code
3151   // in the process
3152   // We current use TESTL [spp],EDI
3153   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3154 
3155   enc_class Safepoint_Poll() %{
3156     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3157     emit_opcode(cbuf,0x85);
3158     emit_rm (cbuf, 0x0, 0x7, 0x5);
3159     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3160   %}
3161 %}
3162 
3163 
3164 //----------FRAME--------------------------------------------------------------
3165 // Definition of frame structure and management information.
3166 //
3167 //  S T A C K   L A Y O U T    Allocators stack-slot number
3168 //                             |   (to get allocators register number
3169 //  G  Owned by    |        |  v    add OptoReg::stack0())
3170 //  r   CALLER     |        |
3171 //  o     |        +--------+      pad to even-align allocators stack-slot
3172 //  w     V        |  pad0  |        numbers; owned by CALLER
3173 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3174 //  h     ^        |   in   |  5
3175 //        |        |  args  |  4   Holes in incoming args owned by SELF
3176 //  |     |        |        |  3
3177 //  |     |        +--------+
3178 //  V     |        | old out|      Empty on Intel, window on Sparc
3179 //        |    old |preserve|      Must be even aligned.
3180 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3181 //        |        |   in   |  3   area for Intel ret address
3182 //     Owned by    |preserve|      Empty on Sparc.
3183 //       SELF      +--------+
3184 //        |        |  pad2  |  2   pad to align old SP
3185 //        |        +--------+  1
3186 //        |        | locks  |  0
3187 //        |        +--------+----> OptoReg::stack0(), even aligned
3188 //        |        |  pad1  | 11   pad to align new SP
3189 //        |        +--------+
3190 //        |        |        | 10
3191 //        |        | spills |  9   spills
3192 //        V        |        |  8   (pad0 slot for callee)
3193 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3194 //        ^        |  out   |  7
3195 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3196 //     Owned by    +--------+
3197 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3198 //        |    new |preserve|      Must be even-aligned.
3199 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3200 //        |        |        |
3201 //
3202 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3203 //         known from SELF's arguments and the Java calling convention.
3204 //         Region 6-7 is determined per call site.
3205 // Note 2: If the calling convention leaves holes in the incoming argument
3206 //         area, those holes are owned by SELF.  Holes in the outgoing area
3207 //         are owned by the CALLEE.  Holes should not be nessecary in the
3208 //         incoming area, as the Java calling convention is completely under
3209 //         the control of the AD file.  Doubles can be sorted and packed to
3210 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3211 //         varargs C calling conventions.
3212 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3213 //         even aligned with pad0 as needed.
3214 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3215 //         region 6-11 is even aligned; it may be padded out more so that
3216 //         the region from SP to FP meets the minimum stack alignment.
3217 
3218 frame %{
3219   // What direction does stack grow in (assumed to be same for C & Java)
3220   stack_direction(TOWARDS_LOW);
3221 
3222   // These three registers define part of the calling convention
3223   // between compiled code and the interpreter.
3224   inline_cache_reg(EAX);                // Inline Cache Register
3225   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3226 
3227   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3228   cisc_spilling_operand_name(indOffset32);
3229 
3230   // Number of stack slots consumed by locking an object
3231   sync_stack_slots(1);
3232 
3233   // Compiled code's Frame Pointer
3234   frame_pointer(ESP);
3235   // Interpreter stores its frame pointer in a register which is
3236   // stored to the stack by I2CAdaptors.
3237   // I2CAdaptors convert from interpreted java to compiled java.
3238   interpreter_frame_pointer(EBP);
3239 
3240   // Stack alignment requirement
3241   // Alignment size in bytes (128-bit -> 16 bytes)
3242   stack_alignment(StackAlignmentInBytes);
3243 
3244   // Number of stack slots between incoming argument block and the start of
3245   // a new frame.  The PROLOG must add this many slots to the stack.  The
3246   // EPILOG must remove this many slots.  Intel needs one slot for
3247   // return address and one for rbp, (must save rbp)
3248   in_preserve_stack_slots(2+VerifyStackAtCalls);
3249 
3250   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3251   // for calls to C.  Supports the var-args backing area for register parms.
3252   varargs_C_out_slots_killed(0);
3253 
3254   // The after-PROLOG location of the return address.  Location of
3255   // return address specifies a type (REG or STACK) and a number
3256   // representing the register number (i.e. - use a register name) or
3257   // stack slot.
3258   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3259   // Otherwise, it is above the locks and verification slot and alignment word
3260   return_addr(STACK - 1 +
3261               align_up((Compile::current()->in_preserve_stack_slots() +
3262                         Compile::current()->fixed_slots()),
3263                        stack_alignment_in_slots()));
3264 
3265   // Body of function which returns an integer array locating
3266   // arguments either in registers or in stack slots.  Passed an array
3267   // of ideal registers called "sig" and a "length" count.  Stack-slot
3268   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3269   // arguments for a CALLEE.  Incoming stack arguments are
3270   // automatically biased by the preserve_stack_slots field above.
3271   calling_convention %{
3272     // No difference between ingoing/outgoing just pass false
3273     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3274   %}
3275 
3276 
3277   // Body of function which returns an integer array locating
3278   // arguments either in registers or in stack slots.  Passed an array
3279   // of ideal registers called "sig" and a "length" count.  Stack-slot
3280   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3281   // arguments for a CALLEE.  Incoming stack arguments are
3282   // automatically biased by the preserve_stack_slots field above.
3283   c_calling_convention %{
3284     // This is obviously always outgoing
3285     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3286   %}
3287 
3288   // Location of C & interpreter return values
3289   c_return_value %{
3290     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3291     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3292     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3293 
3294     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3295     // that C functions return float and double results in XMM0.
3296     if( ideal_reg == Op_RegD && UseSSE>=2 )
3297       return OptoRegPair(XMM0b_num,XMM0_num);
3298     if( ideal_reg == Op_RegF && UseSSE>=2 )
3299       return OptoRegPair(OptoReg::Bad,XMM0_num);
3300 
3301     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3302   %}
3303 
3304   // Location of return values
3305   return_value %{
3306     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3307     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3308     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3309     if( ideal_reg == Op_RegD && UseSSE>=2 )
3310       return OptoRegPair(XMM0b_num,XMM0_num);
3311     if( ideal_reg == Op_RegF && UseSSE>=1 )
3312       return OptoRegPair(OptoReg::Bad,XMM0_num);
3313     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3314   %}
3315 
3316 %}
3317 
3318 //----------ATTRIBUTES---------------------------------------------------------
3319 //----------Operand Attributes-------------------------------------------------
3320 op_attrib op_cost(0);        // Required cost attribute
3321 
3322 //----------Instruction Attributes---------------------------------------------
3323 ins_attrib ins_cost(100);       // Required cost attribute
3324 ins_attrib ins_size(8);         // Required size attribute (in bits)
3325 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3326                                 // non-matching short branch variant of some
3327                                                             // long branch?
3328 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3329                                 // specifies the alignment that some part of the instruction (not
3330                                 // necessarily the start) requires.  If > 1, a compute_padding()
3331                                 // function must be provided for the instruction
3332 
3333 //----------OPERANDS-----------------------------------------------------------
3334 // Operand definitions must precede instruction definitions for correct parsing
3335 // in the ADLC because operands constitute user defined types which are used in
3336 // instruction definitions.
3337 
3338 //----------Simple Operands----------------------------------------------------
3339 // Immediate Operands
3340 // Integer Immediate
3341 operand immI() %{
3342   match(ConI);
3343 
3344   op_cost(10);
3345   format %{ %}
3346   interface(CONST_INTER);
3347 %}
3348 
3349 // Constant for test vs zero
3350 operand immI0() %{
3351   predicate(n->get_int() == 0);
3352   match(ConI);
3353 
3354   op_cost(0);
3355   format %{ %}
3356   interface(CONST_INTER);
3357 %}
3358 
3359 // Constant for increment
3360 operand immI1() %{
3361   predicate(n->get_int() == 1);
3362   match(ConI);
3363 
3364   op_cost(0);
3365   format %{ %}
3366   interface(CONST_INTER);
3367 %}
3368 
3369 // Constant for decrement
3370 operand immI_M1() %{
3371   predicate(n->get_int() == -1);
3372   match(ConI);
3373 
3374   op_cost(0);
3375   format %{ %}
3376   interface(CONST_INTER);
3377 %}
3378 
3379 // Valid scale values for addressing modes
3380 operand immI2() %{
3381   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3382   match(ConI);
3383 
3384   format %{ %}
3385   interface(CONST_INTER);
3386 %}
3387 
3388 operand immI8() %{
3389   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3390   match(ConI);
3391 
3392   op_cost(5);
3393   format %{ %}
3394   interface(CONST_INTER);
3395 %}
3396 
3397 operand immI16() %{
3398   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3399   match(ConI);
3400 
3401   op_cost(10);
3402   format %{ %}
3403   interface(CONST_INTER);
3404 %}
3405 
3406 // Int Immediate non-negative
3407 operand immU31()
3408 %{
3409   predicate(n->get_int() >= 0);
3410   match(ConI);
3411 
3412   op_cost(0);
3413   format %{ %}
3414   interface(CONST_INTER);
3415 %}
3416 
3417 // Constant for long shifts
3418 operand immI_32() %{
3419   predicate( n->get_int() == 32 );
3420   match(ConI);
3421 
3422   op_cost(0);
3423   format %{ %}
3424   interface(CONST_INTER);
3425 %}
3426 
3427 operand immI_1_31() %{
3428   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3429   match(ConI);
3430 
3431   op_cost(0);
3432   format %{ %}
3433   interface(CONST_INTER);
3434 %}
3435 
3436 operand immI_32_63() %{
3437   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3438   match(ConI);
3439   op_cost(0);
3440 
3441   format %{ %}
3442   interface(CONST_INTER);
3443 %}
3444 
3445 operand immI_1() %{
3446   predicate( n->get_int() == 1 );
3447   match(ConI);
3448 
3449   op_cost(0);
3450   format %{ %}
3451   interface(CONST_INTER);
3452 %}
3453 
3454 operand immI_2() %{
3455   predicate( n->get_int() == 2 );
3456   match(ConI);
3457 
3458   op_cost(0);
3459   format %{ %}
3460   interface(CONST_INTER);
3461 %}
3462 
3463 operand immI_3() %{
3464   predicate( n->get_int() == 3 );
3465   match(ConI);
3466 
3467   op_cost(0);
3468   format %{ %}
3469   interface(CONST_INTER);
3470 %}
3471 
3472 // Pointer Immediate
3473 operand immP() %{
3474   match(ConP);
3475 
3476   op_cost(10);
3477   format %{ %}
3478   interface(CONST_INTER);
3479 %}
3480 
3481 // NULL Pointer Immediate
3482 operand immP0() %{
3483   predicate( n->get_ptr() == 0 );
3484   match(ConP);
3485   op_cost(0);
3486 
3487   format %{ %}
3488   interface(CONST_INTER);
3489 %}
3490 
3491 // Long Immediate
3492 operand immL() %{
3493   match(ConL);
3494 
3495   op_cost(20);
3496   format %{ %}
3497   interface(CONST_INTER);
3498 %}
3499 
3500 // Long Immediate zero
3501 operand immL0() %{
3502   predicate( n->get_long() == 0L );
3503   match(ConL);
3504   op_cost(0);
3505 
3506   format %{ %}
3507   interface(CONST_INTER);
3508 %}
3509 
3510 // Long Immediate zero
3511 operand immL_M1() %{
3512   predicate( n->get_long() == -1L );
3513   match(ConL);
3514   op_cost(0);
3515 
3516   format %{ %}
3517   interface(CONST_INTER);
3518 %}
3519 
3520 // Long immediate from 0 to 127.
3521 // Used for a shorter form of long mul by 10.
3522 operand immL_127() %{
3523   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3524   match(ConL);
3525   op_cost(0);
3526 
3527   format %{ %}
3528   interface(CONST_INTER);
3529 %}
3530 
3531 // Long Immediate: low 32-bit mask
3532 operand immL_32bits() %{
3533   predicate(n->get_long() == 0xFFFFFFFFL);
3534   match(ConL);
3535   op_cost(0);
3536 
3537   format %{ %}
3538   interface(CONST_INTER);
3539 %}
3540 
3541 // Long Immediate: low 32-bit mask
3542 operand immL32() %{
3543   predicate(n->get_long() == (int)(n->get_long()));
3544   match(ConL);
3545   op_cost(20);
3546 
3547   format %{ %}
3548   interface(CONST_INTER);
3549 %}
3550 
3551 //Double Immediate zero
3552 operand immDPR0() %{
3553   // Do additional (and counter-intuitive) test against NaN to work around VC++
3554   // bug that generates code such that NaNs compare equal to 0.0
3555   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3556   match(ConD);
3557 
3558   op_cost(5);
3559   format %{ %}
3560   interface(CONST_INTER);
3561 %}
3562 
3563 // Double Immediate one
3564 operand immDPR1() %{
3565   predicate( UseSSE<=1 && n->getd() == 1.0 );
3566   match(ConD);
3567 
3568   op_cost(5);
3569   format %{ %}
3570   interface(CONST_INTER);
3571 %}
3572 
3573 // Double Immediate
3574 operand immDPR() %{
3575   predicate(UseSSE<=1);
3576   match(ConD);
3577 
3578   op_cost(5);
3579   format %{ %}
3580   interface(CONST_INTER);
3581 %}
3582 
3583 operand immD() %{
3584   predicate(UseSSE>=2);
3585   match(ConD);
3586 
3587   op_cost(5);
3588   format %{ %}
3589   interface(CONST_INTER);
3590 %}
3591 
3592 // Double Immediate zero
3593 operand immD0() %{
3594   // Do additional (and counter-intuitive) test against NaN to work around VC++
3595   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3596   // compare equal to -0.0.
3597   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3598   match(ConD);
3599 
3600   format %{ %}
3601   interface(CONST_INTER);
3602 %}
3603 
3604 // Float Immediate zero
3605 operand immFPR0() %{
3606   predicate(UseSSE == 0 && n->getf() == 0.0F);
3607   match(ConF);
3608 
3609   op_cost(5);
3610   format %{ %}
3611   interface(CONST_INTER);
3612 %}
3613 
3614 // Float Immediate one
3615 operand immFPR1() %{
3616   predicate(UseSSE == 0 && n->getf() == 1.0F);
3617   match(ConF);
3618 
3619   op_cost(5);
3620   format %{ %}
3621   interface(CONST_INTER);
3622 %}
3623 
3624 // Float Immediate
3625 operand immFPR() %{
3626   predicate( UseSSE == 0 );
3627   match(ConF);
3628 
3629   op_cost(5);
3630   format %{ %}
3631   interface(CONST_INTER);
3632 %}
3633 
3634 // Float Immediate
3635 operand immF() %{
3636   predicate(UseSSE >= 1);
3637   match(ConF);
3638 
3639   op_cost(5);
3640   format %{ %}
3641   interface(CONST_INTER);
3642 %}
3643 
3644 // Float Immediate zero.  Zero and not -0.0
3645 operand immF0() %{
3646   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3647   match(ConF);
3648 
3649   op_cost(5);
3650   format %{ %}
3651   interface(CONST_INTER);
3652 %}
3653 
3654 // Immediates for special shifts (sign extend)
3655 
3656 // Constants for increment
3657 operand immI_16() %{
3658   predicate( n->get_int() == 16 );
3659   match(ConI);
3660 
3661   format %{ %}
3662   interface(CONST_INTER);
3663 %}
3664 
3665 operand immI_24() %{
3666   predicate( n->get_int() == 24 );
3667   match(ConI);
3668 
3669   format %{ %}
3670   interface(CONST_INTER);
3671 %}
3672 
3673 // Constant for byte-wide masking
3674 operand immI_255() %{
3675   predicate( n->get_int() == 255 );
3676   match(ConI);
3677 
3678   format %{ %}
3679   interface(CONST_INTER);
3680 %}
3681 
3682 // Constant for short-wide masking
3683 operand immI_65535() %{
3684   predicate(n->get_int() == 65535);
3685   match(ConI);
3686 
3687   format %{ %}
3688   interface(CONST_INTER);
3689 %}
3690 
3691 // Register Operands
3692 // Integer Register
3693 operand rRegI() %{
3694   constraint(ALLOC_IN_RC(int_reg));
3695   match(RegI);
3696   match(xRegI);
3697   match(eAXRegI);
3698   match(eBXRegI);
3699   match(eCXRegI);
3700   match(eDXRegI);
3701   match(eDIRegI);
3702   match(eSIRegI);
3703 
3704   format %{ %}
3705   interface(REG_INTER);
3706 %}
3707 
3708 // Subset of Integer Register
3709 operand xRegI(rRegI reg) %{
3710   constraint(ALLOC_IN_RC(int_x_reg));
3711   match(reg);
3712   match(eAXRegI);
3713   match(eBXRegI);
3714   match(eCXRegI);
3715   match(eDXRegI);
3716 
3717   format %{ %}
3718   interface(REG_INTER);
3719 %}
3720 
3721 // Special Registers
3722 operand eAXRegI(xRegI reg) %{
3723   constraint(ALLOC_IN_RC(eax_reg));
3724   match(reg);
3725   match(rRegI);
3726 
3727   format %{ "EAX" %}
3728   interface(REG_INTER);
3729 %}
3730 
3731 // Special Registers
3732 operand eBXRegI(xRegI reg) %{
3733   constraint(ALLOC_IN_RC(ebx_reg));
3734   match(reg);
3735   match(rRegI);
3736 
3737   format %{ "EBX" %}
3738   interface(REG_INTER);
3739 %}
3740 
3741 operand eCXRegI(xRegI reg) %{
3742   constraint(ALLOC_IN_RC(ecx_reg));
3743   match(reg);
3744   match(rRegI);
3745 
3746   format %{ "ECX" %}
3747   interface(REG_INTER);
3748 %}
3749 
3750 operand eDXRegI(xRegI reg) %{
3751   constraint(ALLOC_IN_RC(edx_reg));
3752   match(reg);
3753   match(rRegI);
3754 
3755   format %{ "EDX" %}
3756   interface(REG_INTER);
3757 %}
3758 
3759 operand eDIRegI(xRegI reg) %{
3760   constraint(ALLOC_IN_RC(edi_reg));
3761   match(reg);
3762   match(rRegI);
3763 
3764   format %{ "EDI" %}
3765   interface(REG_INTER);
3766 %}
3767 
3768 operand naxRegI() %{
3769   constraint(ALLOC_IN_RC(nax_reg));
3770   match(RegI);
3771   match(eCXRegI);
3772   match(eDXRegI);
3773   match(eSIRegI);
3774   match(eDIRegI);
3775 
3776   format %{ %}
3777   interface(REG_INTER);
3778 %}
3779 
3780 operand nadxRegI() %{
3781   constraint(ALLOC_IN_RC(nadx_reg));
3782   match(RegI);
3783   match(eBXRegI);
3784   match(eCXRegI);
3785   match(eSIRegI);
3786   match(eDIRegI);
3787 
3788   format %{ %}
3789   interface(REG_INTER);
3790 %}
3791 
3792 operand ncxRegI() %{
3793   constraint(ALLOC_IN_RC(ncx_reg));
3794   match(RegI);
3795   match(eAXRegI);
3796   match(eDXRegI);
3797   match(eSIRegI);
3798   match(eDIRegI);
3799 
3800   format %{ %}
3801   interface(REG_INTER);
3802 %}
3803 
3804 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3805 // //
3806 operand eSIRegI(xRegI reg) %{
3807    constraint(ALLOC_IN_RC(esi_reg));
3808    match(reg);
3809    match(rRegI);
3810 
3811    format %{ "ESI" %}
3812    interface(REG_INTER);
3813 %}
3814 
3815 // Pointer Register
3816 operand anyRegP() %{
3817   constraint(ALLOC_IN_RC(any_reg));
3818   match(RegP);
3819   match(eAXRegP);
3820   match(eBXRegP);
3821   match(eCXRegP);
3822   match(eDIRegP);
3823   match(eRegP);
3824 
3825   format %{ %}
3826   interface(REG_INTER);
3827 %}
3828 
3829 operand eRegP() %{
3830   constraint(ALLOC_IN_RC(int_reg));
3831   match(RegP);
3832   match(eAXRegP);
3833   match(eBXRegP);
3834   match(eCXRegP);
3835   match(eDIRegP);
3836 
3837   format %{ %}
3838   interface(REG_INTER);
3839 %}
3840 
3841 // On windows95, EBP is not safe to use for implicit null tests.
3842 operand eRegP_no_EBP() %{
3843   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3844   match(RegP);
3845   match(eAXRegP);
3846   match(eBXRegP);
3847   match(eCXRegP);
3848   match(eDIRegP);
3849 
3850   op_cost(100);
3851   format %{ %}
3852   interface(REG_INTER);
3853 %}
3854 
3855 operand naxRegP() %{
3856   constraint(ALLOC_IN_RC(nax_reg));
3857   match(RegP);
3858   match(eBXRegP);
3859   match(eDXRegP);
3860   match(eCXRegP);
3861   match(eSIRegP);
3862   match(eDIRegP);
3863 
3864   format %{ %}
3865   interface(REG_INTER);
3866 %}
3867 
3868 operand nabxRegP() %{
3869   constraint(ALLOC_IN_RC(nabx_reg));
3870   match(RegP);
3871   match(eCXRegP);
3872   match(eDXRegP);
3873   match(eSIRegP);
3874   match(eDIRegP);
3875 
3876   format %{ %}
3877   interface(REG_INTER);
3878 %}
3879 
3880 operand pRegP() %{
3881   constraint(ALLOC_IN_RC(p_reg));
3882   match(RegP);
3883   match(eBXRegP);
3884   match(eDXRegP);
3885   match(eSIRegP);
3886   match(eDIRegP);
3887 
3888   format %{ %}
3889   interface(REG_INTER);
3890 %}
3891 
3892 // Special Registers
3893 // Return a pointer value
3894 operand eAXRegP(eRegP reg) %{
3895   constraint(ALLOC_IN_RC(eax_reg));
3896   match(reg);
3897   format %{ "EAX" %}
3898   interface(REG_INTER);
3899 %}
3900 
3901 // Used in AtomicAdd
3902 operand eBXRegP(eRegP reg) %{
3903   constraint(ALLOC_IN_RC(ebx_reg));
3904   match(reg);
3905   format %{ "EBX" %}
3906   interface(REG_INTER);
3907 %}
3908 
3909 // Tail-call (interprocedural jump) to interpreter
3910 operand eCXRegP(eRegP reg) %{
3911   constraint(ALLOC_IN_RC(ecx_reg));
3912   match(reg);
3913   format %{ "ECX" %}
3914   interface(REG_INTER);
3915 %}
3916 
3917 operand eSIRegP(eRegP reg) %{
3918   constraint(ALLOC_IN_RC(esi_reg));
3919   match(reg);
3920   format %{ "ESI" %}
3921   interface(REG_INTER);
3922 %}
3923 
3924 // Used in rep stosw
3925 operand eDIRegP(eRegP reg) %{
3926   constraint(ALLOC_IN_RC(edi_reg));
3927   match(reg);
3928   format %{ "EDI" %}
3929   interface(REG_INTER);
3930 %}
3931 
3932 operand eRegL() %{
3933   constraint(ALLOC_IN_RC(long_reg));
3934   match(RegL);
3935   match(eADXRegL);
3936 
3937   format %{ %}
3938   interface(REG_INTER);
3939 %}
3940 
3941 operand eADXRegL( eRegL reg ) %{
3942   constraint(ALLOC_IN_RC(eadx_reg));
3943   match(reg);
3944 
3945   format %{ "EDX:EAX" %}
3946   interface(REG_INTER);
3947 %}
3948 
3949 operand eBCXRegL( eRegL reg ) %{
3950   constraint(ALLOC_IN_RC(ebcx_reg));
3951   match(reg);
3952 
3953   format %{ "EBX:ECX" %}
3954   interface(REG_INTER);
3955 %}
3956 
3957 // Special case for integer high multiply
3958 operand eADXRegL_low_only() %{
3959   constraint(ALLOC_IN_RC(eadx_reg));
3960   match(RegL);
3961 
3962   format %{ "EAX" %}
3963   interface(REG_INTER);
3964 %}
3965 
3966 // Flags register, used as output of compare instructions
3967 operand eFlagsReg() %{
3968   constraint(ALLOC_IN_RC(int_flags));
3969   match(RegFlags);
3970 
3971   format %{ "EFLAGS" %}
3972   interface(REG_INTER);
3973 %}
3974 
3975 // Flags register, used as output of FLOATING POINT compare instructions
3976 operand eFlagsRegU() %{
3977   constraint(ALLOC_IN_RC(int_flags));
3978   match(RegFlags);
3979 
3980   format %{ "EFLAGS_U" %}
3981   interface(REG_INTER);
3982 %}
3983 
3984 operand eFlagsRegUCF() %{
3985   constraint(ALLOC_IN_RC(int_flags));
3986   match(RegFlags);
3987   predicate(false);
3988 
3989   format %{ "EFLAGS_U_CF" %}
3990   interface(REG_INTER);
3991 %}
3992 
3993 // Condition Code Register used by long compare
3994 operand flagsReg_long_LTGE() %{
3995   constraint(ALLOC_IN_RC(int_flags));
3996   match(RegFlags);
3997   format %{ "FLAGS_LTGE" %}
3998   interface(REG_INTER);
3999 %}
4000 operand flagsReg_long_EQNE() %{
4001   constraint(ALLOC_IN_RC(int_flags));
4002   match(RegFlags);
4003   format %{ "FLAGS_EQNE" %}
4004   interface(REG_INTER);
4005 %}
4006 operand flagsReg_long_LEGT() %{
4007   constraint(ALLOC_IN_RC(int_flags));
4008   match(RegFlags);
4009   format %{ "FLAGS_LEGT" %}
4010   interface(REG_INTER);
4011 %}
4012 
4013 // Condition Code Register used by unsigned long compare
4014 operand flagsReg_ulong_LTGE() %{
4015   constraint(ALLOC_IN_RC(int_flags));
4016   match(RegFlags);
4017   format %{ "FLAGS_U_LTGE" %}
4018   interface(REG_INTER);
4019 %}
4020 operand flagsReg_ulong_EQNE() %{
4021   constraint(ALLOC_IN_RC(int_flags));
4022   match(RegFlags);
4023   format %{ "FLAGS_U_EQNE" %}
4024   interface(REG_INTER);
4025 %}
4026 operand flagsReg_ulong_LEGT() %{
4027   constraint(ALLOC_IN_RC(int_flags));
4028   match(RegFlags);
4029   format %{ "FLAGS_U_LEGT" %}
4030   interface(REG_INTER);
4031 %}
4032 
4033 // Float register operands
4034 operand regDPR() %{
4035   predicate( UseSSE < 2 );
4036   constraint(ALLOC_IN_RC(fp_dbl_reg));
4037   match(RegD);
4038   match(regDPR1);
4039   match(regDPR2);
4040   format %{ %}
4041   interface(REG_INTER);
4042 %}
4043 
4044 operand regDPR1(regDPR reg) %{
4045   predicate( UseSSE < 2 );
4046   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4047   match(reg);
4048   format %{ "FPR1" %}
4049   interface(REG_INTER);
4050 %}
4051 
4052 operand regDPR2(regDPR reg) %{
4053   predicate( UseSSE < 2 );
4054   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4055   match(reg);
4056   format %{ "FPR2" %}
4057   interface(REG_INTER);
4058 %}
4059 
4060 operand regnotDPR1(regDPR reg) %{
4061   predicate( UseSSE < 2 );
4062   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4063   match(reg);
4064   format %{ %}
4065   interface(REG_INTER);
4066 %}
4067 
4068 // Float register operands
4069 operand regFPR() %{
4070   predicate( UseSSE < 2 );
4071   constraint(ALLOC_IN_RC(fp_flt_reg));
4072   match(RegF);
4073   match(regFPR1);
4074   format %{ %}
4075   interface(REG_INTER);
4076 %}
4077 
4078 // Float register operands
4079 operand regFPR1(regFPR reg) %{
4080   predicate( UseSSE < 2 );
4081   constraint(ALLOC_IN_RC(fp_flt_reg0));
4082   match(reg);
4083   format %{ "FPR1" %}
4084   interface(REG_INTER);
4085 %}
4086 
4087 // XMM Float register operands
4088 operand regF() %{
4089   predicate( UseSSE>=1 );
4090   constraint(ALLOC_IN_RC(float_reg_legacy));
4091   match(RegF);
4092   format %{ %}
4093   interface(REG_INTER);
4094 %}
4095 
4096 // Float register operands
4097 operand vlRegF() %{
4098    constraint(ALLOC_IN_RC(float_reg_vl));
4099    match(RegF);
4100 
4101    format %{ %}
4102    interface(REG_INTER);
4103 %}
4104 
4105 // XMM Double register operands
4106 operand regD() %{
4107   predicate( UseSSE>=2 );
4108   constraint(ALLOC_IN_RC(double_reg_legacy));
4109   match(RegD);
4110   format %{ %}
4111   interface(REG_INTER);
4112 %}
4113 
4114 // Double register operands
4115 operand vlRegD() %{
4116    constraint(ALLOC_IN_RC(double_reg_vl));
4117    match(RegD);
4118 
4119    format %{ %}
4120    interface(REG_INTER);
4121 %}
4122 
4123 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4124 // runtime code generation via reg_class_dynamic.
4125 operand vecS() %{
4126   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4127   match(VecS);
4128 
4129   format %{ %}
4130   interface(REG_INTER);
4131 %}
4132 
4133 operand legVecS() %{
4134   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4135   match(VecS);
4136 
4137   format %{ %}
4138   interface(REG_INTER);
4139 %}
4140 
4141 operand vecD() %{
4142   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4143   match(VecD);
4144 
4145   format %{ %}
4146   interface(REG_INTER);
4147 %}
4148 
4149 operand legVecD() %{
4150   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4151   match(VecD);
4152 
4153   format %{ %}
4154   interface(REG_INTER);
4155 %}
4156 
4157 operand vecX() %{
4158   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4159   match(VecX);
4160 
4161   format %{ %}
4162   interface(REG_INTER);
4163 %}
4164 
4165 operand legVecX() %{
4166   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4167   match(VecX);
4168 
4169   format %{ %}
4170   interface(REG_INTER);
4171 %}
4172 
4173 operand vecY() %{
4174   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4175   match(VecY);
4176 
4177   format %{ %}
4178   interface(REG_INTER);
4179 %}
4180 
4181 operand legVecY() %{
4182   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4183   match(VecY);
4184 
4185   format %{ %}
4186   interface(REG_INTER);
4187 %}
4188 
4189 //----------Memory Operands----------------------------------------------------
4190 // Direct Memory Operand
4191 operand direct(immP addr) %{
4192   match(addr);
4193 
4194   format %{ "[$addr]" %}
4195   interface(MEMORY_INTER) %{
4196     base(0xFFFFFFFF);
4197     index(0x4);
4198     scale(0x0);
4199     disp($addr);
4200   %}
4201 %}
4202 
4203 // Indirect Memory Operand
4204 operand indirect(eRegP reg) %{
4205   constraint(ALLOC_IN_RC(int_reg));
4206   match(reg);
4207 
4208   format %{ "[$reg]" %}
4209   interface(MEMORY_INTER) %{
4210     base($reg);
4211     index(0x4);
4212     scale(0x0);
4213     disp(0x0);
4214   %}
4215 %}
4216 
4217 // Indirect Memory Plus Short Offset Operand
4218 operand indOffset8(eRegP reg, immI8 off) %{
4219   match(AddP reg off);
4220 
4221   format %{ "[$reg + $off]" %}
4222   interface(MEMORY_INTER) %{
4223     base($reg);
4224     index(0x4);
4225     scale(0x0);
4226     disp($off);
4227   %}
4228 %}
4229 
4230 // Indirect Memory Plus Long Offset Operand
4231 operand indOffset32(eRegP reg, immI off) %{
4232   match(AddP reg off);
4233 
4234   format %{ "[$reg + $off]" %}
4235   interface(MEMORY_INTER) %{
4236     base($reg);
4237     index(0x4);
4238     scale(0x0);
4239     disp($off);
4240   %}
4241 %}
4242 
4243 // Indirect Memory Plus Long Offset Operand
4244 operand indOffset32X(rRegI reg, immP off) %{
4245   match(AddP off reg);
4246 
4247   format %{ "[$reg + $off]" %}
4248   interface(MEMORY_INTER) %{
4249     base($reg);
4250     index(0x4);
4251     scale(0x0);
4252     disp($off);
4253   %}
4254 %}
4255 
4256 // Indirect Memory Plus Index Register Plus Offset Operand
4257 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4258   match(AddP (AddP reg ireg) off);
4259 
4260   op_cost(10);
4261   format %{"[$reg + $off + $ireg]" %}
4262   interface(MEMORY_INTER) %{
4263     base($reg);
4264     index($ireg);
4265     scale(0x0);
4266     disp($off);
4267   %}
4268 %}
4269 
4270 // Indirect Memory Plus Index Register Plus Offset Operand
4271 operand indIndex(eRegP reg, rRegI ireg) %{
4272   match(AddP reg ireg);
4273 
4274   op_cost(10);
4275   format %{"[$reg + $ireg]" %}
4276   interface(MEMORY_INTER) %{
4277     base($reg);
4278     index($ireg);
4279     scale(0x0);
4280     disp(0x0);
4281   %}
4282 %}
4283 
4284 // // -------------------------------------------------------------------------
4285 // // 486 architecture doesn't support "scale * index + offset" with out a base
4286 // // -------------------------------------------------------------------------
4287 // // Scaled Memory Operands
4288 // // Indirect Memory Times Scale Plus Offset Operand
4289 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4290 //   match(AddP off (LShiftI ireg scale));
4291 //
4292 //   op_cost(10);
4293 //   format %{"[$off + $ireg << $scale]" %}
4294 //   interface(MEMORY_INTER) %{
4295 //     base(0x4);
4296 //     index($ireg);
4297 //     scale($scale);
4298 //     disp($off);
4299 //   %}
4300 // %}
4301 
4302 // Indirect Memory Times Scale Plus Index Register
4303 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4304   match(AddP reg (LShiftI ireg scale));
4305 
4306   op_cost(10);
4307   format %{"[$reg + $ireg << $scale]" %}
4308   interface(MEMORY_INTER) %{
4309     base($reg);
4310     index($ireg);
4311     scale($scale);
4312     disp(0x0);
4313   %}
4314 %}
4315 
4316 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4317 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4318   match(AddP (AddP reg (LShiftI ireg scale)) off);
4319 
4320   op_cost(10);
4321   format %{"[$reg + $off + $ireg << $scale]" %}
4322   interface(MEMORY_INTER) %{
4323     base($reg);
4324     index($ireg);
4325     scale($scale);
4326     disp($off);
4327   %}
4328 %}
4329 
4330 //----------Load Long Memory Operands------------------------------------------
4331 // The load-long idiom will use it's address expression again after loading
4332 // the first word of the long.  If the load-long destination overlaps with
4333 // registers used in the addressing expression, the 2nd half will be loaded
4334 // from a clobbered address.  Fix this by requiring that load-long use
4335 // address registers that do not overlap with the load-long target.
4336 
4337 // load-long support
4338 operand load_long_RegP() %{
4339   constraint(ALLOC_IN_RC(esi_reg));
4340   match(RegP);
4341   match(eSIRegP);
4342   op_cost(100);
4343   format %{  %}
4344   interface(REG_INTER);
4345 %}
4346 
4347 // Indirect Memory Operand Long
4348 operand load_long_indirect(load_long_RegP reg) %{
4349   constraint(ALLOC_IN_RC(esi_reg));
4350   match(reg);
4351 
4352   format %{ "[$reg]" %}
4353   interface(MEMORY_INTER) %{
4354     base($reg);
4355     index(0x4);
4356     scale(0x0);
4357     disp(0x0);
4358   %}
4359 %}
4360 
4361 // Indirect Memory Plus Long Offset Operand
4362 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4363   match(AddP reg off);
4364 
4365   format %{ "[$reg + $off]" %}
4366   interface(MEMORY_INTER) %{
4367     base($reg);
4368     index(0x4);
4369     scale(0x0);
4370     disp($off);
4371   %}
4372 %}
4373 
4374 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4375 
4376 
4377 //----------Special Memory Operands--------------------------------------------
4378 // Stack Slot Operand - This operand is used for loading and storing temporary
4379 //                      values on the stack where a match requires a value to
4380 //                      flow through memory.
4381 operand stackSlotP(sRegP reg) %{
4382   constraint(ALLOC_IN_RC(stack_slots));
4383   // No match rule because this operand is only generated in matching
4384   format %{ "[$reg]" %}
4385   interface(MEMORY_INTER) %{
4386     base(0x4);   // ESP
4387     index(0x4);  // No Index
4388     scale(0x0);  // No Scale
4389     disp($reg);  // Stack Offset
4390   %}
4391 %}
4392 
4393 operand stackSlotI(sRegI reg) %{
4394   constraint(ALLOC_IN_RC(stack_slots));
4395   // No match rule because this operand is only generated in matching
4396   format %{ "[$reg]" %}
4397   interface(MEMORY_INTER) %{
4398     base(0x4);   // ESP
4399     index(0x4);  // No Index
4400     scale(0x0);  // No Scale
4401     disp($reg);  // Stack Offset
4402   %}
4403 %}
4404 
4405 operand stackSlotF(sRegF reg) %{
4406   constraint(ALLOC_IN_RC(stack_slots));
4407   // No match rule because this operand is only generated in matching
4408   format %{ "[$reg]" %}
4409   interface(MEMORY_INTER) %{
4410     base(0x4);   // ESP
4411     index(0x4);  // No Index
4412     scale(0x0);  // No Scale
4413     disp($reg);  // Stack Offset
4414   %}
4415 %}
4416 
4417 operand stackSlotD(sRegD reg) %{
4418   constraint(ALLOC_IN_RC(stack_slots));
4419   // No match rule because this operand is only generated in matching
4420   format %{ "[$reg]" %}
4421   interface(MEMORY_INTER) %{
4422     base(0x4);   // ESP
4423     index(0x4);  // No Index
4424     scale(0x0);  // No Scale
4425     disp($reg);  // Stack Offset
4426   %}
4427 %}
4428 
4429 operand stackSlotL(sRegL reg) %{
4430   constraint(ALLOC_IN_RC(stack_slots));
4431   // No match rule because this operand is only generated in matching
4432   format %{ "[$reg]" %}
4433   interface(MEMORY_INTER) %{
4434     base(0x4);   // ESP
4435     index(0x4);  // No Index
4436     scale(0x0);  // No Scale
4437     disp($reg);  // Stack Offset
4438   %}
4439 %}
4440 
4441 //----------Memory Operands - Win95 Implicit Null Variants----------------
4442 // Indirect Memory Operand
4443 operand indirect_win95_safe(eRegP_no_EBP reg)
4444 %{
4445   constraint(ALLOC_IN_RC(int_reg));
4446   match(reg);
4447 
4448   op_cost(100);
4449   format %{ "[$reg]" %}
4450   interface(MEMORY_INTER) %{
4451     base($reg);
4452     index(0x4);
4453     scale(0x0);
4454     disp(0x0);
4455   %}
4456 %}
4457 
4458 // Indirect Memory Plus Short Offset Operand
4459 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4460 %{
4461   match(AddP reg off);
4462 
4463   op_cost(100);
4464   format %{ "[$reg + $off]" %}
4465   interface(MEMORY_INTER) %{
4466     base($reg);
4467     index(0x4);
4468     scale(0x0);
4469     disp($off);
4470   %}
4471 %}
4472 
4473 // Indirect Memory Plus Long Offset Operand
4474 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4475 %{
4476   match(AddP reg off);
4477 
4478   op_cost(100);
4479   format %{ "[$reg + $off]" %}
4480   interface(MEMORY_INTER) %{
4481     base($reg);
4482     index(0x4);
4483     scale(0x0);
4484     disp($off);
4485   %}
4486 %}
4487 
4488 // Indirect Memory Plus Index Register Plus Offset Operand
4489 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4490 %{
4491   match(AddP (AddP reg ireg) off);
4492 
4493   op_cost(100);
4494   format %{"[$reg + $off + $ireg]" %}
4495   interface(MEMORY_INTER) %{
4496     base($reg);
4497     index($ireg);
4498     scale(0x0);
4499     disp($off);
4500   %}
4501 %}
4502 
4503 // Indirect Memory Times Scale Plus Index Register
4504 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4505 %{
4506   match(AddP reg (LShiftI ireg scale));
4507 
4508   op_cost(100);
4509   format %{"[$reg + $ireg << $scale]" %}
4510   interface(MEMORY_INTER) %{
4511     base($reg);
4512     index($ireg);
4513     scale($scale);
4514     disp(0x0);
4515   %}
4516 %}
4517 
4518 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4519 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4520 %{
4521   match(AddP (AddP reg (LShiftI ireg scale)) off);
4522 
4523   op_cost(100);
4524   format %{"[$reg + $off + $ireg << $scale]" %}
4525   interface(MEMORY_INTER) %{
4526     base($reg);
4527     index($ireg);
4528     scale($scale);
4529     disp($off);
4530   %}
4531 %}
4532 
4533 //----------Conditional Branch Operands----------------------------------------
4534 // Comparison Op  - This is the operation of the comparison, and is limited to
4535 //                  the following set of codes:
4536 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4537 //
4538 // Other attributes of the comparison, such as unsignedness, are specified
4539 // by the comparison instruction that sets a condition code flags register.
4540 // That result is represented by a flags operand whose subtype is appropriate
4541 // to the unsignedness (etc.) of the comparison.
4542 //
4543 // Later, the instruction which matches both the Comparison Op (a Bool) and
4544 // the flags (produced by the Cmp) specifies the coding of the comparison op
4545 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4546 
4547 // Comparision Code
4548 operand cmpOp() %{
4549   match(Bool);
4550 
4551   format %{ "" %}
4552   interface(COND_INTER) %{
4553     equal(0x4, "e");
4554     not_equal(0x5, "ne");
4555     less(0xC, "l");
4556     greater_equal(0xD, "ge");
4557     less_equal(0xE, "le");
4558     greater(0xF, "g");
4559     overflow(0x0, "o");
4560     no_overflow(0x1, "no");
4561   %}
4562 %}
4563 
4564 // Comparison Code, unsigned compare.  Used by FP also, with
4565 // C2 (unordered) turned into GT or LT already.  The other bits
4566 // C0 and C3 are turned into Carry & Zero flags.
4567 operand cmpOpU() %{
4568   match(Bool);
4569 
4570   format %{ "" %}
4571   interface(COND_INTER) %{
4572     equal(0x4, "e");
4573     not_equal(0x5, "ne");
4574     less(0x2, "b");
4575     greater_equal(0x3, "nb");
4576     less_equal(0x6, "be");
4577     greater(0x7, "nbe");
4578     overflow(0x0, "o");
4579     no_overflow(0x1, "no");
4580   %}
4581 %}
4582 
4583 // Floating comparisons that don't require any fixup for the unordered case
4584 operand cmpOpUCF() %{
4585   match(Bool);
4586   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4587             n->as_Bool()->_test._test == BoolTest::ge ||
4588             n->as_Bool()->_test._test == BoolTest::le ||
4589             n->as_Bool()->_test._test == BoolTest::gt);
4590   format %{ "" %}
4591   interface(COND_INTER) %{
4592     equal(0x4, "e");
4593     not_equal(0x5, "ne");
4594     less(0x2, "b");
4595     greater_equal(0x3, "nb");
4596     less_equal(0x6, "be");
4597     greater(0x7, "nbe");
4598     overflow(0x0, "o");
4599     no_overflow(0x1, "no");
4600   %}
4601 %}
4602 
4603 
4604 // Floating comparisons that can be fixed up with extra conditional jumps
4605 operand cmpOpUCF2() %{
4606   match(Bool);
4607   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4608             n->as_Bool()->_test._test == BoolTest::eq);
4609   format %{ "" %}
4610   interface(COND_INTER) %{
4611     equal(0x4, "e");
4612     not_equal(0x5, "ne");
4613     less(0x2, "b");
4614     greater_equal(0x3, "nb");
4615     less_equal(0x6, "be");
4616     greater(0x7, "nbe");
4617     overflow(0x0, "o");
4618     no_overflow(0x1, "no");
4619   %}
4620 %}
4621 
4622 // Comparison Code for FP conditional move
4623 operand cmpOp_fcmov() %{
4624   match(Bool);
4625 
4626   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4627             n->as_Bool()->_test._test != BoolTest::no_overflow);
4628   format %{ "" %}
4629   interface(COND_INTER) %{
4630     equal        (0x0C8);
4631     not_equal    (0x1C8);
4632     less         (0x0C0);
4633     greater_equal(0x1C0);
4634     less_equal   (0x0D0);
4635     greater      (0x1D0);
4636     overflow(0x0, "o"); // not really supported by the instruction
4637     no_overflow(0x1, "no"); // not really supported by the instruction
4638   %}
4639 %}
4640 
4641 // Comparison Code used in long compares
4642 operand cmpOp_commute() %{
4643   match(Bool);
4644 
4645   format %{ "" %}
4646   interface(COND_INTER) %{
4647     equal(0x4, "e");
4648     not_equal(0x5, "ne");
4649     less(0xF, "g");
4650     greater_equal(0xE, "le");
4651     less_equal(0xD, "ge");
4652     greater(0xC, "l");
4653     overflow(0x0, "o");
4654     no_overflow(0x1, "no");
4655   %}
4656 %}
4657 
4658 // Comparison Code used in unsigned long compares
4659 operand cmpOpU_commute() %{
4660   match(Bool);
4661 
4662   format %{ "" %}
4663   interface(COND_INTER) %{
4664     equal(0x4, "e");
4665     not_equal(0x5, "ne");
4666     less(0x7, "nbe");
4667     greater_equal(0x6, "be");
4668     less_equal(0x3, "nb");
4669     greater(0x2, "b");
4670     overflow(0x0, "o");
4671     no_overflow(0x1, "no");
4672   %}
4673 %}
4674 
4675 //----------OPERAND CLASSES----------------------------------------------------
4676 // Operand Classes are groups of operands that are used as to simplify
4677 // instruction definitions by not requiring the AD writer to specify separate
4678 // instructions for every form of operand when the instruction accepts
4679 // multiple operand types with the same basic encoding and format.  The classic
4680 // case of this is memory operands.
4681 
4682 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4683                indIndex, indIndexScale, indIndexScaleOffset);
4684 
4685 // Long memory operations are encoded in 2 instructions and a +4 offset.
4686 // This means some kind of offset is always required and you cannot use
4687 // an oop as the offset (done when working on static globals).
4688 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4689                     indIndex, indIndexScale, indIndexScaleOffset);
4690 
4691 
4692 //----------PIPELINE-----------------------------------------------------------
4693 // Rules which define the behavior of the target architectures pipeline.
4694 pipeline %{
4695 
4696 //----------ATTRIBUTES---------------------------------------------------------
4697 attributes %{
4698   variable_size_instructions;        // Fixed size instructions
4699   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4700   instruction_unit_size = 1;         // An instruction is 1 bytes long
4701   instruction_fetch_unit_size = 16;  // The processor fetches one line
4702   instruction_fetch_units = 1;       // of 16 bytes
4703 
4704   // List of nop instructions
4705   nops( MachNop );
4706 %}
4707 
4708 //----------RESOURCES----------------------------------------------------------
4709 // Resources are the functional units available to the machine
4710 
4711 // Generic P2/P3 pipeline
4712 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4713 // 3 instructions decoded per cycle.
4714 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4715 // 2 ALU op, only ALU0 handles mul/div instructions.
4716 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4717            MS0, MS1, MEM = MS0 | MS1,
4718            BR, FPU,
4719            ALU0, ALU1, ALU = ALU0 | ALU1 );
4720 
4721 //----------PIPELINE DESCRIPTION-----------------------------------------------
4722 // Pipeline Description specifies the stages in the machine's pipeline
4723 
4724 // Generic P2/P3 pipeline
4725 pipe_desc(S0, S1, S2, S3, S4, S5);
4726 
4727 //----------PIPELINE CLASSES---------------------------------------------------
4728 // Pipeline Classes describe the stages in which input and output are
4729 // referenced by the hardware pipeline.
4730 
4731 // Naming convention: ialu or fpu
4732 // Then: _reg
4733 // Then: _reg if there is a 2nd register
4734 // Then: _long if it's a pair of instructions implementing a long
4735 // Then: _fat if it requires the big decoder
4736 //   Or: _mem if it requires the big decoder and a memory unit.
4737 
4738 // Integer ALU reg operation
4739 pipe_class ialu_reg(rRegI dst) %{
4740     single_instruction;
4741     dst    : S4(write);
4742     dst    : S3(read);
4743     DECODE : S0;        // any decoder
4744     ALU    : S3;        // any alu
4745 %}
4746 
4747 // Long ALU reg operation
4748 pipe_class ialu_reg_long(eRegL dst) %{
4749     instruction_count(2);
4750     dst    : S4(write);
4751     dst    : S3(read);
4752     DECODE : S0(2);     // any 2 decoders
4753     ALU    : S3(2);     // both alus
4754 %}
4755 
4756 // Integer ALU reg operation using big decoder
4757 pipe_class ialu_reg_fat(rRegI dst) %{
4758     single_instruction;
4759     dst    : S4(write);
4760     dst    : S3(read);
4761     D0     : S0;        // big decoder only
4762     ALU    : S3;        // any alu
4763 %}
4764 
4765 // Long ALU reg operation using big decoder
4766 pipe_class ialu_reg_long_fat(eRegL dst) %{
4767     instruction_count(2);
4768     dst    : S4(write);
4769     dst    : S3(read);
4770     D0     : S0(2);     // big decoder only; twice
4771     ALU    : S3(2);     // any 2 alus
4772 %}
4773 
4774 // Integer ALU reg-reg operation
4775 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4776     single_instruction;
4777     dst    : S4(write);
4778     src    : S3(read);
4779     DECODE : S0;        // any decoder
4780     ALU    : S3;        // any alu
4781 %}
4782 
4783 // Long ALU reg-reg operation
4784 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4785     instruction_count(2);
4786     dst    : S4(write);
4787     src    : S3(read);
4788     DECODE : S0(2);     // any 2 decoders
4789     ALU    : S3(2);     // both alus
4790 %}
4791 
4792 // Integer ALU reg-reg operation
4793 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4794     single_instruction;
4795     dst    : S4(write);
4796     src    : S3(read);
4797     D0     : S0;        // big decoder only
4798     ALU    : S3;        // any alu
4799 %}
4800 
4801 // Long ALU reg-reg operation
4802 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4803     instruction_count(2);
4804     dst    : S4(write);
4805     src    : S3(read);
4806     D0     : S0(2);     // big decoder only; twice
4807     ALU    : S3(2);     // both alus
4808 %}
4809 
4810 // Integer ALU reg-mem operation
4811 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4812     single_instruction;
4813     dst    : S5(write);
4814     mem    : S3(read);
4815     D0     : S0;        // big decoder only
4816     ALU    : S4;        // any alu
4817     MEM    : S3;        // any mem
4818 %}
4819 
4820 // Long ALU reg-mem operation
4821 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4822     instruction_count(2);
4823     dst    : S5(write);
4824     mem    : S3(read);
4825     D0     : S0(2);     // big decoder only; twice
4826     ALU    : S4(2);     // any 2 alus
4827     MEM    : S3(2);     // both mems
4828 %}
4829 
4830 // Integer mem operation (prefetch)
4831 pipe_class ialu_mem(memory mem)
4832 %{
4833     single_instruction;
4834     mem    : S3(read);
4835     D0     : S0;        // big decoder only
4836     MEM    : S3;        // any mem
4837 %}
4838 
4839 // Integer Store to Memory
4840 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4841     single_instruction;
4842     mem    : S3(read);
4843     src    : S5(read);
4844     D0     : S0;        // big decoder only
4845     ALU    : S4;        // any alu
4846     MEM    : S3;
4847 %}
4848 
4849 // Long Store to Memory
4850 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4851     instruction_count(2);
4852     mem    : S3(read);
4853     src    : S5(read);
4854     D0     : S0(2);     // big decoder only; twice
4855     ALU    : S4(2);     // any 2 alus
4856     MEM    : S3(2);     // Both mems
4857 %}
4858 
4859 // Integer Store to Memory
4860 pipe_class ialu_mem_imm(memory mem) %{
4861     single_instruction;
4862     mem    : S3(read);
4863     D0     : S0;        // big decoder only
4864     ALU    : S4;        // any alu
4865     MEM    : S3;
4866 %}
4867 
4868 // Integer ALU0 reg-reg operation
4869 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4870     single_instruction;
4871     dst    : S4(write);
4872     src    : S3(read);
4873     D0     : S0;        // Big decoder only
4874     ALU0   : S3;        // only alu0
4875 %}
4876 
4877 // Integer ALU0 reg-mem operation
4878 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4879     single_instruction;
4880     dst    : S5(write);
4881     mem    : S3(read);
4882     D0     : S0;        // big decoder only
4883     ALU0   : S4;        // ALU0 only
4884     MEM    : S3;        // any mem
4885 %}
4886 
4887 // Integer ALU reg-reg operation
4888 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4889     single_instruction;
4890     cr     : S4(write);
4891     src1   : S3(read);
4892     src2   : S3(read);
4893     DECODE : S0;        // any decoder
4894     ALU    : S3;        // any alu
4895 %}
4896 
4897 // Integer ALU reg-imm operation
4898 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4899     single_instruction;
4900     cr     : S4(write);
4901     src1   : S3(read);
4902     DECODE : S0;        // any decoder
4903     ALU    : S3;        // any alu
4904 %}
4905 
4906 // Integer ALU reg-mem operation
4907 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4908     single_instruction;
4909     cr     : S4(write);
4910     src1   : S3(read);
4911     src2   : S3(read);
4912     D0     : S0;        // big decoder only
4913     ALU    : S4;        // any alu
4914     MEM    : S3;
4915 %}
4916 
4917 // Conditional move reg-reg
4918 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4919     instruction_count(4);
4920     y      : S4(read);
4921     q      : S3(read);
4922     p      : S3(read);
4923     DECODE : S0(4);     // any decoder
4924 %}
4925 
4926 // Conditional move reg-reg
4927 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4928     single_instruction;
4929     dst    : S4(write);
4930     src    : S3(read);
4931     cr     : S3(read);
4932     DECODE : S0;        // any decoder
4933 %}
4934 
4935 // Conditional move reg-mem
4936 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4937     single_instruction;
4938     dst    : S4(write);
4939     src    : S3(read);
4940     cr     : S3(read);
4941     DECODE : S0;        // any decoder
4942     MEM    : S3;
4943 %}
4944 
4945 // Conditional move reg-reg long
4946 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4947     single_instruction;
4948     dst    : S4(write);
4949     src    : S3(read);
4950     cr     : S3(read);
4951     DECODE : S0(2);     // any 2 decoders
4952 %}
4953 
4954 // Conditional move double reg-reg
4955 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4956     single_instruction;
4957     dst    : S4(write);
4958     src    : S3(read);
4959     cr     : S3(read);
4960     DECODE : S0;        // any decoder
4961 %}
4962 
4963 // Float reg-reg operation
4964 pipe_class fpu_reg(regDPR dst) %{
4965     instruction_count(2);
4966     dst    : S3(read);
4967     DECODE : S0(2);     // any 2 decoders
4968     FPU    : S3;
4969 %}
4970 
4971 // Float reg-reg operation
4972 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4973     instruction_count(2);
4974     dst    : S4(write);
4975     src    : S3(read);
4976     DECODE : S0(2);     // any 2 decoders
4977     FPU    : S3;
4978 %}
4979 
4980 // Float reg-reg operation
4981 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4982     instruction_count(3);
4983     dst    : S4(write);
4984     src1   : S3(read);
4985     src2   : S3(read);
4986     DECODE : S0(3);     // any 3 decoders
4987     FPU    : S3(2);
4988 %}
4989 
4990 // Float reg-reg operation
4991 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4992     instruction_count(4);
4993     dst    : S4(write);
4994     src1   : S3(read);
4995     src2   : S3(read);
4996     src3   : S3(read);
4997     DECODE : S0(4);     // any 3 decoders
4998     FPU    : S3(2);
4999 %}
5000 
5001 // Float reg-reg operation
5002 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5003     instruction_count(4);
5004     dst    : S4(write);
5005     src1   : S3(read);
5006     src2   : S3(read);
5007     src3   : S3(read);
5008     DECODE : S1(3);     // any 3 decoders
5009     D0     : S0;        // Big decoder only
5010     FPU    : S3(2);
5011     MEM    : S3;
5012 %}
5013 
5014 // Float reg-mem operation
5015 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5016     instruction_count(2);
5017     dst    : S5(write);
5018     mem    : S3(read);
5019     D0     : S0;        // big decoder only
5020     DECODE : S1;        // any decoder for FPU POP
5021     FPU    : S4;
5022     MEM    : S3;        // any mem
5023 %}
5024 
5025 // Float reg-mem operation
5026 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5027     instruction_count(3);
5028     dst    : S5(write);
5029     src1   : S3(read);
5030     mem    : S3(read);
5031     D0     : S0;        // big decoder only
5032     DECODE : S1(2);     // any decoder for FPU POP
5033     FPU    : S4;
5034     MEM    : S3;        // any mem
5035 %}
5036 
5037 // Float mem-reg operation
5038 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5039     instruction_count(2);
5040     src    : S5(read);
5041     mem    : S3(read);
5042     DECODE : S0;        // any decoder for FPU PUSH
5043     D0     : S1;        // big decoder only
5044     FPU    : S4;
5045     MEM    : S3;        // any mem
5046 %}
5047 
5048 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5049     instruction_count(3);
5050     src1   : S3(read);
5051     src2   : S3(read);
5052     mem    : S3(read);
5053     DECODE : S0(2);     // any decoder for FPU PUSH
5054     D0     : S1;        // big decoder only
5055     FPU    : S4;
5056     MEM    : S3;        // any mem
5057 %}
5058 
5059 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5060     instruction_count(3);
5061     src1   : S3(read);
5062     src2   : S3(read);
5063     mem    : S4(read);
5064     DECODE : S0;        // any decoder for FPU PUSH
5065     D0     : S0(2);     // big decoder only
5066     FPU    : S4;
5067     MEM    : S3(2);     // any mem
5068 %}
5069 
5070 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5071     instruction_count(2);
5072     src1   : S3(read);
5073     dst    : S4(read);
5074     D0     : S0(2);     // big decoder only
5075     MEM    : S3(2);     // any mem
5076 %}
5077 
5078 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5079     instruction_count(3);
5080     src1   : S3(read);
5081     src2   : S3(read);
5082     dst    : S4(read);
5083     D0     : S0(3);     // big decoder only
5084     FPU    : S4;
5085     MEM    : S3(3);     // any mem
5086 %}
5087 
5088 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5089     instruction_count(3);
5090     src1   : S4(read);
5091     mem    : S4(read);
5092     DECODE : S0;        // any decoder for FPU PUSH
5093     D0     : S0(2);     // big decoder only
5094     FPU    : S4;
5095     MEM    : S3(2);     // any mem
5096 %}
5097 
5098 // Float load constant
5099 pipe_class fpu_reg_con(regDPR dst) %{
5100     instruction_count(2);
5101     dst    : S5(write);
5102     D0     : S0;        // big decoder only for the load
5103     DECODE : S1;        // any decoder for FPU POP
5104     FPU    : S4;
5105     MEM    : S3;        // any mem
5106 %}
5107 
5108 // Float load constant
5109 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5110     instruction_count(3);
5111     dst    : S5(write);
5112     src    : S3(read);
5113     D0     : S0;        // big decoder only for the load
5114     DECODE : S1(2);     // any decoder for FPU POP
5115     FPU    : S4;
5116     MEM    : S3;        // any mem
5117 %}
5118 
5119 // UnConditional branch
5120 pipe_class pipe_jmp( label labl ) %{
5121     single_instruction;
5122     BR   : S3;
5123 %}
5124 
5125 // Conditional branch
5126 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5127     single_instruction;
5128     cr    : S1(read);
5129     BR    : S3;
5130 %}
5131 
5132 // Allocation idiom
5133 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5134     instruction_count(1); force_serialization;
5135     fixed_latency(6);
5136     heap_ptr : S3(read);
5137     DECODE   : S0(3);
5138     D0       : S2;
5139     MEM      : S3;
5140     ALU      : S3(2);
5141     dst      : S5(write);
5142     BR       : S5;
5143 %}
5144 
5145 // Generic big/slow expanded idiom
5146 pipe_class pipe_slow(  ) %{
5147     instruction_count(10); multiple_bundles; force_serialization;
5148     fixed_latency(100);
5149     D0  : S0(2);
5150     MEM : S3(2);
5151 %}
5152 
5153 // The real do-nothing guy
5154 pipe_class empty( ) %{
5155     instruction_count(0);
5156 %}
5157 
5158 // Define the class for the Nop node
5159 define %{
5160    MachNop = empty;
5161 %}
5162 
5163 %}
5164 
5165 //----------INSTRUCTIONS-------------------------------------------------------
5166 //
5167 // match      -- States which machine-independent subtree may be replaced
5168 //               by this instruction.
5169 // ins_cost   -- The estimated cost of this instruction is used by instruction
5170 //               selection to identify a minimum cost tree of machine
5171 //               instructions that matches a tree of machine-independent
5172 //               instructions.
5173 // format     -- A string providing the disassembly for this instruction.
5174 //               The value of an instruction's operand may be inserted
5175 //               by referring to it with a '$' prefix.
5176 // opcode     -- Three instruction opcodes may be provided.  These are referred
5177 //               to within an encode class as $primary, $secondary, and $tertiary
5178 //               respectively.  The primary opcode is commonly used to
5179 //               indicate the type of machine instruction, while secondary
5180 //               and tertiary are often used for prefix options or addressing
5181 //               modes.
5182 // ins_encode -- A list of encode classes with parameters. The encode class
5183 //               name must have been defined in an 'enc_class' specification
5184 //               in the encode section of the architecture description.
5185 
5186 //----------BSWAP-Instruction--------------------------------------------------
5187 instruct bytes_reverse_int(rRegI dst) %{
5188   match(Set dst (ReverseBytesI dst));
5189 
5190   format %{ "BSWAP  $dst" %}
5191   opcode(0x0F, 0xC8);
5192   ins_encode( OpcP, OpcSReg(dst) );
5193   ins_pipe( ialu_reg );
5194 %}
5195 
5196 instruct bytes_reverse_long(eRegL dst) %{
5197   match(Set dst (ReverseBytesL dst));
5198 
5199   format %{ "BSWAP  $dst.lo\n\t"
5200             "BSWAP  $dst.hi\n\t"
5201             "XCHG   $dst.lo $dst.hi" %}
5202 
5203   ins_cost(125);
5204   ins_encode( bswap_long_bytes(dst) );
5205   ins_pipe( ialu_reg_reg);
5206 %}
5207 
5208 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5209   match(Set dst (ReverseBytesUS dst));
5210   effect(KILL cr);
5211 
5212   format %{ "BSWAP  $dst\n\t"
5213             "SHR    $dst,16\n\t" %}
5214   ins_encode %{
5215     __ bswapl($dst$$Register);
5216     __ shrl($dst$$Register, 16);
5217   %}
5218   ins_pipe( ialu_reg );
5219 %}
5220 
5221 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5222   match(Set dst (ReverseBytesS dst));
5223   effect(KILL cr);
5224 
5225   format %{ "BSWAP  $dst\n\t"
5226             "SAR    $dst,16\n\t" %}
5227   ins_encode %{
5228     __ bswapl($dst$$Register);
5229     __ sarl($dst$$Register, 16);
5230   %}
5231   ins_pipe( ialu_reg );
5232 %}
5233 
5234 
5235 //---------- Zeros Count Instructions ------------------------------------------
5236 
5237 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5238   predicate(UseCountLeadingZerosInstruction);
5239   match(Set dst (CountLeadingZerosI src));
5240   effect(KILL cr);
5241 
5242   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5243   ins_encode %{
5244     __ lzcntl($dst$$Register, $src$$Register);
5245   %}
5246   ins_pipe(ialu_reg);
5247 %}
5248 
5249 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5250   predicate(!UseCountLeadingZerosInstruction);
5251   match(Set dst (CountLeadingZerosI src));
5252   effect(KILL cr);
5253 
5254   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5255             "JNZ    skip\n\t"
5256             "MOV    $dst, -1\n"
5257       "skip:\n\t"
5258             "NEG    $dst\n\t"
5259             "ADD    $dst, 31" %}
5260   ins_encode %{
5261     Register Rdst = $dst$$Register;
5262     Register Rsrc = $src$$Register;
5263     Label skip;
5264     __ bsrl(Rdst, Rsrc);
5265     __ jccb(Assembler::notZero, skip);
5266     __ movl(Rdst, -1);
5267     __ bind(skip);
5268     __ negl(Rdst);
5269     __ addl(Rdst, BitsPerInt - 1);
5270   %}
5271   ins_pipe(ialu_reg);
5272 %}
5273 
5274 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5275   predicate(UseCountLeadingZerosInstruction);
5276   match(Set dst (CountLeadingZerosL src));
5277   effect(TEMP dst, KILL cr);
5278 
5279   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5280             "JNC    done\n\t"
5281             "LZCNT  $dst, $src.lo\n\t"
5282             "ADD    $dst, 32\n"
5283       "done:" %}
5284   ins_encode %{
5285     Register Rdst = $dst$$Register;
5286     Register Rsrc = $src$$Register;
5287     Label done;
5288     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5289     __ jccb(Assembler::carryClear, done);
5290     __ lzcntl(Rdst, Rsrc);
5291     __ addl(Rdst, BitsPerInt);
5292     __ bind(done);
5293   %}
5294   ins_pipe(ialu_reg);
5295 %}
5296 
5297 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5298   predicate(!UseCountLeadingZerosInstruction);
5299   match(Set dst (CountLeadingZerosL src));
5300   effect(TEMP dst, KILL cr);
5301 
5302   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5303             "JZ     msw_is_zero\n\t"
5304             "ADD    $dst, 32\n\t"
5305             "JMP    not_zero\n"
5306       "msw_is_zero:\n\t"
5307             "BSR    $dst, $src.lo\n\t"
5308             "JNZ    not_zero\n\t"
5309             "MOV    $dst, -1\n"
5310       "not_zero:\n\t"
5311             "NEG    $dst\n\t"
5312             "ADD    $dst, 63\n" %}
5313  ins_encode %{
5314     Register Rdst = $dst$$Register;
5315     Register Rsrc = $src$$Register;
5316     Label msw_is_zero;
5317     Label not_zero;
5318     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5319     __ jccb(Assembler::zero, msw_is_zero);
5320     __ addl(Rdst, BitsPerInt);
5321     __ jmpb(not_zero);
5322     __ bind(msw_is_zero);
5323     __ bsrl(Rdst, Rsrc);
5324     __ jccb(Assembler::notZero, not_zero);
5325     __ movl(Rdst, -1);
5326     __ bind(not_zero);
5327     __ negl(Rdst);
5328     __ addl(Rdst, BitsPerLong - 1);
5329   %}
5330   ins_pipe(ialu_reg);
5331 %}
5332 
5333 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5334   predicate(UseCountTrailingZerosInstruction);
5335   match(Set dst (CountTrailingZerosI src));
5336   effect(KILL cr);
5337 
5338   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5339   ins_encode %{
5340     __ tzcntl($dst$$Register, $src$$Register);
5341   %}
5342   ins_pipe(ialu_reg);
5343 %}
5344 
5345 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5346   predicate(!UseCountTrailingZerosInstruction);
5347   match(Set dst (CountTrailingZerosI src));
5348   effect(KILL cr);
5349 
5350   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5351             "JNZ    done\n\t"
5352             "MOV    $dst, 32\n"
5353       "done:" %}
5354   ins_encode %{
5355     Register Rdst = $dst$$Register;
5356     Label done;
5357     __ bsfl(Rdst, $src$$Register);
5358     __ jccb(Assembler::notZero, done);
5359     __ movl(Rdst, BitsPerInt);
5360     __ bind(done);
5361   %}
5362   ins_pipe(ialu_reg);
5363 %}
5364 
5365 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5366   predicate(UseCountTrailingZerosInstruction);
5367   match(Set dst (CountTrailingZerosL src));
5368   effect(TEMP dst, KILL cr);
5369 
5370   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5371             "JNC    done\n\t"
5372             "TZCNT  $dst, $src.hi\n\t"
5373             "ADD    $dst, 32\n"
5374             "done:" %}
5375   ins_encode %{
5376     Register Rdst = $dst$$Register;
5377     Register Rsrc = $src$$Register;
5378     Label done;
5379     __ tzcntl(Rdst, Rsrc);
5380     __ jccb(Assembler::carryClear, done);
5381     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5382     __ addl(Rdst, BitsPerInt);
5383     __ bind(done);
5384   %}
5385   ins_pipe(ialu_reg);
5386 %}
5387 
5388 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5389   predicate(!UseCountTrailingZerosInstruction);
5390   match(Set dst (CountTrailingZerosL src));
5391   effect(TEMP dst, KILL cr);
5392 
5393   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5394             "JNZ    done\n\t"
5395             "BSF    $dst, $src.hi\n\t"
5396             "JNZ    msw_not_zero\n\t"
5397             "MOV    $dst, 32\n"
5398       "msw_not_zero:\n\t"
5399             "ADD    $dst, 32\n"
5400       "done:" %}
5401   ins_encode %{
5402     Register Rdst = $dst$$Register;
5403     Register Rsrc = $src$$Register;
5404     Label msw_not_zero;
5405     Label done;
5406     __ bsfl(Rdst, Rsrc);
5407     __ jccb(Assembler::notZero, done);
5408     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5409     __ jccb(Assembler::notZero, msw_not_zero);
5410     __ movl(Rdst, BitsPerInt);
5411     __ bind(msw_not_zero);
5412     __ addl(Rdst, BitsPerInt);
5413     __ bind(done);
5414   %}
5415   ins_pipe(ialu_reg);
5416 %}
5417 
5418 
5419 //---------- Population Count Instructions -------------------------------------
5420 
5421 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5422   predicate(UsePopCountInstruction);
5423   match(Set dst (PopCountI src));
5424   effect(KILL cr);
5425 
5426   format %{ "POPCNT $dst, $src" %}
5427   ins_encode %{
5428     __ popcntl($dst$$Register, $src$$Register);
5429   %}
5430   ins_pipe(ialu_reg);
5431 %}
5432 
5433 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5434   predicate(UsePopCountInstruction);
5435   match(Set dst (PopCountI (LoadI mem)));
5436   effect(KILL cr);
5437 
5438   format %{ "POPCNT $dst, $mem" %}
5439   ins_encode %{
5440     __ popcntl($dst$$Register, $mem$$Address);
5441   %}
5442   ins_pipe(ialu_reg);
5443 %}
5444 
5445 // Note: Long.bitCount(long) returns an int.
5446 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5447   predicate(UsePopCountInstruction);
5448   match(Set dst (PopCountL src));
5449   effect(KILL cr, TEMP tmp, TEMP dst);
5450 
5451   format %{ "POPCNT $dst, $src.lo\n\t"
5452             "POPCNT $tmp, $src.hi\n\t"
5453             "ADD    $dst, $tmp" %}
5454   ins_encode %{
5455     __ popcntl($dst$$Register, $src$$Register);
5456     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5457     __ addl($dst$$Register, $tmp$$Register);
5458   %}
5459   ins_pipe(ialu_reg);
5460 %}
5461 
5462 // Note: Long.bitCount(long) returns an int.
5463 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5464   predicate(UsePopCountInstruction);
5465   match(Set dst (PopCountL (LoadL mem)));
5466   effect(KILL cr, TEMP tmp, TEMP dst);
5467 
5468   format %{ "POPCNT $dst, $mem\n\t"
5469             "POPCNT $tmp, $mem+4\n\t"
5470             "ADD    $dst, $tmp" %}
5471   ins_encode %{
5472     //__ popcntl($dst$$Register, $mem$$Address$$first);
5473     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5474     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5475     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5476     __ addl($dst$$Register, $tmp$$Register);
5477   %}
5478   ins_pipe(ialu_reg);
5479 %}
5480 
5481 
5482 //----------Load/Store/Move Instructions---------------------------------------
5483 //----------Load Instructions--------------------------------------------------
5484 // Load Byte (8bit signed)
5485 instruct loadB(xRegI dst, memory mem) %{
5486   match(Set dst (LoadB mem));
5487 
5488   ins_cost(125);
5489   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5490 
5491   ins_encode %{
5492     __ movsbl($dst$$Register, $mem$$Address);
5493   %}
5494 
5495   ins_pipe(ialu_reg_mem);
5496 %}
5497 
5498 // Load Byte (8bit signed) into Long Register
5499 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5500   match(Set dst (ConvI2L (LoadB mem)));
5501   effect(KILL cr);
5502 
5503   ins_cost(375);
5504   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5505             "MOV    $dst.hi,$dst.lo\n\t"
5506             "SAR    $dst.hi,7" %}
5507 
5508   ins_encode %{
5509     __ movsbl($dst$$Register, $mem$$Address);
5510     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5511     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5512   %}
5513 
5514   ins_pipe(ialu_reg_mem);
5515 %}
5516 
5517 // Load Unsigned Byte (8bit UNsigned)
5518 instruct loadUB(xRegI dst, memory mem) %{
5519   match(Set dst (LoadUB mem));
5520 
5521   ins_cost(125);
5522   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5523 
5524   ins_encode %{
5525     __ movzbl($dst$$Register, $mem$$Address);
5526   %}
5527 
5528   ins_pipe(ialu_reg_mem);
5529 %}
5530 
5531 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5532 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5533   match(Set dst (ConvI2L (LoadUB mem)));
5534   effect(KILL cr);
5535 
5536   ins_cost(250);
5537   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5538             "XOR    $dst.hi,$dst.hi" %}
5539 
5540   ins_encode %{
5541     Register Rdst = $dst$$Register;
5542     __ movzbl(Rdst, $mem$$Address);
5543     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5544   %}
5545 
5546   ins_pipe(ialu_reg_mem);
5547 %}
5548 
5549 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5550 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5551   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5552   effect(KILL cr);
5553 
5554   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5555             "XOR    $dst.hi,$dst.hi\n\t"
5556             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5557   ins_encode %{
5558     Register Rdst = $dst$$Register;
5559     __ movzbl(Rdst, $mem$$Address);
5560     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5561     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5562   %}
5563   ins_pipe(ialu_reg_mem);
5564 %}
5565 
5566 // Load Short (16bit signed)
5567 instruct loadS(rRegI dst, memory mem) %{
5568   match(Set dst (LoadS mem));
5569 
5570   ins_cost(125);
5571   format %{ "MOVSX  $dst,$mem\t# short" %}
5572 
5573   ins_encode %{
5574     __ movswl($dst$$Register, $mem$$Address);
5575   %}
5576 
5577   ins_pipe(ialu_reg_mem);
5578 %}
5579 
5580 // Load Short (16 bit signed) to Byte (8 bit signed)
5581 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5582   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5583 
5584   ins_cost(125);
5585   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5586   ins_encode %{
5587     __ movsbl($dst$$Register, $mem$$Address);
5588   %}
5589   ins_pipe(ialu_reg_mem);
5590 %}
5591 
5592 // Load Short (16bit signed) into Long Register
5593 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5594   match(Set dst (ConvI2L (LoadS mem)));
5595   effect(KILL cr);
5596 
5597   ins_cost(375);
5598   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5599             "MOV    $dst.hi,$dst.lo\n\t"
5600             "SAR    $dst.hi,15" %}
5601 
5602   ins_encode %{
5603     __ movswl($dst$$Register, $mem$$Address);
5604     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5605     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5606   %}
5607 
5608   ins_pipe(ialu_reg_mem);
5609 %}
5610 
5611 // Load Unsigned Short/Char (16bit unsigned)
5612 instruct loadUS(rRegI dst, memory mem) %{
5613   match(Set dst (LoadUS mem));
5614 
5615   ins_cost(125);
5616   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5617 
5618   ins_encode %{
5619     __ movzwl($dst$$Register, $mem$$Address);
5620   %}
5621 
5622   ins_pipe(ialu_reg_mem);
5623 %}
5624 
5625 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5626 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5627   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5628 
5629   ins_cost(125);
5630   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5631   ins_encode %{
5632     __ movsbl($dst$$Register, $mem$$Address);
5633   %}
5634   ins_pipe(ialu_reg_mem);
5635 %}
5636 
5637 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5638 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5639   match(Set dst (ConvI2L (LoadUS mem)));
5640   effect(KILL cr);
5641 
5642   ins_cost(250);
5643   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5644             "XOR    $dst.hi,$dst.hi" %}
5645 
5646   ins_encode %{
5647     __ movzwl($dst$$Register, $mem$$Address);
5648     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5649   %}
5650 
5651   ins_pipe(ialu_reg_mem);
5652 %}
5653 
5654 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5655 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5656   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5657   effect(KILL cr);
5658 
5659   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5660             "XOR    $dst.hi,$dst.hi" %}
5661   ins_encode %{
5662     Register Rdst = $dst$$Register;
5663     __ movzbl(Rdst, $mem$$Address);
5664     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5665   %}
5666   ins_pipe(ialu_reg_mem);
5667 %}
5668 
5669 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5670 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5671   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5672   effect(KILL cr);
5673 
5674   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5675             "XOR    $dst.hi,$dst.hi\n\t"
5676             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5677   ins_encode %{
5678     Register Rdst = $dst$$Register;
5679     __ movzwl(Rdst, $mem$$Address);
5680     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5681     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5682   %}
5683   ins_pipe(ialu_reg_mem);
5684 %}
5685 
5686 // Load Integer
5687 instruct loadI(rRegI dst, memory mem) %{
5688   match(Set dst (LoadI mem));
5689 
5690   ins_cost(125);
5691   format %{ "MOV    $dst,$mem\t# int" %}
5692 
5693   ins_encode %{
5694     __ movl($dst$$Register, $mem$$Address);
5695   %}
5696 
5697   ins_pipe(ialu_reg_mem);
5698 %}
5699 
5700 // Load Integer (32 bit signed) to Byte (8 bit signed)
5701 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5702   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5703 
5704   ins_cost(125);
5705   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5706   ins_encode %{
5707     __ movsbl($dst$$Register, $mem$$Address);
5708   %}
5709   ins_pipe(ialu_reg_mem);
5710 %}
5711 
5712 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5713 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5714   match(Set dst (AndI (LoadI mem) mask));
5715 
5716   ins_cost(125);
5717   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5718   ins_encode %{
5719     __ movzbl($dst$$Register, $mem$$Address);
5720   %}
5721   ins_pipe(ialu_reg_mem);
5722 %}
5723 
5724 // Load Integer (32 bit signed) to Short (16 bit signed)
5725 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5726   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5727 
5728   ins_cost(125);
5729   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5730   ins_encode %{
5731     __ movswl($dst$$Register, $mem$$Address);
5732   %}
5733   ins_pipe(ialu_reg_mem);
5734 %}
5735 
5736 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5737 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5738   match(Set dst (AndI (LoadI mem) mask));
5739 
5740   ins_cost(125);
5741   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5742   ins_encode %{
5743     __ movzwl($dst$$Register, $mem$$Address);
5744   %}
5745   ins_pipe(ialu_reg_mem);
5746 %}
5747 
5748 // Load Integer into Long Register
5749 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5750   match(Set dst (ConvI2L (LoadI mem)));
5751   effect(KILL cr);
5752 
5753   ins_cost(375);
5754   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5755             "MOV    $dst.hi,$dst.lo\n\t"
5756             "SAR    $dst.hi,31" %}
5757 
5758   ins_encode %{
5759     __ movl($dst$$Register, $mem$$Address);
5760     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5761     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5762   %}
5763 
5764   ins_pipe(ialu_reg_mem);
5765 %}
5766 
5767 // Load Integer with mask 0xFF into Long Register
5768 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5769   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5770   effect(KILL cr);
5771 
5772   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5773             "XOR    $dst.hi,$dst.hi" %}
5774   ins_encode %{
5775     Register Rdst = $dst$$Register;
5776     __ movzbl(Rdst, $mem$$Address);
5777     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5778   %}
5779   ins_pipe(ialu_reg_mem);
5780 %}
5781 
5782 // Load Integer with mask 0xFFFF into Long Register
5783 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5784   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5785   effect(KILL cr);
5786 
5787   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5788             "XOR    $dst.hi,$dst.hi" %}
5789   ins_encode %{
5790     Register Rdst = $dst$$Register;
5791     __ movzwl(Rdst, $mem$$Address);
5792     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5793   %}
5794   ins_pipe(ialu_reg_mem);
5795 %}
5796 
5797 // Load Integer with 31-bit mask into Long Register
5798 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5799   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5800   effect(KILL cr);
5801 
5802   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5803             "XOR    $dst.hi,$dst.hi\n\t"
5804             "AND    $dst.lo,$mask" %}
5805   ins_encode %{
5806     Register Rdst = $dst$$Register;
5807     __ movl(Rdst, $mem$$Address);
5808     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5809     __ andl(Rdst, $mask$$constant);
5810   %}
5811   ins_pipe(ialu_reg_mem);
5812 %}
5813 
5814 // Load Unsigned Integer into Long Register
5815 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5816   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5817   effect(KILL cr);
5818 
5819   ins_cost(250);
5820   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5821             "XOR    $dst.hi,$dst.hi" %}
5822 
5823   ins_encode %{
5824     __ movl($dst$$Register, $mem$$Address);
5825     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5826   %}
5827 
5828   ins_pipe(ialu_reg_mem);
5829 %}
5830 
5831 // Load Long.  Cannot clobber address while loading, so restrict address
5832 // register to ESI
5833 instruct loadL(eRegL dst, load_long_memory mem) %{
5834   predicate(!((LoadLNode*)n)->require_atomic_access());
5835   match(Set dst (LoadL mem));
5836 
5837   ins_cost(250);
5838   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5839             "MOV    $dst.hi,$mem+4" %}
5840 
5841   ins_encode %{
5842     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5843     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5844     __ movl($dst$$Register, Amemlo);
5845     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5846   %}
5847 
5848   ins_pipe(ialu_reg_long_mem);
5849 %}
5850 
5851 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5852 // then store it down to the stack and reload on the int
5853 // side.
5854 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5855   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5856   match(Set dst (LoadL mem));
5857 
5858   ins_cost(200);
5859   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5860             "FISTp  $dst" %}
5861   ins_encode(enc_loadL_volatile(mem,dst));
5862   ins_pipe( fpu_reg_mem );
5863 %}
5864 
5865 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5866   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5867   match(Set dst (LoadL mem));
5868   effect(TEMP tmp);
5869   ins_cost(180);
5870   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5871             "MOVSD  $dst,$tmp" %}
5872   ins_encode %{
5873     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5874     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5875   %}
5876   ins_pipe( pipe_slow );
5877 %}
5878 
5879 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5880   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5881   match(Set dst (LoadL mem));
5882   effect(TEMP tmp);
5883   ins_cost(160);
5884   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5885             "MOVD   $dst.lo,$tmp\n\t"
5886             "PSRLQ  $tmp,32\n\t"
5887             "MOVD   $dst.hi,$tmp" %}
5888   ins_encode %{
5889     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5890     __ movdl($dst$$Register, $tmp$$XMMRegister);
5891     __ psrlq($tmp$$XMMRegister, 32);
5892     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5893   %}
5894   ins_pipe( pipe_slow );
5895 %}
5896 
5897 // Load Range
5898 instruct loadRange(rRegI dst, memory mem) %{
5899   match(Set dst (LoadRange mem));
5900 
5901   ins_cost(125);
5902   format %{ "MOV    $dst,$mem" %}
5903   opcode(0x8B);
5904   ins_encode( OpcP, RegMem(dst,mem));
5905   ins_pipe( ialu_reg_mem );
5906 %}
5907 
5908 
5909 // Load Pointer
5910 instruct loadP(eRegP dst, memory mem) %{
5911   match(Set dst (LoadP mem));
5912 
5913   ins_cost(125);
5914   format %{ "MOV    $dst,$mem" %}
5915   opcode(0x8B);
5916   ins_encode( OpcP, RegMem(dst,mem));
5917   ins_pipe( ialu_reg_mem );
5918 %}
5919 
5920 // Load Klass Pointer
5921 instruct loadKlass(eRegP dst, memory mem) %{
5922   match(Set dst (LoadKlass mem));
5923 
5924   ins_cost(125);
5925   format %{ "MOV    $dst,$mem" %}
5926   opcode(0x8B);
5927   ins_encode( OpcP, RegMem(dst,mem));
5928   ins_pipe( ialu_reg_mem );
5929 %}
5930 
5931 // Load Double
5932 instruct loadDPR(regDPR dst, memory mem) %{
5933   predicate(UseSSE<=1);
5934   match(Set dst (LoadD mem));
5935 
5936   ins_cost(150);
5937   format %{ "FLD_D  ST,$mem\n\t"
5938             "FSTP   $dst" %}
5939   opcode(0xDD);               /* DD /0 */
5940   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5941               Pop_Reg_DPR(dst) );
5942   ins_pipe( fpu_reg_mem );
5943 %}
5944 
5945 // Load Double to XMM
5946 instruct loadD(regD dst, memory mem) %{
5947   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5948   match(Set dst (LoadD mem));
5949   ins_cost(145);
5950   format %{ "MOVSD  $dst,$mem" %}
5951   ins_encode %{
5952     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5953   %}
5954   ins_pipe( pipe_slow );
5955 %}
5956 
5957 instruct loadD_partial(regD dst, memory mem) %{
5958   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5959   match(Set dst (LoadD mem));
5960   ins_cost(145);
5961   format %{ "MOVLPD $dst,$mem" %}
5962   ins_encode %{
5963     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5964   %}
5965   ins_pipe( pipe_slow );
5966 %}
5967 
5968 // Load to XMM register (single-precision floating point)
5969 // MOVSS instruction
5970 instruct loadF(regF dst, memory mem) %{
5971   predicate(UseSSE>=1);
5972   match(Set dst (LoadF mem));
5973   ins_cost(145);
5974   format %{ "MOVSS  $dst,$mem" %}
5975   ins_encode %{
5976     __ movflt ($dst$$XMMRegister, $mem$$Address);
5977   %}
5978   ins_pipe( pipe_slow );
5979 %}
5980 
5981 // Load Float
5982 instruct loadFPR(regFPR dst, memory mem) %{
5983   predicate(UseSSE==0);
5984   match(Set dst (LoadF mem));
5985 
5986   ins_cost(150);
5987   format %{ "FLD_S  ST,$mem\n\t"
5988             "FSTP   $dst" %}
5989   opcode(0xD9);               /* D9 /0 */
5990   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5991               Pop_Reg_FPR(dst) );
5992   ins_pipe( fpu_reg_mem );
5993 %}
5994 
5995 // Load Effective Address
5996 instruct leaP8(eRegP dst, indOffset8 mem) %{
5997   match(Set dst mem);
5998 
5999   ins_cost(110);
6000   format %{ "LEA    $dst,$mem" %}
6001   opcode(0x8D);
6002   ins_encode( OpcP, RegMem(dst,mem));
6003   ins_pipe( ialu_reg_reg_fat );
6004 %}
6005 
6006 instruct leaP32(eRegP dst, indOffset32 mem) %{
6007   match(Set dst mem);
6008 
6009   ins_cost(110);
6010   format %{ "LEA    $dst,$mem" %}
6011   opcode(0x8D);
6012   ins_encode( OpcP, RegMem(dst,mem));
6013   ins_pipe( ialu_reg_reg_fat );
6014 %}
6015 
6016 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6017   match(Set dst mem);
6018 
6019   ins_cost(110);
6020   format %{ "LEA    $dst,$mem" %}
6021   opcode(0x8D);
6022   ins_encode( OpcP, RegMem(dst,mem));
6023   ins_pipe( ialu_reg_reg_fat );
6024 %}
6025 
6026 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6027   match(Set dst mem);
6028 
6029   ins_cost(110);
6030   format %{ "LEA    $dst,$mem" %}
6031   opcode(0x8D);
6032   ins_encode( OpcP, RegMem(dst,mem));
6033   ins_pipe( ialu_reg_reg_fat );
6034 %}
6035 
6036 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6037   match(Set dst mem);
6038 
6039   ins_cost(110);
6040   format %{ "LEA    $dst,$mem" %}
6041   opcode(0x8D);
6042   ins_encode( OpcP, RegMem(dst,mem));
6043   ins_pipe( ialu_reg_reg_fat );
6044 %}
6045 
6046 // Load Constant
6047 instruct loadConI(rRegI dst, immI src) %{
6048   match(Set dst src);
6049 
6050   format %{ "MOV    $dst,$src" %}
6051   ins_encode( LdImmI(dst, src) );
6052   ins_pipe( ialu_reg_fat );
6053 %}
6054 
6055 // Load Constant zero
6056 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6057   match(Set dst src);
6058   effect(KILL cr);
6059 
6060   ins_cost(50);
6061   format %{ "XOR    $dst,$dst" %}
6062   opcode(0x33);  /* + rd */
6063   ins_encode( OpcP, RegReg( dst, dst ) );
6064   ins_pipe( ialu_reg );
6065 %}
6066 
6067 instruct loadConP(eRegP dst, immP src) %{
6068   match(Set dst src);
6069 
6070   format %{ "MOV    $dst,$src" %}
6071   opcode(0xB8);  /* + rd */
6072   ins_encode( LdImmP(dst, src) );
6073   ins_pipe( ialu_reg_fat );
6074 %}
6075 
6076 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6077   match(Set dst src);
6078   effect(KILL cr);
6079   ins_cost(200);
6080   format %{ "MOV    $dst.lo,$src.lo\n\t"
6081             "MOV    $dst.hi,$src.hi" %}
6082   opcode(0xB8);
6083   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6084   ins_pipe( ialu_reg_long_fat );
6085 %}
6086 
6087 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6088   match(Set dst src);
6089   effect(KILL cr);
6090   ins_cost(150);
6091   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6092             "XOR    $dst.hi,$dst.hi" %}
6093   opcode(0x33,0x33);
6094   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6095   ins_pipe( ialu_reg_long );
6096 %}
6097 
6098 // The instruction usage is guarded by predicate in operand immFPR().
6099 instruct loadConFPR(regFPR dst, immFPR con) %{
6100   match(Set dst con);
6101   ins_cost(125);
6102   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6103             "FSTP   $dst" %}
6104   ins_encode %{
6105     __ fld_s($constantaddress($con));
6106     __ fstp_d($dst$$reg);
6107   %}
6108   ins_pipe(fpu_reg_con);
6109 %}
6110 
6111 // The instruction usage is guarded by predicate in operand immFPR0().
6112 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6113   match(Set dst con);
6114   ins_cost(125);
6115   format %{ "FLDZ   ST\n\t"
6116             "FSTP   $dst" %}
6117   ins_encode %{
6118     __ fldz();
6119     __ fstp_d($dst$$reg);
6120   %}
6121   ins_pipe(fpu_reg_con);
6122 %}
6123 
6124 // The instruction usage is guarded by predicate in operand immFPR1().
6125 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6126   match(Set dst con);
6127   ins_cost(125);
6128   format %{ "FLD1   ST\n\t"
6129             "FSTP   $dst" %}
6130   ins_encode %{
6131     __ fld1();
6132     __ fstp_d($dst$$reg);
6133   %}
6134   ins_pipe(fpu_reg_con);
6135 %}
6136 
6137 // The instruction usage is guarded by predicate in operand immF().
6138 instruct loadConF(regF dst, immF con) %{
6139   match(Set dst con);
6140   ins_cost(125);
6141   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6142   ins_encode %{
6143     __ movflt($dst$$XMMRegister, $constantaddress($con));
6144   %}
6145   ins_pipe(pipe_slow);
6146 %}
6147 
6148 // The instruction usage is guarded by predicate in operand immF0().
6149 instruct loadConF0(regF dst, immF0 src) %{
6150   match(Set dst src);
6151   ins_cost(100);
6152   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6153   ins_encode %{
6154     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6155   %}
6156   ins_pipe(pipe_slow);
6157 %}
6158 
6159 // The instruction usage is guarded by predicate in operand immDPR().
6160 instruct loadConDPR(regDPR dst, immDPR con) %{
6161   match(Set dst con);
6162   ins_cost(125);
6163 
6164   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6165             "FSTP   $dst" %}
6166   ins_encode %{
6167     __ fld_d($constantaddress($con));
6168     __ fstp_d($dst$$reg);
6169   %}
6170   ins_pipe(fpu_reg_con);
6171 %}
6172 
6173 // The instruction usage is guarded by predicate in operand immDPR0().
6174 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6175   match(Set dst con);
6176   ins_cost(125);
6177 
6178   format %{ "FLDZ   ST\n\t"
6179             "FSTP   $dst" %}
6180   ins_encode %{
6181     __ fldz();
6182     __ fstp_d($dst$$reg);
6183   %}
6184   ins_pipe(fpu_reg_con);
6185 %}
6186 
6187 // The instruction usage is guarded by predicate in operand immDPR1().
6188 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6189   match(Set dst con);
6190   ins_cost(125);
6191 
6192   format %{ "FLD1   ST\n\t"
6193             "FSTP   $dst" %}
6194   ins_encode %{
6195     __ fld1();
6196     __ fstp_d($dst$$reg);
6197   %}
6198   ins_pipe(fpu_reg_con);
6199 %}
6200 
6201 // The instruction usage is guarded by predicate in operand immD().
6202 instruct loadConD(regD dst, immD con) %{
6203   match(Set dst con);
6204   ins_cost(125);
6205   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6206   ins_encode %{
6207     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6208   %}
6209   ins_pipe(pipe_slow);
6210 %}
6211 
6212 // The instruction usage is guarded by predicate in operand immD0().
6213 instruct loadConD0(regD dst, immD0 src) %{
6214   match(Set dst src);
6215   ins_cost(100);
6216   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6217   ins_encode %{
6218     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6219   %}
6220   ins_pipe( pipe_slow );
6221 %}
6222 
6223 // Load Stack Slot
6224 instruct loadSSI(rRegI dst, stackSlotI src) %{
6225   match(Set dst src);
6226   ins_cost(125);
6227 
6228   format %{ "MOV    $dst,$src" %}
6229   opcode(0x8B);
6230   ins_encode( OpcP, RegMem(dst,src));
6231   ins_pipe( ialu_reg_mem );
6232 %}
6233 
6234 instruct loadSSL(eRegL dst, stackSlotL src) %{
6235   match(Set dst src);
6236 
6237   ins_cost(200);
6238   format %{ "MOV    $dst,$src.lo\n\t"
6239             "MOV    $dst+4,$src.hi" %}
6240   opcode(0x8B, 0x8B);
6241   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6242   ins_pipe( ialu_mem_long_reg );
6243 %}
6244 
6245 // Load Stack Slot
6246 instruct loadSSP(eRegP dst, stackSlotP src) %{
6247   match(Set dst src);
6248   ins_cost(125);
6249 
6250   format %{ "MOV    $dst,$src" %}
6251   opcode(0x8B);
6252   ins_encode( OpcP, RegMem(dst,src));
6253   ins_pipe( ialu_reg_mem );
6254 %}
6255 
6256 // Load Stack Slot
6257 instruct loadSSF(regFPR dst, stackSlotF src) %{
6258   match(Set dst src);
6259   ins_cost(125);
6260 
6261   format %{ "FLD_S  $src\n\t"
6262             "FSTP   $dst" %}
6263   opcode(0xD9);               /* D9 /0, FLD m32real */
6264   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6265               Pop_Reg_FPR(dst) );
6266   ins_pipe( fpu_reg_mem );
6267 %}
6268 
6269 // Load Stack Slot
6270 instruct loadSSD(regDPR dst, stackSlotD src) %{
6271   match(Set dst src);
6272   ins_cost(125);
6273 
6274   format %{ "FLD_D  $src\n\t"
6275             "FSTP   $dst" %}
6276   opcode(0xDD);               /* DD /0, FLD m64real */
6277   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6278               Pop_Reg_DPR(dst) );
6279   ins_pipe( fpu_reg_mem );
6280 %}
6281 
6282 // Prefetch instructions for allocation.
6283 // Must be safe to execute with invalid address (cannot fault).
6284 
6285 instruct prefetchAlloc0( memory mem ) %{
6286   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6287   match(PrefetchAllocation mem);
6288   ins_cost(0);
6289   size(0);
6290   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6291   ins_encode();
6292   ins_pipe(empty);
6293 %}
6294 
6295 instruct prefetchAlloc( memory mem ) %{
6296   predicate(AllocatePrefetchInstr==3);
6297   match( PrefetchAllocation mem );
6298   ins_cost(100);
6299 
6300   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6301   ins_encode %{
6302     __ prefetchw($mem$$Address);
6303   %}
6304   ins_pipe(ialu_mem);
6305 %}
6306 
6307 instruct prefetchAllocNTA( memory mem ) %{
6308   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6309   match(PrefetchAllocation mem);
6310   ins_cost(100);
6311 
6312   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6313   ins_encode %{
6314     __ prefetchnta($mem$$Address);
6315   %}
6316   ins_pipe(ialu_mem);
6317 %}
6318 
6319 instruct prefetchAllocT0( memory mem ) %{
6320   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6321   match(PrefetchAllocation mem);
6322   ins_cost(100);
6323 
6324   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6325   ins_encode %{
6326     __ prefetcht0($mem$$Address);
6327   %}
6328   ins_pipe(ialu_mem);
6329 %}
6330 
6331 instruct prefetchAllocT2( memory mem ) %{
6332   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6333   match(PrefetchAllocation mem);
6334   ins_cost(100);
6335 
6336   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6337   ins_encode %{
6338     __ prefetcht2($mem$$Address);
6339   %}
6340   ins_pipe(ialu_mem);
6341 %}
6342 
6343 //----------Store Instructions-------------------------------------------------
6344 
6345 // Store Byte
6346 instruct storeB(memory mem, xRegI src) %{
6347   match(Set mem (StoreB mem src));
6348 
6349   ins_cost(125);
6350   format %{ "MOV8   $mem,$src" %}
6351   opcode(0x88);
6352   ins_encode( OpcP, RegMem( src, mem ) );
6353   ins_pipe( ialu_mem_reg );
6354 %}
6355 
6356 // Store Char/Short
6357 instruct storeC(memory mem, rRegI src) %{
6358   match(Set mem (StoreC mem src));
6359 
6360   ins_cost(125);
6361   format %{ "MOV16  $mem,$src" %}
6362   opcode(0x89, 0x66);
6363   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6364   ins_pipe( ialu_mem_reg );
6365 %}
6366 
6367 // Store Integer
6368 instruct storeI(memory mem, rRegI src) %{
6369   match(Set mem (StoreI mem src));
6370 
6371   ins_cost(125);
6372   format %{ "MOV    $mem,$src" %}
6373   opcode(0x89);
6374   ins_encode( OpcP, RegMem( src, mem ) );
6375   ins_pipe( ialu_mem_reg );
6376 %}
6377 
6378 // Store Long
6379 instruct storeL(long_memory mem, eRegL src) %{
6380   predicate(!((StoreLNode*)n)->require_atomic_access());
6381   match(Set mem (StoreL mem src));
6382 
6383   ins_cost(200);
6384   format %{ "MOV    $mem,$src.lo\n\t"
6385             "MOV    $mem+4,$src.hi" %}
6386   opcode(0x89, 0x89);
6387   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6388   ins_pipe( ialu_mem_long_reg );
6389 %}
6390 
6391 // Store Long to Integer
6392 instruct storeL2I(memory mem, eRegL src) %{
6393   match(Set mem (StoreI mem (ConvL2I src)));
6394 
6395   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6396   ins_encode %{
6397     __ movl($mem$$Address, $src$$Register);
6398   %}
6399   ins_pipe(ialu_mem_reg);
6400 %}
6401 
6402 // Volatile Store Long.  Must be atomic, so move it into
6403 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6404 // target address before the store (for null-ptr checks)
6405 // so the memory operand is used twice in the encoding.
6406 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6407   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6408   match(Set mem (StoreL mem src));
6409   effect( KILL cr );
6410   ins_cost(400);
6411   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6412             "FILD   $src\n\t"
6413             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6414   opcode(0x3B);
6415   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6416   ins_pipe( fpu_reg_mem );
6417 %}
6418 
6419 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6420   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6421   match(Set mem (StoreL mem src));
6422   effect( TEMP tmp, KILL cr );
6423   ins_cost(380);
6424   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6425             "MOVSD  $tmp,$src\n\t"
6426             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6427   ins_encode %{
6428     __ cmpl(rax, $mem$$Address);
6429     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6430     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6431   %}
6432   ins_pipe( pipe_slow );
6433 %}
6434 
6435 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6436   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6437   match(Set mem (StoreL mem src));
6438   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6439   ins_cost(360);
6440   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6441             "MOVD   $tmp,$src.lo\n\t"
6442             "MOVD   $tmp2,$src.hi\n\t"
6443             "PUNPCKLDQ $tmp,$tmp2\n\t"
6444             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6445   ins_encode %{
6446     __ cmpl(rax, $mem$$Address);
6447     __ movdl($tmp$$XMMRegister, $src$$Register);
6448     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6449     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6450     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6451   %}
6452   ins_pipe( pipe_slow );
6453 %}
6454 
6455 // Store Pointer; for storing unknown oops and raw pointers
6456 instruct storeP(memory mem, anyRegP src) %{
6457   match(Set mem (StoreP mem src));
6458 
6459   ins_cost(125);
6460   format %{ "MOV    $mem,$src" %}
6461   opcode(0x89);
6462   ins_encode( OpcP, RegMem( src, mem ) );
6463   ins_pipe( ialu_mem_reg );
6464 %}
6465 
6466 // Store Integer Immediate
6467 instruct storeImmI(memory mem, immI src) %{
6468   match(Set mem (StoreI mem src));
6469 
6470   ins_cost(150);
6471   format %{ "MOV    $mem,$src" %}
6472   opcode(0xC7);               /* C7 /0 */
6473   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6474   ins_pipe( ialu_mem_imm );
6475 %}
6476 
6477 // Store Short/Char Immediate
6478 instruct storeImmI16(memory mem, immI16 src) %{
6479   predicate(UseStoreImmI16);
6480   match(Set mem (StoreC mem src));
6481 
6482   ins_cost(150);
6483   format %{ "MOV16  $mem,$src" %}
6484   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6485   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6486   ins_pipe( ialu_mem_imm );
6487 %}
6488 
6489 // Store Pointer Immediate; null pointers or constant oops that do not
6490 // need card-mark barriers.
6491 instruct storeImmP(memory mem, immP src) %{
6492   match(Set mem (StoreP mem src));
6493 
6494   ins_cost(150);
6495   format %{ "MOV    $mem,$src" %}
6496   opcode(0xC7);               /* C7 /0 */
6497   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6498   ins_pipe( ialu_mem_imm );
6499 %}
6500 
6501 // Store Byte Immediate
6502 instruct storeImmB(memory mem, immI8 src) %{
6503   match(Set mem (StoreB mem src));
6504 
6505   ins_cost(150);
6506   format %{ "MOV8   $mem,$src" %}
6507   opcode(0xC6);               /* C6 /0 */
6508   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6509   ins_pipe( ialu_mem_imm );
6510 %}
6511 
6512 // Store CMS card-mark Immediate
6513 instruct storeImmCM(memory mem, immI8 src) %{
6514   match(Set mem (StoreCM mem src));
6515 
6516   ins_cost(150);
6517   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6518   opcode(0xC6);               /* C6 /0 */
6519   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6520   ins_pipe( ialu_mem_imm );
6521 %}
6522 
6523 // Store Double
6524 instruct storeDPR( memory mem, regDPR1 src) %{
6525   predicate(UseSSE<=1);
6526   match(Set mem (StoreD mem src));
6527 
6528   ins_cost(100);
6529   format %{ "FST_D  $mem,$src" %}
6530   opcode(0xDD);       /* DD /2 */
6531   ins_encode( enc_FPR_store(mem,src) );
6532   ins_pipe( fpu_mem_reg );
6533 %}
6534 
6535 // Store double does rounding on x86
6536 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6537   predicate(UseSSE<=1);
6538   match(Set mem (StoreD mem (RoundDouble src)));
6539 
6540   ins_cost(100);
6541   format %{ "FST_D  $mem,$src\t# round" %}
6542   opcode(0xDD);       /* DD /2 */
6543   ins_encode( enc_FPR_store(mem,src) );
6544   ins_pipe( fpu_mem_reg );
6545 %}
6546 
6547 // Store XMM register to memory (double-precision floating points)
6548 // MOVSD instruction
6549 instruct storeD(memory mem, regD src) %{
6550   predicate(UseSSE>=2);
6551   match(Set mem (StoreD mem src));
6552   ins_cost(95);
6553   format %{ "MOVSD  $mem,$src" %}
6554   ins_encode %{
6555     __ movdbl($mem$$Address, $src$$XMMRegister);
6556   %}
6557   ins_pipe( pipe_slow );
6558 %}
6559 
6560 // Load Double
6561 instruct MoveD2VL(vlRegD dst, regD src) %{
6562   match(Set dst src);
6563   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6564   ins_encode %{
6565     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6566   %}
6567   ins_pipe( fpu_reg_reg );
6568 %}
6569 
6570 // Load Double
6571 instruct MoveVL2D(regD dst, vlRegD src) %{
6572   match(Set dst src);
6573   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6574   ins_encode %{
6575     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6576   %}
6577   ins_pipe( fpu_reg_reg );
6578 %}
6579 
6580 // Store XMM register to memory (single-precision floating point)
6581 // MOVSS instruction
6582 instruct storeF(memory mem, regF src) %{
6583   predicate(UseSSE>=1);
6584   match(Set mem (StoreF mem src));
6585   ins_cost(95);
6586   format %{ "MOVSS  $mem,$src" %}
6587   ins_encode %{
6588     __ movflt($mem$$Address, $src$$XMMRegister);
6589   %}
6590   ins_pipe( pipe_slow );
6591 %}
6592 
6593 // Load Float
6594 instruct MoveF2VL(vlRegF dst, regF src) %{
6595   match(Set dst src);
6596   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6597   ins_encode %{
6598     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6599   %}
6600   ins_pipe( fpu_reg_reg );
6601 %}
6602 
6603 // Load Float
6604 instruct MoveVL2F(regF dst, vlRegF src) %{
6605   match(Set dst src);
6606   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6607   ins_encode %{
6608     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6609   %}
6610   ins_pipe( fpu_reg_reg );
6611 %}
6612 
6613 // Store Float
6614 instruct storeFPR( memory mem, regFPR1 src) %{
6615   predicate(UseSSE==0);
6616   match(Set mem (StoreF mem src));
6617 
6618   ins_cost(100);
6619   format %{ "FST_S  $mem,$src" %}
6620   opcode(0xD9);       /* D9 /2 */
6621   ins_encode( enc_FPR_store(mem,src) );
6622   ins_pipe( fpu_mem_reg );
6623 %}
6624 
6625 // Store Float does rounding on x86
6626 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6627   predicate(UseSSE==0);
6628   match(Set mem (StoreF mem (RoundFloat src)));
6629 
6630   ins_cost(100);
6631   format %{ "FST_S  $mem,$src\t# round" %}
6632   opcode(0xD9);       /* D9 /2 */
6633   ins_encode( enc_FPR_store(mem,src) );
6634   ins_pipe( fpu_mem_reg );
6635 %}
6636 
6637 // Store Float does rounding on x86
6638 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6639   predicate(UseSSE<=1);
6640   match(Set mem (StoreF mem (ConvD2F src)));
6641 
6642   ins_cost(100);
6643   format %{ "FST_S  $mem,$src\t# D-round" %}
6644   opcode(0xD9);       /* D9 /2 */
6645   ins_encode( enc_FPR_store(mem,src) );
6646   ins_pipe( fpu_mem_reg );
6647 %}
6648 
6649 // Store immediate Float value (it is faster than store from FPU register)
6650 // The instruction usage is guarded by predicate in operand immFPR().
6651 instruct storeFPR_imm( memory mem, immFPR src) %{
6652   match(Set mem (StoreF mem src));
6653 
6654   ins_cost(50);
6655   format %{ "MOV    $mem,$src\t# store float" %}
6656   opcode(0xC7);               /* C7 /0 */
6657   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6658   ins_pipe( ialu_mem_imm );
6659 %}
6660 
6661 // Store immediate Float value (it is faster than store from XMM register)
6662 // The instruction usage is guarded by predicate in operand immF().
6663 instruct storeF_imm( memory mem, immF src) %{
6664   match(Set mem (StoreF mem src));
6665 
6666   ins_cost(50);
6667   format %{ "MOV    $mem,$src\t# store float" %}
6668   opcode(0xC7);               /* C7 /0 */
6669   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6670   ins_pipe( ialu_mem_imm );
6671 %}
6672 
6673 // Store Integer to stack slot
6674 instruct storeSSI(stackSlotI dst, rRegI src) %{
6675   match(Set dst src);
6676 
6677   ins_cost(100);
6678   format %{ "MOV    $dst,$src" %}
6679   opcode(0x89);
6680   ins_encode( OpcPRegSS( dst, src ) );
6681   ins_pipe( ialu_mem_reg );
6682 %}
6683 
6684 // Store Integer to stack slot
6685 instruct storeSSP(stackSlotP dst, eRegP src) %{
6686   match(Set dst src);
6687 
6688   ins_cost(100);
6689   format %{ "MOV    $dst,$src" %}
6690   opcode(0x89);
6691   ins_encode( OpcPRegSS( dst, src ) );
6692   ins_pipe( ialu_mem_reg );
6693 %}
6694 
6695 // Store Long to stack slot
6696 instruct storeSSL(stackSlotL dst, eRegL src) %{
6697   match(Set dst src);
6698 
6699   ins_cost(200);
6700   format %{ "MOV    $dst,$src.lo\n\t"
6701             "MOV    $dst+4,$src.hi" %}
6702   opcode(0x89, 0x89);
6703   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6704   ins_pipe( ialu_mem_long_reg );
6705 %}
6706 
6707 //----------MemBar Instructions-----------------------------------------------
6708 // Memory barrier flavors
6709 
6710 instruct membar_acquire() %{
6711   match(MemBarAcquire);
6712   match(LoadFence);
6713   ins_cost(400);
6714 
6715   size(0);
6716   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6717   ins_encode();
6718   ins_pipe(empty);
6719 %}
6720 
6721 instruct membar_acquire_lock() %{
6722   match(MemBarAcquireLock);
6723   ins_cost(0);
6724 
6725   size(0);
6726   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6727   ins_encode( );
6728   ins_pipe(empty);
6729 %}
6730 
6731 instruct membar_release() %{
6732   match(MemBarRelease);
6733   match(StoreFence);
6734   ins_cost(400);
6735 
6736   size(0);
6737   format %{ "MEMBAR-release ! (empty encoding)" %}
6738   ins_encode( );
6739   ins_pipe(empty);
6740 %}
6741 
6742 instruct membar_release_lock() %{
6743   match(MemBarReleaseLock);
6744   ins_cost(0);
6745 
6746   size(0);
6747   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6748   ins_encode( );
6749   ins_pipe(empty);
6750 %}
6751 
6752 instruct membar_volatile(eFlagsReg cr) %{
6753   match(MemBarVolatile);
6754   effect(KILL cr);
6755   ins_cost(400);
6756 
6757   format %{
6758     $$template
6759     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6760   %}
6761   ins_encode %{
6762     __ membar(Assembler::StoreLoad);
6763   %}
6764   ins_pipe(pipe_slow);
6765 %}
6766 
6767 instruct unnecessary_membar_volatile() %{
6768   match(MemBarVolatile);
6769   predicate(Matcher::post_store_load_barrier(n));
6770   ins_cost(0);
6771 
6772   size(0);
6773   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6774   ins_encode( );
6775   ins_pipe(empty);
6776 %}
6777 
6778 instruct membar_storestore() %{
6779   match(MemBarStoreStore);
6780   ins_cost(0);
6781 
6782   size(0);
6783   format %{ "MEMBAR-storestore (empty encoding)" %}
6784   ins_encode( );
6785   ins_pipe(empty);
6786 %}
6787 
6788 //----------Move Instructions--------------------------------------------------
6789 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6790   match(Set dst (CastX2P src));
6791   format %{ "# X2P  $dst, $src" %}
6792   ins_encode( /*empty encoding*/ );
6793   ins_cost(0);
6794   ins_pipe(empty);
6795 %}
6796 
6797 instruct castP2X(rRegI dst, eRegP src ) %{
6798   match(Set dst (CastP2X src));
6799   ins_cost(50);
6800   format %{ "MOV    $dst, $src\t# CastP2X" %}
6801   ins_encode( enc_Copy( dst, src) );
6802   ins_pipe( ialu_reg_reg );
6803 %}
6804 
6805 //----------Conditional Move---------------------------------------------------
6806 // Conditional move
6807 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6808   predicate(!VM_Version::supports_cmov() );
6809   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6810   ins_cost(200);
6811   format %{ "J$cop,us skip\t# signed cmove\n\t"
6812             "MOV    $dst,$src\n"
6813       "skip:" %}
6814   ins_encode %{
6815     Label Lskip;
6816     // Invert sense of branch from sense of CMOV
6817     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6818     __ movl($dst$$Register, $src$$Register);
6819     __ bind(Lskip);
6820   %}
6821   ins_pipe( pipe_cmov_reg );
6822 %}
6823 
6824 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6825   predicate(!VM_Version::supports_cmov() );
6826   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6827   ins_cost(200);
6828   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6829             "MOV    $dst,$src\n"
6830       "skip:" %}
6831   ins_encode %{
6832     Label Lskip;
6833     // Invert sense of branch from sense of CMOV
6834     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6835     __ movl($dst$$Register, $src$$Register);
6836     __ bind(Lskip);
6837   %}
6838   ins_pipe( pipe_cmov_reg );
6839 %}
6840 
6841 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6842   predicate(VM_Version::supports_cmov() );
6843   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6844   ins_cost(200);
6845   format %{ "CMOV$cop $dst,$src" %}
6846   opcode(0x0F,0x40);
6847   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6848   ins_pipe( pipe_cmov_reg );
6849 %}
6850 
6851 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6852   predicate(VM_Version::supports_cmov() );
6853   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6854   ins_cost(200);
6855   format %{ "CMOV$cop $dst,$src" %}
6856   opcode(0x0F,0x40);
6857   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6858   ins_pipe( pipe_cmov_reg );
6859 %}
6860 
6861 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6862   predicate(VM_Version::supports_cmov() );
6863   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6864   ins_cost(200);
6865   expand %{
6866     cmovI_regU(cop, cr, dst, src);
6867   %}
6868 %}
6869 
6870 // Conditional move
6871 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6872   predicate(VM_Version::supports_cmov() );
6873   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6874   ins_cost(250);
6875   format %{ "CMOV$cop $dst,$src" %}
6876   opcode(0x0F,0x40);
6877   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6878   ins_pipe( pipe_cmov_mem );
6879 %}
6880 
6881 // Conditional move
6882 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6883   predicate(VM_Version::supports_cmov() );
6884   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6885   ins_cost(250);
6886   format %{ "CMOV$cop $dst,$src" %}
6887   opcode(0x0F,0x40);
6888   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6889   ins_pipe( pipe_cmov_mem );
6890 %}
6891 
6892 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6893   predicate(VM_Version::supports_cmov() );
6894   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6895   ins_cost(250);
6896   expand %{
6897     cmovI_memU(cop, cr, dst, src);
6898   %}
6899 %}
6900 
6901 // Conditional move
6902 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6903   predicate(VM_Version::supports_cmov() );
6904   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6905   ins_cost(200);
6906   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6907   opcode(0x0F,0x40);
6908   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6909   ins_pipe( pipe_cmov_reg );
6910 %}
6911 
6912 // Conditional move (non-P6 version)
6913 // Note:  a CMoveP is generated for  stubs and native wrappers
6914 //        regardless of whether we are on a P6, so we
6915 //        emulate a cmov here
6916 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6917   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6918   ins_cost(300);
6919   format %{ "Jn$cop   skip\n\t"
6920           "MOV    $dst,$src\t# pointer\n"
6921       "skip:" %}
6922   opcode(0x8b);
6923   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6924   ins_pipe( pipe_cmov_reg );
6925 %}
6926 
6927 // Conditional move
6928 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6929   predicate(VM_Version::supports_cmov() );
6930   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6931   ins_cost(200);
6932   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6933   opcode(0x0F,0x40);
6934   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6935   ins_pipe( pipe_cmov_reg );
6936 %}
6937 
6938 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6939   predicate(VM_Version::supports_cmov() );
6940   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6941   ins_cost(200);
6942   expand %{
6943     cmovP_regU(cop, cr, dst, src);
6944   %}
6945 %}
6946 
6947 // DISABLED: Requires the ADLC to emit a bottom_type call that
6948 // correctly meets the two pointer arguments; one is an incoming
6949 // register but the other is a memory operand.  ALSO appears to
6950 // be buggy with implicit null checks.
6951 //
6952 //// Conditional move
6953 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6954 //  predicate(VM_Version::supports_cmov() );
6955 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6956 //  ins_cost(250);
6957 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6958 //  opcode(0x0F,0x40);
6959 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6960 //  ins_pipe( pipe_cmov_mem );
6961 //%}
6962 //
6963 //// Conditional move
6964 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6965 //  predicate(VM_Version::supports_cmov() );
6966 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6967 //  ins_cost(250);
6968 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6969 //  opcode(0x0F,0x40);
6970 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6971 //  ins_pipe( pipe_cmov_mem );
6972 //%}
6973 
6974 // Conditional move
6975 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6976   predicate(UseSSE<=1);
6977   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6978   ins_cost(200);
6979   format %{ "FCMOV$cop $dst,$src\t# double" %}
6980   opcode(0xDA);
6981   ins_encode( enc_cmov_dpr(cop,src) );
6982   ins_pipe( pipe_cmovDPR_reg );
6983 %}
6984 
6985 // Conditional move
6986 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6987   predicate(UseSSE==0);
6988   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6989   ins_cost(200);
6990   format %{ "FCMOV$cop $dst,$src\t# float" %}
6991   opcode(0xDA);
6992   ins_encode( enc_cmov_dpr(cop,src) );
6993   ins_pipe( pipe_cmovDPR_reg );
6994 %}
6995 
6996 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6997 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6998   predicate(UseSSE<=1);
6999   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7000   ins_cost(200);
7001   format %{ "Jn$cop   skip\n\t"
7002             "MOV    $dst,$src\t# double\n"
7003       "skip:" %}
7004   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7005   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7006   ins_pipe( pipe_cmovDPR_reg );
7007 %}
7008 
7009 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7010 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7011   predicate(UseSSE==0);
7012   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7013   ins_cost(200);
7014   format %{ "Jn$cop    skip\n\t"
7015             "MOV    $dst,$src\t# float\n"
7016       "skip:" %}
7017   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7018   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7019   ins_pipe( pipe_cmovDPR_reg );
7020 %}
7021 
7022 // No CMOVE with SSE/SSE2
7023 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7024   predicate (UseSSE>=1);
7025   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7026   ins_cost(200);
7027   format %{ "Jn$cop   skip\n\t"
7028             "MOVSS  $dst,$src\t# float\n"
7029       "skip:" %}
7030   ins_encode %{
7031     Label skip;
7032     // Invert sense of branch from sense of CMOV
7033     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7034     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7035     __ bind(skip);
7036   %}
7037   ins_pipe( pipe_slow );
7038 %}
7039 
7040 // No CMOVE with SSE/SSE2
7041 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7042   predicate (UseSSE>=2);
7043   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7044   ins_cost(200);
7045   format %{ "Jn$cop   skip\n\t"
7046             "MOVSD  $dst,$src\t# float\n"
7047       "skip:" %}
7048   ins_encode %{
7049     Label skip;
7050     // Invert sense of branch from sense of CMOV
7051     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7052     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7053     __ bind(skip);
7054   %}
7055   ins_pipe( pipe_slow );
7056 %}
7057 
7058 // unsigned version
7059 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7060   predicate (UseSSE>=1);
7061   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7062   ins_cost(200);
7063   format %{ "Jn$cop   skip\n\t"
7064             "MOVSS  $dst,$src\t# float\n"
7065       "skip:" %}
7066   ins_encode %{
7067     Label skip;
7068     // Invert sense of branch from sense of CMOV
7069     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7070     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7071     __ bind(skip);
7072   %}
7073   ins_pipe( pipe_slow );
7074 %}
7075 
7076 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7077   predicate (UseSSE>=1);
7078   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7079   ins_cost(200);
7080   expand %{
7081     fcmovF_regU(cop, cr, dst, src);
7082   %}
7083 %}
7084 
7085 // unsigned version
7086 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7087   predicate (UseSSE>=2);
7088   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7089   ins_cost(200);
7090   format %{ "Jn$cop   skip\n\t"
7091             "MOVSD  $dst,$src\t# float\n"
7092       "skip:" %}
7093   ins_encode %{
7094     Label skip;
7095     // Invert sense of branch from sense of CMOV
7096     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7097     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7098     __ bind(skip);
7099   %}
7100   ins_pipe( pipe_slow );
7101 %}
7102 
7103 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7104   predicate (UseSSE>=2);
7105   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7106   ins_cost(200);
7107   expand %{
7108     fcmovD_regU(cop, cr, dst, src);
7109   %}
7110 %}
7111 
7112 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7113   predicate(VM_Version::supports_cmov() );
7114   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7115   ins_cost(200);
7116   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7117             "CMOV$cop $dst.hi,$src.hi" %}
7118   opcode(0x0F,0x40);
7119   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7120   ins_pipe( pipe_cmov_reg_long );
7121 %}
7122 
7123 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7124   predicate(VM_Version::supports_cmov() );
7125   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7126   ins_cost(200);
7127   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7128             "CMOV$cop $dst.hi,$src.hi" %}
7129   opcode(0x0F,0x40);
7130   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7131   ins_pipe( pipe_cmov_reg_long );
7132 %}
7133 
7134 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7135   predicate(VM_Version::supports_cmov() );
7136   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7137   ins_cost(200);
7138   expand %{
7139     cmovL_regU(cop, cr, dst, src);
7140   %}
7141 %}
7142 
7143 //----------Arithmetic Instructions--------------------------------------------
7144 //----------Addition Instructions----------------------------------------------
7145 
7146 // Integer Addition Instructions
7147 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7148   match(Set dst (AddI dst src));
7149   effect(KILL cr);
7150 
7151   size(2);
7152   format %{ "ADD    $dst,$src" %}
7153   opcode(0x03);
7154   ins_encode( OpcP, RegReg( dst, src) );
7155   ins_pipe( ialu_reg_reg );
7156 %}
7157 
7158 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7159   match(Set dst (AddI dst src));
7160   effect(KILL cr);
7161 
7162   format %{ "ADD    $dst,$src" %}
7163   opcode(0x81, 0x00); /* /0 id */
7164   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7165   ins_pipe( ialu_reg );
7166 %}
7167 
7168 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7169   predicate(UseIncDec);
7170   match(Set dst (AddI dst src));
7171   effect(KILL cr);
7172 
7173   size(1);
7174   format %{ "INC    $dst" %}
7175   opcode(0x40); /*  */
7176   ins_encode( Opc_plus( primary, dst ) );
7177   ins_pipe( ialu_reg );
7178 %}
7179 
7180 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7181   match(Set dst (AddI src0 src1));
7182   ins_cost(110);
7183 
7184   format %{ "LEA    $dst,[$src0 + $src1]" %}
7185   opcode(0x8D); /* 0x8D /r */
7186   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7187   ins_pipe( ialu_reg_reg );
7188 %}
7189 
7190 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7191   match(Set dst (AddP src0 src1));
7192   ins_cost(110);
7193 
7194   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7195   opcode(0x8D); /* 0x8D /r */
7196   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7197   ins_pipe( ialu_reg_reg );
7198 %}
7199 
7200 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7201   predicate(UseIncDec);
7202   match(Set dst (AddI dst src));
7203   effect(KILL cr);
7204 
7205   size(1);
7206   format %{ "DEC    $dst" %}
7207   opcode(0x48); /*  */
7208   ins_encode( Opc_plus( primary, dst ) );
7209   ins_pipe( ialu_reg );
7210 %}
7211 
7212 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7213   match(Set dst (AddP dst src));
7214   effect(KILL cr);
7215 
7216   size(2);
7217   format %{ "ADD    $dst,$src" %}
7218   opcode(0x03);
7219   ins_encode( OpcP, RegReg( dst, src) );
7220   ins_pipe( ialu_reg_reg );
7221 %}
7222 
7223 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7224   match(Set dst (AddP dst src));
7225   effect(KILL cr);
7226 
7227   format %{ "ADD    $dst,$src" %}
7228   opcode(0x81,0x00); /* Opcode 81 /0 id */
7229   // ins_encode( RegImm( dst, src) );
7230   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7231   ins_pipe( ialu_reg );
7232 %}
7233 
7234 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7235   match(Set dst (AddI dst (LoadI src)));
7236   effect(KILL cr);
7237 
7238   ins_cost(125);
7239   format %{ "ADD    $dst,$src" %}
7240   opcode(0x03);
7241   ins_encode( OpcP, RegMem( dst, src) );
7242   ins_pipe( ialu_reg_mem );
7243 %}
7244 
7245 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7246   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7247   effect(KILL cr);
7248 
7249   ins_cost(150);
7250   format %{ "ADD    $dst,$src" %}
7251   opcode(0x01);  /* Opcode 01 /r */
7252   ins_encode( OpcP, RegMem( src, dst ) );
7253   ins_pipe( ialu_mem_reg );
7254 %}
7255 
7256 // Add Memory with Immediate
7257 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7258   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7259   effect(KILL cr);
7260 
7261   ins_cost(125);
7262   format %{ "ADD    $dst,$src" %}
7263   opcode(0x81);               /* Opcode 81 /0 id */
7264   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7265   ins_pipe( ialu_mem_imm );
7266 %}
7267 
7268 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7269   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7270   effect(KILL cr);
7271 
7272   ins_cost(125);
7273   format %{ "INC    $dst" %}
7274   opcode(0xFF);               /* Opcode FF /0 */
7275   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7276   ins_pipe( ialu_mem_imm );
7277 %}
7278 
7279 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7280   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7281   effect(KILL cr);
7282 
7283   ins_cost(125);
7284   format %{ "DEC    $dst" %}
7285   opcode(0xFF);               /* Opcode FF /1 */
7286   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7287   ins_pipe( ialu_mem_imm );
7288 %}
7289 
7290 
7291 instruct checkCastPP( eRegP dst ) %{
7292   match(Set dst (CheckCastPP dst));
7293 
7294   size(0);
7295   format %{ "#checkcastPP of $dst" %}
7296   ins_encode( /*empty encoding*/ );
7297   ins_pipe( empty );
7298 %}
7299 
7300 instruct castPP( eRegP dst ) %{
7301   match(Set dst (CastPP dst));
7302   format %{ "#castPP of $dst" %}
7303   ins_encode( /*empty encoding*/ );
7304   ins_pipe( empty );
7305 %}
7306 
7307 instruct castII( rRegI dst ) %{
7308   match(Set dst (CastII dst));
7309   format %{ "#castII of $dst" %}
7310   ins_encode( /*empty encoding*/ );
7311   ins_cost(0);
7312   ins_pipe( empty );
7313 %}
7314 
7315 
7316 // Load-locked - same as a regular pointer load when used with compare-swap
7317 instruct loadPLocked(eRegP dst, memory mem) %{
7318   match(Set dst (LoadPLocked mem));
7319 
7320   ins_cost(125);
7321   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7322   opcode(0x8B);
7323   ins_encode( OpcP, RegMem(dst,mem));
7324   ins_pipe( ialu_reg_mem );
7325 %}
7326 
7327 // Conditional-store of the updated heap-top.
7328 // Used during allocation of the shared heap.
7329 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7330 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7331   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7332   // EAX is killed if there is contention, but then it's also unused.
7333   // In the common case of no contention, EAX holds the new oop address.
7334   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7335   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7336   ins_pipe( pipe_cmpxchg );
7337 %}
7338 
7339 // Conditional-store of an int value.
7340 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7341 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7342   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7343   effect(KILL oldval);
7344   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7345   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7346   ins_pipe( pipe_cmpxchg );
7347 %}
7348 
7349 // Conditional-store of a long value.
7350 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7351 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7352   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7353   effect(KILL oldval);
7354   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7355             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7356             "XCHG   EBX,ECX"
7357   %}
7358   ins_encode %{
7359     // Note: we need to swap rbx, and rcx before and after the
7360     //       cmpxchg8 instruction because the instruction uses
7361     //       rcx as the high order word of the new value to store but
7362     //       our register encoding uses rbx.
7363     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7364     __ lock();
7365     __ cmpxchg8($mem$$Address);
7366     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7367   %}
7368   ins_pipe( pipe_cmpxchg );
7369 %}
7370 
7371 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7372 
7373 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7374   predicate(VM_Version::supports_cx8());
7375   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7376   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7377   effect(KILL cr, KILL oldval);
7378   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7379             "MOV    $res,0\n\t"
7380             "JNE,s  fail\n\t"
7381             "MOV    $res,1\n"
7382           "fail:" %}
7383   ins_encode( enc_cmpxchg8(mem_ptr),
7384               enc_flags_ne_to_boolean(res) );
7385   ins_pipe( pipe_cmpxchg );
7386 %}
7387 
7388 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7389   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7390   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7391   effect(KILL cr, KILL oldval);
7392   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7393             "MOV    $res,0\n\t"
7394             "JNE,s  fail\n\t"
7395             "MOV    $res,1\n"
7396           "fail:" %}
7397   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7398   ins_pipe( pipe_cmpxchg );
7399 %}
7400 
7401 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7402   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7403   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7404   effect(KILL cr, KILL oldval);
7405   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7406             "MOV    $res,0\n\t"
7407             "JNE,s  fail\n\t"
7408             "MOV    $res,1\n"
7409           "fail:" %}
7410   ins_encode( enc_cmpxchgb(mem_ptr),
7411               enc_flags_ne_to_boolean(res) );
7412   ins_pipe( pipe_cmpxchg );
7413 %}
7414 
7415 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7416   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7417   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7418   effect(KILL cr, KILL oldval);
7419   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7420             "MOV    $res,0\n\t"
7421             "JNE,s  fail\n\t"
7422             "MOV    $res,1\n"
7423           "fail:" %}
7424   ins_encode( enc_cmpxchgw(mem_ptr),
7425               enc_flags_ne_to_boolean(res) );
7426   ins_pipe( pipe_cmpxchg );
7427 %}
7428 
7429 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7430   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7431   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7432   effect(KILL cr, KILL oldval);
7433   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7434             "MOV    $res,0\n\t"
7435             "JNE,s  fail\n\t"
7436             "MOV    $res,1\n"
7437           "fail:" %}
7438   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7439   ins_pipe( pipe_cmpxchg );
7440 %}
7441 
7442 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7443   predicate(VM_Version::supports_cx8());
7444   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7445   effect(KILL cr);
7446   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7447   ins_encode( enc_cmpxchg8(mem_ptr) );
7448   ins_pipe( pipe_cmpxchg );
7449 %}
7450 
7451 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7452   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7453   effect(KILL cr);
7454   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7455   ins_encode( enc_cmpxchg(mem_ptr) );
7456   ins_pipe( pipe_cmpxchg );
7457 %}
7458 
7459 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7460   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7461   effect(KILL cr);
7462   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7463   ins_encode( enc_cmpxchgb(mem_ptr) );
7464   ins_pipe( pipe_cmpxchg );
7465 %}
7466 
7467 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7468   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7469   effect(KILL cr);
7470   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7471   ins_encode( enc_cmpxchgw(mem_ptr) );
7472   ins_pipe( pipe_cmpxchg );
7473 %}
7474 
7475 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7476   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7477   effect(KILL cr);
7478   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7479   ins_encode( enc_cmpxchg(mem_ptr) );
7480   ins_pipe( pipe_cmpxchg );
7481 %}
7482 
7483 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7484   predicate(n->as_LoadStore()->result_not_used());
7485   match(Set dummy (GetAndAddB mem add));
7486   effect(KILL cr);
7487   format %{ "ADDB  [$mem],$add" %}
7488   ins_encode %{
7489     __ lock();
7490     __ addb($mem$$Address, $add$$constant);
7491   %}
7492   ins_pipe( pipe_cmpxchg );
7493 %}
7494 
7495 // Important to match to xRegI: only 8-bit regs.
7496 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7497   match(Set newval (GetAndAddB mem newval));
7498   effect(KILL cr);
7499   format %{ "XADDB  [$mem],$newval" %}
7500   ins_encode %{
7501     __ lock();
7502     __ xaddb($mem$$Address, $newval$$Register);
7503   %}
7504   ins_pipe( pipe_cmpxchg );
7505 %}
7506 
7507 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7508   predicate(n->as_LoadStore()->result_not_used());
7509   match(Set dummy (GetAndAddS mem add));
7510   effect(KILL cr);
7511   format %{ "ADDS  [$mem],$add" %}
7512   ins_encode %{
7513     __ lock();
7514     __ addw($mem$$Address, $add$$constant);
7515   %}
7516   ins_pipe( pipe_cmpxchg );
7517 %}
7518 
7519 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7520   match(Set newval (GetAndAddS mem newval));
7521   effect(KILL cr);
7522   format %{ "XADDS  [$mem],$newval" %}
7523   ins_encode %{
7524     __ lock();
7525     __ xaddw($mem$$Address, $newval$$Register);
7526   %}
7527   ins_pipe( pipe_cmpxchg );
7528 %}
7529 
7530 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7531   predicate(n->as_LoadStore()->result_not_used());
7532   match(Set dummy (GetAndAddI mem add));
7533   effect(KILL cr);
7534   format %{ "ADDL  [$mem],$add" %}
7535   ins_encode %{
7536     __ lock();
7537     __ addl($mem$$Address, $add$$constant);
7538   %}
7539   ins_pipe( pipe_cmpxchg );
7540 %}
7541 
7542 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7543   match(Set newval (GetAndAddI mem newval));
7544   effect(KILL cr);
7545   format %{ "XADDL  [$mem],$newval" %}
7546   ins_encode %{
7547     __ lock();
7548     __ xaddl($mem$$Address, $newval$$Register);
7549   %}
7550   ins_pipe( pipe_cmpxchg );
7551 %}
7552 
7553 // Important to match to xRegI: only 8-bit regs.
7554 instruct xchgB( memory mem, xRegI newval) %{
7555   match(Set newval (GetAndSetB mem newval));
7556   format %{ "XCHGB  $newval,[$mem]" %}
7557   ins_encode %{
7558     __ xchgb($newval$$Register, $mem$$Address);
7559   %}
7560   ins_pipe( pipe_cmpxchg );
7561 %}
7562 
7563 instruct xchgS( memory mem, rRegI newval) %{
7564   match(Set newval (GetAndSetS mem newval));
7565   format %{ "XCHGW  $newval,[$mem]" %}
7566   ins_encode %{
7567     __ xchgw($newval$$Register, $mem$$Address);
7568   %}
7569   ins_pipe( pipe_cmpxchg );
7570 %}
7571 
7572 instruct xchgI( memory mem, rRegI newval) %{
7573   match(Set newval (GetAndSetI mem newval));
7574   format %{ "XCHGL  $newval,[$mem]" %}
7575   ins_encode %{
7576     __ xchgl($newval$$Register, $mem$$Address);
7577   %}
7578   ins_pipe( pipe_cmpxchg );
7579 %}
7580 
7581 instruct xchgP( memory mem, pRegP newval) %{
7582   match(Set newval (GetAndSetP mem newval));
7583   format %{ "XCHGL  $newval,[$mem]" %}
7584   ins_encode %{
7585     __ xchgl($newval$$Register, $mem$$Address);
7586   %}
7587   ins_pipe( pipe_cmpxchg );
7588 %}
7589 
7590 //----------Subtraction Instructions-------------------------------------------
7591 
7592 // Integer Subtraction Instructions
7593 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7594   match(Set dst (SubI dst src));
7595   effect(KILL cr);
7596 
7597   size(2);
7598   format %{ "SUB    $dst,$src" %}
7599   opcode(0x2B);
7600   ins_encode( OpcP, RegReg( dst, src) );
7601   ins_pipe( ialu_reg_reg );
7602 %}
7603 
7604 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7605   match(Set dst (SubI dst src));
7606   effect(KILL cr);
7607 
7608   format %{ "SUB    $dst,$src" %}
7609   opcode(0x81,0x05);  /* Opcode 81 /5 */
7610   // ins_encode( RegImm( dst, src) );
7611   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7612   ins_pipe( ialu_reg );
7613 %}
7614 
7615 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7616   match(Set dst (SubI dst (LoadI src)));
7617   effect(KILL cr);
7618 
7619   ins_cost(125);
7620   format %{ "SUB    $dst,$src" %}
7621   opcode(0x2B);
7622   ins_encode( OpcP, RegMem( dst, src) );
7623   ins_pipe( ialu_reg_mem );
7624 %}
7625 
7626 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7627   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7628   effect(KILL cr);
7629 
7630   ins_cost(150);
7631   format %{ "SUB    $dst,$src" %}
7632   opcode(0x29);  /* Opcode 29 /r */
7633   ins_encode( OpcP, RegMem( src, dst ) );
7634   ins_pipe( ialu_mem_reg );
7635 %}
7636 
7637 // Subtract from a pointer
7638 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7639   match(Set dst (AddP dst (SubI zero src)));
7640   effect(KILL cr);
7641 
7642   size(2);
7643   format %{ "SUB    $dst,$src" %}
7644   opcode(0x2B);
7645   ins_encode( OpcP, RegReg( dst, src) );
7646   ins_pipe( ialu_reg_reg );
7647 %}
7648 
7649 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7650   match(Set dst (SubI zero dst));
7651   effect(KILL cr);
7652 
7653   size(2);
7654   format %{ "NEG    $dst" %}
7655   opcode(0xF7,0x03);  // Opcode F7 /3
7656   ins_encode( OpcP, RegOpc( dst ) );
7657   ins_pipe( ialu_reg );
7658 %}
7659 
7660 //----------Multiplication/Division Instructions-------------------------------
7661 // Integer Multiplication Instructions
7662 // Multiply Register
7663 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7664   match(Set dst (MulI dst src));
7665   effect(KILL cr);
7666 
7667   size(3);
7668   ins_cost(300);
7669   format %{ "IMUL   $dst,$src" %}
7670   opcode(0xAF, 0x0F);
7671   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7672   ins_pipe( ialu_reg_reg_alu0 );
7673 %}
7674 
7675 // Multiply 32-bit Immediate
7676 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7677   match(Set dst (MulI src imm));
7678   effect(KILL cr);
7679 
7680   ins_cost(300);
7681   format %{ "IMUL   $dst,$src,$imm" %}
7682   opcode(0x69);  /* 69 /r id */
7683   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7684   ins_pipe( ialu_reg_reg_alu0 );
7685 %}
7686 
7687 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7688   match(Set dst src);
7689   effect(KILL cr);
7690 
7691   // Note that this is artificially increased to make it more expensive than loadConL
7692   ins_cost(250);
7693   format %{ "MOV    EAX,$src\t// low word only" %}
7694   opcode(0xB8);
7695   ins_encode( LdImmL_Lo(dst, src) );
7696   ins_pipe( ialu_reg_fat );
7697 %}
7698 
7699 // Multiply by 32-bit Immediate, taking the shifted high order results
7700 //  (special case for shift by 32)
7701 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7702   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7703   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7704              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7705              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7706   effect(USE src1, KILL cr);
7707 
7708   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7709   ins_cost(0*100 + 1*400 - 150);
7710   format %{ "IMUL   EDX:EAX,$src1" %}
7711   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7712   ins_pipe( pipe_slow );
7713 %}
7714 
7715 // Multiply by 32-bit Immediate, taking the shifted high order results
7716 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7717   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7718   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7719              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7720              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7721   effect(USE src1, KILL cr);
7722 
7723   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7724   ins_cost(1*100 + 1*400 - 150);
7725   format %{ "IMUL   EDX:EAX,$src1\n\t"
7726             "SAR    EDX,$cnt-32" %}
7727   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7728   ins_pipe( pipe_slow );
7729 %}
7730 
7731 // Multiply Memory 32-bit Immediate
7732 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7733   match(Set dst (MulI (LoadI src) imm));
7734   effect(KILL cr);
7735 
7736   ins_cost(300);
7737   format %{ "IMUL   $dst,$src,$imm" %}
7738   opcode(0x69);  /* 69 /r id */
7739   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7740   ins_pipe( ialu_reg_mem_alu0 );
7741 %}
7742 
7743 // Multiply Memory
7744 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7745   match(Set dst (MulI dst (LoadI src)));
7746   effect(KILL cr);
7747 
7748   ins_cost(350);
7749   format %{ "IMUL   $dst,$src" %}
7750   opcode(0xAF, 0x0F);
7751   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7752   ins_pipe( ialu_reg_mem_alu0 );
7753 %}
7754 
7755 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7756 %{
7757   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7758   effect(KILL cr, KILL src2);
7759 
7760   expand %{ mulI_eReg(dst, src1, cr);
7761            mulI_eReg(src2, src3, cr);
7762            addI_eReg(dst, src2, cr); %}
7763 %}
7764 
7765 // Multiply Register Int to Long
7766 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7767   // Basic Idea: long = (long)int * (long)int
7768   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7769   effect(DEF dst, USE src, USE src1, KILL flags);
7770 
7771   ins_cost(300);
7772   format %{ "IMUL   $dst,$src1" %}
7773 
7774   ins_encode( long_int_multiply( dst, src1 ) );
7775   ins_pipe( ialu_reg_reg_alu0 );
7776 %}
7777 
7778 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7779   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7780   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7781   effect(KILL flags);
7782 
7783   ins_cost(300);
7784   format %{ "MUL    $dst,$src1" %}
7785 
7786   ins_encode( long_uint_multiply(dst, src1) );
7787   ins_pipe( ialu_reg_reg_alu0 );
7788 %}
7789 
7790 // Multiply Register Long
7791 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7792   match(Set dst (MulL dst src));
7793   effect(KILL cr, TEMP tmp);
7794   ins_cost(4*100+3*400);
7795 // Basic idea: lo(result) = lo(x_lo * y_lo)
7796 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7797   format %{ "MOV    $tmp,$src.lo\n\t"
7798             "IMUL   $tmp,EDX\n\t"
7799             "MOV    EDX,$src.hi\n\t"
7800             "IMUL   EDX,EAX\n\t"
7801             "ADD    $tmp,EDX\n\t"
7802             "MUL    EDX:EAX,$src.lo\n\t"
7803             "ADD    EDX,$tmp" %}
7804   ins_encode( long_multiply( dst, src, tmp ) );
7805   ins_pipe( pipe_slow );
7806 %}
7807 
7808 // Multiply Register Long where the left operand's high 32 bits are zero
7809 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7810   predicate(is_operand_hi32_zero(n->in(1)));
7811   match(Set dst (MulL dst src));
7812   effect(KILL cr, TEMP tmp);
7813   ins_cost(2*100+2*400);
7814 // Basic idea: lo(result) = lo(x_lo * y_lo)
7815 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7816   format %{ "MOV    $tmp,$src.hi\n\t"
7817             "IMUL   $tmp,EAX\n\t"
7818             "MUL    EDX:EAX,$src.lo\n\t"
7819             "ADD    EDX,$tmp" %}
7820   ins_encode %{
7821     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7822     __ imull($tmp$$Register, rax);
7823     __ mull($src$$Register);
7824     __ addl(rdx, $tmp$$Register);
7825   %}
7826   ins_pipe( pipe_slow );
7827 %}
7828 
7829 // Multiply Register Long where the right operand's high 32 bits are zero
7830 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7831   predicate(is_operand_hi32_zero(n->in(2)));
7832   match(Set dst (MulL dst src));
7833   effect(KILL cr, TEMP tmp);
7834   ins_cost(2*100+2*400);
7835 // Basic idea: lo(result) = lo(x_lo * y_lo)
7836 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7837   format %{ "MOV    $tmp,$src.lo\n\t"
7838             "IMUL   $tmp,EDX\n\t"
7839             "MUL    EDX:EAX,$src.lo\n\t"
7840             "ADD    EDX,$tmp" %}
7841   ins_encode %{
7842     __ movl($tmp$$Register, $src$$Register);
7843     __ imull($tmp$$Register, rdx);
7844     __ mull($src$$Register);
7845     __ addl(rdx, $tmp$$Register);
7846   %}
7847   ins_pipe( pipe_slow );
7848 %}
7849 
7850 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7851 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7852   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7853   match(Set dst (MulL dst src));
7854   effect(KILL cr);
7855   ins_cost(1*400);
7856 // Basic idea: lo(result) = lo(x_lo * y_lo)
7857 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7858   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7859   ins_encode %{
7860     __ mull($src$$Register);
7861   %}
7862   ins_pipe( pipe_slow );
7863 %}
7864 
7865 // Multiply Register Long by small constant
7866 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7867   match(Set dst (MulL dst src));
7868   effect(KILL cr, TEMP tmp);
7869   ins_cost(2*100+2*400);
7870   size(12);
7871 // Basic idea: lo(result) = lo(src * EAX)
7872 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7873   format %{ "IMUL   $tmp,EDX,$src\n\t"
7874             "MOV    EDX,$src\n\t"
7875             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7876             "ADD    EDX,$tmp" %}
7877   ins_encode( long_multiply_con( dst, src, tmp ) );
7878   ins_pipe( pipe_slow );
7879 %}
7880 
7881 // Integer DIV with Register
7882 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7883   match(Set rax (DivI rax div));
7884   effect(KILL rdx, KILL cr);
7885   size(26);
7886   ins_cost(30*100+10*100);
7887   format %{ "CMP    EAX,0x80000000\n\t"
7888             "JNE,s  normal\n\t"
7889             "XOR    EDX,EDX\n\t"
7890             "CMP    ECX,-1\n\t"
7891             "JE,s   done\n"
7892     "normal: CDQ\n\t"
7893             "IDIV   $div\n\t"
7894     "done:"        %}
7895   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7896   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7897   ins_pipe( ialu_reg_reg_alu0 );
7898 %}
7899 
7900 // Divide Register Long
7901 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7902   match(Set dst (DivL src1 src2));
7903   effect( KILL cr, KILL cx, KILL bx );
7904   ins_cost(10000);
7905   format %{ "PUSH   $src1.hi\n\t"
7906             "PUSH   $src1.lo\n\t"
7907             "PUSH   $src2.hi\n\t"
7908             "PUSH   $src2.lo\n\t"
7909             "CALL   SharedRuntime::ldiv\n\t"
7910             "ADD    ESP,16" %}
7911   ins_encode( long_div(src1,src2) );
7912   ins_pipe( pipe_slow );
7913 %}
7914 
7915 // Integer DIVMOD with Register, both quotient and mod results
7916 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7917   match(DivModI rax div);
7918   effect(KILL cr);
7919   size(26);
7920   ins_cost(30*100+10*100);
7921   format %{ "CMP    EAX,0x80000000\n\t"
7922             "JNE,s  normal\n\t"
7923             "XOR    EDX,EDX\n\t"
7924             "CMP    ECX,-1\n\t"
7925             "JE,s   done\n"
7926     "normal: CDQ\n\t"
7927             "IDIV   $div\n\t"
7928     "done:"        %}
7929   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7930   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7931   ins_pipe( pipe_slow );
7932 %}
7933 
7934 // Integer MOD with Register
7935 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7936   match(Set rdx (ModI rax div));
7937   effect(KILL rax, KILL cr);
7938 
7939   size(26);
7940   ins_cost(300);
7941   format %{ "CDQ\n\t"
7942             "IDIV   $div" %}
7943   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7944   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7945   ins_pipe( ialu_reg_reg_alu0 );
7946 %}
7947 
7948 // Remainder Register Long
7949 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7950   match(Set dst (ModL src1 src2));
7951   effect( KILL cr, KILL cx, KILL bx );
7952   ins_cost(10000);
7953   format %{ "PUSH   $src1.hi\n\t"
7954             "PUSH   $src1.lo\n\t"
7955             "PUSH   $src2.hi\n\t"
7956             "PUSH   $src2.lo\n\t"
7957             "CALL   SharedRuntime::lrem\n\t"
7958             "ADD    ESP,16" %}
7959   ins_encode( long_mod(src1,src2) );
7960   ins_pipe( pipe_slow );
7961 %}
7962 
7963 // Divide Register Long (no special case since divisor != -1)
7964 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7965   match(Set dst (DivL dst imm));
7966   effect( TEMP tmp, TEMP tmp2, KILL cr );
7967   ins_cost(1000);
7968   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7969             "XOR    $tmp2,$tmp2\n\t"
7970             "CMP    $tmp,EDX\n\t"
7971             "JA,s   fast\n\t"
7972             "MOV    $tmp2,EAX\n\t"
7973             "MOV    EAX,EDX\n\t"
7974             "MOV    EDX,0\n\t"
7975             "JLE,s  pos\n\t"
7976             "LNEG   EAX : $tmp2\n\t"
7977             "DIV    $tmp # unsigned division\n\t"
7978             "XCHG   EAX,$tmp2\n\t"
7979             "DIV    $tmp\n\t"
7980             "LNEG   $tmp2 : EAX\n\t"
7981             "JMP,s  done\n"
7982     "pos:\n\t"
7983             "DIV    $tmp\n\t"
7984             "XCHG   EAX,$tmp2\n"
7985     "fast:\n\t"
7986             "DIV    $tmp\n"
7987     "done:\n\t"
7988             "MOV    EDX,$tmp2\n\t"
7989             "NEG    EDX:EAX # if $imm < 0" %}
7990   ins_encode %{
7991     int con = (int)$imm$$constant;
7992     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7993     int pcon = (con > 0) ? con : -con;
7994     Label Lfast, Lpos, Ldone;
7995 
7996     __ movl($tmp$$Register, pcon);
7997     __ xorl($tmp2$$Register,$tmp2$$Register);
7998     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7999     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8000 
8001     __ movl($tmp2$$Register, $dst$$Register); // save
8002     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8003     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8004     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8005 
8006     // Negative dividend.
8007     // convert value to positive to use unsigned division
8008     __ lneg($dst$$Register, $tmp2$$Register);
8009     __ divl($tmp$$Register);
8010     __ xchgl($dst$$Register, $tmp2$$Register);
8011     __ divl($tmp$$Register);
8012     // revert result back to negative
8013     __ lneg($tmp2$$Register, $dst$$Register);
8014     __ jmpb(Ldone);
8015 
8016     __ bind(Lpos);
8017     __ divl($tmp$$Register); // Use unsigned division
8018     __ xchgl($dst$$Register, $tmp2$$Register);
8019     // Fallthrow for final divide, tmp2 has 32 bit hi result
8020 
8021     __ bind(Lfast);
8022     // fast path: src is positive
8023     __ divl($tmp$$Register); // Use unsigned division
8024 
8025     __ bind(Ldone);
8026     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8027     if (con < 0) {
8028       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8029     }
8030   %}
8031   ins_pipe( pipe_slow );
8032 %}
8033 
8034 // Remainder Register Long (remainder fit into 32 bits)
8035 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8036   match(Set dst (ModL dst imm));
8037   effect( TEMP tmp, TEMP tmp2, KILL cr );
8038   ins_cost(1000);
8039   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8040             "CMP    $tmp,EDX\n\t"
8041             "JA,s   fast\n\t"
8042             "MOV    $tmp2,EAX\n\t"
8043             "MOV    EAX,EDX\n\t"
8044             "MOV    EDX,0\n\t"
8045             "JLE,s  pos\n\t"
8046             "LNEG   EAX : $tmp2\n\t"
8047             "DIV    $tmp # unsigned division\n\t"
8048             "MOV    EAX,$tmp2\n\t"
8049             "DIV    $tmp\n\t"
8050             "NEG    EDX\n\t"
8051             "JMP,s  done\n"
8052     "pos:\n\t"
8053             "DIV    $tmp\n\t"
8054             "MOV    EAX,$tmp2\n"
8055     "fast:\n\t"
8056             "DIV    $tmp\n"
8057     "done:\n\t"
8058             "MOV    EAX,EDX\n\t"
8059             "SAR    EDX,31\n\t" %}
8060   ins_encode %{
8061     int con = (int)$imm$$constant;
8062     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8063     int pcon = (con > 0) ? con : -con;
8064     Label  Lfast, Lpos, Ldone;
8065 
8066     __ movl($tmp$$Register, pcon);
8067     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8068     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8069 
8070     __ movl($tmp2$$Register, $dst$$Register); // save
8071     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8072     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8073     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8074 
8075     // Negative dividend.
8076     // convert value to positive to use unsigned division
8077     __ lneg($dst$$Register, $tmp2$$Register);
8078     __ divl($tmp$$Register);
8079     __ movl($dst$$Register, $tmp2$$Register);
8080     __ divl($tmp$$Register);
8081     // revert remainder back to negative
8082     __ negl(HIGH_FROM_LOW($dst$$Register));
8083     __ jmpb(Ldone);
8084 
8085     __ bind(Lpos);
8086     __ divl($tmp$$Register);
8087     __ movl($dst$$Register, $tmp2$$Register);
8088 
8089     __ bind(Lfast);
8090     // fast path: src is positive
8091     __ divl($tmp$$Register);
8092 
8093     __ bind(Ldone);
8094     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8095     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8096 
8097   %}
8098   ins_pipe( pipe_slow );
8099 %}
8100 
8101 // Integer Shift Instructions
8102 // Shift Left by one
8103 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8104   match(Set dst (LShiftI dst shift));
8105   effect(KILL cr);
8106 
8107   size(2);
8108   format %{ "SHL    $dst,$shift" %}
8109   opcode(0xD1, 0x4);  /* D1 /4 */
8110   ins_encode( OpcP, RegOpc( dst ) );
8111   ins_pipe( ialu_reg );
8112 %}
8113 
8114 // Shift Left by 8-bit immediate
8115 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8116   match(Set dst (LShiftI dst shift));
8117   effect(KILL cr);
8118 
8119   size(3);
8120   format %{ "SHL    $dst,$shift" %}
8121   opcode(0xC1, 0x4);  /* C1 /4 ib */
8122   ins_encode( RegOpcImm( dst, shift) );
8123   ins_pipe( ialu_reg );
8124 %}
8125 
8126 // Shift Left by variable
8127 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8128   match(Set dst (LShiftI dst shift));
8129   effect(KILL cr);
8130 
8131   size(2);
8132   format %{ "SHL    $dst,$shift" %}
8133   opcode(0xD3, 0x4);  /* D3 /4 */
8134   ins_encode( OpcP, RegOpc( dst ) );
8135   ins_pipe( ialu_reg_reg );
8136 %}
8137 
8138 // Arithmetic shift right by one
8139 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8140   match(Set dst (RShiftI dst shift));
8141   effect(KILL cr);
8142 
8143   size(2);
8144   format %{ "SAR    $dst,$shift" %}
8145   opcode(0xD1, 0x7);  /* D1 /7 */
8146   ins_encode( OpcP, RegOpc( dst ) );
8147   ins_pipe( ialu_reg );
8148 %}
8149 
8150 // Arithmetic shift right by one
8151 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8152   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8153   effect(KILL cr);
8154   format %{ "SAR    $dst,$shift" %}
8155   opcode(0xD1, 0x7);  /* D1 /7 */
8156   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8157   ins_pipe( ialu_mem_imm );
8158 %}
8159 
8160 // Arithmetic Shift Right by 8-bit immediate
8161 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8162   match(Set dst (RShiftI dst shift));
8163   effect(KILL cr);
8164 
8165   size(3);
8166   format %{ "SAR    $dst,$shift" %}
8167   opcode(0xC1, 0x7);  /* C1 /7 ib */
8168   ins_encode( RegOpcImm( dst, shift ) );
8169   ins_pipe( ialu_mem_imm );
8170 %}
8171 
8172 // Arithmetic Shift Right by 8-bit immediate
8173 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8174   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8175   effect(KILL cr);
8176 
8177   format %{ "SAR    $dst,$shift" %}
8178   opcode(0xC1, 0x7);  /* C1 /7 ib */
8179   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8180   ins_pipe( ialu_mem_imm );
8181 %}
8182 
8183 // Arithmetic Shift Right by variable
8184 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8185   match(Set dst (RShiftI dst shift));
8186   effect(KILL cr);
8187 
8188   size(2);
8189   format %{ "SAR    $dst,$shift" %}
8190   opcode(0xD3, 0x7);  /* D3 /7 */
8191   ins_encode( OpcP, RegOpc( dst ) );
8192   ins_pipe( ialu_reg_reg );
8193 %}
8194 
8195 // Logical shift right by one
8196 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8197   match(Set dst (URShiftI dst shift));
8198   effect(KILL cr);
8199 
8200   size(2);
8201   format %{ "SHR    $dst,$shift" %}
8202   opcode(0xD1, 0x5);  /* D1 /5 */
8203   ins_encode( OpcP, RegOpc( dst ) );
8204   ins_pipe( ialu_reg );
8205 %}
8206 
8207 // Logical Shift Right by 8-bit immediate
8208 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8209   match(Set dst (URShiftI dst shift));
8210   effect(KILL cr);
8211 
8212   size(3);
8213   format %{ "SHR    $dst,$shift" %}
8214   opcode(0xC1, 0x5);  /* C1 /5 ib */
8215   ins_encode( RegOpcImm( dst, shift) );
8216   ins_pipe( ialu_reg );
8217 %}
8218 
8219 
8220 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8221 // This idiom is used by the compiler for the i2b bytecode.
8222 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8223   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8224 
8225   size(3);
8226   format %{ "MOVSX  $dst,$src :8" %}
8227   ins_encode %{
8228     __ movsbl($dst$$Register, $src$$Register);
8229   %}
8230   ins_pipe(ialu_reg_reg);
8231 %}
8232 
8233 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8234 // This idiom is used by the compiler the i2s bytecode.
8235 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8236   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8237 
8238   size(3);
8239   format %{ "MOVSX  $dst,$src :16" %}
8240   ins_encode %{
8241     __ movswl($dst$$Register, $src$$Register);
8242   %}
8243   ins_pipe(ialu_reg_reg);
8244 %}
8245 
8246 
8247 // Logical Shift Right by variable
8248 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8249   match(Set dst (URShiftI dst shift));
8250   effect(KILL cr);
8251 
8252   size(2);
8253   format %{ "SHR    $dst,$shift" %}
8254   opcode(0xD3, 0x5);  /* D3 /5 */
8255   ins_encode( OpcP, RegOpc( dst ) );
8256   ins_pipe( ialu_reg_reg );
8257 %}
8258 
8259 
8260 //----------Logical Instructions-----------------------------------------------
8261 //----------Integer Logical Instructions---------------------------------------
8262 // And Instructions
8263 // And Register with Register
8264 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8265   match(Set dst (AndI dst src));
8266   effect(KILL cr);
8267 
8268   size(2);
8269   format %{ "AND    $dst,$src" %}
8270   opcode(0x23);
8271   ins_encode( OpcP, RegReg( dst, src) );
8272   ins_pipe( ialu_reg_reg );
8273 %}
8274 
8275 // And Register with Immediate
8276 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8277   match(Set dst (AndI dst src));
8278   effect(KILL cr);
8279 
8280   format %{ "AND    $dst,$src" %}
8281   opcode(0x81,0x04);  /* Opcode 81 /4 */
8282   // ins_encode( RegImm( dst, src) );
8283   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8284   ins_pipe( ialu_reg );
8285 %}
8286 
8287 // And Register with Memory
8288 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8289   match(Set dst (AndI dst (LoadI src)));
8290   effect(KILL cr);
8291 
8292   ins_cost(125);
8293   format %{ "AND    $dst,$src" %}
8294   opcode(0x23);
8295   ins_encode( OpcP, RegMem( dst, src) );
8296   ins_pipe( ialu_reg_mem );
8297 %}
8298 
8299 // And Memory with Register
8300 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8301   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8302   effect(KILL cr);
8303 
8304   ins_cost(150);
8305   format %{ "AND    $dst,$src" %}
8306   opcode(0x21);  /* Opcode 21 /r */
8307   ins_encode( OpcP, RegMem( src, dst ) );
8308   ins_pipe( ialu_mem_reg );
8309 %}
8310 
8311 // And Memory with Immediate
8312 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8313   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8314   effect(KILL cr);
8315 
8316   ins_cost(125);
8317   format %{ "AND    $dst,$src" %}
8318   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8319   // ins_encode( MemImm( dst, src) );
8320   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8321   ins_pipe( ialu_mem_imm );
8322 %}
8323 
8324 // BMI1 instructions
8325 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8326   match(Set dst (AndI (XorI src1 minus_1) src2));
8327   predicate(UseBMI1Instructions);
8328   effect(KILL cr);
8329 
8330   format %{ "ANDNL  $dst, $src1, $src2" %}
8331 
8332   ins_encode %{
8333     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8334   %}
8335   ins_pipe(ialu_reg);
8336 %}
8337 
8338 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8339   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8340   predicate(UseBMI1Instructions);
8341   effect(KILL cr);
8342 
8343   ins_cost(125);
8344   format %{ "ANDNL  $dst, $src1, $src2" %}
8345 
8346   ins_encode %{
8347     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8348   %}
8349   ins_pipe(ialu_reg_mem);
8350 %}
8351 
8352 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8353   match(Set dst (AndI (SubI imm_zero src) src));
8354   predicate(UseBMI1Instructions);
8355   effect(KILL cr);
8356 
8357   format %{ "BLSIL  $dst, $src" %}
8358 
8359   ins_encode %{
8360     __ blsil($dst$$Register, $src$$Register);
8361   %}
8362   ins_pipe(ialu_reg);
8363 %}
8364 
8365 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8366   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8367   predicate(UseBMI1Instructions);
8368   effect(KILL cr);
8369 
8370   ins_cost(125);
8371   format %{ "BLSIL  $dst, $src" %}
8372 
8373   ins_encode %{
8374     __ blsil($dst$$Register, $src$$Address);
8375   %}
8376   ins_pipe(ialu_reg_mem);
8377 %}
8378 
8379 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8380 %{
8381   match(Set dst (XorI (AddI src minus_1) src));
8382   predicate(UseBMI1Instructions);
8383   effect(KILL cr);
8384 
8385   format %{ "BLSMSKL $dst, $src" %}
8386 
8387   ins_encode %{
8388     __ blsmskl($dst$$Register, $src$$Register);
8389   %}
8390 
8391   ins_pipe(ialu_reg);
8392 %}
8393 
8394 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8395 %{
8396   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8397   predicate(UseBMI1Instructions);
8398   effect(KILL cr);
8399 
8400   ins_cost(125);
8401   format %{ "BLSMSKL $dst, $src" %}
8402 
8403   ins_encode %{
8404     __ blsmskl($dst$$Register, $src$$Address);
8405   %}
8406 
8407   ins_pipe(ialu_reg_mem);
8408 %}
8409 
8410 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8411 %{
8412   match(Set dst (AndI (AddI src minus_1) src) );
8413   predicate(UseBMI1Instructions);
8414   effect(KILL cr);
8415 
8416   format %{ "BLSRL  $dst, $src" %}
8417 
8418   ins_encode %{
8419     __ blsrl($dst$$Register, $src$$Register);
8420   %}
8421 
8422   ins_pipe(ialu_reg);
8423 %}
8424 
8425 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8426 %{
8427   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8428   predicate(UseBMI1Instructions);
8429   effect(KILL cr);
8430 
8431   ins_cost(125);
8432   format %{ "BLSRL  $dst, $src" %}
8433 
8434   ins_encode %{
8435     __ blsrl($dst$$Register, $src$$Address);
8436   %}
8437 
8438   ins_pipe(ialu_reg_mem);
8439 %}
8440 
8441 // Or Instructions
8442 // Or Register with Register
8443 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8444   match(Set dst (OrI dst src));
8445   effect(KILL cr);
8446 
8447   size(2);
8448   format %{ "OR     $dst,$src" %}
8449   opcode(0x0B);
8450   ins_encode( OpcP, RegReg( dst, src) );
8451   ins_pipe( ialu_reg_reg );
8452 %}
8453 
8454 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8455   match(Set dst (OrI dst (CastP2X src)));
8456   effect(KILL cr);
8457 
8458   size(2);
8459   format %{ "OR     $dst,$src" %}
8460   opcode(0x0B);
8461   ins_encode( OpcP, RegReg( dst, src) );
8462   ins_pipe( ialu_reg_reg );
8463 %}
8464 
8465 
8466 // Or Register with Immediate
8467 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8468   match(Set dst (OrI dst src));
8469   effect(KILL cr);
8470 
8471   format %{ "OR     $dst,$src" %}
8472   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8473   // ins_encode( RegImm( dst, src) );
8474   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8475   ins_pipe( ialu_reg );
8476 %}
8477 
8478 // Or Register with Memory
8479 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8480   match(Set dst (OrI dst (LoadI src)));
8481   effect(KILL cr);
8482 
8483   ins_cost(125);
8484   format %{ "OR     $dst,$src" %}
8485   opcode(0x0B);
8486   ins_encode( OpcP, RegMem( dst, src) );
8487   ins_pipe( ialu_reg_mem );
8488 %}
8489 
8490 // Or Memory with Register
8491 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8492   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8493   effect(KILL cr);
8494 
8495   ins_cost(150);
8496   format %{ "OR     $dst,$src" %}
8497   opcode(0x09);  /* Opcode 09 /r */
8498   ins_encode( OpcP, RegMem( src, dst ) );
8499   ins_pipe( ialu_mem_reg );
8500 %}
8501 
8502 // Or Memory with Immediate
8503 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8504   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8505   effect(KILL cr);
8506 
8507   ins_cost(125);
8508   format %{ "OR     $dst,$src" %}
8509   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8510   // ins_encode( MemImm( dst, src) );
8511   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8512   ins_pipe( ialu_mem_imm );
8513 %}
8514 
8515 // ROL/ROR
8516 // ROL expand
8517 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8518   effect(USE_DEF dst, USE shift, KILL cr);
8519 
8520   format %{ "ROL    $dst, $shift" %}
8521   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8522   ins_encode( OpcP, RegOpc( dst ));
8523   ins_pipe( ialu_reg );
8524 %}
8525 
8526 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8527   effect(USE_DEF dst, USE shift, KILL cr);
8528 
8529   format %{ "ROL    $dst, $shift" %}
8530   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8531   ins_encode( RegOpcImm(dst, shift) );
8532   ins_pipe(ialu_reg);
8533 %}
8534 
8535 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8536   effect(USE_DEF dst, USE shift, KILL cr);
8537 
8538   format %{ "ROL    $dst, $shift" %}
8539   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8540   ins_encode(OpcP, RegOpc(dst));
8541   ins_pipe( ialu_reg_reg );
8542 %}
8543 // end of ROL expand
8544 
8545 // ROL 32bit by one once
8546 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8547   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8548 
8549   expand %{
8550     rolI_eReg_imm1(dst, lshift, cr);
8551   %}
8552 %}
8553 
8554 // ROL 32bit var by imm8 once
8555 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8556   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8557   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8558 
8559   expand %{
8560     rolI_eReg_imm8(dst, lshift, cr);
8561   %}
8562 %}
8563 
8564 // ROL 32bit var by var once
8565 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8566   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8567 
8568   expand %{
8569     rolI_eReg_CL(dst, shift, cr);
8570   %}
8571 %}
8572 
8573 // ROL 32bit var by var once
8574 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8575   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8576 
8577   expand %{
8578     rolI_eReg_CL(dst, shift, cr);
8579   %}
8580 %}
8581 
8582 // ROR expand
8583 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8584   effect(USE_DEF dst, USE shift, KILL cr);
8585 
8586   format %{ "ROR    $dst, $shift" %}
8587   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8588   ins_encode( OpcP, RegOpc( dst ) );
8589   ins_pipe( ialu_reg );
8590 %}
8591 
8592 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8593   effect (USE_DEF dst, USE shift, KILL cr);
8594 
8595   format %{ "ROR    $dst, $shift" %}
8596   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8597   ins_encode( RegOpcImm(dst, shift) );
8598   ins_pipe( ialu_reg );
8599 %}
8600 
8601 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8602   effect(USE_DEF dst, USE shift, KILL cr);
8603 
8604   format %{ "ROR    $dst, $shift" %}
8605   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8606   ins_encode(OpcP, RegOpc(dst));
8607   ins_pipe( ialu_reg_reg );
8608 %}
8609 // end of ROR expand
8610 
8611 // ROR right once
8612 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8613   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8614 
8615   expand %{
8616     rorI_eReg_imm1(dst, rshift, cr);
8617   %}
8618 %}
8619 
8620 // ROR 32bit by immI8 once
8621 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8622   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8623   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8624 
8625   expand %{
8626     rorI_eReg_imm8(dst, rshift, cr);
8627   %}
8628 %}
8629 
8630 // ROR 32bit var by var once
8631 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8632   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8633 
8634   expand %{
8635     rorI_eReg_CL(dst, shift, cr);
8636   %}
8637 %}
8638 
8639 // ROR 32bit var by var once
8640 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8641   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8642 
8643   expand %{
8644     rorI_eReg_CL(dst, shift, cr);
8645   %}
8646 %}
8647 
8648 // Xor Instructions
8649 // Xor Register with Register
8650 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8651   match(Set dst (XorI dst src));
8652   effect(KILL cr);
8653 
8654   size(2);
8655   format %{ "XOR    $dst,$src" %}
8656   opcode(0x33);
8657   ins_encode( OpcP, RegReg( dst, src) );
8658   ins_pipe( ialu_reg_reg );
8659 %}
8660 
8661 // Xor Register with Immediate -1
8662 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8663   match(Set dst (XorI dst imm));
8664 
8665   size(2);
8666   format %{ "NOT    $dst" %}
8667   ins_encode %{
8668      __ notl($dst$$Register);
8669   %}
8670   ins_pipe( ialu_reg );
8671 %}
8672 
8673 // Xor Register with Immediate
8674 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8675   match(Set dst (XorI dst src));
8676   effect(KILL cr);
8677 
8678   format %{ "XOR    $dst,$src" %}
8679   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8680   // ins_encode( RegImm( dst, src) );
8681   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8682   ins_pipe( ialu_reg );
8683 %}
8684 
8685 // Xor Register with Memory
8686 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8687   match(Set dst (XorI dst (LoadI src)));
8688   effect(KILL cr);
8689 
8690   ins_cost(125);
8691   format %{ "XOR    $dst,$src" %}
8692   opcode(0x33);
8693   ins_encode( OpcP, RegMem(dst, src) );
8694   ins_pipe( ialu_reg_mem );
8695 %}
8696 
8697 // Xor Memory with Register
8698 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8699   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8700   effect(KILL cr);
8701 
8702   ins_cost(150);
8703   format %{ "XOR    $dst,$src" %}
8704   opcode(0x31);  /* Opcode 31 /r */
8705   ins_encode( OpcP, RegMem( src, dst ) );
8706   ins_pipe( ialu_mem_reg );
8707 %}
8708 
8709 // Xor Memory with Immediate
8710 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8711   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8712   effect(KILL cr);
8713 
8714   ins_cost(125);
8715   format %{ "XOR    $dst,$src" %}
8716   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8717   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8718   ins_pipe( ialu_mem_imm );
8719 %}
8720 
8721 //----------Convert Int to Boolean---------------------------------------------
8722 
8723 instruct movI_nocopy(rRegI dst, rRegI src) %{
8724   effect( DEF dst, USE src );
8725   format %{ "MOV    $dst,$src" %}
8726   ins_encode( enc_Copy( dst, src) );
8727   ins_pipe( ialu_reg_reg );
8728 %}
8729 
8730 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8731   effect( USE_DEF dst, USE src, KILL cr );
8732 
8733   size(4);
8734   format %{ "NEG    $dst\n\t"
8735             "ADC    $dst,$src" %}
8736   ins_encode( neg_reg(dst),
8737               OpcRegReg(0x13,dst,src) );
8738   ins_pipe( ialu_reg_reg_long );
8739 %}
8740 
8741 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8742   match(Set dst (Conv2B src));
8743 
8744   expand %{
8745     movI_nocopy(dst,src);
8746     ci2b(dst,src,cr);
8747   %}
8748 %}
8749 
8750 instruct movP_nocopy(rRegI dst, eRegP src) %{
8751   effect( DEF dst, USE src );
8752   format %{ "MOV    $dst,$src" %}
8753   ins_encode( enc_Copy( dst, src) );
8754   ins_pipe( ialu_reg_reg );
8755 %}
8756 
8757 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8758   effect( USE_DEF dst, USE src, KILL cr );
8759   format %{ "NEG    $dst\n\t"
8760             "ADC    $dst,$src" %}
8761   ins_encode( neg_reg(dst),
8762               OpcRegReg(0x13,dst,src) );
8763   ins_pipe( ialu_reg_reg_long );
8764 %}
8765 
8766 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8767   match(Set dst (Conv2B src));
8768 
8769   expand %{
8770     movP_nocopy(dst,src);
8771     cp2b(dst,src,cr);
8772   %}
8773 %}
8774 
8775 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8776   match(Set dst (CmpLTMask p q));
8777   effect(KILL cr);
8778   ins_cost(400);
8779 
8780   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8781   format %{ "XOR    $dst,$dst\n\t"
8782             "CMP    $p,$q\n\t"
8783             "SETlt  $dst\n\t"
8784             "NEG    $dst" %}
8785   ins_encode %{
8786     Register Rp = $p$$Register;
8787     Register Rq = $q$$Register;
8788     Register Rd = $dst$$Register;
8789     Label done;
8790     __ xorl(Rd, Rd);
8791     __ cmpl(Rp, Rq);
8792     __ setb(Assembler::less, Rd);
8793     __ negl(Rd);
8794   %}
8795 
8796   ins_pipe(pipe_slow);
8797 %}
8798 
8799 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8800   match(Set dst (CmpLTMask dst zero));
8801   effect(DEF dst, KILL cr);
8802   ins_cost(100);
8803 
8804   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8805   ins_encode %{
8806   __ sarl($dst$$Register, 31);
8807   %}
8808   ins_pipe(ialu_reg);
8809 %}
8810 
8811 /* better to save a register than avoid a branch */
8812 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8813   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8814   effect(KILL cr);
8815   ins_cost(400);
8816   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8817             "JGE    done\n\t"
8818             "ADD    $p,$y\n"
8819             "done:  " %}
8820   ins_encode %{
8821     Register Rp = $p$$Register;
8822     Register Rq = $q$$Register;
8823     Register Ry = $y$$Register;
8824     Label done;
8825     __ subl(Rp, Rq);
8826     __ jccb(Assembler::greaterEqual, done);
8827     __ addl(Rp, Ry);
8828     __ bind(done);
8829   %}
8830 
8831   ins_pipe(pipe_cmplt);
8832 %}
8833 
8834 /* better to save a register than avoid a branch */
8835 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8836   match(Set y (AndI (CmpLTMask p q) y));
8837   effect(KILL cr);
8838 
8839   ins_cost(300);
8840 
8841   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8842             "JLT      done\n\t"
8843             "XORL     $y, $y\n"
8844             "done:  " %}
8845   ins_encode %{
8846     Register Rp = $p$$Register;
8847     Register Rq = $q$$Register;
8848     Register Ry = $y$$Register;
8849     Label done;
8850     __ cmpl(Rp, Rq);
8851     __ jccb(Assembler::less, done);
8852     __ xorl(Ry, Ry);
8853     __ bind(done);
8854   %}
8855 
8856   ins_pipe(pipe_cmplt);
8857 %}
8858 
8859 /* If I enable this, I encourage spilling in the inner loop of compress.
8860 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8861   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8862 */
8863 //----------Overflow Math Instructions-----------------------------------------
8864 
8865 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8866 %{
8867   match(Set cr (OverflowAddI op1 op2));
8868   effect(DEF cr, USE_KILL op1, USE op2);
8869 
8870   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8871 
8872   ins_encode %{
8873     __ addl($op1$$Register, $op2$$Register);
8874   %}
8875   ins_pipe(ialu_reg_reg);
8876 %}
8877 
8878 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8879 %{
8880   match(Set cr (OverflowAddI op1 op2));
8881   effect(DEF cr, USE_KILL op1, USE op2);
8882 
8883   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8884 
8885   ins_encode %{
8886     __ addl($op1$$Register, $op2$$constant);
8887   %}
8888   ins_pipe(ialu_reg_reg);
8889 %}
8890 
8891 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8892 %{
8893   match(Set cr (OverflowSubI op1 op2));
8894 
8895   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8896   ins_encode %{
8897     __ cmpl($op1$$Register, $op2$$Register);
8898   %}
8899   ins_pipe(ialu_reg_reg);
8900 %}
8901 
8902 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8903 %{
8904   match(Set cr (OverflowSubI op1 op2));
8905 
8906   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8907   ins_encode %{
8908     __ cmpl($op1$$Register, $op2$$constant);
8909   %}
8910   ins_pipe(ialu_reg_reg);
8911 %}
8912 
8913 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8914 %{
8915   match(Set cr (OverflowSubI zero op2));
8916   effect(DEF cr, USE_KILL op2);
8917 
8918   format %{ "NEG    $op2\t# overflow check int" %}
8919   ins_encode %{
8920     __ negl($op2$$Register);
8921   %}
8922   ins_pipe(ialu_reg_reg);
8923 %}
8924 
8925 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8926 %{
8927   match(Set cr (OverflowMulI op1 op2));
8928   effect(DEF cr, USE_KILL op1, USE op2);
8929 
8930   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8931   ins_encode %{
8932     __ imull($op1$$Register, $op2$$Register);
8933   %}
8934   ins_pipe(ialu_reg_reg_alu0);
8935 %}
8936 
8937 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8938 %{
8939   match(Set cr (OverflowMulI op1 op2));
8940   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8941 
8942   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8943   ins_encode %{
8944     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8945   %}
8946   ins_pipe(ialu_reg_reg_alu0);
8947 %}
8948 
8949 // Integer Absolute Instructions
8950 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8951 %{
8952   match(Set dst (AbsI src));
8953   effect(TEMP dst, TEMP tmp, KILL cr);
8954   format %{ "movl $tmp, $src\n\t"
8955             "sarl $tmp, 31\n\t"
8956             "movl $dst, $src\n\t"
8957             "xorl $dst, $tmp\n\t"
8958             "subl $dst, $tmp\n"
8959           %}
8960   ins_encode %{
8961     __ movl($tmp$$Register, $src$$Register);
8962     __ sarl($tmp$$Register, 31);
8963     __ movl($dst$$Register, $src$$Register);
8964     __ xorl($dst$$Register, $tmp$$Register);
8965     __ subl($dst$$Register, $tmp$$Register);
8966   %}
8967 
8968   ins_pipe(ialu_reg_reg);
8969 %} 
8970 
8971 //----------Long Instructions------------------------------------------------
8972 // Add Long Register with Register
8973 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8974   match(Set dst (AddL dst src));
8975   effect(KILL cr);
8976   ins_cost(200);
8977   format %{ "ADD    $dst.lo,$src.lo\n\t"
8978             "ADC    $dst.hi,$src.hi" %}
8979   opcode(0x03, 0x13);
8980   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8981   ins_pipe( ialu_reg_reg_long );
8982 %}
8983 
8984 // Add Long Register with Immediate
8985 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8986   match(Set dst (AddL dst src));
8987   effect(KILL cr);
8988   format %{ "ADD    $dst.lo,$src.lo\n\t"
8989             "ADC    $dst.hi,$src.hi" %}
8990   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8991   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8992   ins_pipe( ialu_reg_long );
8993 %}
8994 
8995 // Add Long Register with Memory
8996 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8997   match(Set dst (AddL dst (LoadL mem)));
8998   effect(KILL cr);
8999   ins_cost(125);
9000   format %{ "ADD    $dst.lo,$mem\n\t"
9001             "ADC    $dst.hi,$mem+4" %}
9002   opcode(0x03, 0x13);
9003   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9004   ins_pipe( ialu_reg_long_mem );
9005 %}
9006 
9007 // Subtract Long Register with Register.
9008 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9009   match(Set dst (SubL dst src));
9010   effect(KILL cr);
9011   ins_cost(200);
9012   format %{ "SUB    $dst.lo,$src.lo\n\t"
9013             "SBB    $dst.hi,$src.hi" %}
9014   opcode(0x2B, 0x1B);
9015   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9016   ins_pipe( ialu_reg_reg_long );
9017 %}
9018 
9019 // Subtract Long Register with Immediate
9020 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9021   match(Set dst (SubL dst src));
9022   effect(KILL cr);
9023   format %{ "SUB    $dst.lo,$src.lo\n\t"
9024             "SBB    $dst.hi,$src.hi" %}
9025   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9026   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9027   ins_pipe( ialu_reg_long );
9028 %}
9029 
9030 // Subtract Long Register with Memory
9031 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9032   match(Set dst (SubL dst (LoadL mem)));
9033   effect(KILL cr);
9034   ins_cost(125);
9035   format %{ "SUB    $dst.lo,$mem\n\t"
9036             "SBB    $dst.hi,$mem+4" %}
9037   opcode(0x2B, 0x1B);
9038   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9039   ins_pipe( ialu_reg_long_mem );
9040 %}
9041 
9042 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9043   match(Set dst (SubL zero dst));
9044   effect(KILL cr);
9045   ins_cost(300);
9046   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9047   ins_encode( neg_long(dst) );
9048   ins_pipe( ialu_reg_reg_long );
9049 %}
9050 
9051 // And Long Register with Register
9052 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9053   match(Set dst (AndL dst src));
9054   effect(KILL cr);
9055   format %{ "AND    $dst.lo,$src.lo\n\t"
9056             "AND    $dst.hi,$src.hi" %}
9057   opcode(0x23,0x23);
9058   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9059   ins_pipe( ialu_reg_reg_long );
9060 %}
9061 
9062 // And Long Register with Immediate
9063 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9064   match(Set dst (AndL dst src));
9065   effect(KILL cr);
9066   format %{ "AND    $dst.lo,$src.lo\n\t"
9067             "AND    $dst.hi,$src.hi" %}
9068   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9069   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9070   ins_pipe( ialu_reg_long );
9071 %}
9072 
9073 // And Long Register with Memory
9074 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9075   match(Set dst (AndL dst (LoadL mem)));
9076   effect(KILL cr);
9077   ins_cost(125);
9078   format %{ "AND    $dst.lo,$mem\n\t"
9079             "AND    $dst.hi,$mem+4" %}
9080   opcode(0x23, 0x23);
9081   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9082   ins_pipe( ialu_reg_long_mem );
9083 %}
9084 
9085 // BMI1 instructions
9086 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9087   match(Set dst (AndL (XorL src1 minus_1) src2));
9088   predicate(UseBMI1Instructions);
9089   effect(KILL cr, TEMP dst);
9090 
9091   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9092             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9093          %}
9094 
9095   ins_encode %{
9096     Register Rdst = $dst$$Register;
9097     Register Rsrc1 = $src1$$Register;
9098     Register Rsrc2 = $src2$$Register;
9099     __ andnl(Rdst, Rsrc1, Rsrc2);
9100     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9101   %}
9102   ins_pipe(ialu_reg_reg_long);
9103 %}
9104 
9105 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9106   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9107   predicate(UseBMI1Instructions);
9108   effect(KILL cr, TEMP dst);
9109 
9110   ins_cost(125);
9111   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9112             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9113          %}
9114 
9115   ins_encode %{
9116     Register Rdst = $dst$$Register;
9117     Register Rsrc1 = $src1$$Register;
9118     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9119 
9120     __ andnl(Rdst, Rsrc1, $src2$$Address);
9121     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9122   %}
9123   ins_pipe(ialu_reg_mem);
9124 %}
9125 
9126 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9127   match(Set dst (AndL (SubL imm_zero src) src));
9128   predicate(UseBMI1Instructions);
9129   effect(KILL cr, TEMP dst);
9130 
9131   format %{ "MOVL   $dst.hi, 0\n\t"
9132             "BLSIL  $dst.lo, $src.lo\n\t"
9133             "JNZ    done\n\t"
9134             "BLSIL  $dst.hi, $src.hi\n"
9135             "done:"
9136          %}
9137 
9138   ins_encode %{
9139     Label done;
9140     Register Rdst = $dst$$Register;
9141     Register Rsrc = $src$$Register;
9142     __ movl(HIGH_FROM_LOW(Rdst), 0);
9143     __ blsil(Rdst, Rsrc);
9144     __ jccb(Assembler::notZero, done);
9145     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9146     __ bind(done);
9147   %}
9148   ins_pipe(ialu_reg);
9149 %}
9150 
9151 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9152   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9153   predicate(UseBMI1Instructions);
9154   effect(KILL cr, TEMP dst);
9155 
9156   ins_cost(125);
9157   format %{ "MOVL   $dst.hi, 0\n\t"
9158             "BLSIL  $dst.lo, $src\n\t"
9159             "JNZ    done\n\t"
9160             "BLSIL  $dst.hi, $src+4\n"
9161             "done:"
9162          %}
9163 
9164   ins_encode %{
9165     Label done;
9166     Register Rdst = $dst$$Register;
9167     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9168 
9169     __ movl(HIGH_FROM_LOW(Rdst), 0);
9170     __ blsil(Rdst, $src$$Address);
9171     __ jccb(Assembler::notZero, done);
9172     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9173     __ bind(done);
9174   %}
9175   ins_pipe(ialu_reg_mem);
9176 %}
9177 
9178 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9179 %{
9180   match(Set dst (XorL (AddL src minus_1) src));
9181   predicate(UseBMI1Instructions);
9182   effect(KILL cr, TEMP dst);
9183 
9184   format %{ "MOVL    $dst.hi, 0\n\t"
9185             "BLSMSKL $dst.lo, $src.lo\n\t"
9186             "JNC     done\n\t"
9187             "BLSMSKL $dst.hi, $src.hi\n"
9188             "done:"
9189          %}
9190 
9191   ins_encode %{
9192     Label done;
9193     Register Rdst = $dst$$Register;
9194     Register Rsrc = $src$$Register;
9195     __ movl(HIGH_FROM_LOW(Rdst), 0);
9196     __ blsmskl(Rdst, Rsrc);
9197     __ jccb(Assembler::carryClear, done);
9198     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9199     __ bind(done);
9200   %}
9201 
9202   ins_pipe(ialu_reg);
9203 %}
9204 
9205 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9206 %{
9207   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9208   predicate(UseBMI1Instructions);
9209   effect(KILL cr, TEMP dst);
9210 
9211   ins_cost(125);
9212   format %{ "MOVL    $dst.hi, 0\n\t"
9213             "BLSMSKL $dst.lo, $src\n\t"
9214             "JNC     done\n\t"
9215             "BLSMSKL $dst.hi, $src+4\n"
9216             "done:"
9217          %}
9218 
9219   ins_encode %{
9220     Label done;
9221     Register Rdst = $dst$$Register;
9222     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9223 
9224     __ movl(HIGH_FROM_LOW(Rdst), 0);
9225     __ blsmskl(Rdst, $src$$Address);
9226     __ jccb(Assembler::carryClear, done);
9227     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9228     __ bind(done);
9229   %}
9230 
9231   ins_pipe(ialu_reg_mem);
9232 %}
9233 
9234 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9235 %{
9236   match(Set dst (AndL (AddL src minus_1) src) );
9237   predicate(UseBMI1Instructions);
9238   effect(KILL cr, TEMP dst);
9239 
9240   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9241             "BLSRL  $dst.lo, $src.lo\n\t"
9242             "JNC    done\n\t"
9243             "BLSRL  $dst.hi, $src.hi\n"
9244             "done:"
9245   %}
9246 
9247   ins_encode %{
9248     Label done;
9249     Register Rdst = $dst$$Register;
9250     Register Rsrc = $src$$Register;
9251     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9252     __ blsrl(Rdst, Rsrc);
9253     __ jccb(Assembler::carryClear, done);
9254     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9255     __ bind(done);
9256   %}
9257 
9258   ins_pipe(ialu_reg);
9259 %}
9260 
9261 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9262 %{
9263   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9264   predicate(UseBMI1Instructions);
9265   effect(KILL cr, TEMP dst);
9266 
9267   ins_cost(125);
9268   format %{ "MOVL   $dst.hi, $src+4\n\t"
9269             "BLSRL  $dst.lo, $src\n\t"
9270             "JNC    done\n\t"
9271             "BLSRL  $dst.hi, $src+4\n"
9272             "done:"
9273   %}
9274 
9275   ins_encode %{
9276     Label done;
9277     Register Rdst = $dst$$Register;
9278     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9279     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9280     __ blsrl(Rdst, $src$$Address);
9281     __ jccb(Assembler::carryClear, done);
9282     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9283     __ bind(done);
9284   %}
9285 
9286   ins_pipe(ialu_reg_mem);
9287 %}
9288 
9289 // Or Long Register with Register
9290 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9291   match(Set dst (OrL dst src));
9292   effect(KILL cr);
9293   format %{ "OR     $dst.lo,$src.lo\n\t"
9294             "OR     $dst.hi,$src.hi" %}
9295   opcode(0x0B,0x0B);
9296   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9297   ins_pipe( ialu_reg_reg_long );
9298 %}
9299 
9300 // Or Long Register with Immediate
9301 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9302   match(Set dst (OrL dst src));
9303   effect(KILL cr);
9304   format %{ "OR     $dst.lo,$src.lo\n\t"
9305             "OR     $dst.hi,$src.hi" %}
9306   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9307   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9308   ins_pipe( ialu_reg_long );
9309 %}
9310 
9311 // Or Long Register with Memory
9312 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9313   match(Set dst (OrL dst (LoadL mem)));
9314   effect(KILL cr);
9315   ins_cost(125);
9316   format %{ "OR     $dst.lo,$mem\n\t"
9317             "OR     $dst.hi,$mem+4" %}
9318   opcode(0x0B,0x0B);
9319   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9320   ins_pipe( ialu_reg_long_mem );
9321 %}
9322 
9323 // Xor Long Register with Register
9324 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9325   match(Set dst (XorL dst src));
9326   effect(KILL cr);
9327   format %{ "XOR    $dst.lo,$src.lo\n\t"
9328             "XOR    $dst.hi,$src.hi" %}
9329   opcode(0x33,0x33);
9330   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9331   ins_pipe( ialu_reg_reg_long );
9332 %}
9333 
9334 // Xor Long Register with Immediate -1
9335 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9336   match(Set dst (XorL dst imm));
9337   format %{ "NOT    $dst.lo\n\t"
9338             "NOT    $dst.hi" %}
9339   ins_encode %{
9340      __ notl($dst$$Register);
9341      __ notl(HIGH_FROM_LOW($dst$$Register));
9342   %}
9343   ins_pipe( ialu_reg_long );
9344 %}
9345 
9346 // Xor Long Register with Immediate
9347 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9348   match(Set dst (XorL dst src));
9349   effect(KILL cr);
9350   format %{ "XOR    $dst.lo,$src.lo\n\t"
9351             "XOR    $dst.hi,$src.hi" %}
9352   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9353   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9354   ins_pipe( ialu_reg_long );
9355 %}
9356 
9357 // Xor Long Register with Memory
9358 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9359   match(Set dst (XorL dst (LoadL mem)));
9360   effect(KILL cr);
9361   ins_cost(125);
9362   format %{ "XOR    $dst.lo,$mem\n\t"
9363             "XOR    $dst.hi,$mem+4" %}
9364   opcode(0x33,0x33);
9365   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9366   ins_pipe( ialu_reg_long_mem );
9367 %}
9368 
9369 // Shift Left Long by 1
9370 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9371   predicate(UseNewLongLShift);
9372   match(Set dst (LShiftL dst cnt));
9373   effect(KILL cr);
9374   ins_cost(100);
9375   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9376             "ADC    $dst.hi,$dst.hi" %}
9377   ins_encode %{
9378     __ addl($dst$$Register,$dst$$Register);
9379     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9380   %}
9381   ins_pipe( ialu_reg_long );
9382 %}
9383 
9384 // Shift Left Long by 2
9385 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9386   predicate(UseNewLongLShift);
9387   match(Set dst (LShiftL dst cnt));
9388   effect(KILL cr);
9389   ins_cost(100);
9390   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9391             "ADC    $dst.hi,$dst.hi\n\t"
9392             "ADD    $dst.lo,$dst.lo\n\t"
9393             "ADC    $dst.hi,$dst.hi" %}
9394   ins_encode %{
9395     __ addl($dst$$Register,$dst$$Register);
9396     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9397     __ addl($dst$$Register,$dst$$Register);
9398     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9399   %}
9400   ins_pipe( ialu_reg_long );
9401 %}
9402 
9403 // Shift Left Long by 3
9404 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9405   predicate(UseNewLongLShift);
9406   match(Set dst (LShiftL dst cnt));
9407   effect(KILL cr);
9408   ins_cost(100);
9409   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9410             "ADC    $dst.hi,$dst.hi\n\t"
9411             "ADD    $dst.lo,$dst.lo\n\t"
9412             "ADC    $dst.hi,$dst.hi\n\t"
9413             "ADD    $dst.lo,$dst.lo\n\t"
9414             "ADC    $dst.hi,$dst.hi" %}
9415   ins_encode %{
9416     __ addl($dst$$Register,$dst$$Register);
9417     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9418     __ addl($dst$$Register,$dst$$Register);
9419     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9420     __ addl($dst$$Register,$dst$$Register);
9421     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9422   %}
9423   ins_pipe( ialu_reg_long );
9424 %}
9425 
9426 // Shift Left Long by 1-31
9427 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9428   match(Set dst (LShiftL dst cnt));
9429   effect(KILL cr);
9430   ins_cost(200);
9431   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9432             "SHL    $dst.lo,$cnt" %}
9433   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9434   ins_encode( move_long_small_shift(dst,cnt) );
9435   ins_pipe( ialu_reg_long );
9436 %}
9437 
9438 // Shift Left Long by 32-63
9439 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9440   match(Set dst (LShiftL dst cnt));
9441   effect(KILL cr);
9442   ins_cost(300);
9443   format %{ "MOV    $dst.hi,$dst.lo\n"
9444           "\tSHL    $dst.hi,$cnt-32\n"
9445           "\tXOR    $dst.lo,$dst.lo" %}
9446   opcode(0xC1, 0x4);  /* C1 /4 ib */
9447   ins_encode( move_long_big_shift_clr(dst,cnt) );
9448   ins_pipe( ialu_reg_long );
9449 %}
9450 
9451 // Shift Left Long by variable
9452 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9453   match(Set dst (LShiftL dst shift));
9454   effect(KILL cr);
9455   ins_cost(500+200);
9456   size(17);
9457   format %{ "TEST   $shift,32\n\t"
9458             "JEQ,s  small\n\t"
9459             "MOV    $dst.hi,$dst.lo\n\t"
9460             "XOR    $dst.lo,$dst.lo\n"
9461     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9462             "SHL    $dst.lo,$shift" %}
9463   ins_encode( shift_left_long( dst, shift ) );
9464   ins_pipe( pipe_slow );
9465 %}
9466 
9467 // Shift Right Long by 1-31
9468 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9469   match(Set dst (URShiftL dst cnt));
9470   effect(KILL cr);
9471   ins_cost(200);
9472   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9473             "SHR    $dst.hi,$cnt" %}
9474   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9475   ins_encode( move_long_small_shift(dst,cnt) );
9476   ins_pipe( ialu_reg_long );
9477 %}
9478 
9479 // Shift Right Long by 32-63
9480 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9481   match(Set dst (URShiftL dst cnt));
9482   effect(KILL cr);
9483   ins_cost(300);
9484   format %{ "MOV    $dst.lo,$dst.hi\n"
9485           "\tSHR    $dst.lo,$cnt-32\n"
9486           "\tXOR    $dst.hi,$dst.hi" %}
9487   opcode(0xC1, 0x5);  /* C1 /5 ib */
9488   ins_encode( move_long_big_shift_clr(dst,cnt) );
9489   ins_pipe( ialu_reg_long );
9490 %}
9491 
9492 // Shift Right Long by variable
9493 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9494   match(Set dst (URShiftL dst shift));
9495   effect(KILL cr);
9496   ins_cost(600);
9497   size(17);
9498   format %{ "TEST   $shift,32\n\t"
9499             "JEQ,s  small\n\t"
9500             "MOV    $dst.lo,$dst.hi\n\t"
9501             "XOR    $dst.hi,$dst.hi\n"
9502     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9503             "SHR    $dst.hi,$shift" %}
9504   ins_encode( shift_right_long( dst, shift ) );
9505   ins_pipe( pipe_slow );
9506 %}
9507 
9508 // Shift Right Long by 1-31
9509 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9510   match(Set dst (RShiftL dst cnt));
9511   effect(KILL cr);
9512   ins_cost(200);
9513   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9514             "SAR    $dst.hi,$cnt" %}
9515   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9516   ins_encode( move_long_small_shift(dst,cnt) );
9517   ins_pipe( ialu_reg_long );
9518 %}
9519 
9520 // Shift Right Long by 32-63
9521 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9522   match(Set dst (RShiftL dst cnt));
9523   effect(KILL cr);
9524   ins_cost(300);
9525   format %{ "MOV    $dst.lo,$dst.hi\n"
9526           "\tSAR    $dst.lo,$cnt-32\n"
9527           "\tSAR    $dst.hi,31" %}
9528   opcode(0xC1, 0x7);  /* C1 /7 ib */
9529   ins_encode( move_long_big_shift_sign(dst,cnt) );
9530   ins_pipe( ialu_reg_long );
9531 %}
9532 
9533 // Shift Right arithmetic Long by variable
9534 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9535   match(Set dst (RShiftL dst shift));
9536   effect(KILL cr);
9537   ins_cost(600);
9538   size(18);
9539   format %{ "TEST   $shift,32\n\t"
9540             "JEQ,s  small\n\t"
9541             "MOV    $dst.lo,$dst.hi\n\t"
9542             "SAR    $dst.hi,31\n"
9543     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9544             "SAR    $dst.hi,$shift" %}
9545   ins_encode( shift_right_arith_long( dst, shift ) );
9546   ins_pipe( pipe_slow );
9547 %}
9548 
9549 
9550 //----------Double Instructions------------------------------------------------
9551 // Double Math
9552 
9553 // Compare & branch
9554 
9555 // P6 version of float compare, sets condition codes in EFLAGS
9556 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9557   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9558   match(Set cr (CmpD src1 src2));
9559   effect(KILL rax);
9560   ins_cost(150);
9561   format %{ "FLD    $src1\n\t"
9562             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9563             "JNP    exit\n\t"
9564             "MOV    ah,1       // saw a NaN, set CF\n\t"
9565             "SAHF\n"
9566      "exit:\tNOP               // avoid branch to branch" %}
9567   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9568   ins_encode( Push_Reg_DPR(src1),
9569               OpcP, RegOpc(src2),
9570               cmpF_P6_fixup );
9571   ins_pipe( pipe_slow );
9572 %}
9573 
9574 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9575   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9576   match(Set cr (CmpD src1 src2));
9577   ins_cost(150);
9578   format %{ "FLD    $src1\n\t"
9579             "FUCOMIP ST,$src2  // P6 instruction" %}
9580   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9581   ins_encode( Push_Reg_DPR(src1),
9582               OpcP, RegOpc(src2));
9583   ins_pipe( pipe_slow );
9584 %}
9585 
9586 // Compare & branch
9587 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9588   predicate(UseSSE<=1);
9589   match(Set cr (CmpD src1 src2));
9590   effect(KILL rax);
9591   ins_cost(200);
9592   format %{ "FLD    $src1\n\t"
9593             "FCOMp  $src2\n\t"
9594             "FNSTSW AX\n\t"
9595             "TEST   AX,0x400\n\t"
9596             "JZ,s   flags\n\t"
9597             "MOV    AH,1\t# unordered treat as LT\n"
9598     "flags:\tSAHF" %}
9599   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9600   ins_encode( Push_Reg_DPR(src1),
9601               OpcP, RegOpc(src2),
9602               fpu_flags);
9603   ins_pipe( pipe_slow );
9604 %}
9605 
9606 // Compare vs zero into -1,0,1
9607 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9608   predicate(UseSSE<=1);
9609   match(Set dst (CmpD3 src1 zero));
9610   effect(KILL cr, KILL rax);
9611   ins_cost(280);
9612   format %{ "FTSTD  $dst,$src1" %}
9613   opcode(0xE4, 0xD9);
9614   ins_encode( Push_Reg_DPR(src1),
9615               OpcS, OpcP, PopFPU,
9616               CmpF_Result(dst));
9617   ins_pipe( pipe_slow );
9618 %}
9619 
9620 // Compare into -1,0,1
9621 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9622   predicate(UseSSE<=1);
9623   match(Set dst (CmpD3 src1 src2));
9624   effect(KILL cr, KILL rax);
9625   ins_cost(300);
9626   format %{ "FCMPD  $dst,$src1,$src2" %}
9627   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9628   ins_encode( Push_Reg_DPR(src1),
9629               OpcP, RegOpc(src2),
9630               CmpF_Result(dst));
9631   ins_pipe( pipe_slow );
9632 %}
9633 
9634 // float compare and set condition codes in EFLAGS by XMM regs
9635 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9636   predicate(UseSSE>=2);
9637   match(Set cr (CmpD src1 src2));
9638   ins_cost(145);
9639   format %{ "UCOMISD $src1,$src2\n\t"
9640             "JNP,s   exit\n\t"
9641             "PUSHF\t# saw NaN, set CF\n\t"
9642             "AND     [rsp], #0xffffff2b\n\t"
9643             "POPF\n"
9644     "exit:" %}
9645   ins_encode %{
9646     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9647     emit_cmpfp_fixup(_masm);
9648   %}
9649   ins_pipe( pipe_slow );
9650 %}
9651 
9652 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9653   predicate(UseSSE>=2);
9654   match(Set cr (CmpD src1 src2));
9655   ins_cost(100);
9656   format %{ "UCOMISD $src1,$src2" %}
9657   ins_encode %{
9658     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9659   %}
9660   ins_pipe( pipe_slow );
9661 %}
9662 
9663 // float compare and set condition codes in EFLAGS by XMM regs
9664 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9665   predicate(UseSSE>=2);
9666   match(Set cr (CmpD src1 (LoadD src2)));
9667   ins_cost(145);
9668   format %{ "UCOMISD $src1,$src2\n\t"
9669             "JNP,s   exit\n\t"
9670             "PUSHF\t# saw NaN, set CF\n\t"
9671             "AND     [rsp], #0xffffff2b\n\t"
9672             "POPF\n"
9673     "exit:" %}
9674   ins_encode %{
9675     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9676     emit_cmpfp_fixup(_masm);
9677   %}
9678   ins_pipe( pipe_slow );
9679 %}
9680 
9681 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9682   predicate(UseSSE>=2);
9683   match(Set cr (CmpD src1 (LoadD src2)));
9684   ins_cost(100);
9685   format %{ "UCOMISD $src1,$src2" %}
9686   ins_encode %{
9687     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9688   %}
9689   ins_pipe( pipe_slow );
9690 %}
9691 
9692 // Compare into -1,0,1 in XMM
9693 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9694   predicate(UseSSE>=2);
9695   match(Set dst (CmpD3 src1 src2));
9696   effect(KILL cr);
9697   ins_cost(255);
9698   format %{ "UCOMISD $src1, $src2\n\t"
9699             "MOV     $dst, #-1\n\t"
9700             "JP,s    done\n\t"
9701             "JB,s    done\n\t"
9702             "SETNE   $dst\n\t"
9703             "MOVZB   $dst, $dst\n"
9704     "done:" %}
9705   ins_encode %{
9706     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9707     emit_cmpfp3(_masm, $dst$$Register);
9708   %}
9709   ins_pipe( pipe_slow );
9710 %}
9711 
9712 // Compare into -1,0,1 in XMM and memory
9713 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9714   predicate(UseSSE>=2);
9715   match(Set dst (CmpD3 src1 (LoadD src2)));
9716   effect(KILL cr);
9717   ins_cost(275);
9718   format %{ "UCOMISD $src1, $src2\n\t"
9719             "MOV     $dst, #-1\n\t"
9720             "JP,s    done\n\t"
9721             "JB,s    done\n\t"
9722             "SETNE   $dst\n\t"
9723             "MOVZB   $dst, $dst\n"
9724     "done:" %}
9725   ins_encode %{
9726     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9727     emit_cmpfp3(_masm, $dst$$Register);
9728   %}
9729   ins_pipe( pipe_slow );
9730 %}
9731 
9732 
9733 instruct subDPR_reg(regDPR dst, regDPR src) %{
9734   predicate (UseSSE <=1);
9735   match(Set dst (SubD dst src));
9736 
9737   format %{ "FLD    $src\n\t"
9738             "DSUBp  $dst,ST" %}
9739   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9740   ins_cost(150);
9741   ins_encode( Push_Reg_DPR(src),
9742               OpcP, RegOpc(dst) );
9743   ins_pipe( fpu_reg_reg );
9744 %}
9745 
9746 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9747   predicate (UseSSE <=1);
9748   match(Set dst (RoundDouble (SubD src1 src2)));
9749   ins_cost(250);
9750 
9751   format %{ "FLD    $src2\n\t"
9752             "DSUB   ST,$src1\n\t"
9753             "FSTP_D $dst\t# D-round" %}
9754   opcode(0xD8, 0x5);
9755   ins_encode( Push_Reg_DPR(src2),
9756               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9757   ins_pipe( fpu_mem_reg_reg );
9758 %}
9759 
9760 
9761 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9762   predicate (UseSSE <=1);
9763   match(Set dst (SubD dst (LoadD src)));
9764   ins_cost(150);
9765 
9766   format %{ "FLD    $src\n\t"
9767             "DSUBp  $dst,ST" %}
9768   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9769   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9770               OpcP, RegOpc(dst) );
9771   ins_pipe( fpu_reg_mem );
9772 %}
9773 
9774 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9775   predicate (UseSSE<=1);
9776   match(Set dst (AbsD src));
9777   ins_cost(100);
9778   format %{ "FABS" %}
9779   opcode(0xE1, 0xD9);
9780   ins_encode( OpcS, OpcP );
9781   ins_pipe( fpu_reg_reg );
9782 %}
9783 
9784 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9785   predicate(UseSSE<=1);
9786   match(Set dst (NegD src));
9787   ins_cost(100);
9788   format %{ "FCHS" %}
9789   opcode(0xE0, 0xD9);
9790   ins_encode( OpcS, OpcP );
9791   ins_pipe( fpu_reg_reg );
9792 %}
9793 
9794 instruct addDPR_reg(regDPR dst, regDPR src) %{
9795   predicate(UseSSE<=1);
9796   match(Set dst (AddD dst src));
9797   format %{ "FLD    $src\n\t"
9798             "DADD   $dst,ST" %}
9799   size(4);
9800   ins_cost(150);
9801   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9802   ins_encode( Push_Reg_DPR(src),
9803               OpcP, RegOpc(dst) );
9804   ins_pipe( fpu_reg_reg );
9805 %}
9806 
9807 
9808 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9809   predicate(UseSSE<=1);
9810   match(Set dst (RoundDouble (AddD src1 src2)));
9811   ins_cost(250);
9812 
9813   format %{ "FLD    $src2\n\t"
9814             "DADD   ST,$src1\n\t"
9815             "FSTP_D $dst\t# D-round" %}
9816   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9817   ins_encode( Push_Reg_DPR(src2),
9818               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9819   ins_pipe( fpu_mem_reg_reg );
9820 %}
9821 
9822 
9823 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9824   predicate(UseSSE<=1);
9825   match(Set dst (AddD dst (LoadD src)));
9826   ins_cost(150);
9827 
9828   format %{ "FLD    $src\n\t"
9829             "DADDp  $dst,ST" %}
9830   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9831   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9832               OpcP, RegOpc(dst) );
9833   ins_pipe( fpu_reg_mem );
9834 %}
9835 
9836 // add-to-memory
9837 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9838   predicate(UseSSE<=1);
9839   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9840   ins_cost(150);
9841 
9842   format %{ "FLD_D  $dst\n\t"
9843             "DADD   ST,$src\n\t"
9844             "FST_D  $dst" %}
9845   opcode(0xDD, 0x0);
9846   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9847               Opcode(0xD8), RegOpc(src),
9848               set_instruction_start,
9849               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9850   ins_pipe( fpu_reg_mem );
9851 %}
9852 
9853 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9854   predicate(UseSSE<=1);
9855   match(Set dst (AddD dst con));
9856   ins_cost(125);
9857   format %{ "FLD1\n\t"
9858             "DADDp  $dst,ST" %}
9859   ins_encode %{
9860     __ fld1();
9861     __ faddp($dst$$reg);
9862   %}
9863   ins_pipe(fpu_reg);
9864 %}
9865 
9866 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9867   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9868   match(Set dst (AddD dst con));
9869   ins_cost(200);
9870   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9871             "DADDp  $dst,ST" %}
9872   ins_encode %{
9873     __ fld_d($constantaddress($con));
9874     __ faddp($dst$$reg);
9875   %}
9876   ins_pipe(fpu_reg_mem);
9877 %}
9878 
9879 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9880   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9881   match(Set dst (RoundDouble (AddD src con)));
9882   ins_cost(200);
9883   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9884             "DADD   ST,$src\n\t"
9885             "FSTP_D $dst\t# D-round" %}
9886   ins_encode %{
9887     __ fld_d($constantaddress($con));
9888     __ fadd($src$$reg);
9889     __ fstp_d(Address(rsp, $dst$$disp));
9890   %}
9891   ins_pipe(fpu_mem_reg_con);
9892 %}
9893 
9894 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9895   predicate(UseSSE<=1);
9896   match(Set dst (MulD dst src));
9897   format %{ "FLD    $src\n\t"
9898             "DMULp  $dst,ST" %}
9899   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9900   ins_cost(150);
9901   ins_encode( Push_Reg_DPR(src),
9902               OpcP, RegOpc(dst) );
9903   ins_pipe( fpu_reg_reg );
9904 %}
9905 
9906 // Strict FP instruction biases argument before multiply then
9907 // biases result to avoid double rounding of subnormals.
9908 //
9909 // scale arg1 by multiplying arg1 by 2^(-15360)
9910 // load arg2
9911 // multiply scaled arg1 by arg2
9912 // rescale product by 2^(15360)
9913 //
9914 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9915   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9916   match(Set dst (MulD dst src));
9917   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9918 
9919   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9920             "DMULp  $dst,ST\n\t"
9921             "FLD    $src\n\t"
9922             "DMULp  $dst,ST\n\t"
9923             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9924             "DMULp  $dst,ST\n\t" %}
9925   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9926   ins_encode( strictfp_bias1(dst),
9927               Push_Reg_DPR(src),
9928               OpcP, RegOpc(dst),
9929               strictfp_bias2(dst) );
9930   ins_pipe( fpu_reg_reg );
9931 %}
9932 
9933 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9934   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9935   match(Set dst (MulD dst con));
9936   ins_cost(200);
9937   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9938             "DMULp  $dst,ST" %}
9939   ins_encode %{
9940     __ fld_d($constantaddress($con));
9941     __ fmulp($dst$$reg);
9942   %}
9943   ins_pipe(fpu_reg_mem);
9944 %}
9945 
9946 
9947 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9948   predicate( UseSSE<=1 );
9949   match(Set dst (MulD dst (LoadD src)));
9950   ins_cost(200);
9951   format %{ "FLD_D  $src\n\t"
9952             "DMULp  $dst,ST" %}
9953   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9954   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9955               OpcP, RegOpc(dst) );
9956   ins_pipe( fpu_reg_mem );
9957 %}
9958 
9959 //
9960 // Cisc-alternate to reg-reg multiply
9961 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9962   predicate( UseSSE<=1 );
9963   match(Set dst (MulD src (LoadD mem)));
9964   ins_cost(250);
9965   format %{ "FLD_D  $mem\n\t"
9966             "DMUL   ST,$src\n\t"
9967             "FSTP_D $dst" %}
9968   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9969   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9970               OpcReg_FPR(src),
9971               Pop_Reg_DPR(dst) );
9972   ins_pipe( fpu_reg_reg_mem );
9973 %}
9974 
9975 
9976 // MACRO3 -- addDPR a mulDPR
9977 // This instruction is a '2-address' instruction in that the result goes
9978 // back to src2.  This eliminates a move from the macro; possibly the
9979 // register allocator will have to add it back (and maybe not).
9980 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9981   predicate( UseSSE<=1 );
9982   match(Set src2 (AddD (MulD src0 src1) src2));
9983   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9984             "DMUL   ST,$src1\n\t"
9985             "DADDp  $src2,ST" %}
9986   ins_cost(250);
9987   opcode(0xDD); /* LoadD DD /0 */
9988   ins_encode( Push_Reg_FPR(src0),
9989               FMul_ST_reg(src1),
9990               FAddP_reg_ST(src2) );
9991   ins_pipe( fpu_reg_reg_reg );
9992 %}
9993 
9994 
9995 // MACRO3 -- subDPR a mulDPR
9996 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9997   predicate( UseSSE<=1 );
9998   match(Set src2 (SubD (MulD src0 src1) src2));
9999   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10000             "DMUL   ST,$src1\n\t"
10001             "DSUBRp $src2,ST" %}
10002   ins_cost(250);
10003   ins_encode( Push_Reg_FPR(src0),
10004               FMul_ST_reg(src1),
10005               Opcode(0xDE), Opc_plus(0xE0,src2));
10006   ins_pipe( fpu_reg_reg_reg );
10007 %}
10008 
10009 
10010 instruct divDPR_reg(regDPR dst, regDPR src) %{
10011   predicate( UseSSE<=1 );
10012   match(Set dst (DivD dst src));
10013 
10014   format %{ "FLD    $src\n\t"
10015             "FDIVp  $dst,ST" %}
10016   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10017   ins_cost(150);
10018   ins_encode( Push_Reg_DPR(src),
10019               OpcP, RegOpc(dst) );
10020   ins_pipe( fpu_reg_reg );
10021 %}
10022 
10023 // Strict FP instruction biases argument before division then
10024 // biases result, to avoid double rounding of subnormals.
10025 //
10026 // scale dividend by multiplying dividend by 2^(-15360)
10027 // load divisor
10028 // divide scaled dividend by divisor
10029 // rescale quotient by 2^(15360)
10030 //
10031 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10032   predicate (UseSSE<=1);
10033   match(Set dst (DivD dst src));
10034   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10035   ins_cost(01);
10036 
10037   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10038             "DMULp  $dst,ST\n\t"
10039             "FLD    $src\n\t"
10040             "FDIVp  $dst,ST\n\t"
10041             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10042             "DMULp  $dst,ST\n\t" %}
10043   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10044   ins_encode( strictfp_bias1(dst),
10045               Push_Reg_DPR(src),
10046               OpcP, RegOpc(dst),
10047               strictfp_bias2(dst) );
10048   ins_pipe( fpu_reg_reg );
10049 %}
10050 
10051 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10052   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10053   match(Set dst (RoundDouble (DivD src1 src2)));
10054 
10055   format %{ "FLD    $src1\n\t"
10056             "FDIV   ST,$src2\n\t"
10057             "FSTP_D $dst\t# D-round" %}
10058   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10059   ins_encode( Push_Reg_DPR(src1),
10060               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10061   ins_pipe( fpu_mem_reg_reg );
10062 %}
10063 
10064 
10065 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10066   predicate(UseSSE<=1);
10067   match(Set dst (ModD dst src));
10068   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10069 
10070   format %{ "DMOD   $dst,$src" %}
10071   ins_cost(250);
10072   ins_encode(Push_Reg_Mod_DPR(dst, src),
10073               emitModDPR(),
10074               Push_Result_Mod_DPR(src),
10075               Pop_Reg_DPR(dst));
10076   ins_pipe( pipe_slow );
10077 %}
10078 
10079 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10080   predicate(UseSSE>=2);
10081   match(Set dst (ModD src0 src1));
10082   effect(KILL rax, KILL cr);
10083 
10084   format %{ "SUB    ESP,8\t # DMOD\n"
10085           "\tMOVSD  [ESP+0],$src1\n"
10086           "\tFLD_D  [ESP+0]\n"
10087           "\tMOVSD  [ESP+0],$src0\n"
10088           "\tFLD_D  [ESP+0]\n"
10089      "loop:\tFPREM\n"
10090           "\tFWAIT\n"
10091           "\tFNSTSW AX\n"
10092           "\tSAHF\n"
10093           "\tJP     loop\n"
10094           "\tFSTP_D [ESP+0]\n"
10095           "\tMOVSD  $dst,[ESP+0]\n"
10096           "\tADD    ESP,8\n"
10097           "\tFSTP   ST0\t # Restore FPU Stack"
10098     %}
10099   ins_cost(250);
10100   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10101   ins_pipe( pipe_slow );
10102 %}
10103 
10104 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10105   predicate (UseSSE<=1);
10106   match(Set dst(AtanD dst src));
10107   format %{ "DATA   $dst,$src" %}
10108   opcode(0xD9, 0xF3);
10109   ins_encode( Push_Reg_DPR(src),
10110               OpcP, OpcS, RegOpc(dst) );
10111   ins_pipe( pipe_slow );
10112 %}
10113 
10114 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10115   predicate (UseSSE>=2);
10116   match(Set dst(AtanD dst src));
10117   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10118   format %{ "DATA   $dst,$src" %}
10119   opcode(0xD9, 0xF3);
10120   ins_encode( Push_SrcD(src),
10121               OpcP, OpcS, Push_ResultD(dst) );
10122   ins_pipe( pipe_slow );
10123 %}
10124 
10125 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10126   predicate (UseSSE<=1);
10127   match(Set dst (SqrtD src));
10128   format %{ "DSQRT  $dst,$src" %}
10129   opcode(0xFA, 0xD9);
10130   ins_encode( Push_Reg_DPR(src),
10131               OpcS, OpcP, Pop_Reg_DPR(dst) );
10132   ins_pipe( pipe_slow );
10133 %}
10134 
10135 //-------------Float Instructions-------------------------------
10136 // Float Math
10137 
10138 // Code for float compare:
10139 //     fcompp();
10140 //     fwait(); fnstsw_ax();
10141 //     sahf();
10142 //     movl(dst, unordered_result);
10143 //     jcc(Assembler::parity, exit);
10144 //     movl(dst, less_result);
10145 //     jcc(Assembler::below, exit);
10146 //     movl(dst, equal_result);
10147 //     jcc(Assembler::equal, exit);
10148 //     movl(dst, greater_result);
10149 //   exit:
10150 
10151 // P6 version of float compare, sets condition codes in EFLAGS
10152 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10153   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10154   match(Set cr (CmpF src1 src2));
10155   effect(KILL rax);
10156   ins_cost(150);
10157   format %{ "FLD    $src1\n\t"
10158             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10159             "JNP    exit\n\t"
10160             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10161             "SAHF\n"
10162      "exit:\tNOP               // avoid branch to branch" %}
10163   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10164   ins_encode( Push_Reg_DPR(src1),
10165               OpcP, RegOpc(src2),
10166               cmpF_P6_fixup );
10167   ins_pipe( pipe_slow );
10168 %}
10169 
10170 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10171   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10172   match(Set cr (CmpF src1 src2));
10173   ins_cost(100);
10174   format %{ "FLD    $src1\n\t"
10175             "FUCOMIP ST,$src2  // P6 instruction" %}
10176   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10177   ins_encode( Push_Reg_DPR(src1),
10178               OpcP, RegOpc(src2));
10179   ins_pipe( pipe_slow );
10180 %}
10181 
10182 
10183 // Compare & branch
10184 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10185   predicate(UseSSE == 0);
10186   match(Set cr (CmpF src1 src2));
10187   effect(KILL rax);
10188   ins_cost(200);
10189   format %{ "FLD    $src1\n\t"
10190             "FCOMp  $src2\n\t"
10191             "FNSTSW AX\n\t"
10192             "TEST   AX,0x400\n\t"
10193             "JZ,s   flags\n\t"
10194             "MOV    AH,1\t# unordered treat as LT\n"
10195     "flags:\tSAHF" %}
10196   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10197   ins_encode( Push_Reg_DPR(src1),
10198               OpcP, RegOpc(src2),
10199               fpu_flags);
10200   ins_pipe( pipe_slow );
10201 %}
10202 
10203 // Compare vs zero into -1,0,1
10204 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10205   predicate(UseSSE == 0);
10206   match(Set dst (CmpF3 src1 zero));
10207   effect(KILL cr, KILL rax);
10208   ins_cost(280);
10209   format %{ "FTSTF  $dst,$src1" %}
10210   opcode(0xE4, 0xD9);
10211   ins_encode( Push_Reg_DPR(src1),
10212               OpcS, OpcP, PopFPU,
10213               CmpF_Result(dst));
10214   ins_pipe( pipe_slow );
10215 %}
10216 
10217 // Compare into -1,0,1
10218 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10219   predicate(UseSSE == 0);
10220   match(Set dst (CmpF3 src1 src2));
10221   effect(KILL cr, KILL rax);
10222   ins_cost(300);
10223   format %{ "FCMPF  $dst,$src1,$src2" %}
10224   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10225   ins_encode( Push_Reg_DPR(src1),
10226               OpcP, RegOpc(src2),
10227               CmpF_Result(dst));
10228   ins_pipe( pipe_slow );
10229 %}
10230 
10231 // float compare and set condition codes in EFLAGS by XMM regs
10232 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10233   predicate(UseSSE>=1);
10234   match(Set cr (CmpF src1 src2));
10235   ins_cost(145);
10236   format %{ "UCOMISS $src1,$src2\n\t"
10237             "JNP,s   exit\n\t"
10238             "PUSHF\t# saw NaN, set CF\n\t"
10239             "AND     [rsp], #0xffffff2b\n\t"
10240             "POPF\n"
10241     "exit:" %}
10242   ins_encode %{
10243     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10244     emit_cmpfp_fixup(_masm);
10245   %}
10246   ins_pipe( pipe_slow );
10247 %}
10248 
10249 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10250   predicate(UseSSE>=1);
10251   match(Set cr (CmpF src1 src2));
10252   ins_cost(100);
10253   format %{ "UCOMISS $src1,$src2" %}
10254   ins_encode %{
10255     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10256   %}
10257   ins_pipe( pipe_slow );
10258 %}
10259 
10260 // float compare and set condition codes in EFLAGS by XMM regs
10261 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10262   predicate(UseSSE>=1);
10263   match(Set cr (CmpF src1 (LoadF src2)));
10264   ins_cost(165);
10265   format %{ "UCOMISS $src1,$src2\n\t"
10266             "JNP,s   exit\n\t"
10267             "PUSHF\t# saw NaN, set CF\n\t"
10268             "AND     [rsp], #0xffffff2b\n\t"
10269             "POPF\n"
10270     "exit:" %}
10271   ins_encode %{
10272     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10273     emit_cmpfp_fixup(_masm);
10274   %}
10275   ins_pipe( pipe_slow );
10276 %}
10277 
10278 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10279   predicate(UseSSE>=1);
10280   match(Set cr (CmpF src1 (LoadF src2)));
10281   ins_cost(100);
10282   format %{ "UCOMISS $src1,$src2" %}
10283   ins_encode %{
10284     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10285   %}
10286   ins_pipe( pipe_slow );
10287 %}
10288 
10289 // Compare into -1,0,1 in XMM
10290 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10291   predicate(UseSSE>=1);
10292   match(Set dst (CmpF3 src1 src2));
10293   effect(KILL cr);
10294   ins_cost(255);
10295   format %{ "UCOMISS $src1, $src2\n\t"
10296             "MOV     $dst, #-1\n\t"
10297             "JP,s    done\n\t"
10298             "JB,s    done\n\t"
10299             "SETNE   $dst\n\t"
10300             "MOVZB   $dst, $dst\n"
10301     "done:" %}
10302   ins_encode %{
10303     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10304     emit_cmpfp3(_masm, $dst$$Register);
10305   %}
10306   ins_pipe( pipe_slow );
10307 %}
10308 
10309 // Compare into -1,0,1 in XMM and memory
10310 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10311   predicate(UseSSE>=1);
10312   match(Set dst (CmpF3 src1 (LoadF src2)));
10313   effect(KILL cr);
10314   ins_cost(275);
10315   format %{ "UCOMISS $src1, $src2\n\t"
10316             "MOV     $dst, #-1\n\t"
10317             "JP,s    done\n\t"
10318             "JB,s    done\n\t"
10319             "SETNE   $dst\n\t"
10320             "MOVZB   $dst, $dst\n"
10321     "done:" %}
10322   ins_encode %{
10323     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10324     emit_cmpfp3(_masm, $dst$$Register);
10325   %}
10326   ins_pipe( pipe_slow );
10327 %}
10328 
10329 // Spill to obtain 24-bit precision
10330 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10331   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10332   match(Set dst (SubF src1 src2));
10333 
10334   format %{ "FSUB   $dst,$src1 - $src2" %}
10335   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10336   ins_encode( Push_Reg_FPR(src1),
10337               OpcReg_FPR(src2),
10338               Pop_Mem_FPR(dst) );
10339   ins_pipe( fpu_mem_reg_reg );
10340 %}
10341 //
10342 // This instruction does not round to 24-bits
10343 instruct subFPR_reg(regFPR dst, regFPR src) %{
10344   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10345   match(Set dst (SubF dst src));
10346 
10347   format %{ "FSUB   $dst,$src" %}
10348   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10349   ins_encode( Push_Reg_FPR(src),
10350               OpcP, RegOpc(dst) );
10351   ins_pipe( fpu_reg_reg );
10352 %}
10353 
10354 // Spill to obtain 24-bit precision
10355 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10356   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10357   match(Set dst (AddF src1 src2));
10358 
10359   format %{ "FADD   $dst,$src1,$src2" %}
10360   opcode(0xD8, 0x0); /* D8 C0+i */
10361   ins_encode( Push_Reg_FPR(src2),
10362               OpcReg_FPR(src1),
10363               Pop_Mem_FPR(dst) );
10364   ins_pipe( fpu_mem_reg_reg );
10365 %}
10366 //
10367 // This instruction does not round to 24-bits
10368 instruct addFPR_reg(regFPR dst, regFPR src) %{
10369   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10370   match(Set dst (AddF dst src));
10371 
10372   format %{ "FLD    $src\n\t"
10373             "FADDp  $dst,ST" %}
10374   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10375   ins_encode( Push_Reg_FPR(src),
10376               OpcP, RegOpc(dst) );
10377   ins_pipe( fpu_reg_reg );
10378 %}
10379 
10380 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10381   predicate(UseSSE==0);
10382   match(Set dst (AbsF src));
10383   ins_cost(100);
10384   format %{ "FABS" %}
10385   opcode(0xE1, 0xD9);
10386   ins_encode( OpcS, OpcP );
10387   ins_pipe( fpu_reg_reg );
10388 %}
10389 
10390 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10391   predicate(UseSSE==0);
10392   match(Set dst (NegF src));
10393   ins_cost(100);
10394   format %{ "FCHS" %}
10395   opcode(0xE0, 0xD9);
10396   ins_encode( OpcS, OpcP );
10397   ins_pipe( fpu_reg_reg );
10398 %}
10399 
10400 // Cisc-alternate to addFPR_reg
10401 // Spill to obtain 24-bit precision
10402 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10403   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10404   match(Set dst (AddF src1 (LoadF src2)));
10405 
10406   format %{ "FLD    $src2\n\t"
10407             "FADD   ST,$src1\n\t"
10408             "FSTP_S $dst" %}
10409   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10410   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10411               OpcReg_FPR(src1),
10412               Pop_Mem_FPR(dst) );
10413   ins_pipe( fpu_mem_reg_mem );
10414 %}
10415 //
10416 // Cisc-alternate to addFPR_reg
10417 // This instruction does not round to 24-bits
10418 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10419   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10420   match(Set dst (AddF dst (LoadF src)));
10421 
10422   format %{ "FADD   $dst,$src" %}
10423   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10424   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10425               OpcP, RegOpc(dst) );
10426   ins_pipe( fpu_reg_mem );
10427 %}
10428 
10429 // // Following two instructions for _222_mpegaudio
10430 // Spill to obtain 24-bit precision
10431 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10432   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10433   match(Set dst (AddF src1 src2));
10434 
10435   format %{ "FADD   $dst,$src1,$src2" %}
10436   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10437   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10438               OpcReg_FPR(src2),
10439               Pop_Mem_FPR(dst) );
10440   ins_pipe( fpu_mem_reg_mem );
10441 %}
10442 
10443 // Cisc-spill variant
10444 // Spill to obtain 24-bit precision
10445 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10446   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10447   match(Set dst (AddF src1 (LoadF src2)));
10448 
10449   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10450   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10451   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10452               set_instruction_start,
10453               OpcP, RMopc_Mem(secondary,src1),
10454               Pop_Mem_FPR(dst) );
10455   ins_pipe( fpu_mem_mem_mem );
10456 %}
10457 
10458 // Spill to obtain 24-bit precision
10459 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10460   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10461   match(Set dst (AddF src1 src2));
10462 
10463   format %{ "FADD   $dst,$src1,$src2" %}
10464   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10465   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10466               set_instruction_start,
10467               OpcP, RMopc_Mem(secondary,src1),
10468               Pop_Mem_FPR(dst) );
10469   ins_pipe( fpu_mem_mem_mem );
10470 %}
10471 
10472 
10473 // Spill to obtain 24-bit precision
10474 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10475   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10476   match(Set dst (AddF src con));
10477   format %{ "FLD    $src\n\t"
10478             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10479             "FSTP_S $dst"  %}
10480   ins_encode %{
10481     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10482     __ fadd_s($constantaddress($con));
10483     __ fstp_s(Address(rsp, $dst$$disp));
10484   %}
10485   ins_pipe(fpu_mem_reg_con);
10486 %}
10487 //
10488 // This instruction does not round to 24-bits
10489 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10490   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10491   match(Set dst (AddF src con));
10492   format %{ "FLD    $src\n\t"
10493             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10494             "FSTP   $dst"  %}
10495   ins_encode %{
10496     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10497     __ fadd_s($constantaddress($con));
10498     __ fstp_d($dst$$reg);
10499   %}
10500   ins_pipe(fpu_reg_reg_con);
10501 %}
10502 
10503 // Spill to obtain 24-bit precision
10504 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10505   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10506   match(Set dst (MulF src1 src2));
10507 
10508   format %{ "FLD    $src1\n\t"
10509             "FMUL   $src2\n\t"
10510             "FSTP_S $dst"  %}
10511   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10512   ins_encode( Push_Reg_FPR(src1),
10513               OpcReg_FPR(src2),
10514               Pop_Mem_FPR(dst) );
10515   ins_pipe( fpu_mem_reg_reg );
10516 %}
10517 //
10518 // This instruction does not round to 24-bits
10519 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10520   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10521   match(Set dst (MulF src1 src2));
10522 
10523   format %{ "FLD    $src1\n\t"
10524             "FMUL   $src2\n\t"
10525             "FSTP_S $dst"  %}
10526   opcode(0xD8, 0x1); /* D8 C8+i */
10527   ins_encode( Push_Reg_FPR(src2),
10528               OpcReg_FPR(src1),
10529               Pop_Reg_FPR(dst) );
10530   ins_pipe( fpu_reg_reg_reg );
10531 %}
10532 
10533 
10534 // Spill to obtain 24-bit precision
10535 // Cisc-alternate to reg-reg multiply
10536 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10537   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10538   match(Set dst (MulF src1 (LoadF src2)));
10539 
10540   format %{ "FLD_S  $src2\n\t"
10541             "FMUL   $src1\n\t"
10542             "FSTP_S $dst"  %}
10543   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10544   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10545               OpcReg_FPR(src1),
10546               Pop_Mem_FPR(dst) );
10547   ins_pipe( fpu_mem_reg_mem );
10548 %}
10549 //
10550 // This instruction does not round to 24-bits
10551 // Cisc-alternate to reg-reg multiply
10552 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10553   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10554   match(Set dst (MulF src1 (LoadF src2)));
10555 
10556   format %{ "FMUL   $dst,$src1,$src2" %}
10557   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10558   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10559               OpcReg_FPR(src1),
10560               Pop_Reg_FPR(dst) );
10561   ins_pipe( fpu_reg_reg_mem );
10562 %}
10563 
10564 // Spill to obtain 24-bit precision
10565 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10566   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10567   match(Set dst (MulF src1 src2));
10568 
10569   format %{ "FMUL   $dst,$src1,$src2" %}
10570   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10571   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10572               set_instruction_start,
10573               OpcP, RMopc_Mem(secondary,src1),
10574               Pop_Mem_FPR(dst) );
10575   ins_pipe( fpu_mem_mem_mem );
10576 %}
10577 
10578 // Spill to obtain 24-bit precision
10579 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10580   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10581   match(Set dst (MulF src con));
10582 
10583   format %{ "FLD    $src\n\t"
10584             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10585             "FSTP_S $dst"  %}
10586   ins_encode %{
10587     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10588     __ fmul_s($constantaddress($con));
10589     __ fstp_s(Address(rsp, $dst$$disp));
10590   %}
10591   ins_pipe(fpu_mem_reg_con);
10592 %}
10593 //
10594 // This instruction does not round to 24-bits
10595 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10596   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10597   match(Set dst (MulF src con));
10598 
10599   format %{ "FLD    $src\n\t"
10600             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10601             "FSTP   $dst"  %}
10602   ins_encode %{
10603     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10604     __ fmul_s($constantaddress($con));
10605     __ fstp_d($dst$$reg);
10606   %}
10607   ins_pipe(fpu_reg_reg_con);
10608 %}
10609 
10610 
10611 //
10612 // MACRO1 -- subsume unshared load into mulFPR
10613 // This instruction does not round to 24-bits
10614 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10615   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10616   match(Set dst (MulF (LoadF mem1) src));
10617 
10618   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10619             "FMUL   ST,$src\n\t"
10620             "FSTP   $dst" %}
10621   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10622   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10623               OpcReg_FPR(src),
10624               Pop_Reg_FPR(dst) );
10625   ins_pipe( fpu_reg_reg_mem );
10626 %}
10627 //
10628 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10629 // This instruction does not round to 24-bits
10630 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10631   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10632   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10633   ins_cost(95);
10634 
10635   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10636             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10637             "FADD   ST,$src2\n\t"
10638             "FSTP   $dst" %}
10639   opcode(0xD9); /* LoadF D9 /0 */
10640   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10641               FMul_ST_reg(src1),
10642               FAdd_ST_reg(src2),
10643               Pop_Reg_FPR(dst) );
10644   ins_pipe( fpu_reg_mem_reg_reg );
10645 %}
10646 
10647 // MACRO3 -- addFPR a mulFPR
10648 // This instruction does not round to 24-bits.  It is a '2-address'
10649 // instruction in that the result goes back to src2.  This eliminates
10650 // a move from the macro; possibly the register allocator will have
10651 // to add it back (and maybe not).
10652 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10653   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10654   match(Set src2 (AddF (MulF src0 src1) src2));
10655 
10656   format %{ "FLD    $src0     ===MACRO3===\n\t"
10657             "FMUL   ST,$src1\n\t"
10658             "FADDP  $src2,ST" %}
10659   opcode(0xD9); /* LoadF D9 /0 */
10660   ins_encode( Push_Reg_FPR(src0),
10661               FMul_ST_reg(src1),
10662               FAddP_reg_ST(src2) );
10663   ins_pipe( fpu_reg_reg_reg );
10664 %}
10665 
10666 // MACRO4 -- divFPR subFPR
10667 // This instruction does not round to 24-bits
10668 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10669   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10670   match(Set dst (DivF (SubF src2 src1) src3));
10671 
10672   format %{ "FLD    $src2   ===MACRO4===\n\t"
10673             "FSUB   ST,$src1\n\t"
10674             "FDIV   ST,$src3\n\t"
10675             "FSTP  $dst" %}
10676   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10677   ins_encode( Push_Reg_FPR(src2),
10678               subFPR_divFPR_encode(src1,src3),
10679               Pop_Reg_FPR(dst) );
10680   ins_pipe( fpu_reg_reg_reg_reg );
10681 %}
10682 
10683 // Spill to obtain 24-bit precision
10684 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10685   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10686   match(Set dst (DivF src1 src2));
10687 
10688   format %{ "FDIV   $dst,$src1,$src2" %}
10689   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10690   ins_encode( Push_Reg_FPR(src1),
10691               OpcReg_FPR(src2),
10692               Pop_Mem_FPR(dst) );
10693   ins_pipe( fpu_mem_reg_reg );
10694 %}
10695 //
10696 // This instruction does not round to 24-bits
10697 instruct divFPR_reg(regFPR dst, regFPR src) %{
10698   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10699   match(Set dst (DivF dst src));
10700 
10701   format %{ "FDIV   $dst,$src" %}
10702   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10703   ins_encode( Push_Reg_FPR(src),
10704               OpcP, RegOpc(dst) );
10705   ins_pipe( fpu_reg_reg );
10706 %}
10707 
10708 
10709 // Spill to obtain 24-bit precision
10710 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10711   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10712   match(Set dst (ModF src1 src2));
10713   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10714 
10715   format %{ "FMOD   $dst,$src1,$src2" %}
10716   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10717               emitModDPR(),
10718               Push_Result_Mod_DPR(src2),
10719               Pop_Mem_FPR(dst));
10720   ins_pipe( pipe_slow );
10721 %}
10722 //
10723 // This instruction does not round to 24-bits
10724 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10725   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10726   match(Set dst (ModF dst src));
10727   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10728 
10729   format %{ "FMOD   $dst,$src" %}
10730   ins_encode(Push_Reg_Mod_DPR(dst, src),
10731               emitModDPR(),
10732               Push_Result_Mod_DPR(src),
10733               Pop_Reg_FPR(dst));
10734   ins_pipe( pipe_slow );
10735 %}
10736 
10737 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10738   predicate(UseSSE>=1);
10739   match(Set dst (ModF src0 src1));
10740   effect(KILL rax, KILL cr);
10741   format %{ "SUB    ESP,4\t # FMOD\n"
10742           "\tMOVSS  [ESP+0],$src1\n"
10743           "\tFLD_S  [ESP+0]\n"
10744           "\tMOVSS  [ESP+0],$src0\n"
10745           "\tFLD_S  [ESP+0]\n"
10746      "loop:\tFPREM\n"
10747           "\tFWAIT\n"
10748           "\tFNSTSW AX\n"
10749           "\tSAHF\n"
10750           "\tJP     loop\n"
10751           "\tFSTP_S [ESP+0]\n"
10752           "\tMOVSS  $dst,[ESP+0]\n"
10753           "\tADD    ESP,4\n"
10754           "\tFSTP   ST0\t # Restore FPU Stack"
10755     %}
10756   ins_cost(250);
10757   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10758   ins_pipe( pipe_slow );
10759 %}
10760 
10761 
10762 //----------Arithmetic Conversion Instructions---------------------------------
10763 // The conversions operations are all Alpha sorted.  Please keep it that way!
10764 
10765 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10766   predicate(UseSSE==0);
10767   match(Set dst (RoundFloat src));
10768   ins_cost(125);
10769   format %{ "FST_S  $dst,$src\t# F-round" %}
10770   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10771   ins_pipe( fpu_mem_reg );
10772 %}
10773 
10774 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10775   predicate(UseSSE<=1);
10776   match(Set dst (RoundDouble src));
10777   ins_cost(125);
10778   format %{ "FST_D  $dst,$src\t# D-round" %}
10779   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10780   ins_pipe( fpu_mem_reg );
10781 %}
10782 
10783 // Force rounding to 24-bit precision and 6-bit exponent
10784 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10785   predicate(UseSSE==0);
10786   match(Set dst (ConvD2F src));
10787   format %{ "FST_S  $dst,$src\t# F-round" %}
10788   expand %{
10789     roundFloat_mem_reg(dst,src);
10790   %}
10791 %}
10792 
10793 // Force rounding to 24-bit precision and 6-bit exponent
10794 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10795   predicate(UseSSE==1);
10796   match(Set dst (ConvD2F src));
10797   effect( KILL cr );
10798   format %{ "SUB    ESP,4\n\t"
10799             "FST_S  [ESP],$src\t# F-round\n\t"
10800             "MOVSS  $dst,[ESP]\n\t"
10801             "ADD ESP,4" %}
10802   ins_encode %{
10803     __ subptr(rsp, 4);
10804     if ($src$$reg != FPR1L_enc) {
10805       __ fld_s($src$$reg-1);
10806       __ fstp_s(Address(rsp, 0));
10807     } else {
10808       __ fst_s(Address(rsp, 0));
10809     }
10810     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10811     __ addptr(rsp, 4);
10812   %}
10813   ins_pipe( pipe_slow );
10814 %}
10815 
10816 // Force rounding double precision to single precision
10817 instruct convD2F_reg(regF dst, regD src) %{
10818   predicate(UseSSE>=2);
10819   match(Set dst (ConvD2F src));
10820   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10821   ins_encode %{
10822     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10823   %}
10824   ins_pipe( pipe_slow );
10825 %}
10826 
10827 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10828   predicate(UseSSE==0);
10829   match(Set dst (ConvF2D src));
10830   format %{ "FST_S  $dst,$src\t# D-round" %}
10831   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10832   ins_pipe( fpu_reg_reg );
10833 %}
10834 
10835 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10836   predicate(UseSSE==1);
10837   match(Set dst (ConvF2D src));
10838   format %{ "FST_D  $dst,$src\t# D-round" %}
10839   expand %{
10840     roundDouble_mem_reg(dst,src);
10841   %}
10842 %}
10843 
10844 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10845   predicate(UseSSE==1);
10846   match(Set dst (ConvF2D src));
10847   effect( KILL cr );
10848   format %{ "SUB    ESP,4\n\t"
10849             "MOVSS  [ESP] $src\n\t"
10850             "FLD_S  [ESP]\n\t"
10851             "ADD    ESP,4\n\t"
10852             "FSTP   $dst\t# D-round" %}
10853   ins_encode %{
10854     __ subptr(rsp, 4);
10855     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10856     __ fld_s(Address(rsp, 0));
10857     __ addptr(rsp, 4);
10858     __ fstp_d($dst$$reg);
10859   %}
10860   ins_pipe( pipe_slow );
10861 %}
10862 
10863 instruct convF2D_reg(regD dst, regF src) %{
10864   predicate(UseSSE>=2);
10865   match(Set dst (ConvF2D src));
10866   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10867   ins_encode %{
10868     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10869   %}
10870   ins_pipe( pipe_slow );
10871 %}
10872 
10873 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10874 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10875   predicate(UseSSE<=1);
10876   match(Set dst (ConvD2I src));
10877   effect( KILL tmp, KILL cr );
10878   format %{ "FLD    $src\t# Convert double to int \n\t"
10879             "FLDCW  trunc mode\n\t"
10880             "SUB    ESP,4\n\t"
10881             "FISTp  [ESP + #0]\n\t"
10882             "FLDCW  std/24-bit mode\n\t"
10883             "POP    EAX\n\t"
10884             "CMP    EAX,0x80000000\n\t"
10885             "JNE,s  fast\n\t"
10886             "FLD_D  $src\n\t"
10887             "CALL   d2i_wrapper\n"
10888       "fast:" %}
10889   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10890   ins_pipe( pipe_slow );
10891 %}
10892 
10893 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10894 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10895   predicate(UseSSE>=2);
10896   match(Set dst (ConvD2I src));
10897   effect( KILL tmp, KILL cr );
10898   format %{ "CVTTSD2SI $dst, $src\n\t"
10899             "CMP    $dst,0x80000000\n\t"
10900             "JNE,s  fast\n\t"
10901             "SUB    ESP, 8\n\t"
10902             "MOVSD  [ESP], $src\n\t"
10903             "FLD_D  [ESP]\n\t"
10904             "ADD    ESP, 8\n\t"
10905             "CALL   d2i_wrapper\n"
10906       "fast:" %}
10907   ins_encode %{
10908     Label fast;
10909     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10910     __ cmpl($dst$$Register, 0x80000000);
10911     __ jccb(Assembler::notEqual, fast);
10912     __ subptr(rsp, 8);
10913     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10914     __ fld_d(Address(rsp, 0));
10915     __ addptr(rsp, 8);
10916     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10917     __ bind(fast);
10918   %}
10919   ins_pipe( pipe_slow );
10920 %}
10921 
10922 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10923   predicate(UseSSE<=1);
10924   match(Set dst (ConvD2L src));
10925   effect( KILL cr );
10926   format %{ "FLD    $src\t# Convert double to long\n\t"
10927             "FLDCW  trunc mode\n\t"
10928             "SUB    ESP,8\n\t"
10929             "FISTp  [ESP + #0]\n\t"
10930             "FLDCW  std/24-bit mode\n\t"
10931             "POP    EAX\n\t"
10932             "POP    EDX\n\t"
10933             "CMP    EDX,0x80000000\n\t"
10934             "JNE,s  fast\n\t"
10935             "TEST   EAX,EAX\n\t"
10936             "JNE,s  fast\n\t"
10937             "FLD    $src\n\t"
10938             "CALL   d2l_wrapper\n"
10939       "fast:" %}
10940   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10941   ins_pipe( pipe_slow );
10942 %}
10943 
10944 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10945 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10946   predicate (UseSSE>=2);
10947   match(Set dst (ConvD2L src));
10948   effect( KILL cr );
10949   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10950             "MOVSD  [ESP],$src\n\t"
10951             "FLD_D  [ESP]\n\t"
10952             "FLDCW  trunc mode\n\t"
10953             "FISTp  [ESP + #0]\n\t"
10954             "FLDCW  std/24-bit mode\n\t"
10955             "POP    EAX\n\t"
10956             "POP    EDX\n\t"
10957             "CMP    EDX,0x80000000\n\t"
10958             "JNE,s  fast\n\t"
10959             "TEST   EAX,EAX\n\t"
10960             "JNE,s  fast\n\t"
10961             "SUB    ESP,8\n\t"
10962             "MOVSD  [ESP],$src\n\t"
10963             "FLD_D  [ESP]\n\t"
10964             "ADD    ESP,8\n\t"
10965             "CALL   d2l_wrapper\n"
10966       "fast:" %}
10967   ins_encode %{
10968     Label fast;
10969     __ subptr(rsp, 8);
10970     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10971     __ fld_d(Address(rsp, 0));
10972     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10973     __ fistp_d(Address(rsp, 0));
10974     // Restore the rounding mode, mask the exception
10975     if (Compile::current()->in_24_bit_fp_mode()) {
10976       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10977     } else {
10978       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10979     }
10980     // Load the converted long, adjust CPU stack
10981     __ pop(rax);
10982     __ pop(rdx);
10983     __ cmpl(rdx, 0x80000000);
10984     __ jccb(Assembler::notEqual, fast);
10985     __ testl(rax, rax);
10986     __ jccb(Assembler::notEqual, fast);
10987     __ subptr(rsp, 8);
10988     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10989     __ fld_d(Address(rsp, 0));
10990     __ addptr(rsp, 8);
10991     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10992     __ bind(fast);
10993   %}
10994   ins_pipe( pipe_slow );
10995 %}
10996 
10997 // Convert a double to an int.  Java semantics require we do complex
10998 // manglations in the corner cases.  So we set the rounding mode to
10999 // 'zero', store the darned double down as an int, and reset the
11000 // rounding mode to 'nearest'.  The hardware stores a flag value down
11001 // if we would overflow or converted a NAN; we check for this and
11002 // and go the slow path if needed.
11003 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11004   predicate(UseSSE==0);
11005   match(Set dst (ConvF2I src));
11006   effect( KILL tmp, KILL cr );
11007   format %{ "FLD    $src\t# Convert float to int \n\t"
11008             "FLDCW  trunc mode\n\t"
11009             "SUB    ESP,4\n\t"
11010             "FISTp  [ESP + #0]\n\t"
11011             "FLDCW  std/24-bit mode\n\t"
11012             "POP    EAX\n\t"
11013             "CMP    EAX,0x80000000\n\t"
11014             "JNE,s  fast\n\t"
11015             "FLD    $src\n\t"
11016             "CALL   d2i_wrapper\n"
11017       "fast:" %}
11018   // DPR2I_encoding works for FPR2I
11019   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11020   ins_pipe( pipe_slow );
11021 %}
11022 
11023 // Convert a float in xmm to an int reg.
11024 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11025   predicate(UseSSE>=1);
11026   match(Set dst (ConvF2I src));
11027   effect( KILL tmp, KILL cr );
11028   format %{ "CVTTSS2SI $dst, $src\n\t"
11029             "CMP    $dst,0x80000000\n\t"
11030             "JNE,s  fast\n\t"
11031             "SUB    ESP, 4\n\t"
11032             "MOVSS  [ESP], $src\n\t"
11033             "FLD    [ESP]\n\t"
11034             "ADD    ESP, 4\n\t"
11035             "CALL   d2i_wrapper\n"
11036       "fast:" %}
11037   ins_encode %{
11038     Label fast;
11039     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11040     __ cmpl($dst$$Register, 0x80000000);
11041     __ jccb(Assembler::notEqual, fast);
11042     __ subptr(rsp, 4);
11043     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11044     __ fld_s(Address(rsp, 0));
11045     __ addptr(rsp, 4);
11046     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11047     __ bind(fast);
11048   %}
11049   ins_pipe( pipe_slow );
11050 %}
11051 
11052 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11053   predicate(UseSSE==0);
11054   match(Set dst (ConvF2L src));
11055   effect( KILL cr );
11056   format %{ "FLD    $src\t# Convert float to long\n\t"
11057             "FLDCW  trunc mode\n\t"
11058             "SUB    ESP,8\n\t"
11059             "FISTp  [ESP + #0]\n\t"
11060             "FLDCW  std/24-bit mode\n\t"
11061             "POP    EAX\n\t"
11062             "POP    EDX\n\t"
11063             "CMP    EDX,0x80000000\n\t"
11064             "JNE,s  fast\n\t"
11065             "TEST   EAX,EAX\n\t"
11066             "JNE,s  fast\n\t"
11067             "FLD    $src\n\t"
11068             "CALL   d2l_wrapper\n"
11069       "fast:" %}
11070   // DPR2L_encoding works for FPR2L
11071   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11072   ins_pipe( pipe_slow );
11073 %}
11074 
11075 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11076 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11077   predicate (UseSSE>=1);
11078   match(Set dst (ConvF2L src));
11079   effect( KILL cr );
11080   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11081             "MOVSS  [ESP],$src\n\t"
11082             "FLD_S  [ESP]\n\t"
11083             "FLDCW  trunc mode\n\t"
11084             "FISTp  [ESP + #0]\n\t"
11085             "FLDCW  std/24-bit mode\n\t"
11086             "POP    EAX\n\t"
11087             "POP    EDX\n\t"
11088             "CMP    EDX,0x80000000\n\t"
11089             "JNE,s  fast\n\t"
11090             "TEST   EAX,EAX\n\t"
11091             "JNE,s  fast\n\t"
11092             "SUB    ESP,4\t# Convert float to long\n\t"
11093             "MOVSS  [ESP],$src\n\t"
11094             "FLD_S  [ESP]\n\t"
11095             "ADD    ESP,4\n\t"
11096             "CALL   d2l_wrapper\n"
11097       "fast:" %}
11098   ins_encode %{
11099     Label fast;
11100     __ subptr(rsp, 8);
11101     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11102     __ fld_s(Address(rsp, 0));
11103     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11104     __ fistp_d(Address(rsp, 0));
11105     // Restore the rounding mode, mask the exception
11106     if (Compile::current()->in_24_bit_fp_mode()) {
11107       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11108     } else {
11109       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11110     }
11111     // Load the converted long, adjust CPU stack
11112     __ pop(rax);
11113     __ pop(rdx);
11114     __ cmpl(rdx, 0x80000000);
11115     __ jccb(Assembler::notEqual, fast);
11116     __ testl(rax, rax);
11117     __ jccb(Assembler::notEqual, fast);
11118     __ subptr(rsp, 4);
11119     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11120     __ fld_s(Address(rsp, 0));
11121     __ addptr(rsp, 4);
11122     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11123     __ bind(fast);
11124   %}
11125   ins_pipe( pipe_slow );
11126 %}
11127 
11128 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11129   predicate( UseSSE<=1 );
11130   match(Set dst (ConvI2D src));
11131   format %{ "FILD   $src\n\t"
11132             "FSTP   $dst" %}
11133   opcode(0xDB, 0x0);  /* DB /0 */
11134   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11135   ins_pipe( fpu_reg_mem );
11136 %}
11137 
11138 instruct convI2D_reg(regD dst, rRegI src) %{
11139   predicate( UseSSE>=2 && !UseXmmI2D );
11140   match(Set dst (ConvI2D src));
11141   format %{ "CVTSI2SD $dst,$src" %}
11142   ins_encode %{
11143     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11144   %}
11145   ins_pipe( pipe_slow );
11146 %}
11147 
11148 instruct convI2D_mem(regD dst, memory mem) %{
11149   predicate( UseSSE>=2 );
11150   match(Set dst (ConvI2D (LoadI mem)));
11151   format %{ "CVTSI2SD $dst,$mem" %}
11152   ins_encode %{
11153     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11154   %}
11155   ins_pipe( pipe_slow );
11156 %}
11157 
11158 instruct convXI2D_reg(regD dst, rRegI src)
11159 %{
11160   predicate( UseSSE>=2 && UseXmmI2D );
11161   match(Set dst (ConvI2D src));
11162 
11163   format %{ "MOVD  $dst,$src\n\t"
11164             "CVTDQ2PD $dst,$dst\t# i2d" %}
11165   ins_encode %{
11166     __ movdl($dst$$XMMRegister, $src$$Register);
11167     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11168   %}
11169   ins_pipe(pipe_slow); // XXX
11170 %}
11171 
11172 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11173   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11174   match(Set dst (ConvI2D (LoadI mem)));
11175   format %{ "FILD   $mem\n\t"
11176             "FSTP   $dst" %}
11177   opcode(0xDB);      /* DB /0 */
11178   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11179               Pop_Reg_DPR(dst));
11180   ins_pipe( fpu_reg_mem );
11181 %}
11182 
11183 // Convert a byte to a float; no rounding step needed.
11184 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11185   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11186   match(Set dst (ConvI2F src));
11187   format %{ "FILD   $src\n\t"
11188             "FSTP   $dst" %}
11189 
11190   opcode(0xDB, 0x0);  /* DB /0 */
11191   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11192   ins_pipe( fpu_reg_mem );
11193 %}
11194 
11195 // In 24-bit mode, force exponent rounding by storing back out
11196 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11197   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11198   match(Set dst (ConvI2F src));
11199   ins_cost(200);
11200   format %{ "FILD   $src\n\t"
11201             "FSTP_S $dst" %}
11202   opcode(0xDB, 0x0);  /* DB /0 */
11203   ins_encode( Push_Mem_I(src),
11204               Pop_Mem_FPR(dst));
11205   ins_pipe( fpu_mem_mem );
11206 %}
11207 
11208 // In 24-bit mode, force exponent rounding by storing back out
11209 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11210   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11211   match(Set dst (ConvI2F (LoadI mem)));
11212   ins_cost(200);
11213   format %{ "FILD   $mem\n\t"
11214             "FSTP_S $dst" %}
11215   opcode(0xDB);  /* DB /0 */
11216   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11217               Pop_Mem_FPR(dst));
11218   ins_pipe( fpu_mem_mem );
11219 %}
11220 
11221 // This instruction does not round to 24-bits
11222 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11223   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11224   match(Set dst (ConvI2F src));
11225   format %{ "FILD   $src\n\t"
11226             "FSTP   $dst" %}
11227   opcode(0xDB, 0x0);  /* DB /0 */
11228   ins_encode( Push_Mem_I(src),
11229               Pop_Reg_FPR(dst));
11230   ins_pipe( fpu_reg_mem );
11231 %}
11232 
11233 // This instruction does not round to 24-bits
11234 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11235   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11236   match(Set dst (ConvI2F (LoadI mem)));
11237   format %{ "FILD   $mem\n\t"
11238             "FSTP   $dst" %}
11239   opcode(0xDB);      /* DB /0 */
11240   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11241               Pop_Reg_FPR(dst));
11242   ins_pipe( fpu_reg_mem );
11243 %}
11244 
11245 // Convert an int to a float in xmm; no rounding step needed.
11246 instruct convI2F_reg(regF dst, rRegI src) %{
11247   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11248   match(Set dst (ConvI2F src));
11249   format %{ "CVTSI2SS $dst, $src" %}
11250   ins_encode %{
11251     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11252   %}
11253   ins_pipe( pipe_slow );
11254 %}
11255 
11256  instruct convXI2F_reg(regF dst, rRegI src)
11257 %{
11258   predicate( UseSSE>=2 && UseXmmI2F );
11259   match(Set dst (ConvI2F src));
11260 
11261   format %{ "MOVD  $dst,$src\n\t"
11262             "CVTDQ2PS $dst,$dst\t# i2f" %}
11263   ins_encode %{
11264     __ movdl($dst$$XMMRegister, $src$$Register);
11265     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11266   %}
11267   ins_pipe(pipe_slow); // XXX
11268 %}
11269 
11270 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11271   match(Set dst (ConvI2L src));
11272   effect(KILL cr);
11273   ins_cost(375);
11274   format %{ "MOV    $dst.lo,$src\n\t"
11275             "MOV    $dst.hi,$src\n\t"
11276             "SAR    $dst.hi,31" %}
11277   ins_encode(convert_int_long(dst,src));
11278   ins_pipe( ialu_reg_reg_long );
11279 %}
11280 
11281 // Zero-extend convert int to long
11282 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11283   match(Set dst (AndL (ConvI2L src) mask) );
11284   effect( KILL flags );
11285   ins_cost(250);
11286   format %{ "MOV    $dst.lo,$src\n\t"
11287             "XOR    $dst.hi,$dst.hi" %}
11288   opcode(0x33); // XOR
11289   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11290   ins_pipe( ialu_reg_reg_long );
11291 %}
11292 
11293 // Zero-extend long
11294 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11295   match(Set dst (AndL src mask) );
11296   effect( KILL flags );
11297   ins_cost(250);
11298   format %{ "MOV    $dst.lo,$src.lo\n\t"
11299             "XOR    $dst.hi,$dst.hi\n\t" %}
11300   opcode(0x33); // XOR
11301   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11302   ins_pipe( ialu_reg_reg_long );
11303 %}
11304 
11305 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11306   predicate (UseSSE<=1);
11307   match(Set dst (ConvL2D src));
11308   effect( KILL cr );
11309   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11310             "PUSH   $src.lo\n\t"
11311             "FILD   ST,[ESP + #0]\n\t"
11312             "ADD    ESP,8\n\t"
11313             "FSTP_D $dst\t# D-round" %}
11314   opcode(0xDF, 0x5);  /* DF /5 */
11315   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11316   ins_pipe( pipe_slow );
11317 %}
11318 
11319 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11320   predicate (UseSSE>=2);
11321   match(Set dst (ConvL2D src));
11322   effect( KILL cr );
11323   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11324             "PUSH   $src.lo\n\t"
11325             "FILD_D [ESP]\n\t"
11326             "FSTP_D [ESP]\n\t"
11327             "MOVSD  $dst,[ESP]\n\t"
11328             "ADD    ESP,8" %}
11329   opcode(0xDF, 0x5);  /* DF /5 */
11330   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11331   ins_pipe( pipe_slow );
11332 %}
11333 
11334 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11335   predicate (UseSSE>=1);
11336   match(Set dst (ConvL2F src));
11337   effect( KILL cr );
11338   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11339             "PUSH   $src.lo\n\t"
11340             "FILD_D [ESP]\n\t"
11341             "FSTP_S [ESP]\n\t"
11342             "MOVSS  $dst,[ESP]\n\t"
11343             "ADD    ESP,8" %}
11344   opcode(0xDF, 0x5);  /* DF /5 */
11345   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11346   ins_pipe( pipe_slow );
11347 %}
11348 
11349 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11350   match(Set dst (ConvL2F src));
11351   effect( KILL cr );
11352   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11353             "PUSH   $src.lo\n\t"
11354             "FILD   ST,[ESP + #0]\n\t"
11355             "ADD    ESP,8\n\t"
11356             "FSTP_S $dst\t# F-round" %}
11357   opcode(0xDF, 0x5);  /* DF /5 */
11358   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11359   ins_pipe( pipe_slow );
11360 %}
11361 
11362 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11363   match(Set dst (ConvL2I src));
11364   effect( DEF dst, USE src );
11365   format %{ "MOV    $dst,$src.lo" %}
11366   ins_encode(enc_CopyL_Lo(dst,src));
11367   ins_pipe( ialu_reg_reg );
11368 %}
11369 
11370 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11371   match(Set dst (MoveF2I src));
11372   effect( DEF dst, USE src );
11373   ins_cost(100);
11374   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11375   ins_encode %{
11376     __ movl($dst$$Register, Address(rsp, $src$$disp));
11377   %}
11378   ins_pipe( ialu_reg_mem );
11379 %}
11380 
11381 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11382   predicate(UseSSE==0);
11383   match(Set dst (MoveF2I src));
11384   effect( DEF dst, USE src );
11385 
11386   ins_cost(125);
11387   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11388   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11389   ins_pipe( fpu_mem_reg );
11390 %}
11391 
11392 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11393   predicate(UseSSE>=1);
11394   match(Set dst (MoveF2I src));
11395   effect( DEF dst, USE src );
11396 
11397   ins_cost(95);
11398   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11399   ins_encode %{
11400     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11401   %}
11402   ins_pipe( pipe_slow );
11403 %}
11404 
11405 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11406   predicate(UseSSE>=2);
11407   match(Set dst (MoveF2I src));
11408   effect( DEF dst, USE src );
11409   ins_cost(85);
11410   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11411   ins_encode %{
11412     __ movdl($dst$$Register, $src$$XMMRegister);
11413   %}
11414   ins_pipe( pipe_slow );
11415 %}
11416 
11417 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11418   match(Set dst (MoveI2F src));
11419   effect( DEF dst, USE src );
11420 
11421   ins_cost(100);
11422   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11423   ins_encode %{
11424     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11425   %}
11426   ins_pipe( ialu_mem_reg );
11427 %}
11428 
11429 
11430 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11431   predicate(UseSSE==0);
11432   match(Set dst (MoveI2F src));
11433   effect(DEF dst, USE src);
11434 
11435   ins_cost(125);
11436   format %{ "FLD_S  $src\n\t"
11437             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11438   opcode(0xD9);               /* D9 /0, FLD m32real */
11439   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11440               Pop_Reg_FPR(dst) );
11441   ins_pipe( fpu_reg_mem );
11442 %}
11443 
11444 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11445   predicate(UseSSE>=1);
11446   match(Set dst (MoveI2F src));
11447   effect( DEF dst, USE src );
11448 
11449   ins_cost(95);
11450   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11451   ins_encode %{
11452     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11453   %}
11454   ins_pipe( pipe_slow );
11455 %}
11456 
11457 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11458   predicate(UseSSE>=2);
11459   match(Set dst (MoveI2F src));
11460   effect( DEF dst, USE src );
11461 
11462   ins_cost(85);
11463   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11464   ins_encode %{
11465     __ movdl($dst$$XMMRegister, $src$$Register);
11466   %}
11467   ins_pipe( pipe_slow );
11468 %}
11469 
11470 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11471   match(Set dst (MoveD2L src));
11472   effect(DEF dst, USE src);
11473 
11474   ins_cost(250);
11475   format %{ "MOV    $dst.lo,$src\n\t"
11476             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11477   opcode(0x8B, 0x8B);
11478   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11479   ins_pipe( ialu_mem_long_reg );
11480 %}
11481 
11482 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11483   predicate(UseSSE<=1);
11484   match(Set dst (MoveD2L src));
11485   effect(DEF dst, USE src);
11486 
11487   ins_cost(125);
11488   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11489   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11490   ins_pipe( fpu_mem_reg );
11491 %}
11492 
11493 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11494   predicate(UseSSE>=2);
11495   match(Set dst (MoveD2L src));
11496   effect(DEF dst, USE src);
11497   ins_cost(95);
11498   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11499   ins_encode %{
11500     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11501   %}
11502   ins_pipe( pipe_slow );
11503 %}
11504 
11505 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11506   predicate(UseSSE>=2);
11507   match(Set dst (MoveD2L src));
11508   effect(DEF dst, USE src, TEMP tmp);
11509   ins_cost(85);
11510   format %{ "MOVD   $dst.lo,$src\n\t"
11511             "PSHUFLW $tmp,$src,0x4E\n\t"
11512             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11513   ins_encode %{
11514     __ movdl($dst$$Register, $src$$XMMRegister);
11515     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11516     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11517   %}
11518   ins_pipe( pipe_slow );
11519 %}
11520 
11521 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11522   match(Set dst (MoveL2D src));
11523   effect(DEF dst, USE src);
11524 
11525   ins_cost(200);
11526   format %{ "MOV    $dst,$src.lo\n\t"
11527             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11528   opcode(0x89, 0x89);
11529   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11530   ins_pipe( ialu_mem_long_reg );
11531 %}
11532 
11533 
11534 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11535   predicate(UseSSE<=1);
11536   match(Set dst (MoveL2D src));
11537   effect(DEF dst, USE src);
11538   ins_cost(125);
11539 
11540   format %{ "FLD_D  $src\n\t"
11541             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11542   opcode(0xDD);               /* DD /0, FLD m64real */
11543   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11544               Pop_Reg_DPR(dst) );
11545   ins_pipe( fpu_reg_mem );
11546 %}
11547 
11548 
11549 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11550   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11551   match(Set dst (MoveL2D src));
11552   effect(DEF dst, USE src);
11553 
11554   ins_cost(95);
11555   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11556   ins_encode %{
11557     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11558   %}
11559   ins_pipe( pipe_slow );
11560 %}
11561 
11562 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11563   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11564   match(Set dst (MoveL2D src));
11565   effect(DEF dst, USE src);
11566 
11567   ins_cost(95);
11568   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11569   ins_encode %{
11570     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11571   %}
11572   ins_pipe( pipe_slow );
11573 %}
11574 
11575 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11576   predicate(UseSSE>=2);
11577   match(Set dst (MoveL2D src));
11578   effect(TEMP dst, USE src, TEMP tmp);
11579   ins_cost(85);
11580   format %{ "MOVD   $dst,$src.lo\n\t"
11581             "MOVD   $tmp,$src.hi\n\t"
11582             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11583   ins_encode %{
11584     __ movdl($dst$$XMMRegister, $src$$Register);
11585     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11586     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11587   %}
11588   ins_pipe( pipe_slow );
11589 %}
11590 
11591 
11592 // =======================================================================
11593 // fast clearing of an array
11594 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11595   predicate(!((ClearArrayNode*)n)->is_large());
11596   match(Set dummy (ClearArray cnt base));
11597   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11598 
11599   format %{ $$template
11600     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11601     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11602     $$emit$$"JG     LARGE\n\t"
11603     $$emit$$"SHL    ECX, 1\n\t"
11604     $$emit$$"DEC    ECX\n\t"
11605     $$emit$$"JS     DONE\t# Zero length\n\t"
11606     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11607     $$emit$$"DEC    ECX\n\t"
11608     $$emit$$"JGE    LOOP\n\t"
11609     $$emit$$"JMP    DONE\n\t"
11610     $$emit$$"# LARGE:\n\t"
11611     if (UseFastStosb) {
11612        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11613        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11614     } else if (UseXMMForObjInit) {
11615        $$emit$$"MOV     RDI,RAX\n\t"
11616        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11617        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11618        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11619        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11620        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11621        $$emit$$"ADD     0x40,RAX\n\t"
11622        $$emit$$"# L_zero_64_bytes:\n\t"
11623        $$emit$$"SUB     0x8,RCX\n\t"
11624        $$emit$$"JGE     L_loop\n\t"
11625        $$emit$$"ADD     0x4,RCX\n\t"
11626        $$emit$$"JL      L_tail\n\t"
11627        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11628        $$emit$$"ADD     0x20,RAX\n\t"
11629        $$emit$$"SUB     0x4,RCX\n\t"
11630        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11631        $$emit$$"ADD     0x4,RCX\n\t"
11632        $$emit$$"JLE     L_end\n\t"
11633        $$emit$$"DEC     RCX\n\t"
11634        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11635        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11636        $$emit$$"ADD     0x8,RAX\n\t"
11637        $$emit$$"DEC     RCX\n\t"
11638        $$emit$$"JGE     L_sloop\n\t"
11639        $$emit$$"# L_end:\n\t"
11640     } else {
11641        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11642        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11643     }
11644     $$emit$$"# DONE"
11645   %}
11646   ins_encode %{
11647     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11648                  $tmp$$XMMRegister, false);
11649   %}
11650   ins_pipe( pipe_slow );
11651 %}
11652 
11653 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11654   predicate(((ClearArrayNode*)n)->is_large());
11655   match(Set dummy (ClearArray cnt base));
11656   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11657   format %{ $$template
11658     if (UseFastStosb) {
11659        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11660        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11661        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11662     } else if (UseXMMForObjInit) {
11663        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11664        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11665        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11666        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11667        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11668        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11669        $$emit$$"ADD     0x40,RAX\n\t"
11670        $$emit$$"# L_zero_64_bytes:\n\t"
11671        $$emit$$"SUB     0x8,RCX\n\t"
11672        $$emit$$"JGE     L_loop\n\t"
11673        $$emit$$"ADD     0x4,RCX\n\t"
11674        $$emit$$"JL      L_tail\n\t"
11675        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11676        $$emit$$"ADD     0x20,RAX\n\t"
11677        $$emit$$"SUB     0x4,RCX\n\t"
11678        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11679        $$emit$$"ADD     0x4,RCX\n\t"
11680        $$emit$$"JLE     L_end\n\t"
11681        $$emit$$"DEC     RCX\n\t"
11682        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11683        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11684        $$emit$$"ADD     0x8,RAX\n\t"
11685        $$emit$$"DEC     RCX\n\t"
11686        $$emit$$"JGE     L_sloop\n\t"
11687        $$emit$$"# L_end:\n\t"
11688     } else {
11689        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11690        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11691        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11692     }
11693     $$emit$$"# DONE"
11694   %}
11695   ins_encode %{
11696     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11697                  $tmp$$XMMRegister, true);
11698   %}
11699   ins_pipe( pipe_slow );
11700 %}
11701 
11702 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11703                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11704   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11705   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11706   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11707 
11708   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11709   ins_encode %{
11710     __ string_compare($str1$$Register, $str2$$Register,
11711                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11712                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11713   %}
11714   ins_pipe( pipe_slow );
11715 %}
11716 
11717 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11718                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11719   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11720   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11721   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11722 
11723   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11724   ins_encode %{
11725     __ string_compare($str1$$Register, $str2$$Register,
11726                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11727                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11728   %}
11729   ins_pipe( pipe_slow );
11730 %}
11731 
11732 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11733                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11734   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11735   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11736   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11737 
11738   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11739   ins_encode %{
11740     __ string_compare($str1$$Register, $str2$$Register,
11741                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11742                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11743   %}
11744   ins_pipe( pipe_slow );
11745 %}
11746 
11747 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11748                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11749   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11750   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11751   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11752 
11753   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11754   ins_encode %{
11755     __ string_compare($str2$$Register, $str1$$Register,
11756                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11757                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11758   %}
11759   ins_pipe( pipe_slow );
11760 %}
11761 
11762 // fast string equals
11763 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11764                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11765   match(Set result (StrEquals (Binary str1 str2) cnt));
11766   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11767 
11768   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11769   ins_encode %{
11770     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11771                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11772                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11773   %}
11774 
11775   ins_pipe( pipe_slow );
11776 %}
11777 
11778 // fast search of substring with known size.
11779 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11780                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11781   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11782   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11783   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11784 
11785   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11786   ins_encode %{
11787     int icnt2 = (int)$int_cnt2$$constant;
11788     if (icnt2 >= 16) {
11789       // IndexOf for constant substrings with size >= 16 elements
11790       // which don't need to be loaded through stack.
11791       __ string_indexofC8($str1$$Register, $str2$$Register,
11792                           $cnt1$$Register, $cnt2$$Register,
11793                           icnt2, $result$$Register,
11794                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11795     } else {
11796       // Small strings are loaded through stack if they cross page boundary.
11797       __ string_indexof($str1$$Register, $str2$$Register,
11798                         $cnt1$$Register, $cnt2$$Register,
11799                         icnt2, $result$$Register,
11800                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11801     }
11802   %}
11803   ins_pipe( pipe_slow );
11804 %}
11805 
11806 // fast search of substring with known size.
11807 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11808                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11809   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11810   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11811   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11812 
11813   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11814   ins_encode %{
11815     int icnt2 = (int)$int_cnt2$$constant;
11816     if (icnt2 >= 8) {
11817       // IndexOf for constant substrings with size >= 8 elements
11818       // which don't need to be loaded through stack.
11819       __ string_indexofC8($str1$$Register, $str2$$Register,
11820                           $cnt1$$Register, $cnt2$$Register,
11821                           icnt2, $result$$Register,
11822                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11823     } else {
11824       // Small strings are loaded through stack if they cross page boundary.
11825       __ string_indexof($str1$$Register, $str2$$Register,
11826                         $cnt1$$Register, $cnt2$$Register,
11827                         icnt2, $result$$Register,
11828                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11829     }
11830   %}
11831   ins_pipe( pipe_slow );
11832 %}
11833 
11834 // fast search of substring with known size.
11835 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11836                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11837   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11838   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11839   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11840 
11841   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11842   ins_encode %{
11843     int icnt2 = (int)$int_cnt2$$constant;
11844     if (icnt2 >= 8) {
11845       // IndexOf for constant substrings with size >= 8 elements
11846       // which don't need to be loaded through stack.
11847       __ string_indexofC8($str1$$Register, $str2$$Register,
11848                           $cnt1$$Register, $cnt2$$Register,
11849                           icnt2, $result$$Register,
11850                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11851     } else {
11852       // Small strings are loaded through stack if they cross page boundary.
11853       __ string_indexof($str1$$Register, $str2$$Register,
11854                         $cnt1$$Register, $cnt2$$Register,
11855                         icnt2, $result$$Register,
11856                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11857     }
11858   %}
11859   ins_pipe( pipe_slow );
11860 %}
11861 
11862 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11863                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11864   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11865   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11866   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11867 
11868   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11869   ins_encode %{
11870     __ string_indexof($str1$$Register, $str2$$Register,
11871                       $cnt1$$Register, $cnt2$$Register,
11872                       (-1), $result$$Register,
11873                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11874   %}
11875   ins_pipe( pipe_slow );
11876 %}
11877 
11878 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11879                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11880   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11881   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11882   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11883 
11884   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11885   ins_encode %{
11886     __ string_indexof($str1$$Register, $str2$$Register,
11887                       $cnt1$$Register, $cnt2$$Register,
11888                       (-1), $result$$Register,
11889                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11890   %}
11891   ins_pipe( pipe_slow );
11892 %}
11893 
11894 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11895                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11896   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11897   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11898   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11899 
11900   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11901   ins_encode %{
11902     __ string_indexof($str1$$Register, $str2$$Register,
11903                       $cnt1$$Register, $cnt2$$Register,
11904                       (-1), $result$$Register,
11905                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11906   %}
11907   ins_pipe( pipe_slow );
11908 %}
11909 
11910 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11911                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11912   predicate(UseSSE42Intrinsics);
11913   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11914   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11915   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11916   ins_encode %{
11917     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11918                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11919   %}
11920   ins_pipe( pipe_slow );
11921 %}
11922 
11923 // fast array equals
11924 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11925                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11926 %{
11927   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11928   match(Set result (AryEq ary1 ary2));
11929   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11930   //ins_cost(300);
11931 
11932   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11933   ins_encode %{
11934     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11935                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11936                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11937   %}
11938   ins_pipe( pipe_slow );
11939 %}
11940 
11941 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11942                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11943 %{
11944   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11945   match(Set result (AryEq ary1 ary2));
11946   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11947   //ins_cost(300);
11948 
11949   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11950   ins_encode %{
11951     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11952                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11953                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11954   %}
11955   ins_pipe( pipe_slow );
11956 %}
11957 
11958 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11959                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11960 %{
11961   match(Set result (HasNegatives ary1 len));
11962   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11963 
11964   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11965   ins_encode %{
11966     __ has_negatives($ary1$$Register, $len$$Register,
11967                      $result$$Register, $tmp3$$Register,
11968                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11969   %}
11970   ins_pipe( pipe_slow );
11971 %}
11972 
11973 // fast char[] to byte[] compression
11974 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11975                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11976   match(Set result (StrCompressedCopy src (Binary dst len)));
11977   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11978 
11979   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11980   ins_encode %{
11981     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11982                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11983                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11984   %}
11985   ins_pipe( pipe_slow );
11986 %}
11987 
11988 // fast byte[] to char[] inflation
11989 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11990                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11991   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11992   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11993 
11994   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11995   ins_encode %{
11996     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11997                           $tmp1$$XMMRegister, $tmp2$$Register);
11998   %}
11999   ins_pipe( pipe_slow );
12000 %}
12001 
12002 // encode char[] to byte[] in ISO_8859_1
12003 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12004                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12005                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12006   match(Set result (EncodeISOArray src (Binary dst len)));
12007   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12008 
12009   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12010   ins_encode %{
12011     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12012                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12013                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
12014   %}
12015   ins_pipe( pipe_slow );
12016 %}
12017 
12018 
12019 //----------Control Flow Instructions------------------------------------------
12020 // Signed compare Instructions
12021 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12022   match(Set cr (CmpI op1 op2));
12023   effect( DEF cr, USE op1, USE op2 );
12024   format %{ "CMP    $op1,$op2" %}
12025   opcode(0x3B);  /* Opcode 3B /r */
12026   ins_encode( OpcP, RegReg( op1, op2) );
12027   ins_pipe( ialu_cr_reg_reg );
12028 %}
12029 
12030 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12031   match(Set cr (CmpI op1 op2));
12032   effect( DEF cr, USE op1 );
12033   format %{ "CMP    $op1,$op2" %}
12034   opcode(0x81,0x07);  /* Opcode 81 /7 */
12035   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12036   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12037   ins_pipe( ialu_cr_reg_imm );
12038 %}
12039 
12040 // Cisc-spilled version of cmpI_eReg
12041 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12042   match(Set cr (CmpI op1 (LoadI op2)));
12043 
12044   format %{ "CMP    $op1,$op2" %}
12045   ins_cost(500);
12046   opcode(0x3B);  /* Opcode 3B /r */
12047   ins_encode( OpcP, RegMem( op1, op2) );
12048   ins_pipe( ialu_cr_reg_mem );
12049 %}
12050 
12051 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
12052   match(Set cr (CmpI src zero));
12053   effect( DEF cr, USE src );
12054 
12055   format %{ "TEST   $src,$src" %}
12056   opcode(0x85);
12057   ins_encode( OpcP, RegReg( src, src ) );
12058   ins_pipe( ialu_cr_reg_imm );
12059 %}
12060 
12061 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
12062   match(Set cr (CmpI (AndI src con) zero));
12063 
12064   format %{ "TEST   $src,$con" %}
12065   opcode(0xF7,0x00);
12066   ins_encode( OpcP, RegOpc(src), Con32(con) );
12067   ins_pipe( ialu_cr_reg_imm );
12068 %}
12069 
12070 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
12071   match(Set cr (CmpI (AndI src mem) zero));
12072 
12073   format %{ "TEST   $src,$mem" %}
12074   opcode(0x85);
12075   ins_encode( OpcP, RegMem( src, mem ) );
12076   ins_pipe( ialu_cr_reg_mem );
12077 %}
12078 
12079 // Unsigned compare Instructions; really, same as signed except they
12080 // produce an eFlagsRegU instead of eFlagsReg.
12081 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12082   match(Set cr (CmpU op1 op2));
12083 
12084   format %{ "CMPu   $op1,$op2" %}
12085   opcode(0x3B);  /* Opcode 3B /r */
12086   ins_encode( OpcP, RegReg( op1, op2) );
12087   ins_pipe( ialu_cr_reg_reg );
12088 %}
12089 
12090 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12091   match(Set cr (CmpU op1 op2));
12092 
12093   format %{ "CMPu   $op1,$op2" %}
12094   opcode(0x81,0x07);  /* Opcode 81 /7 */
12095   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12096   ins_pipe( ialu_cr_reg_imm );
12097 %}
12098 
12099 // // Cisc-spilled version of cmpU_eReg
12100 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12101   match(Set cr (CmpU op1 (LoadI op2)));
12102 
12103   format %{ "CMPu   $op1,$op2" %}
12104   ins_cost(500);
12105   opcode(0x3B);  /* Opcode 3B /r */
12106   ins_encode( OpcP, RegMem( op1, op2) );
12107   ins_pipe( ialu_cr_reg_mem );
12108 %}
12109 
12110 // // Cisc-spilled version of cmpU_eReg
12111 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12112 //  match(Set cr (CmpU (LoadI op1) op2));
12113 //
12114 //  format %{ "CMPu   $op1,$op2" %}
12115 //  ins_cost(500);
12116 //  opcode(0x39);  /* Opcode 39 /r */
12117 //  ins_encode( OpcP, RegMem( op1, op2) );
12118 //%}
12119 
12120 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12121   match(Set cr (CmpU src zero));
12122 
12123   format %{ "TESTu  $src,$src" %}
12124   opcode(0x85);
12125   ins_encode( OpcP, RegReg( src, src ) );
12126   ins_pipe( ialu_cr_reg_imm );
12127 %}
12128 
12129 // Unsigned pointer compare Instructions
12130 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12131   match(Set cr (CmpP op1 op2));
12132 
12133   format %{ "CMPu   $op1,$op2" %}
12134   opcode(0x3B);  /* Opcode 3B /r */
12135   ins_encode( OpcP, RegReg( op1, op2) );
12136   ins_pipe( ialu_cr_reg_reg );
12137 %}
12138 
12139 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12140   match(Set cr (CmpP op1 op2));
12141 
12142   format %{ "CMPu   $op1,$op2" %}
12143   opcode(0x81,0x07);  /* Opcode 81 /7 */
12144   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12145   ins_pipe( ialu_cr_reg_imm );
12146 %}
12147 
12148 // // Cisc-spilled version of cmpP_eReg
12149 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12150   match(Set cr (CmpP op1 (LoadP op2)));
12151 
12152   format %{ "CMPu   $op1,$op2" %}
12153   ins_cost(500);
12154   opcode(0x3B);  /* Opcode 3B /r */
12155   ins_encode( OpcP, RegMem( op1, op2) );
12156   ins_pipe( ialu_cr_reg_mem );
12157 %}
12158 
12159 // // Cisc-spilled version of cmpP_eReg
12160 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12161 //  match(Set cr (CmpP (LoadP op1) op2));
12162 //
12163 //  format %{ "CMPu   $op1,$op2" %}
12164 //  ins_cost(500);
12165 //  opcode(0x39);  /* Opcode 39 /r */
12166 //  ins_encode( OpcP, RegMem( op1, op2) );
12167 //%}
12168 
12169 // Compare raw pointer (used in out-of-heap check).
12170 // Only works because non-oop pointers must be raw pointers
12171 // and raw pointers have no anti-dependencies.
12172 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12173   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12174   match(Set cr (CmpP op1 (LoadP op2)));
12175 
12176   format %{ "CMPu   $op1,$op2" %}
12177   opcode(0x3B);  /* Opcode 3B /r */
12178   ins_encode( OpcP, RegMem( op1, op2) );
12179   ins_pipe( ialu_cr_reg_mem );
12180 %}
12181 
12182 //
12183 // This will generate a signed flags result. This should be ok
12184 // since any compare to a zero should be eq/neq.
12185 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12186   match(Set cr (CmpP src zero));
12187 
12188   format %{ "TEST   $src,$src" %}
12189   opcode(0x85);
12190   ins_encode( OpcP, RegReg( src, src ) );
12191   ins_pipe( ialu_cr_reg_imm );
12192 %}
12193 
12194 // Cisc-spilled version of testP_reg
12195 // This will generate a signed flags result. This should be ok
12196 // since any compare to a zero should be eq/neq.
12197 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12198   match(Set cr (CmpP (LoadP op) zero));
12199 
12200   format %{ "TEST   $op,0xFFFFFFFF" %}
12201   ins_cost(500);
12202   opcode(0xF7);               /* Opcode F7 /0 */
12203   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12204   ins_pipe( ialu_cr_reg_imm );
12205 %}
12206 
12207 // Yanked all unsigned pointer compare operations.
12208 // Pointer compares are done with CmpP which is already unsigned.
12209 
12210 //----------Max and Min--------------------------------------------------------
12211 // Min Instructions
12212 ////
12213 //   *** Min and Max using the conditional move are slower than the
12214 //   *** branch version on a Pentium III.
12215 // // Conditional move for min
12216 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12217 //  effect( USE_DEF op2, USE op1, USE cr );
12218 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12219 //  opcode(0x4C,0x0F);
12220 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12221 //  ins_pipe( pipe_cmov_reg );
12222 //%}
12223 //
12224 //// Min Register with Register (P6 version)
12225 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12226 //  predicate(VM_Version::supports_cmov() );
12227 //  match(Set op2 (MinI op1 op2));
12228 //  ins_cost(200);
12229 //  expand %{
12230 //    eFlagsReg cr;
12231 //    compI_eReg(cr,op1,op2);
12232 //    cmovI_reg_lt(op2,op1,cr);
12233 //  %}
12234 //%}
12235 
12236 // Min Register with Register (generic version)
12237 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12238   match(Set dst (MinI dst src));
12239   effect(KILL flags);
12240   ins_cost(300);
12241 
12242   format %{ "MIN    $dst,$src" %}
12243   opcode(0xCC);
12244   ins_encode( min_enc(dst,src) );
12245   ins_pipe( pipe_slow );
12246 %}
12247 
12248 // Max Register with Register
12249 //   *** Min and Max using the conditional move are slower than the
12250 //   *** branch version on a Pentium III.
12251 // // Conditional move for max
12252 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12253 //  effect( USE_DEF op2, USE op1, USE cr );
12254 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12255 //  opcode(0x4F,0x0F);
12256 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12257 //  ins_pipe( pipe_cmov_reg );
12258 //%}
12259 //
12260 // // Max Register with Register (P6 version)
12261 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12262 //  predicate(VM_Version::supports_cmov() );
12263 //  match(Set op2 (MaxI op1 op2));
12264 //  ins_cost(200);
12265 //  expand %{
12266 //    eFlagsReg cr;
12267 //    compI_eReg(cr,op1,op2);
12268 //    cmovI_reg_gt(op2,op1,cr);
12269 //  %}
12270 //%}
12271 
12272 // Max Register with Register (generic version)
12273 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12274   match(Set dst (MaxI dst src));
12275   effect(KILL flags);
12276   ins_cost(300);
12277 
12278   format %{ "MAX    $dst,$src" %}
12279   opcode(0xCC);
12280   ins_encode( max_enc(dst,src) );
12281   ins_pipe( pipe_slow );
12282 %}
12283 
12284 // ============================================================================
12285 // Counted Loop limit node which represents exact final iterator value.
12286 // Note: the resulting value should fit into integer range since
12287 // counted loops have limit check on overflow.
12288 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12289   match(Set limit (LoopLimit (Binary init limit) stride));
12290   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12291   ins_cost(300);
12292 
12293   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12294   ins_encode %{
12295     int strd = (int)$stride$$constant;
12296     assert(strd != 1 && strd != -1, "sanity");
12297     int m1 = (strd > 0) ? 1 : -1;
12298     // Convert limit to long (EAX:EDX)
12299     __ cdql();
12300     // Convert init to long (init:tmp)
12301     __ movl($tmp$$Register, $init$$Register);
12302     __ sarl($tmp$$Register, 31);
12303     // $limit - $init
12304     __ subl($limit$$Register, $init$$Register);
12305     __ sbbl($limit_hi$$Register, $tmp$$Register);
12306     // + ($stride - 1)
12307     if (strd > 0) {
12308       __ addl($limit$$Register, (strd - 1));
12309       __ adcl($limit_hi$$Register, 0);
12310       __ movl($tmp$$Register, strd);
12311     } else {
12312       __ addl($limit$$Register, (strd + 1));
12313       __ adcl($limit_hi$$Register, -1);
12314       __ lneg($limit_hi$$Register, $limit$$Register);
12315       __ movl($tmp$$Register, -strd);
12316     }
12317     // signed devision: (EAX:EDX) / pos_stride
12318     __ idivl($tmp$$Register);
12319     if (strd < 0) {
12320       // restore sign
12321       __ negl($tmp$$Register);
12322     }
12323     // (EAX) * stride
12324     __ mull($tmp$$Register);
12325     // + init (ignore upper bits)
12326     __ addl($limit$$Register, $init$$Register);
12327   %}
12328   ins_pipe( pipe_slow );
12329 %}
12330 
12331 // ============================================================================
12332 // Branch Instructions
12333 // Jump Table
12334 instruct jumpXtnd(rRegI switch_val) %{
12335   match(Jump switch_val);
12336   ins_cost(350);
12337   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12338   ins_encode %{
12339     // Jump to Address(table_base + switch_reg)
12340     Address index(noreg, $switch_val$$Register, Address::times_1);
12341     __ jump(ArrayAddress($constantaddress, index));
12342   %}
12343   ins_pipe(pipe_jmp);
12344 %}
12345 
12346 // Jump Direct - Label defines a relative address from JMP+1
12347 instruct jmpDir(label labl) %{
12348   match(Goto);
12349   effect(USE labl);
12350 
12351   ins_cost(300);
12352   format %{ "JMP    $labl" %}
12353   size(5);
12354   ins_encode %{
12355     Label* L = $labl$$label;
12356     __ jmp(*L, false); // Always long jump
12357   %}
12358   ins_pipe( pipe_jmp );
12359 %}
12360 
12361 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12362 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12363   match(If cop cr);
12364   effect(USE labl);
12365 
12366   ins_cost(300);
12367   format %{ "J$cop    $labl" %}
12368   size(6);
12369   ins_encode %{
12370     Label* L = $labl$$label;
12371     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12372   %}
12373   ins_pipe( pipe_jcc );
12374 %}
12375 
12376 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12377 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12378   predicate(!n->has_vector_mask_set());
12379   match(CountedLoopEnd cop cr);
12380   effect(USE labl);
12381 
12382   ins_cost(300);
12383   format %{ "J$cop    $labl\t# Loop end" %}
12384   size(6);
12385   ins_encode %{
12386     Label* L = $labl$$label;
12387     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12388   %}
12389   ins_pipe( pipe_jcc );
12390 %}
12391 
12392 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12393 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12394   predicate(!n->has_vector_mask_set());
12395   match(CountedLoopEnd cop cmp);
12396   effect(USE labl);
12397 
12398   ins_cost(300);
12399   format %{ "J$cop,u  $labl\t# Loop end" %}
12400   size(6);
12401   ins_encode %{
12402     Label* L = $labl$$label;
12403     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12404   %}
12405   ins_pipe( pipe_jcc );
12406 %}
12407 
12408 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12409   predicate(!n->has_vector_mask_set());
12410   match(CountedLoopEnd cop cmp);
12411   effect(USE labl);
12412 
12413   ins_cost(200);
12414   format %{ "J$cop,u  $labl\t# Loop end" %}
12415   size(6);
12416   ins_encode %{
12417     Label* L = $labl$$label;
12418     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12419   %}
12420   ins_pipe( pipe_jcc );
12421 %}
12422 
12423 // mask version
12424 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12425 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12426   predicate(n->has_vector_mask_set());
12427   match(CountedLoopEnd cop cr);
12428   effect(USE labl);
12429 
12430   ins_cost(400);
12431   format %{ "J$cop    $labl\t# Loop end\n\t"
12432             "restorevectmask \t# vector mask restore for loops" %}
12433   size(10);
12434   ins_encode %{
12435     Label* L = $labl$$label;
12436     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12437     __ restorevectmask();
12438   %}
12439   ins_pipe( pipe_jcc );
12440 %}
12441 
12442 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12443 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12444   predicate(n->has_vector_mask_set());
12445   match(CountedLoopEnd cop cmp);
12446   effect(USE labl);
12447 
12448   ins_cost(400);
12449   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12450             "restorevectmask \t# vector mask restore for loops" %}
12451   size(10);
12452   ins_encode %{
12453     Label* L = $labl$$label;
12454     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12455     __ restorevectmask();
12456   %}
12457   ins_pipe( pipe_jcc );
12458 %}
12459 
12460 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12461   predicate(n->has_vector_mask_set());
12462   match(CountedLoopEnd cop cmp);
12463   effect(USE labl);
12464 
12465   ins_cost(300);
12466   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12467             "restorevectmask \t# vector mask restore for loops" %}
12468   size(10);
12469   ins_encode %{
12470     Label* L = $labl$$label;
12471     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12472     __ restorevectmask();
12473   %}
12474   ins_pipe( pipe_jcc );
12475 %}
12476 
12477 // Jump Direct Conditional - using unsigned comparison
12478 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12479   match(If cop cmp);
12480   effect(USE labl);
12481 
12482   ins_cost(300);
12483   format %{ "J$cop,u  $labl" %}
12484   size(6);
12485   ins_encode %{
12486     Label* L = $labl$$label;
12487     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12488   %}
12489   ins_pipe(pipe_jcc);
12490 %}
12491 
12492 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12493   match(If cop cmp);
12494   effect(USE labl);
12495 
12496   ins_cost(200);
12497   format %{ "J$cop,u  $labl" %}
12498   size(6);
12499   ins_encode %{
12500     Label* L = $labl$$label;
12501     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12502   %}
12503   ins_pipe(pipe_jcc);
12504 %}
12505 
12506 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12507   match(If cop cmp);
12508   effect(USE labl);
12509 
12510   ins_cost(200);
12511   format %{ $$template
12512     if ($cop$$cmpcode == Assembler::notEqual) {
12513       $$emit$$"JP,u   $labl\n\t"
12514       $$emit$$"J$cop,u   $labl"
12515     } else {
12516       $$emit$$"JP,u   done\n\t"
12517       $$emit$$"J$cop,u   $labl\n\t"
12518       $$emit$$"done:"
12519     }
12520   %}
12521   ins_encode %{
12522     Label* l = $labl$$label;
12523     if ($cop$$cmpcode == Assembler::notEqual) {
12524       __ jcc(Assembler::parity, *l, false);
12525       __ jcc(Assembler::notEqual, *l, false);
12526     } else if ($cop$$cmpcode == Assembler::equal) {
12527       Label done;
12528       __ jccb(Assembler::parity, done);
12529       __ jcc(Assembler::equal, *l, false);
12530       __ bind(done);
12531     } else {
12532        ShouldNotReachHere();
12533     }
12534   %}
12535   ins_pipe(pipe_jcc);
12536 %}
12537 
12538 // ============================================================================
12539 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12540 // array for an instance of the superklass.  Set a hidden internal cache on a
12541 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12542 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12543 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12544   match(Set result (PartialSubtypeCheck sub super));
12545   effect( KILL rcx, KILL cr );
12546 
12547   ins_cost(1100);  // slightly larger than the next version
12548   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12549             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12550             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12551             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12552             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12553             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12554             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12555      "miss:\t" %}
12556 
12557   opcode(0x1); // Force a XOR of EDI
12558   ins_encode( enc_PartialSubtypeCheck() );
12559   ins_pipe( pipe_slow );
12560 %}
12561 
12562 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12563   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12564   effect( KILL rcx, KILL result );
12565 
12566   ins_cost(1000);
12567   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12568             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12569             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12570             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12571             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12572             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12573      "miss:\t" %}
12574 
12575   opcode(0x0);  // No need to XOR EDI
12576   ins_encode( enc_PartialSubtypeCheck() );
12577   ins_pipe( pipe_slow );
12578 %}
12579 
12580 // ============================================================================
12581 // Branch Instructions -- short offset versions
12582 //
12583 // These instructions are used to replace jumps of a long offset (the default
12584 // match) with jumps of a shorter offset.  These instructions are all tagged
12585 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12586 // match rules in general matching.  Instead, the ADLC generates a conversion
12587 // method in the MachNode which can be used to do in-place replacement of the
12588 // long variant with the shorter variant.  The compiler will determine if a
12589 // branch can be taken by the is_short_branch_offset() predicate in the machine
12590 // specific code section of the file.
12591 
12592 // Jump Direct - Label defines a relative address from JMP+1
12593 instruct jmpDir_short(label labl) %{
12594   match(Goto);
12595   effect(USE labl);
12596 
12597   ins_cost(300);
12598   format %{ "JMP,s  $labl" %}
12599   size(2);
12600   ins_encode %{
12601     Label* L = $labl$$label;
12602     __ jmpb(*L);
12603   %}
12604   ins_pipe( pipe_jmp );
12605   ins_short_branch(1);
12606 %}
12607 
12608 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12609 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12610   match(If cop cr);
12611   effect(USE labl);
12612 
12613   ins_cost(300);
12614   format %{ "J$cop,s  $labl" %}
12615   size(2);
12616   ins_encode %{
12617     Label* L = $labl$$label;
12618     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12619   %}
12620   ins_pipe( pipe_jcc );
12621   ins_short_branch(1);
12622 %}
12623 
12624 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12625 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12626   match(CountedLoopEnd cop cr);
12627   effect(USE labl);
12628 
12629   ins_cost(300);
12630   format %{ "J$cop,s  $labl\t# Loop end" %}
12631   size(2);
12632   ins_encode %{
12633     Label* L = $labl$$label;
12634     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12635   %}
12636   ins_pipe( pipe_jcc );
12637   ins_short_branch(1);
12638 %}
12639 
12640 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12641 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12642   match(CountedLoopEnd cop cmp);
12643   effect(USE labl);
12644 
12645   ins_cost(300);
12646   format %{ "J$cop,us $labl\t# Loop end" %}
12647   size(2);
12648   ins_encode %{
12649     Label* L = $labl$$label;
12650     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12651   %}
12652   ins_pipe( pipe_jcc );
12653   ins_short_branch(1);
12654 %}
12655 
12656 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12657   match(CountedLoopEnd cop cmp);
12658   effect(USE labl);
12659 
12660   ins_cost(300);
12661   format %{ "J$cop,us $labl\t# Loop end" %}
12662   size(2);
12663   ins_encode %{
12664     Label* L = $labl$$label;
12665     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12666   %}
12667   ins_pipe( pipe_jcc );
12668   ins_short_branch(1);
12669 %}
12670 
12671 // Jump Direct Conditional - using unsigned comparison
12672 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12673   match(If cop cmp);
12674   effect(USE labl);
12675 
12676   ins_cost(300);
12677   format %{ "J$cop,us $labl" %}
12678   size(2);
12679   ins_encode %{
12680     Label* L = $labl$$label;
12681     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12682   %}
12683   ins_pipe( pipe_jcc );
12684   ins_short_branch(1);
12685 %}
12686 
12687 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12688   match(If cop cmp);
12689   effect(USE labl);
12690 
12691   ins_cost(300);
12692   format %{ "J$cop,us $labl" %}
12693   size(2);
12694   ins_encode %{
12695     Label* L = $labl$$label;
12696     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12697   %}
12698   ins_pipe( pipe_jcc );
12699   ins_short_branch(1);
12700 %}
12701 
12702 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12703   match(If cop cmp);
12704   effect(USE labl);
12705 
12706   ins_cost(300);
12707   format %{ $$template
12708     if ($cop$$cmpcode == Assembler::notEqual) {
12709       $$emit$$"JP,u,s   $labl\n\t"
12710       $$emit$$"J$cop,u,s   $labl"
12711     } else {
12712       $$emit$$"JP,u,s   done\n\t"
12713       $$emit$$"J$cop,u,s  $labl\n\t"
12714       $$emit$$"done:"
12715     }
12716   %}
12717   size(4);
12718   ins_encode %{
12719     Label* l = $labl$$label;
12720     if ($cop$$cmpcode == Assembler::notEqual) {
12721       __ jccb(Assembler::parity, *l);
12722       __ jccb(Assembler::notEqual, *l);
12723     } else if ($cop$$cmpcode == Assembler::equal) {
12724       Label done;
12725       __ jccb(Assembler::parity, done);
12726       __ jccb(Assembler::equal, *l);
12727       __ bind(done);
12728     } else {
12729        ShouldNotReachHere();
12730     }
12731   %}
12732   ins_pipe(pipe_jcc);
12733   ins_short_branch(1);
12734 %}
12735 
12736 // ============================================================================
12737 // Long Compare
12738 //
12739 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12740 // is tricky.  The flavor of compare used depends on whether we are testing
12741 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12742 // The GE test is the negated LT test.  The LE test can be had by commuting
12743 // the operands (yielding a GE test) and then negating; negate again for the
12744 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12745 // NE test is negated from that.
12746 
12747 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12748 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12749 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12750 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12751 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12752 // foo match ends up with the wrong leaf.  One fix is to not match both
12753 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12754 // both forms beat the trinary form of long-compare and both are very useful
12755 // on Intel which has so few registers.
12756 
12757 // Manifest a CmpL result in an integer register.  Very painful.
12758 // This is the test to avoid.
12759 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12760   match(Set dst (CmpL3 src1 src2));
12761   effect( KILL flags );
12762   ins_cost(1000);
12763   format %{ "XOR    $dst,$dst\n\t"
12764             "CMP    $src1.hi,$src2.hi\n\t"
12765             "JLT,s  m_one\n\t"
12766             "JGT,s  p_one\n\t"
12767             "CMP    $src1.lo,$src2.lo\n\t"
12768             "JB,s   m_one\n\t"
12769             "JEQ,s  done\n"
12770     "p_one:\tINC    $dst\n\t"
12771             "JMP,s  done\n"
12772     "m_one:\tDEC    $dst\n"
12773      "done:" %}
12774   ins_encode %{
12775     Label p_one, m_one, done;
12776     __ xorptr($dst$$Register, $dst$$Register);
12777     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12778     __ jccb(Assembler::less,    m_one);
12779     __ jccb(Assembler::greater, p_one);
12780     __ cmpl($src1$$Register, $src2$$Register);
12781     __ jccb(Assembler::below,   m_one);
12782     __ jccb(Assembler::equal,   done);
12783     __ bind(p_one);
12784     __ incrementl($dst$$Register);
12785     __ jmpb(done);
12786     __ bind(m_one);
12787     __ decrementl($dst$$Register);
12788     __ bind(done);
12789   %}
12790   ins_pipe( pipe_slow );
12791 %}
12792 
12793 //======
12794 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12795 // compares.  Can be used for LE or GT compares by reversing arguments.
12796 // NOT GOOD FOR EQ/NE tests.
12797 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12798   match( Set flags (CmpL src zero ));
12799   ins_cost(100);
12800   format %{ "TEST   $src.hi,$src.hi" %}
12801   opcode(0x85);
12802   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12803   ins_pipe( ialu_cr_reg_reg );
12804 %}
12805 
12806 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12807 // compares.  Can be used for LE or GT compares by reversing arguments.
12808 // NOT GOOD FOR EQ/NE tests.
12809 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12810   match( Set flags (CmpL src1 src2 ));
12811   effect( TEMP tmp );
12812   ins_cost(300);
12813   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12814             "MOV    $tmp,$src1.hi\n\t"
12815             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12816   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12817   ins_pipe( ialu_cr_reg_reg );
12818 %}
12819 
12820 // Long compares reg < zero/req OR reg >= zero/req.
12821 // Just a wrapper for a normal branch, plus the predicate test.
12822 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12823   match(If cmp flags);
12824   effect(USE labl);
12825   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12826   expand %{
12827     jmpCon(cmp,flags,labl);    // JLT or JGE...
12828   %}
12829 %}
12830 
12831 //======
12832 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12833 // compares.  Can be used for LE or GT compares by reversing arguments.
12834 // NOT GOOD FOR EQ/NE tests.
12835 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12836   match(Set flags (CmpUL src zero));
12837   ins_cost(100);
12838   format %{ "TEST   $src.hi,$src.hi" %}
12839   opcode(0x85);
12840   ins_encode(OpcP, RegReg_Hi2(src, src));
12841   ins_pipe(ialu_cr_reg_reg);
12842 %}
12843 
12844 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12845 // compares.  Can be used for LE or GT compares by reversing arguments.
12846 // NOT GOOD FOR EQ/NE tests.
12847 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12848   match(Set flags (CmpUL src1 src2));
12849   effect(TEMP tmp);
12850   ins_cost(300);
12851   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12852             "MOV    $tmp,$src1.hi\n\t"
12853             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12854   ins_encode(long_cmp_flags2(src1, src2, tmp));
12855   ins_pipe(ialu_cr_reg_reg);
12856 %}
12857 
12858 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12859 // Just a wrapper for a normal branch, plus the predicate test.
12860 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12861   match(If cmp flags);
12862   effect(USE labl);
12863   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12864   expand %{
12865     jmpCon(cmp, flags, labl);    // JLT or JGE...
12866   %}
12867 %}
12868 
12869 // Compare 2 longs and CMOVE longs.
12870 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12871   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12872   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12873   ins_cost(400);
12874   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12875             "CMOV$cmp $dst.hi,$src.hi" %}
12876   opcode(0x0F,0x40);
12877   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12878   ins_pipe( pipe_cmov_reg_long );
12879 %}
12880 
12881 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12882   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12883   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12884   ins_cost(500);
12885   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12886             "CMOV$cmp $dst.hi,$src.hi" %}
12887   opcode(0x0F,0x40);
12888   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12889   ins_pipe( pipe_cmov_reg_long );
12890 %}
12891 
12892 // Compare 2 longs and CMOVE ints.
12893 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12894   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12895   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12896   ins_cost(200);
12897   format %{ "CMOV$cmp $dst,$src" %}
12898   opcode(0x0F,0x40);
12899   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12900   ins_pipe( pipe_cmov_reg );
12901 %}
12902 
12903 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12904   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12905   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12906   ins_cost(250);
12907   format %{ "CMOV$cmp $dst,$src" %}
12908   opcode(0x0F,0x40);
12909   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12910   ins_pipe( pipe_cmov_mem );
12911 %}
12912 
12913 // Compare 2 longs and CMOVE ints.
12914 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12915   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12916   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12917   ins_cost(200);
12918   format %{ "CMOV$cmp $dst,$src" %}
12919   opcode(0x0F,0x40);
12920   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12921   ins_pipe( pipe_cmov_reg );
12922 %}
12923 
12924 // Compare 2 longs and CMOVE doubles
12925 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12926   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12927   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12928   ins_cost(200);
12929   expand %{
12930     fcmovDPR_regS(cmp,flags,dst,src);
12931   %}
12932 %}
12933 
12934 // Compare 2 longs and CMOVE doubles
12935 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12936   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12937   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12938   ins_cost(200);
12939   expand %{
12940     fcmovD_regS(cmp,flags,dst,src);
12941   %}
12942 %}
12943 
12944 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12945   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12946   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12947   ins_cost(200);
12948   expand %{
12949     fcmovFPR_regS(cmp,flags,dst,src);
12950   %}
12951 %}
12952 
12953 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12954   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12955   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12956   ins_cost(200);
12957   expand %{
12958     fcmovF_regS(cmp,flags,dst,src);
12959   %}
12960 %}
12961 
12962 //======
12963 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12964 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12965   match( Set flags (CmpL src zero ));
12966   effect(TEMP tmp);
12967   ins_cost(200);
12968   format %{ "MOV    $tmp,$src.lo\n\t"
12969             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12970   ins_encode( long_cmp_flags0( src, tmp ) );
12971   ins_pipe( ialu_reg_reg_long );
12972 %}
12973 
12974 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12975 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12976   match( Set flags (CmpL src1 src2 ));
12977   ins_cost(200+300);
12978   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12979             "JNE,s  skip\n\t"
12980             "CMP    $src1.hi,$src2.hi\n\t"
12981      "skip:\t" %}
12982   ins_encode( long_cmp_flags1( src1, src2 ) );
12983   ins_pipe( ialu_cr_reg_reg );
12984 %}
12985 
12986 // Long compare reg == zero/reg OR reg != zero/reg
12987 // Just a wrapper for a normal branch, plus the predicate test.
12988 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12989   match(If cmp flags);
12990   effect(USE labl);
12991   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12992   expand %{
12993     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12994   %}
12995 %}
12996 
12997 //======
12998 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12999 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13000   match(Set flags (CmpUL src zero));
13001   effect(TEMP tmp);
13002   ins_cost(200);
13003   format %{ "MOV    $tmp,$src.lo\n\t"
13004             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13005   ins_encode(long_cmp_flags0(src, tmp));
13006   ins_pipe(ialu_reg_reg_long);
13007 %}
13008 
13009 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13010 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13011   match(Set flags (CmpUL src1 src2));
13012   ins_cost(200+300);
13013   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13014             "JNE,s  skip\n\t"
13015             "CMP    $src1.hi,$src2.hi\n\t"
13016      "skip:\t" %}
13017   ins_encode(long_cmp_flags1(src1, src2));
13018   ins_pipe(ialu_cr_reg_reg);
13019 %}
13020 
13021 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13022 // Just a wrapper for a normal branch, plus the predicate test.
13023 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13024   match(If cmp flags);
13025   effect(USE labl);
13026   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13027   expand %{
13028     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13029   %}
13030 %}
13031 
13032 // Compare 2 longs and CMOVE longs.
13033 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13034   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13035   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13036   ins_cost(400);
13037   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13038             "CMOV$cmp $dst.hi,$src.hi" %}
13039   opcode(0x0F,0x40);
13040   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13041   ins_pipe( pipe_cmov_reg_long );
13042 %}
13043 
13044 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13045   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13046   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13047   ins_cost(500);
13048   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13049             "CMOV$cmp $dst.hi,$src.hi" %}
13050   opcode(0x0F,0x40);
13051   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13052   ins_pipe( pipe_cmov_reg_long );
13053 %}
13054 
13055 // Compare 2 longs and CMOVE ints.
13056 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13057   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13058   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13059   ins_cost(200);
13060   format %{ "CMOV$cmp $dst,$src" %}
13061   opcode(0x0F,0x40);
13062   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13063   ins_pipe( pipe_cmov_reg );
13064 %}
13065 
13066 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13067   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13068   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13069   ins_cost(250);
13070   format %{ "CMOV$cmp $dst,$src" %}
13071   opcode(0x0F,0x40);
13072   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13073   ins_pipe( pipe_cmov_mem );
13074 %}
13075 
13076 // Compare 2 longs and CMOVE ints.
13077 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13078   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13079   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13080   ins_cost(200);
13081   format %{ "CMOV$cmp $dst,$src" %}
13082   opcode(0x0F,0x40);
13083   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13084   ins_pipe( pipe_cmov_reg );
13085 %}
13086 
13087 // Compare 2 longs and CMOVE doubles
13088 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13089   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13090   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13091   ins_cost(200);
13092   expand %{
13093     fcmovDPR_regS(cmp,flags,dst,src);
13094   %}
13095 %}
13096 
13097 // Compare 2 longs and CMOVE doubles
13098 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13099   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13100   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13101   ins_cost(200);
13102   expand %{
13103     fcmovD_regS(cmp,flags,dst,src);
13104   %}
13105 %}
13106 
13107 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13108   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13109   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13110   ins_cost(200);
13111   expand %{
13112     fcmovFPR_regS(cmp,flags,dst,src);
13113   %}
13114 %}
13115 
13116 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13117   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13118   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13119   ins_cost(200);
13120   expand %{
13121     fcmovF_regS(cmp,flags,dst,src);
13122   %}
13123 %}
13124 
13125 //======
13126 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13127 // Same as cmpL_reg_flags_LEGT except must negate src
13128 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13129   match( Set flags (CmpL src zero ));
13130   effect( TEMP tmp );
13131   ins_cost(300);
13132   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13133             "CMP    $tmp,$src.lo\n\t"
13134             "SBB    $tmp,$src.hi\n\t" %}
13135   ins_encode( long_cmp_flags3(src, tmp) );
13136   ins_pipe( ialu_reg_reg_long );
13137 %}
13138 
13139 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13140 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13141 // requires a commuted test to get the same result.
13142 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13143   match( Set flags (CmpL src1 src2 ));
13144   effect( TEMP tmp );
13145   ins_cost(300);
13146   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13147             "MOV    $tmp,$src2.hi\n\t"
13148             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13149   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13150   ins_pipe( ialu_cr_reg_reg );
13151 %}
13152 
13153 // Long compares reg < zero/req OR reg >= zero/req.
13154 // Just a wrapper for a normal branch, plus the predicate test
13155 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13156   match(If cmp flags);
13157   effect(USE labl);
13158   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13159   ins_cost(300);
13160   expand %{
13161     jmpCon(cmp,flags,labl);    // JGT or JLE...
13162   %}
13163 %}
13164 
13165 //======
13166 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13167 // Same as cmpUL_reg_flags_LEGT except must negate src
13168 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13169   match(Set flags (CmpUL src zero));
13170   effect(TEMP tmp);
13171   ins_cost(300);
13172   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13173             "CMP    $tmp,$src.lo\n\t"
13174             "SBB    $tmp,$src.hi\n\t" %}
13175   ins_encode(long_cmp_flags3(src, tmp));
13176   ins_pipe(ialu_reg_reg_long);
13177 %}
13178 
13179 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13180 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13181 // requires a commuted test to get the same result.
13182 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13183   match(Set flags (CmpUL src1 src2));
13184   effect(TEMP tmp);
13185   ins_cost(300);
13186   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13187             "MOV    $tmp,$src2.hi\n\t"
13188             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13189   ins_encode(long_cmp_flags2( src2, src1, tmp));
13190   ins_pipe(ialu_cr_reg_reg);
13191 %}
13192 
13193 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13194 // Just a wrapper for a normal branch, plus the predicate test
13195 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13196   match(If cmp flags);
13197   effect(USE labl);
13198   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13199   ins_cost(300);
13200   expand %{
13201     jmpCon(cmp, flags, labl);    // JGT or JLE...
13202   %}
13203 %}
13204 
13205 // Compare 2 longs and CMOVE longs.
13206 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13207   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13208   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13209   ins_cost(400);
13210   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13211             "CMOV$cmp $dst.hi,$src.hi" %}
13212   opcode(0x0F,0x40);
13213   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13214   ins_pipe( pipe_cmov_reg_long );
13215 %}
13216 
13217 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13218   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13219   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13220   ins_cost(500);
13221   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13222             "CMOV$cmp $dst.hi,$src.hi+4" %}
13223   opcode(0x0F,0x40);
13224   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13225   ins_pipe( pipe_cmov_reg_long );
13226 %}
13227 
13228 // Compare 2 longs and CMOVE ints.
13229 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13230   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13231   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13232   ins_cost(200);
13233   format %{ "CMOV$cmp $dst,$src" %}
13234   opcode(0x0F,0x40);
13235   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13236   ins_pipe( pipe_cmov_reg );
13237 %}
13238 
13239 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13240   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13241   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13242   ins_cost(250);
13243   format %{ "CMOV$cmp $dst,$src" %}
13244   opcode(0x0F,0x40);
13245   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13246   ins_pipe( pipe_cmov_mem );
13247 %}
13248 
13249 // Compare 2 longs and CMOVE ptrs.
13250 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13251   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13252   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13253   ins_cost(200);
13254   format %{ "CMOV$cmp $dst,$src" %}
13255   opcode(0x0F,0x40);
13256   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13257   ins_pipe( pipe_cmov_reg );
13258 %}
13259 
13260 // Compare 2 longs and CMOVE doubles
13261 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13262   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13263   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13264   ins_cost(200);
13265   expand %{
13266     fcmovDPR_regS(cmp,flags,dst,src);
13267   %}
13268 %}
13269 
13270 // Compare 2 longs and CMOVE doubles
13271 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13272   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13273   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13274   ins_cost(200);
13275   expand %{
13276     fcmovD_regS(cmp,flags,dst,src);
13277   %}
13278 %}
13279 
13280 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13281   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13282   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13283   ins_cost(200);
13284   expand %{
13285     fcmovFPR_regS(cmp,flags,dst,src);
13286   %}
13287 %}
13288 
13289 
13290 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13291   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13292   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13293   ins_cost(200);
13294   expand %{
13295     fcmovF_regS(cmp,flags,dst,src);
13296   %}
13297 %}
13298 
13299 
13300 // ============================================================================
13301 // Procedure Call/Return Instructions
13302 // Call Java Static Instruction
13303 // Note: If this code changes, the corresponding ret_addr_offset() and
13304 //       compute_padding() functions will have to be adjusted.
13305 instruct CallStaticJavaDirect(method meth) %{
13306   match(CallStaticJava);
13307   effect(USE meth);
13308 
13309   ins_cost(300);
13310   format %{ "CALL,static " %}
13311   opcode(0xE8); /* E8 cd */
13312   ins_encode( pre_call_resets,
13313               Java_Static_Call( meth ),
13314               call_epilog,
13315               post_call_FPU );
13316   ins_pipe( pipe_slow );
13317   ins_alignment(4);
13318 %}
13319 
13320 // Call Java Dynamic Instruction
13321 // Note: If this code changes, the corresponding ret_addr_offset() and
13322 //       compute_padding() functions will have to be adjusted.
13323 instruct CallDynamicJavaDirect(method meth) %{
13324   match(CallDynamicJava);
13325   effect(USE meth);
13326 
13327   ins_cost(300);
13328   format %{ "MOV    EAX,(oop)-1\n\t"
13329             "CALL,dynamic" %}
13330   opcode(0xE8); /* E8 cd */
13331   ins_encode( pre_call_resets,
13332               Java_Dynamic_Call( meth ),
13333               call_epilog,
13334               post_call_FPU );
13335   ins_pipe( pipe_slow );
13336   ins_alignment(4);
13337 %}
13338 
13339 // Call Runtime Instruction
13340 instruct CallRuntimeDirect(method meth) %{
13341   match(CallRuntime );
13342   effect(USE meth);
13343 
13344   ins_cost(300);
13345   format %{ "CALL,runtime " %}
13346   opcode(0xE8); /* E8 cd */
13347   // Use FFREEs to clear entries in float stack
13348   ins_encode( pre_call_resets,
13349               FFree_Float_Stack_All,
13350               Java_To_Runtime( meth ),
13351               post_call_FPU );
13352   ins_pipe( pipe_slow );
13353 %}
13354 
13355 // Call runtime without safepoint
13356 instruct CallLeafDirect(method meth) %{
13357   match(CallLeaf);
13358   effect(USE meth);
13359 
13360   ins_cost(300);
13361   format %{ "CALL_LEAF,runtime " %}
13362   opcode(0xE8); /* E8 cd */
13363   ins_encode( pre_call_resets,
13364               FFree_Float_Stack_All,
13365               Java_To_Runtime( meth ),
13366               Verify_FPU_For_Leaf, post_call_FPU );
13367   ins_pipe( pipe_slow );
13368 %}
13369 
13370 instruct CallLeafNoFPDirect(method meth) %{
13371   match(CallLeafNoFP);
13372   effect(USE meth);
13373 
13374   ins_cost(300);
13375   format %{ "CALL_LEAF_NOFP,runtime " %}
13376   opcode(0xE8); /* E8 cd */
13377   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13378   ins_pipe( pipe_slow );
13379 %}
13380 
13381 
13382 // Return Instruction
13383 // Remove the return address & jump to it.
13384 instruct Ret() %{
13385   match(Return);
13386   format %{ "RET" %}
13387   opcode(0xC3);
13388   ins_encode(OpcP);
13389   ins_pipe( pipe_jmp );
13390 %}
13391 
13392 // Tail Call; Jump from runtime stub to Java code.
13393 // Also known as an 'interprocedural jump'.
13394 // Target of jump will eventually return to caller.
13395 // TailJump below removes the return address.
13396 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13397   match(TailCall jump_target method_oop );
13398   ins_cost(300);
13399   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13400   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13401   ins_encode( OpcP, RegOpc(jump_target) );
13402   ins_pipe( pipe_jmp );
13403 %}
13404 
13405 
13406 // Tail Jump; remove the return address; jump to target.
13407 // TailCall above leaves the return address around.
13408 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13409   match( TailJump jump_target ex_oop );
13410   ins_cost(300);
13411   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13412             "JMP    $jump_target " %}
13413   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13414   ins_encode( enc_pop_rdx,
13415               OpcP, RegOpc(jump_target) );
13416   ins_pipe( pipe_jmp );
13417 %}
13418 
13419 // Create exception oop: created by stack-crawling runtime code.
13420 // Created exception is now available to this handler, and is setup
13421 // just prior to jumping to this handler.  No code emitted.
13422 instruct CreateException( eAXRegP ex_oop )
13423 %{
13424   match(Set ex_oop (CreateEx));
13425 
13426   size(0);
13427   // use the following format syntax
13428   format %{ "# exception oop is in EAX; no code emitted" %}
13429   ins_encode();
13430   ins_pipe( empty );
13431 %}
13432 
13433 
13434 // Rethrow exception:
13435 // The exception oop will come in the first argument position.
13436 // Then JUMP (not call) to the rethrow stub code.
13437 instruct RethrowException()
13438 %{
13439   match(Rethrow);
13440 
13441   // use the following format syntax
13442   format %{ "JMP    rethrow_stub" %}
13443   ins_encode(enc_rethrow);
13444   ins_pipe( pipe_jmp );
13445 %}
13446 
13447 // inlined locking and unlocking
13448 
13449 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13450   predicate(Compile::current()->use_rtm());
13451   match(Set cr (FastLock object box));
13452   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13453   ins_cost(300);
13454   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13455   ins_encode %{
13456     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13457                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13458                  _counters, _rtm_counters, _stack_rtm_counters,
13459                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13460                  true, ra_->C->profile_rtm());
13461   %}
13462   ins_pipe(pipe_slow);
13463 %}
13464 
13465 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13466   predicate(!Compile::current()->use_rtm());
13467   match(Set cr (FastLock object box));
13468   effect(TEMP tmp, TEMP scr, USE_KILL box);
13469   ins_cost(300);
13470   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13471   ins_encode %{
13472     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13473                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13474   %}
13475   ins_pipe(pipe_slow);
13476 %}
13477 
13478 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13479   match(Set cr (FastUnlock object box));
13480   effect(TEMP tmp, USE_KILL box);
13481   ins_cost(300);
13482   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13483   ins_encode %{
13484     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13485   %}
13486   ins_pipe(pipe_slow);
13487 %}
13488 
13489 
13490 
13491 // ============================================================================
13492 // Safepoint Instruction
13493 instruct safePoint_poll(eFlagsReg cr) %{
13494   predicate(SafepointMechanism::uses_global_page_poll());
13495   match(SafePoint);
13496   effect(KILL cr);
13497 
13498   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13499   // On SPARC that might be acceptable as we can generate the address with
13500   // just a sethi, saving an or.  By polling at offset 0 we can end up
13501   // putting additional pressure on the index-0 in the D$.  Because of
13502   // alignment (just like the situation at hand) the lower indices tend
13503   // to see more traffic.  It'd be better to change the polling address
13504   // to offset 0 of the last $line in the polling page.
13505 
13506   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13507   ins_cost(125);
13508   size(6) ;
13509   ins_encode( Safepoint_Poll() );
13510   ins_pipe( ialu_reg_mem );
13511 %}
13512 
13513 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13514   predicate(SafepointMechanism::uses_thread_local_poll());
13515   match(SafePoint poll);
13516   effect(KILL cr, USE poll);
13517 
13518   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13519   ins_cost(125);
13520   // EBP would need size(3)
13521   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13522   ins_encode %{
13523     __ relocate(relocInfo::poll_type);
13524     address pre_pc = __ pc();
13525     __ testl(rax, Address($poll$$Register, 0));
13526     address post_pc = __ pc();
13527     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13528   %}
13529   ins_pipe(ialu_reg_mem);
13530 %}
13531 
13532 
13533 // ============================================================================
13534 // This name is KNOWN by the ADLC and cannot be changed.
13535 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13536 // for this guy.
13537 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13538   match(Set dst (ThreadLocal));
13539   effect(DEF dst, KILL cr);
13540 
13541   format %{ "MOV    $dst, Thread::current()" %}
13542   ins_encode %{
13543     Register dstReg = as_Register($dst$$reg);
13544     __ get_thread(dstReg);
13545   %}
13546   ins_pipe( ialu_reg_fat );
13547 %}
13548 
13549 
13550 
13551 //----------PEEPHOLE RULES-----------------------------------------------------
13552 // These must follow all instruction definitions as they use the names
13553 // defined in the instructions definitions.
13554 //
13555 // peepmatch ( root_instr_name [preceding_instruction]* );
13556 //
13557 // peepconstraint %{
13558 // (instruction_number.operand_name relational_op instruction_number.operand_name
13559 //  [, ...] );
13560 // // instruction numbers are zero-based using left to right order in peepmatch
13561 //
13562 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13563 // // provide an instruction_number.operand_name for each operand that appears
13564 // // in the replacement instruction's match rule
13565 //
13566 // ---------VM FLAGS---------------------------------------------------------
13567 //
13568 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13569 //
13570 // Each peephole rule is given an identifying number starting with zero and
13571 // increasing by one in the order seen by the parser.  An individual peephole
13572 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13573 // on the command-line.
13574 //
13575 // ---------CURRENT LIMITATIONS----------------------------------------------
13576 //
13577 // Only match adjacent instructions in same basic block
13578 // Only equality constraints
13579 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13580 // Only one replacement instruction
13581 //
13582 // ---------EXAMPLE----------------------------------------------------------
13583 //
13584 // // pertinent parts of existing instructions in architecture description
13585 // instruct movI(rRegI dst, rRegI src) %{
13586 //   match(Set dst (CopyI src));
13587 // %}
13588 //
13589 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13590 //   match(Set dst (AddI dst src));
13591 //   effect(KILL cr);
13592 // %}
13593 //
13594 // // Change (inc mov) to lea
13595 // peephole %{
13596 //   // increment preceeded by register-register move
13597 //   peepmatch ( incI_eReg movI );
13598 //   // require that the destination register of the increment
13599 //   // match the destination register of the move
13600 //   peepconstraint ( 0.dst == 1.dst );
13601 //   // construct a replacement instruction that sets
13602 //   // the destination to ( move's source register + one )
13603 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13604 // %}
13605 //
13606 // Implementation no longer uses movX instructions since
13607 // machine-independent system no longer uses CopyX nodes.
13608 //
13609 // peephole %{
13610 //   peepmatch ( incI_eReg movI );
13611 //   peepconstraint ( 0.dst == 1.dst );
13612 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13613 // %}
13614 //
13615 // peephole %{
13616 //   peepmatch ( decI_eReg movI );
13617 //   peepconstraint ( 0.dst == 1.dst );
13618 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13619 // %}
13620 //
13621 // peephole %{
13622 //   peepmatch ( addI_eReg_imm movI );
13623 //   peepconstraint ( 0.dst == 1.dst );
13624 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13625 // %}
13626 //
13627 // peephole %{
13628 //   peepmatch ( addP_eReg_imm movP );
13629 //   peepconstraint ( 0.dst == 1.dst );
13630 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13631 // %}
13632 
13633 // // Change load of spilled value to only a spill
13634 // instruct storeI(memory mem, rRegI src) %{
13635 //   match(Set mem (StoreI mem src));
13636 // %}
13637 //
13638 // instruct loadI(rRegI dst, memory mem) %{
13639 //   match(Set dst (LoadI mem));
13640 // %}
13641 //
13642 peephole %{
13643   peepmatch ( loadI storeI );
13644   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13645   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13646 %}
13647 
13648 //----------SMARTSPILL RULES---------------------------------------------------
13649 // These must follow all instruction definitions as they use the names
13650 // defined in the instructions definitions.