1 //
   2 // Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 
 105 // Specify priority of register selection within phases of register
 106 // allocation.  Highest priority is first.  A useful heuristic is to
 107 // give registers a low priority when they are required by machine
 108 // instructions, like EAX and EDX.  Registers which are used as
 109 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 110 // For the Intel integer registers, the equivalent Long pairs are
 111 // EDX:EAX, EBX:ECX, and EDI:EBP.
 112 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 113                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 114                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 115                     FPR6L, FPR6H, FPR7L, FPR7H );
 116 
 117 
 118 //----------Architecture Description Register Classes--------------------------
 119 // Several register classes are automatically defined based upon information in
 120 // this architecture description.
 121 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 122 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 123 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 124 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 125 //
 126 // Class for no registers (empty set).
 127 reg_class no_reg();
 128 
 129 // Class for all registers
 130 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 131 // Class for all registers (excluding EBP)
 132 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 133 // Dynamic register class that selects at runtime between register classes
 134 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 
 135 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 136 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 137 
 138 // Class for general registers
 139 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 140 // Class for general registers (excluding EBP).
 141 // This register class can be used for implicit null checks on win95.
 142 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 143 // Used also if the PreserveFramePointer flag is true.
 144 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 145 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 146 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 147 
 148 // Class of "X" registers
 149 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 150 
 151 // Class of registers that can appear in an address with no offset.
 152 // EBP and ESP require an extra instruction byte for zero offset.
 153 // Used in fast-unlock
 154 reg_class p_reg(EDX, EDI, ESI, EBX);
 155 
 156 // Class for general registers excluding ECX
 157 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 158 // Class for general registers excluding ECX (and EBP)
 159 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 160 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 161 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 162 
 163 // Class for general registers excluding EAX
 164 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 165 
 166 // Class for general registers excluding EAX and EBX.
 167 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 168 // Class for general registers excluding EAX and EBX (and EBP)
 169 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 170 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 171 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 172 
 173 // Class of EAX (for multiply and divide operations)
 174 reg_class eax_reg(EAX);
 175 
 176 // Class of EBX (for atomic add)
 177 reg_class ebx_reg(EBX);
 178 
 179 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 180 reg_class ecx_reg(ECX);
 181 
 182 // Class of EDX (for multiply and divide operations)
 183 reg_class edx_reg(EDX);
 184 
 185 // Class of EDI (for synchronization)
 186 reg_class edi_reg(EDI);
 187 
 188 // Class of ESI (for synchronization)
 189 reg_class esi_reg(ESI);
 190 
 191 // Singleton class for stack pointer
 192 reg_class sp_reg(ESP);
 193 
 194 // Singleton class for instruction pointer
 195 // reg_class ip_reg(EIP);
 196 
 197 // Class of integer register pairs
 198 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 199 // Class of integer register pairs (excluding EBP and EDI);
 200 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 201 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 202 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 203 
 204 // Class of integer register pairs that aligns with calling convention
 205 reg_class eadx_reg( EAX,EDX );
 206 reg_class ebcx_reg( ECX,EBX );
 207 
 208 // Not AX or DX, used in divides
 209 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 210 // Not AX or DX (and neither EBP), used in divides
 211 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 212 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 213 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 214 
 215 // Floating point registers.  Notice FPR0 is not a choice.
 216 // FPR0 is not ever allocated; we use clever encodings to fake
 217 // a 2-address instructions out of Intels FP stack.
 218 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 219 
 220 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 221                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 222                       FPR7L,FPR7H );
 223 
 224 reg_class fp_flt_reg0( FPR1L );
 225 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 226 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 227 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 228                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 229 
 230 %}
 231 
 232 source_hpp %{
 233 #if INCLUDE_ALL_GCS
 234 #include "shenandoahBarrierSetAssembler_x86.hpp"
 235 #endif
 236 %}
 237 
 238 //----------SOURCE BLOCK-------------------------------------------------------
 239 // This is a block of C++ code which provides values, functions, and
 240 // definitions necessary in the rest of the architecture description
 241 source_hpp %{
 242 // Must be visible to the DFA in dfa_x86_32.cpp
 243 extern bool is_operand_hi32_zero(Node* n);
 244 %}
 245 
 246 source %{
 247 #define   RELOC_IMM32    Assembler::imm_operand
 248 #define   RELOC_DISP32   Assembler::disp32_operand
 249 
 250 #define __ _masm.
 251 
 252 // How to find the high register of a Long pair, given the low register
 253 #define   HIGH_FROM_LOW(x) ((x)+2)
 254 
 255 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 256 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 257 // fast versions of NegF/NegD and AbsF/AbsD.
 258 
 259 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 260 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 261   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 262   // of 128-bits operands for SSE instructions.
 263   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 264   // Store the value to a 128-bits operand.
 265   operand[0] = lo;
 266   operand[1] = hi;
 267   return operand;
 268 }
 269 
 270 // Buffer for 128-bits masks used by SSE instructions.
 271 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 272 
 273 // Static initialization during VM startup.
 274 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 275 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 276 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 277 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 278 
 279 // Offset hacking within calls.
 280 static int pre_call_resets_size() {
 281   int size = 0;
 282   Compile* C = Compile::current();
 283   if (C->in_24_bit_fp_mode()) {
 284     size += 6; // fldcw
 285   }
 286   if (C->max_vector_size() > 16) {
 287     size += 3; // vzeroupper
 288   }
 289   return size;
 290 }
 291 
 292 // !!!!! Special hack to get all type of calls to specify the byte offset
 293 //       from the start of the call to the point where the return address
 294 //       will point.
 295 int MachCallStaticJavaNode::ret_addr_offset() {
 296   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points  
 297 }
 298 
 299 int MachCallDynamicJavaNode::ret_addr_offset() {
 300   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 301 }
 302 
 303 static int sizeof_FFree_Float_Stack_All = -1;
 304 
 305 int MachCallRuntimeNode::ret_addr_offset() {
 306   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 307   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 308 }
 309 
 310 // Indicate if the safepoint node needs the polling page as an input.
 311 // Since x86 does have absolute addressing, it doesn't.
 312 bool SafePointNode::needs_polling_address_input() {
 313   return false;
 314 }
 315 
 316 //
 317 // Compute padding required for nodes which need alignment
 318 //
 319 
 320 // The address of the call instruction needs to be 4-byte aligned to
 321 // ensure that it does not span a cache line so that it can be patched.
 322 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 323   current_offset += pre_call_resets_size();  // skip fldcw, if any
 324   current_offset += 1;      // skip call opcode byte
 325   return round_to(current_offset, alignment_required()) - current_offset;
 326 }
 327 
 328 // The address of the call instruction needs to be 4-byte aligned to
 329 // ensure that it does not span a cache line so that it can be patched.
 330 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 331   current_offset += pre_call_resets_size();  // skip fldcw, if any
 332   current_offset += 5;      // skip MOV instruction
 333   current_offset += 1;      // skip call opcode byte
 334   return round_to(current_offset, alignment_required()) - current_offset;
 335 }
 336 
 337 // EMIT_RM()
 338 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 339   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 340   cbuf.insts()->emit_int8(c);
 341 }
 342 
 343 // EMIT_CC()
 344 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 345   unsigned char c = (unsigned char)( f1 | f2 );
 346   cbuf.insts()->emit_int8(c);
 347 }
 348 
 349 // EMIT_OPCODE()
 350 void emit_opcode(CodeBuffer &cbuf, int code) {
 351   cbuf.insts()->emit_int8((unsigned char) code);
 352 }
 353 
 354 // EMIT_OPCODE() w/ relocation information
 355 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 356   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 357   emit_opcode(cbuf, code);
 358 }
 359 
 360 // EMIT_D8()
 361 void emit_d8(CodeBuffer &cbuf, int d8) {
 362   cbuf.insts()->emit_int8((unsigned char) d8);
 363 }
 364 
 365 // EMIT_D16()
 366 void emit_d16(CodeBuffer &cbuf, int d16) {
 367   cbuf.insts()->emit_int16(d16);
 368 }
 369 
 370 // EMIT_D32()
 371 void emit_d32(CodeBuffer &cbuf, int d32) {
 372   cbuf.insts()->emit_int32(d32);
 373 }
 374 
 375 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 376 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 377         int format) {
 378   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from RelocationHolder
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 384         int format) {
 385 #ifdef ASSERT
 386   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 387     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 388   }
 389 #endif
 390   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 391   cbuf.insts()->emit_int32(d32);
 392 }
 393 
 394 // Access stack slot for load or store
 395 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 396   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 397   if( -128 <= disp && disp <= 127 ) {
 398     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 399     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 400     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 401   } else {
 402     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 403     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 404     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 405   }
 406 }
 407 
 408    // rRegI ereg, memory mem) %{    // emit_reg_mem
 409 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 410   // There is no index & no scale, use form without SIB byte
 411   if ((index == 0x4) &&
 412       (scale == 0) && (base != ESP_enc)) {
 413     // If no displacement, mode is 0x0; unless base is [EBP]
 414     if ( (displace == 0) && (base != EBP_enc) ) {
 415       emit_rm(cbuf, 0x0, reg_encoding, base);
 416     }
 417     else {                    // If 8-bit displacement, mode 0x1
 418       if ((displace >= -128) && (displace <= 127)
 419           && (disp_reloc == relocInfo::none) ) {
 420         emit_rm(cbuf, 0x1, reg_encoding, base);
 421         emit_d8(cbuf, displace);
 422       }
 423       else {                  // If 32-bit displacement
 424         if (base == -1) { // Special flag for absolute address
 425           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 426           // (manual lies; no SIB needed here)
 427           if ( disp_reloc != relocInfo::none ) {
 428             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 429           } else {
 430             emit_d32      (cbuf, displace);
 431           }
 432         }
 433         else {                // Normal base + offset
 434           emit_rm(cbuf, 0x2, reg_encoding, base);
 435           if ( disp_reloc != relocInfo::none ) {
 436             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 437           } else {
 438             emit_d32      (cbuf, displace);
 439           }
 440         }
 441       }
 442     }
 443   }
 444   else {                      // Else, encode with the SIB byte
 445     // If no displacement, mode is 0x0; unless base is [EBP]
 446     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 447       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 448       emit_rm(cbuf, scale, index, base);
 449     }
 450     else {                    // If 8-bit displacement, mode 0x1
 451       if ((displace >= -128) && (displace <= 127)
 452           && (disp_reloc == relocInfo::none) ) {
 453         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 454         emit_rm(cbuf, scale, index, base);
 455         emit_d8(cbuf, displace);
 456       }
 457       else {                  // If 32-bit displacement
 458         if (base == 0x04 ) {
 459           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 460           emit_rm(cbuf, scale, index, 0x04);
 461         } else {
 462           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 463           emit_rm(cbuf, scale, index, base);
 464         }
 465         if ( disp_reloc != relocInfo::none ) {
 466           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 467         } else {
 468           emit_d32      (cbuf, displace);
 469         }
 470       }
 471     }
 472   }
 473 }
 474 
 475 
 476 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 477   if( dst_encoding == src_encoding ) {
 478     // reg-reg copy, use an empty encoding
 479   } else {
 480     emit_opcode( cbuf, 0x8B );
 481     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 482   }
 483 }
 484 
 485 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 486   Label exit;
 487   __ jccb(Assembler::noParity, exit);
 488   __ pushf();
 489   //
 490   // comiss/ucomiss instructions set ZF,PF,CF flags and
 491   // zero OF,AF,SF for NaN values.
 492   // Fixup flags by zeroing ZF,PF so that compare of NaN
 493   // values returns 'less than' result (CF is set).
 494   // Leave the rest of flags unchanged.
 495   //
 496   //    7 6 5 4 3 2 1 0
 497   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 498   //    0 0 1 0 1 0 1 1   (0x2B)
 499   //
 500   __ andl(Address(rsp, 0), 0xffffff2b);
 501   __ popf();
 502   __ bind(exit);
 503 }
 504 
 505 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 506   Label done;
 507   __ movl(dst, -1);
 508   __ jcc(Assembler::parity, done);
 509   __ jcc(Assembler::below, done);
 510   __ setb(Assembler::notEqual, dst);
 511   __ movzbl(dst, dst);
 512   __ bind(done);
 513 }
 514 
 515 
 516 //=============================================================================
 517 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 518 
 519 int Compile::ConstantTable::calculate_table_base_offset() const {
 520   return 0;  // absolute addressing, no offset
 521 }
 522 
 523 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 524 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 525   ShouldNotReachHere();
 526 }
 527 
 528 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 529   // Empty encoding
 530 }
 531 
 532 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 533   return 0;
 534 }
 535 
 536 #ifndef PRODUCT
 537 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 538   st->print("# MachConstantBaseNode (empty encoding)");
 539 }
 540 #endif
 541 
 542 
 543 //=============================================================================
 544 #ifndef PRODUCT
 545 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 546   Compile* C = ra_->C;
 547 
 548   int framesize = C->frame_size_in_bytes();
 549   int bangsize = C->bang_size_in_bytes();
 550   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 551   // Remove wordSize for return addr which is already pushed.
 552   framesize -= wordSize;
 553 
 554   if (C->need_stack_bang(bangsize)) {
 555     framesize -= wordSize;
 556     st->print("# stack bang (%d bytes)", bangsize);
 557     st->print("\n\t");
 558     st->print("PUSH   EBP\t# Save EBP");
 559     if (PreserveFramePointer) {
 560       st->print("\n\t");
 561       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 562     }
 563     if (framesize) {
 564       st->print("\n\t");
 565       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 566     }
 567   } else {
 568     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 569     st->print("\n\t");
 570     framesize -= wordSize;
 571     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 572     if (PreserveFramePointer) {
 573       st->print("\n\t");
 574       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 575       if (framesize > 0) {
 576         st->print("\n\t");
 577         st->print("ADD    EBP, #%d", framesize);
 578       }
 579     }
 580   }
 581 
 582   if (VerifyStackAtCalls) {
 583     st->print("\n\t");
 584     framesize -= wordSize;
 585     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 586   }
 587 
 588   if( C->in_24_bit_fp_mode() ) {
 589     st->print("\n\t");
 590     st->print("FLDCW  \t# load 24 bit fpu control word");
 591   }
 592   if (UseSSE >= 2 && VerifyFPU) {
 593     st->print("\n\t");
 594     st->print("# verify FPU stack (must be clean on entry)");
 595   }
 596 
 597 #ifdef ASSERT
 598   if (VerifyStackAtCalls) {
 599     st->print("\n\t");
 600     st->print("# stack alignment check");
 601   }
 602 #endif
 603   st->cr();
 604 }
 605 #endif
 606 
 607 
 608 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 609   Compile* C = ra_->C;
 610   MacroAssembler _masm(&cbuf);
 611 
 612   int framesize = C->frame_size_in_bytes();
 613   int bangsize = C->bang_size_in_bytes();
 614 
 615   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 616 
 617   C->set_frame_complete(cbuf.insts_size());
 618 
 619   if (C->has_mach_constant_base_node()) {
 620     // NOTE: We set the table base offset here because users might be
 621     // emitted before MachConstantBaseNode.
 622     Compile::ConstantTable& constant_table = C->constant_table();
 623     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 624   }
 625 }
 626 
 627 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 628   return MachNode::size(ra_); // too many variables; just compute it the hard way
 629 }
 630 
 631 int MachPrologNode::reloc() const {
 632   return 0; // a large enough number
 633 }
 634 
 635 //=============================================================================
 636 #ifndef PRODUCT
 637 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 638   Compile *C = ra_->C;
 639   int framesize = C->frame_size_in_bytes();
 640   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 641   // Remove two words for return addr and rbp,
 642   framesize -= 2*wordSize;
 643 
 644   if (C->max_vector_size() > 16) {
 645     st->print("VZEROUPPER");
 646     st->cr(); st->print("\t");
 647   }
 648   if (C->in_24_bit_fp_mode()) {
 649     st->print("FLDCW  standard control word");
 650     st->cr(); st->print("\t");
 651   }
 652   if (framesize) {
 653     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 654     st->cr(); st->print("\t");
 655   }
 656   st->print_cr("POPL   EBP"); st->print("\t");
 657   if (do_polling() && C->is_method_compilation()) {
 658     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 659     st->cr(); st->print("\t");
 660   }
 661 }
 662 #endif
 663 
 664 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 665   Compile *C = ra_->C;
 666 
 667   if (C->max_vector_size() > 16) {
 668     // Clear upper bits of YMM registers when current compiled code uses
 669     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 670     MacroAssembler masm(&cbuf);
 671     masm.vzeroupper();
 672   }
 673   // If method set FPU control word, restore to standard control word
 674   if (C->in_24_bit_fp_mode()) {
 675     MacroAssembler masm(&cbuf);
 676     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 677   }
 678 
 679   int framesize = C->frame_size_in_bytes();
 680   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 681   // Remove two words for return addr and rbp,
 682   framesize -= 2*wordSize;
 683 
 684   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 685 
 686   if (framesize >= 128) {
 687     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 688     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 689     emit_d32(cbuf, framesize);
 690   } else if (framesize) {
 691     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 692     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 693     emit_d8(cbuf, framesize);
 694   }
 695 
 696   emit_opcode(cbuf, 0x58 | EBP_enc);
 697 
 698   if (do_polling() && C->is_method_compilation()) {
 699     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 700     emit_opcode(cbuf,0x85);
 701     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 702     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 703   }
 704 }
 705 
 706 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 707   Compile *C = ra_->C;
 708   // If method set FPU control word, restore to standard control word
 709   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 710   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 711   if (do_polling() && C->is_method_compilation()) size += 6;
 712 
 713   int framesize = C->frame_size_in_bytes();
 714   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 715   // Remove two words for return addr and rbp,
 716   framesize -= 2*wordSize;
 717 
 718   size++; // popl rbp,
 719 
 720   if (framesize >= 128) {
 721     size += 6;
 722   } else {
 723     size += framesize ? 3 : 0;
 724   }
 725   return size;
 726 }
 727 
 728 int MachEpilogNode::reloc() const {
 729   return 0; // a large enough number
 730 }
 731 
 732 const Pipeline * MachEpilogNode::pipeline() const {
 733   return MachNode::pipeline_class();
 734 }
 735 
 736 int MachEpilogNode::safepoint_offset() const { return 0; }
 737 
 738 //=============================================================================
 739 
 740 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 741 static enum RC rc_class( OptoReg::Name reg ) {
 742 
 743   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 744   if (OptoReg::is_stack(reg)) return rc_stack;
 745 
 746   VMReg r = OptoReg::as_VMReg(reg);
 747   if (r->is_Register()) return rc_int;
 748   if (r->is_FloatRegister()) {
 749     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 750     return rc_float;
 751   }
 752   assert(r->is_XMMRegister(), "must be");
 753   return rc_xmm;
 754 }
 755 
 756 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 757                         int opcode, const char *op_str, int size, outputStream* st ) {
 758   if( cbuf ) {
 759     emit_opcode  (*cbuf, opcode );
 760     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 761 #ifndef PRODUCT
 762   } else if( !do_size ) {
 763     if( size != 0 ) st->print("\n\t");
 764     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 765       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 766       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 767     } else { // FLD, FST, PUSH, POP
 768       st->print("%s [ESP + #%d]",op_str,offset);
 769     }
 770 #endif
 771   }
 772   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 773   return size+3+offset_size;
 774 }
 775 
 776 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 777 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 778                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 779   if (cbuf) {
 780     MacroAssembler _masm(cbuf);
 781     if (reg_lo+1 == reg_hi) { // double move?
 782       if (is_load) {
 783         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 784       } else {
 785         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 786       }
 787     } else {
 788       if (is_load) {
 789         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 790       } else {
 791         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 792       }
 793     }
 794 #ifndef PRODUCT
 795   } else if (!do_size) {
 796     if (size != 0) st->print("\n\t");
 797     if (reg_lo+1 == reg_hi) { // double move?
 798       if (is_load) st->print("%s %s,[ESP + #%d]",
 799                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 800                               Matcher::regName[reg_lo], offset);
 801       else         st->print("MOVSD  [ESP + #%d],%s",
 802                               offset, Matcher::regName[reg_lo]);
 803     } else {
 804       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 805                               Matcher::regName[reg_lo], offset);
 806       else         st->print("MOVSS  [ESP + #%d],%s",
 807                               offset, Matcher::regName[reg_lo]);
 808     }
 809 #endif
 810   }
 811   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 812   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 813   return size+5+offset_size;
 814 }
 815 
 816 
 817 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 818                             int src_hi, int dst_hi, int size, outputStream* st ) {
 819   if (cbuf) {
 820     MacroAssembler _masm(cbuf);
 821     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 822       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 823                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 824     } else {
 825       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 826                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 827     }
 828 #ifndef PRODUCT
 829   } else if (!do_size) {
 830     if (size != 0) st->print("\n\t");
 831     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 832       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 833         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 834       } else {
 835         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 836       }
 837     } else {
 838       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 839         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 840       } else {
 841         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 842       }
 843     }
 844 #endif
 845   }
 846   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 847   // Only MOVAPS SSE prefix uses 1 byte.
 848   int sz = 4;
 849   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 850       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 851   return size + sz;
 852 }
 853 
 854 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 855                             int src_hi, int dst_hi, int size, outputStream* st ) {
 856   // 32-bit
 857   if (cbuf) {
 858     MacroAssembler _masm(cbuf);
 859     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 860              as_Register(Matcher::_regEncode[src_lo]));
 861 #ifndef PRODUCT
 862   } else if (!do_size) {
 863     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 864 #endif
 865   }
 866   return 4;
 867 }
 868 
 869 
 870 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 871                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 872   // 32-bit
 873   if (cbuf) {
 874     MacroAssembler _masm(cbuf);
 875     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 876              as_XMMRegister(Matcher::_regEncode[src_lo]));
 877 #ifndef PRODUCT
 878   } else if (!do_size) {
 879     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 880 #endif
 881   }
 882   return 4;
 883 }
 884 
 885 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 886   if( cbuf ) {
 887     emit_opcode(*cbuf, 0x8B );
 888     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 889 #ifndef PRODUCT
 890   } else if( !do_size ) {
 891     if( size != 0 ) st->print("\n\t");
 892     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 893 #endif
 894   }
 895   return size+2;
 896 }
 897 
 898 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 899                                  int offset, int size, outputStream* st ) {
 900   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 901     if( cbuf ) {
 902       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 903       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 904 #ifndef PRODUCT
 905     } else if( !do_size ) {
 906       if( size != 0 ) st->print("\n\t");
 907       st->print("FLD    %s",Matcher::regName[src_lo]);
 908 #endif
 909     }
 910     size += 2;
 911   }
 912 
 913   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 914   const char *op_str;
 915   int op;
 916   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 917     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 918     op = 0xDD;
 919   } else {                   // 32-bit store
 920     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 921     op = 0xD9;
 922     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 923   }
 924 
 925   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 926 }
 927 
 928 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 929 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 930                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 931 
 932 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 933                             int stack_offset, int reg, uint ireg, outputStream* st);
 934 
 935 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 936                                      int dst_offset, uint ireg, outputStream* st) {
 937   int calc_size = 0;
 938   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 939   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 940   switch (ireg) {
 941   case Op_VecS:
 942     calc_size = 3+src_offset_size + 3+dst_offset_size;
 943     break;
 944   case Op_VecD:
 945     calc_size = 3+src_offset_size + 3+dst_offset_size;
 946     src_offset += 4;
 947     dst_offset += 4;
 948     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 949     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 950     calc_size += 3+src_offset_size + 3+dst_offset_size;
 951     break;
 952   case Op_VecX:
 953     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 954     break;
 955   case Op_VecY:
 956     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 957     break;
 958   default:
 959     ShouldNotReachHere();
 960   }
 961   if (cbuf) {
 962     MacroAssembler _masm(cbuf);
 963     int offset = __ offset();
 964     switch (ireg) {
 965     case Op_VecS:
 966       __ pushl(Address(rsp, src_offset));
 967       __ popl (Address(rsp, dst_offset));
 968       break;
 969     case Op_VecD:
 970       __ pushl(Address(rsp, src_offset));
 971       __ popl (Address(rsp, dst_offset));
 972       __ pushl(Address(rsp, src_offset+4));
 973       __ popl (Address(rsp, dst_offset+4));
 974       break;
 975     case Op_VecX:
 976       __ movdqu(Address(rsp, -16), xmm0);
 977       __ movdqu(xmm0, Address(rsp, src_offset));
 978       __ movdqu(Address(rsp, dst_offset), xmm0);
 979       __ movdqu(xmm0, Address(rsp, -16));
 980       break;
 981     case Op_VecY:
 982       __ vmovdqu(Address(rsp, -32), xmm0);
 983       __ vmovdqu(xmm0, Address(rsp, src_offset));
 984       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 985       __ vmovdqu(xmm0, Address(rsp, -32));
 986       break;
 987     default:
 988       ShouldNotReachHere();
 989     }
 990     int size = __ offset() - offset;
 991     assert(size == calc_size, "incorrect size calculattion");
 992     return size;
 993 #ifndef PRODUCT
 994   } else if (!do_size) {
 995     switch (ireg) {
 996     case Op_VecS:
 997       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 998                 "popl    [rsp + #%d]",
 999                 src_offset, dst_offset);
1000       break;
1001     case Op_VecD:
1002       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1003                 "popq    [rsp + #%d]\n\t"
1004                 "pushl   [rsp + #%d]\n\t"
1005                 "popq    [rsp + #%d]",
1006                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1007       break;
1008      case Op_VecX:
1009       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1010                 "movdqu  xmm0, [rsp + #%d]\n\t"
1011                 "movdqu  [rsp + #%d], xmm0\n\t"
1012                 "movdqu  xmm0, [rsp - #16]",
1013                 src_offset, dst_offset);
1014       break;
1015     case Op_VecY:
1016       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1017                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1018                 "vmovdqu [rsp + #%d], xmm0\n\t"
1019                 "vmovdqu xmm0, [rsp - #32]",
1020                 src_offset, dst_offset);
1021       break;
1022     default:
1023       ShouldNotReachHere();
1024     }
1025 #endif
1026   }
1027   return calc_size;
1028 }
1029 
1030 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1031   // Get registers to move
1032   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1033   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1034   OptoReg::Name dst_second = ra_->get_reg_second(this );
1035   OptoReg::Name dst_first = ra_->get_reg_first(this );
1036 
1037   enum RC src_second_rc = rc_class(src_second);
1038   enum RC src_first_rc = rc_class(src_first);
1039   enum RC dst_second_rc = rc_class(dst_second);
1040   enum RC dst_first_rc = rc_class(dst_first);
1041 
1042   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1043 
1044   // Generate spill code!
1045   int size = 0;
1046 
1047   if( src_first == dst_first && src_second == dst_second )
1048     return size;            // Self copy, no move
1049 
1050   if (bottom_type()->isa_vect() != NULL) {
1051     uint ireg = ideal_reg();
1052     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1053     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1054     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
1055     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1056       // mem -> mem
1057       int src_offset = ra_->reg2offset(src_first);
1058       int dst_offset = ra_->reg2offset(dst_first);
1059       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1060     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1061       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1062     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1063       int stack_offset = ra_->reg2offset(dst_first);
1064       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1065     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1066       int stack_offset = ra_->reg2offset(src_first);
1067       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1068     } else {
1069       ShouldNotReachHere();
1070     }
1071   }
1072 
1073   // --------------------------------------
1074   // Check for mem-mem move.  push/pop to move.
1075   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1076     if( src_second == dst_first ) { // overlapping stack copy ranges
1077       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1078       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1079       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1080       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1081     }
1082     // move low bits
1083     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1084     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1085     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1086       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1087       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1088     }
1089     return size;
1090   }
1091 
1092   // --------------------------------------
1093   // Check for integer reg-reg copy
1094   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1095     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1096 
1097   // Check for integer store
1098   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1099     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1100 
1101   // Check for integer load
1102   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1103     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1104 
1105   // Check for integer reg-xmm reg copy
1106   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1107     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1108             "no 64 bit integer-float reg moves" );
1109     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1110   }
1111   // --------------------------------------
1112   // Check for float reg-reg copy
1113   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1114     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1115             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1116     if( cbuf ) {
1117 
1118       // Note the mucking with the register encode to compensate for the 0/1
1119       // indexing issue mentioned in a comment in the reg_def sections
1120       // for FPR registers many lines above here.
1121 
1122       if( src_first != FPR1L_num ) {
1123         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1124         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1125         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1126         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1127      } else {
1128         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1129         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1130      }
1131 #ifndef PRODUCT
1132     } else if( !do_size ) {
1133       if( size != 0 ) st->print("\n\t");
1134       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1135       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1136 #endif
1137     }
1138     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1139   }
1140 
1141   // Check for float store
1142   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1143     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1144   }
1145 
1146   // Check for float load
1147   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1148     int offset = ra_->reg2offset(src_first);
1149     const char *op_str;
1150     int op;
1151     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1152       op_str = "FLD_D";
1153       op = 0xDD;
1154     } else {                   // 32-bit load
1155       op_str = "FLD_S";
1156       op = 0xD9;
1157       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1158     }
1159     if( cbuf ) {
1160       emit_opcode  (*cbuf, op );
1161       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1162       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1163       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1164 #ifndef PRODUCT
1165     } else if( !do_size ) {
1166       if( size != 0 ) st->print("\n\t");
1167       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1168 #endif
1169     }
1170     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1171     return size + 3+offset_size+2;
1172   }
1173 
1174   // Check for xmm reg-reg copy
1175   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1176     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1177             (src_first+1 == src_second && dst_first+1 == dst_second),
1178             "no non-adjacent float-moves" );
1179     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1180   }
1181 
1182   // Check for xmm reg-integer reg copy
1183   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1184     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1185             "no 64 bit float-integer reg moves" );
1186     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1187   }
1188 
1189   // Check for xmm store
1190   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1191     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1192   }
1193 
1194   // Check for float xmm load
1195   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1196     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1197   }
1198 
1199   // Copy from float reg to xmm reg
1200   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1201     // copy to the top of stack from floating point reg
1202     // and use LEA to preserve flags
1203     if( cbuf ) {
1204       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1205       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1206       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1207       emit_d8(*cbuf,0xF8);
1208 #ifndef PRODUCT
1209     } else if( !do_size ) {
1210       if( size != 0 ) st->print("\n\t");
1211       st->print("LEA    ESP,[ESP-8]");
1212 #endif
1213     }
1214     size += 4;
1215 
1216     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1217 
1218     // Copy from the temp memory to the xmm reg.
1219     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1220 
1221     if( cbuf ) {
1222       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1223       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1224       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1225       emit_d8(*cbuf,0x08);
1226 #ifndef PRODUCT
1227     } else if( !do_size ) {
1228       if( size != 0 ) st->print("\n\t");
1229       st->print("LEA    ESP,[ESP+8]");
1230 #endif
1231     }
1232     size += 4;
1233     return size;
1234   }
1235 
1236   assert( size > 0, "missed a case" );
1237 
1238   // --------------------------------------------------------------------
1239   // Check for second bits still needing moving.
1240   if( src_second == dst_second )
1241     return size;               // Self copy; no move
1242   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1243 
1244   // Check for second word int-int move
1245   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1246     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1247 
1248   // Check for second word integer store
1249   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1250     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1251 
1252   // Check for second word integer load
1253   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1254     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1255 
1256 
1257   Unimplemented();
1258   return 0; // Mute compiler
1259 }
1260 
1261 #ifndef PRODUCT
1262 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1263   implementation( NULL, ra_, false, st );
1264 }
1265 #endif
1266 
1267 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1268   implementation( &cbuf, ra_, false, NULL );
1269 }
1270 
1271 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1272   return implementation( NULL, ra_, true, NULL );
1273 }
1274 
1275 
1276 //=============================================================================
1277 #ifndef PRODUCT
1278 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1279   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1280   int reg = ra_->get_reg_first(this);
1281   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1282 }
1283 #endif
1284 
1285 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1286   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1287   int reg = ra_->get_encode(this);
1288   if( offset >= 128 ) {
1289     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1290     emit_rm(cbuf, 0x2, reg, 0x04);
1291     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1292     emit_d32(cbuf, offset);
1293   }
1294   else {
1295     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1296     emit_rm(cbuf, 0x1, reg, 0x04);
1297     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1298     emit_d8(cbuf, offset);
1299   }
1300 }
1301 
1302 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1303   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1304   if( offset >= 128 ) {
1305     return 7;
1306   }
1307   else {
1308     return 4;
1309   }
1310 }
1311 
1312 //=============================================================================
1313 #ifndef PRODUCT
1314 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1315   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1316   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1317   st->print_cr("\tNOP");
1318   st->print_cr("\tNOP");
1319   if( !OptoBreakpoint )
1320     st->print_cr("\tNOP");
1321 }
1322 #endif
1323 
1324 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1325   MacroAssembler masm(&cbuf);
1326 #ifdef ASSERT
1327   uint insts_size = cbuf.insts_size();
1328 #endif
1329   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1330   masm.jump_cc(Assembler::notEqual,
1331                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1332   /* WARNING these NOPs are critical so that verified entry point is properly
1333      aligned for patching by NativeJump::patch_verified_entry() */
1334   int nops_cnt = 2;
1335   if( !OptoBreakpoint ) // Leave space for int3
1336      nops_cnt += 1;
1337   masm.nop(nops_cnt);
1338 
1339   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1340 }
1341 
1342 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1343   return OptoBreakpoint ? 11 : 12;
1344 }
1345 
1346 
1347 //=============================================================================
1348 
1349 int Matcher::regnum_to_fpu_offset(int regnum) {
1350   return regnum - 32; // The FP registers are in the second chunk
1351 }
1352 
1353 // This is UltraSparc specific, true just means we have fast l2f conversion
1354 const bool Matcher::convL2FSupported(void) {
1355   return true;
1356 }
1357 
1358 // Is this branch offset short enough that a short branch can be used?
1359 //
1360 // NOTE: If the platform does not provide any short branch variants, then
1361 //       this method should return false for offset 0.
1362 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1363   // The passed offset is relative to address of the branch.
1364   // On 86 a branch displacement is calculated relative to address
1365   // of a next instruction.
1366   offset -= br_size;
1367 
1368   // the short version of jmpConUCF2 contains multiple branches,
1369   // making the reach slightly less
1370   if (rule == jmpConUCF2_rule)
1371     return (-126 <= offset && offset <= 125);
1372   return (-128 <= offset && offset <= 127);
1373 }
1374 
1375 const bool Matcher::isSimpleConstant64(jlong value) {
1376   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1377   return false;
1378 }
1379 
1380 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1381 const bool Matcher::init_array_count_is_in_bytes = false;
1382 
1383 // Threshold size for cleararray.
1384 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1385 
1386 // Needs 2 CMOV's for longs.
1387 const int Matcher::long_cmove_cost() { return 1; }
1388 
1389 // No CMOVF/CMOVD with SSE/SSE2
1390 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1391 
1392 // Does the CPU require late expand (see block.cpp for description of late expand)?
1393 const bool Matcher::require_postalloc_expand = false;
1394 
1395 // Should the Matcher clone shifts on addressing modes, expecting them to
1396 // be subsumed into complex addressing expressions or compute them into
1397 // registers?  True for Intel but false for most RISCs
1398 const bool Matcher::clone_shift_expressions = true;
1399 
1400 // Do we need to mask the count passed to shift instructions or does
1401 // the cpu only look at the lower 5/6 bits anyway?
1402 const bool Matcher::need_masked_shift_count = false;
1403 
1404 bool Matcher::narrow_oop_use_complex_address() {
1405   ShouldNotCallThis();
1406   return true;
1407 }
1408 
1409 bool Matcher::narrow_klass_use_complex_address() {
1410   ShouldNotCallThis();
1411   return true;
1412 }
1413 
1414 
1415 // Is it better to copy float constants, or load them directly from memory?
1416 // Intel can load a float constant from a direct address, requiring no
1417 // extra registers.  Most RISCs will have to materialize an address into a
1418 // register first, so they would do better to copy the constant from stack.
1419 const bool Matcher::rematerialize_float_constants = true;
1420 
1421 // If CPU can load and store mis-aligned doubles directly then no fixup is
1422 // needed.  Else we split the double into 2 integer pieces and move it
1423 // piece-by-piece.  Only happens when passing doubles into C code as the
1424 // Java calling convention forces doubles to be aligned.
1425 const bool Matcher::misaligned_doubles_ok = true;
1426 
1427 
1428 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1429   // Get the memory operand from the node
1430   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1431   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1432   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1433   uint opcnt     = 1;                 // First operand
1434   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1435   while( idx >= skipped+num_edges ) {
1436     skipped += num_edges;
1437     opcnt++;                          // Bump operand count
1438     assert( opcnt < numopnds, "Accessing non-existent operand" );
1439     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1440   }
1441 
1442   MachOper *memory = node->_opnds[opcnt];
1443   MachOper *new_memory = NULL;
1444   switch (memory->opcode()) {
1445   case DIRECT:
1446   case INDOFFSET32X:
1447     // No transformation necessary.
1448     return;
1449   case INDIRECT:
1450     new_memory = new (C) indirect_win95_safeOper( );
1451     break;
1452   case INDOFFSET8:
1453     new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1454     break;
1455   case INDOFFSET32:
1456     new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1457     break;
1458   case INDINDEXOFFSET:
1459     new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1460     break;
1461   case INDINDEXSCALE:
1462     new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1463     break;
1464   case INDINDEXSCALEOFFSET:
1465     new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1466     break;
1467   case LOAD_LONG_INDIRECT:
1468   case LOAD_LONG_INDOFFSET32:
1469     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1470     return;
1471   default:
1472     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1473     return;
1474   }
1475   node->_opnds[opcnt] = new_memory;
1476 }
1477 
1478 // Advertise here if the CPU requires explicit rounding operations
1479 // to implement the UseStrictFP mode.
1480 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1481 
1482 // Are floats conerted to double when stored to stack during deoptimization?
1483 // On x32 it is stored with convertion only when FPU is used for floats.
1484 bool Matcher::float_in_double() { return (UseSSE == 0); }
1485 
1486 // Do ints take an entire long register or just half?
1487 const bool Matcher::int_in_long = false;
1488 
1489 // Return whether or not this register is ever used as an argument.  This
1490 // function is used on startup to build the trampoline stubs in generateOptoStub.
1491 // Registers not mentioned will be killed by the VM call in the trampoline, and
1492 // arguments in those registers not be available to the callee.
1493 bool Matcher::can_be_java_arg( int reg ) {
1494   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1495   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1496   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1497   return false;
1498 }
1499 
1500 bool Matcher::is_spillable_arg( int reg ) {
1501   return can_be_java_arg(reg);
1502 }
1503 
1504 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1505   // Use hardware integer DIV instruction when
1506   // it is faster than a code which use multiply.
1507   // Only when constant divisor fits into 32 bit
1508   // (min_jint is excluded to get only correct
1509   // positive 32 bit values from negative).
1510   return VM_Version::has_fast_idiv() &&
1511          (divisor == (int)divisor && divisor != min_jint);
1512 }
1513 
1514 // Register for DIVI projection of divmodI
1515 RegMask Matcher::divI_proj_mask() {
1516   return EAX_REG_mask();
1517 }
1518 
1519 // Register for MODI projection of divmodI
1520 RegMask Matcher::modI_proj_mask() {
1521   return EDX_REG_mask();
1522 }
1523 
1524 // Register for DIVL projection of divmodL
1525 RegMask Matcher::divL_proj_mask() {
1526   ShouldNotReachHere();
1527   return RegMask();
1528 }
1529 
1530 // Register for MODL projection of divmodL
1531 RegMask Matcher::modL_proj_mask() {
1532   ShouldNotReachHere();
1533   return RegMask();
1534 }
1535 
1536 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1537   return NO_REG_mask();
1538 }
1539 
1540 // Returns true if the high 32 bits of the value is known to be zero.
1541 bool is_operand_hi32_zero(Node* n) {
1542   int opc = n->Opcode();
1543   if (opc == Op_AndL) {
1544     Node* o2 = n->in(2);
1545     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1546       return true;
1547     }
1548   }
1549   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1550     return true;
1551   }
1552   return false;
1553 }
1554 
1555 %}
1556 
1557 //----------ENCODING BLOCK-----------------------------------------------------
1558 // This block specifies the encoding classes used by the compiler to output
1559 // byte streams.  Encoding classes generate functions which are called by
1560 // Machine Instruction Nodes in order to generate the bit encoding of the
1561 // instruction.  Operands specify their base encoding interface with the
1562 // interface keyword.  There are currently supported four interfaces,
1563 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1564 // operand to generate a function which returns its register number when
1565 // queried.   CONST_INTER causes an operand to generate a function which
1566 // returns the value of the constant when queried.  MEMORY_INTER causes an
1567 // operand to generate four functions which return the Base Register, the
1568 // Index Register, the Scale Value, and the Offset Value of the operand when
1569 // queried.  COND_INTER causes an operand to generate six functions which
1570 // return the encoding code (ie - encoding bits for the instruction)
1571 // associated with each basic boolean condition for a conditional instruction.
1572 // Instructions specify two basic values for encoding.  They use the
1573 // ins_encode keyword to specify their encoding class (which must be one of
1574 // the class names specified in the encoding block), and they use the
1575 // opcode keyword to specify, in order, their primary, secondary, and
1576 // tertiary opcode.  Only the opcode sections which a particular instruction
1577 // needs for encoding need to be specified.
1578 encode %{
1579   // Build emit functions for each basic byte or larger field in the intel
1580   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1581   // code in the enc_class source block.  Emit functions will live in the
1582   // main source block for now.  In future, we can generalize this by
1583   // adding a syntax that specifies the sizes of fields in an order,
1584   // so that the adlc can build the emit functions automagically
1585 
1586   // Emit primary opcode
1587   enc_class OpcP %{
1588     emit_opcode(cbuf, $primary);
1589   %}
1590 
1591   // Emit secondary opcode
1592   enc_class OpcS %{
1593     emit_opcode(cbuf, $secondary);
1594   %}
1595 
1596   // Emit opcode directly
1597   enc_class Opcode(immI d8) %{
1598     emit_opcode(cbuf, $d8$$constant);
1599   %}
1600 
1601   enc_class SizePrefix %{
1602     emit_opcode(cbuf,0x66);
1603   %}
1604 
1605   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1606     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1607   %}
1608 
1609   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1610     emit_opcode(cbuf,$opcode$$constant);
1611     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1612   %}
1613 
1614   enc_class mov_r32_imm0( rRegI dst ) %{
1615     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1616     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1617   %}
1618 
1619   enc_class cdq_enc %{
1620     // Full implementation of Java idiv and irem; checks for
1621     // special case as described in JVM spec., p.243 & p.271.
1622     //
1623     //         normal case                           special case
1624     //
1625     // input : rax,: dividend                         min_int
1626     //         reg: divisor                          -1
1627     //
1628     // output: rax,: quotient  (= rax, idiv reg)       min_int
1629     //         rdx: remainder (= rax, irem reg)       0
1630     //
1631     //  Code sequnce:
1632     //
1633     //  81 F8 00 00 00 80    cmp         rax,80000000h
1634     //  0F 85 0B 00 00 00    jne         normal_case
1635     //  33 D2                xor         rdx,edx
1636     //  83 F9 FF             cmp         rcx,0FFh
1637     //  0F 84 03 00 00 00    je          done
1638     //                  normal_case:
1639     //  99                   cdq
1640     //  F7 F9                idiv        rax,ecx
1641     //                  done:
1642     //
1643     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1644     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1645     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1646     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1647     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1648     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1649     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1650     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1651     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1652     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1653     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1654     // normal_case:
1655     emit_opcode(cbuf,0x99);                                         // cdq
1656     // idiv (note: must be emitted by the user of this rule)
1657     // normal:
1658   %}
1659 
1660   // Dense encoding for older common ops
1661   enc_class Opc_plus(immI opcode, rRegI reg) %{
1662     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1663   %}
1664 
1665 
1666   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1667   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1668     // Check for 8-bit immediate, and set sign extend bit in opcode
1669     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1670       emit_opcode(cbuf, $primary | 0x02);
1671     }
1672     else {                          // If 32-bit immediate
1673       emit_opcode(cbuf, $primary);
1674     }
1675   %}
1676 
1677   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1678     // Emit primary opcode and set sign-extend bit
1679     // Check for 8-bit immediate, and set sign extend bit in opcode
1680     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1681       emit_opcode(cbuf, $primary | 0x02);    }
1682     else {                          // If 32-bit immediate
1683       emit_opcode(cbuf, $primary);
1684     }
1685     // Emit r/m byte with secondary opcode, after primary opcode.
1686     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1687   %}
1688 
1689   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1690     // Check for 8-bit immediate, and set sign extend bit in opcode
1691     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1692       $$$emit8$imm$$constant;
1693     }
1694     else {                          // If 32-bit immediate
1695       // Output immediate
1696       $$$emit32$imm$$constant;
1697     }
1698   %}
1699 
1700   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1701     // Emit primary opcode and set sign-extend bit
1702     // Check for 8-bit immediate, and set sign extend bit in opcode
1703     int con = (int)$imm$$constant; // Throw away top bits
1704     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1705     // Emit r/m byte with secondary opcode, after primary opcode.
1706     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1707     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1708     else                               emit_d32(cbuf,con);
1709   %}
1710 
1711   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1712     // Emit primary opcode and set sign-extend bit
1713     // Check for 8-bit immediate, and set sign extend bit in opcode
1714     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1715     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1716     // Emit r/m byte with tertiary opcode, after primary opcode.
1717     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1718     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1719     else                               emit_d32(cbuf,con);
1720   %}
1721 
1722   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1723     emit_cc(cbuf, $secondary, $dst$$reg );
1724   %}
1725 
1726   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1727     int destlo = $dst$$reg;
1728     int desthi = HIGH_FROM_LOW(destlo);
1729     // bswap lo
1730     emit_opcode(cbuf, 0x0F);
1731     emit_cc(cbuf, 0xC8, destlo);
1732     // bswap hi
1733     emit_opcode(cbuf, 0x0F);
1734     emit_cc(cbuf, 0xC8, desthi);
1735     // xchg lo and hi
1736     emit_opcode(cbuf, 0x87);
1737     emit_rm(cbuf, 0x3, destlo, desthi);
1738   %}
1739 
1740   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1741     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1742   %}
1743 
1744   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1745     $$$emit8$primary;
1746     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1747   %}
1748 
1749   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1750     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1751     emit_d8(cbuf, op >> 8 );
1752     emit_d8(cbuf, op & 255);
1753   %}
1754 
1755   // emulate a CMOV with a conditional branch around a MOV
1756   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1757     // Invert sense of branch from sense of CMOV
1758     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1759     emit_d8( cbuf, $brOffs$$constant );
1760   %}
1761 
1762   enc_class enc_PartialSubtypeCheck( ) %{
1763     Register Redi = as_Register(EDI_enc); // result register
1764     Register Reax = as_Register(EAX_enc); // super class
1765     Register Recx = as_Register(ECX_enc); // killed
1766     Register Resi = as_Register(ESI_enc); // sub class
1767     Label miss;
1768 
1769     MacroAssembler _masm(&cbuf);
1770     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1771                                      NULL, &miss,
1772                                      /*set_cond_codes:*/ true);
1773     if ($primary) {
1774       __ xorptr(Redi, Redi);
1775     }
1776     __ bind(miss);
1777   %}
1778 
1779   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1780     MacroAssembler masm(&cbuf);
1781     int start = masm.offset();
1782     if (UseSSE >= 2) {
1783       if (VerifyFPU) {
1784         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1785       }
1786     } else {
1787       // External c_calling_convention expects the FPU stack to be 'clean'.
1788       // Compiled code leaves it dirty.  Do cleanup now.
1789       masm.empty_FPU_stack();
1790     }
1791     if (sizeof_FFree_Float_Stack_All == -1) {
1792       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1793     } else {
1794       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1795     }
1796   %}
1797 
1798   enc_class Verify_FPU_For_Leaf %{
1799     if( VerifyFPU ) {
1800       MacroAssembler masm(&cbuf);
1801       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1802     }
1803   %}
1804 
1805   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1806     // This is the instruction starting address for relocation info.
1807     cbuf.set_insts_mark();
1808     $$$emit8$primary;
1809     // CALL directly to the runtime
1810     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1811                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1812 
1813     if (UseSSE >= 2) {
1814       MacroAssembler _masm(&cbuf);
1815       BasicType rt = tf()->return_type();
1816 
1817       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1818         // A C runtime call where the return value is unused.  In SSE2+
1819         // mode the result needs to be removed from the FPU stack.  It's
1820         // likely that this function call could be removed by the
1821         // optimizer if the C function is a pure function.
1822         __ ffree(0);
1823       } else if (rt == T_FLOAT) {
1824         __ lea(rsp, Address(rsp, -4));
1825         __ fstp_s(Address(rsp, 0));
1826         __ movflt(xmm0, Address(rsp, 0));
1827         __ lea(rsp, Address(rsp,  4));
1828       } else if (rt == T_DOUBLE) {
1829         __ lea(rsp, Address(rsp, -8));
1830         __ fstp_d(Address(rsp, 0));
1831         __ movdbl(xmm0, Address(rsp, 0));
1832         __ lea(rsp, Address(rsp,  8));
1833       }
1834     }
1835   %}
1836 
1837 
1838   enc_class pre_call_resets %{
1839     // If method sets FPU control word restore it here
1840     debug_only(int off0 = cbuf.insts_size());
1841     if (ra_->C->in_24_bit_fp_mode()) {
1842       MacroAssembler _masm(&cbuf);
1843       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1844     }
1845     if (ra_->C->max_vector_size() > 16) {
1846       // Clear upper bits of YMM registers when current compiled code uses
1847       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1848       MacroAssembler _masm(&cbuf);
1849       __ vzeroupper();
1850     }
1851     debug_only(int off1 = cbuf.insts_size());
1852     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1853   %}
1854 
1855   enc_class post_call_FPU %{
1856     // If method sets FPU control word do it here also
1857     if (Compile::current()->in_24_bit_fp_mode()) {
1858       MacroAssembler masm(&cbuf);
1859       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1860     }
1861   %}
1862 
1863   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1864     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1865     // who we intended to call.
1866     cbuf.set_insts_mark();
1867     $$$emit8$primary;
1868     if (!_method) {
1869       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1870                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1871     } else if (_optimized_virtual) {
1872       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1873                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1874     } else {
1875       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1876                      static_call_Relocation::spec(), RELOC_IMM32 );
1877     }
1878     if (_method) {  // Emit stub for static call.
1879       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1880       if (stub == NULL) {
1881         ciEnv::current()->record_failure("CodeCache is full");
1882         return;
1883       } 
1884     }
1885   %}
1886 
1887   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1888     MacroAssembler _masm(&cbuf);
1889     __ ic_call((address)$meth$$method);
1890   %}
1891 
1892   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1893     int disp = in_bytes(Method::from_compiled_offset());
1894     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1895 
1896     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1897     cbuf.set_insts_mark();
1898     $$$emit8$primary;
1899     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1900     emit_d8(cbuf, disp);             // Displacement
1901 
1902   %}
1903 
1904 //   Following encoding is no longer used, but may be restored if calling
1905 //   convention changes significantly.
1906 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1907 //
1908 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1909 //     // int ic_reg     = Matcher::inline_cache_reg();
1910 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1911 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1912 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1913 //
1914 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1915 //     // // so we load it immediately before the call
1916 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1917 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1918 //
1919 //     // xor rbp,ebp
1920 //     emit_opcode(cbuf, 0x33);
1921 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1922 //
1923 //     // CALL to interpreter.
1924 //     cbuf.set_insts_mark();
1925 //     $$$emit8$primary;
1926 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1927 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1928 //   %}
1929 
1930   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1931     $$$emit8$primary;
1932     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1933     $$$emit8$shift$$constant;
1934   %}
1935 
1936   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1937     // Load immediate does not have a zero or sign extended version
1938     // for 8-bit immediates
1939     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1940     $$$emit32$src$$constant;
1941   %}
1942 
1943   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1944     // Load immediate does not have a zero or sign extended version
1945     // for 8-bit immediates
1946     emit_opcode(cbuf, $primary + $dst$$reg);
1947     $$$emit32$src$$constant;
1948   %}
1949 
1950   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1951     // Load immediate does not have a zero or sign extended version
1952     // for 8-bit immediates
1953     int dst_enc = $dst$$reg;
1954     int src_con = $src$$constant & 0x0FFFFFFFFL;
1955     if (src_con == 0) {
1956       // xor dst, dst
1957       emit_opcode(cbuf, 0x33);
1958       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1959     } else {
1960       emit_opcode(cbuf, $primary + dst_enc);
1961       emit_d32(cbuf, src_con);
1962     }
1963   %}
1964 
1965   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1966     // Load immediate does not have a zero or sign extended version
1967     // for 8-bit immediates
1968     int dst_enc = $dst$$reg + 2;
1969     int src_con = ((julong)($src$$constant)) >> 32;
1970     if (src_con == 0) {
1971       // xor dst, dst
1972       emit_opcode(cbuf, 0x33);
1973       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1974     } else {
1975       emit_opcode(cbuf, $primary + dst_enc);
1976       emit_d32(cbuf, src_con);
1977     }
1978   %}
1979 
1980 
1981   // Encode a reg-reg copy.  If it is useless, then empty encoding.
1982   enc_class enc_Copy( rRegI dst, rRegI src ) %{
1983     encode_Copy( cbuf, $dst$$reg, $src$$reg );
1984   %}
1985 
1986   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
1987     encode_Copy( cbuf, $dst$$reg, $src$$reg );
1988   %}
1989 
1990   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1991     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1992   %}
1993 
1994   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
1995     $$$emit8$primary;
1996     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1997   %}
1998 
1999   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2000     $$$emit8$secondary;
2001     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2002   %}
2003 
2004   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2005     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2006   %}
2007 
2008   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2009     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2010   %}
2011 
2012   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2013     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2014   %}
2015 
2016   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2017     // Output immediate
2018     $$$emit32$src$$constant;
2019   %}
2020 
2021   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2022     // Output Float immediate bits
2023     jfloat jf = $src$$constant;
2024     int    jf_as_bits = jint_cast( jf );
2025     emit_d32(cbuf, jf_as_bits);
2026   %}
2027 
2028   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2029     // Output Float immediate bits
2030     jfloat jf = $src$$constant;
2031     int    jf_as_bits = jint_cast( jf );
2032     emit_d32(cbuf, jf_as_bits);
2033   %}
2034 
2035   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2036     // Output immediate
2037     $$$emit16$src$$constant;
2038   %}
2039 
2040   enc_class Con_d32(immI src) %{
2041     emit_d32(cbuf,$src$$constant);
2042   %}
2043 
2044   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2045     // Output immediate memory reference
2046     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2047     emit_d32(cbuf, 0x00);
2048   %}
2049 
2050   enc_class lock_prefix( ) %{
2051     if( os::is_MP() )
2052       emit_opcode(cbuf,0xF0);         // [Lock]
2053   %}
2054 
2055   // Cmp-xchg long value.
2056   // Note: we need to swap rbx, and rcx before and after the
2057   //       cmpxchg8 instruction because the instruction uses
2058   //       rcx as the high order word of the new value to store but
2059   //       our register encoding uses rbx,.
2060   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2061 
2062     // XCHG  rbx,ecx
2063     emit_opcode(cbuf,0x87);
2064     emit_opcode(cbuf,0xD9);
2065     // [Lock]
2066     if( os::is_MP() )
2067       emit_opcode(cbuf,0xF0);
2068     // CMPXCHG8 [Eptr]
2069     emit_opcode(cbuf,0x0F);
2070     emit_opcode(cbuf,0xC7);
2071     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2072     // XCHG  rbx,ecx
2073     emit_opcode(cbuf,0x87);
2074     emit_opcode(cbuf,0xD9);
2075   %}
2076 
2077   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2078     // [Lock]
2079     if( os::is_MP() )
2080       emit_opcode(cbuf,0xF0);
2081 
2082     // CMPXCHG [Eptr]
2083     emit_opcode(cbuf,0x0F);
2084     emit_opcode(cbuf,0xB1);
2085     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2086   %}
2087 
2088   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2089     int res_encoding = $res$$reg;
2090 
2091     // MOV  res,0
2092     emit_opcode( cbuf, 0xB8 + res_encoding);
2093     emit_d32( cbuf, 0 );
2094     // JNE,s  fail
2095     emit_opcode(cbuf,0x75);
2096     emit_d8(cbuf, 5 );
2097     // MOV  res,1
2098     emit_opcode( cbuf, 0xB8 + res_encoding);
2099     emit_d32( cbuf, 1 );
2100     // fail:
2101   %}
2102 
2103   enc_class set_instruction_start( ) %{
2104     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2105   %}
2106 
2107   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2108     int reg_encoding = $ereg$$reg;
2109     int base  = $mem$$base;
2110     int index = $mem$$index;
2111     int scale = $mem$$scale;
2112     int displace = $mem$$disp;
2113     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2114     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2115   %}
2116 
2117   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2118     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2119     int base  = $mem$$base;
2120     int index = $mem$$index;
2121     int scale = $mem$$scale;
2122     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2123     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2124     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2125   %}
2126 
2127   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2128     int r1, r2;
2129     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2130     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2131     emit_opcode(cbuf,0x0F);
2132     emit_opcode(cbuf,$tertiary);
2133     emit_rm(cbuf, 0x3, r1, r2);
2134     emit_d8(cbuf,$cnt$$constant);
2135     emit_d8(cbuf,$primary);
2136     emit_rm(cbuf, 0x3, $secondary, r1);
2137     emit_d8(cbuf,$cnt$$constant);
2138   %}
2139 
2140   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2141     emit_opcode( cbuf, 0x8B ); // Move
2142     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2143     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2144       emit_d8(cbuf,$primary);
2145       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2146       emit_d8(cbuf,$cnt$$constant-32);
2147     }
2148     emit_d8(cbuf,$primary);
2149     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2150     emit_d8(cbuf,31);
2151   %}
2152 
2153   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2154     int r1, r2;
2155     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2156     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2157 
2158     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2159     emit_rm(cbuf, 0x3, r1, r2);
2160     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2161       emit_opcode(cbuf,$primary);
2162       emit_rm(cbuf, 0x3, $secondary, r1);
2163       emit_d8(cbuf,$cnt$$constant-32);
2164     }
2165     emit_opcode(cbuf,0x33);  // XOR r2,r2
2166     emit_rm(cbuf, 0x3, r2, r2);
2167   %}
2168 
2169   // Clone of RegMem but accepts an extra parameter to access each
2170   // half of a double in memory; it never needs relocation info.
2171   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2172     emit_opcode(cbuf,$opcode$$constant);
2173     int reg_encoding = $rm_reg$$reg;
2174     int base     = $mem$$base;
2175     int index    = $mem$$index;
2176     int scale    = $mem$$scale;
2177     int displace = $mem$$disp + $disp_for_half$$constant;
2178     relocInfo::relocType disp_reloc = relocInfo::none;
2179     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2180   %}
2181 
2182   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2183   //
2184   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2185   // and it never needs relocation information.
2186   // Frequently used to move data between FPU's Stack Top and memory.
2187   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2188     int rm_byte_opcode = $rm_opcode$$constant;
2189     int base     = $mem$$base;
2190     int index    = $mem$$index;
2191     int scale    = $mem$$scale;
2192     int displace = $mem$$disp;
2193     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2194     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2195   %}
2196 
2197   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2198     int rm_byte_opcode = $rm_opcode$$constant;
2199     int base     = $mem$$base;
2200     int index    = $mem$$index;
2201     int scale    = $mem$$scale;
2202     int displace = $mem$$disp;
2203     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2204     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2205   %}
2206 
2207   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2208     int reg_encoding = $dst$$reg;
2209     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2210     int index        = 0x04;            // 0x04 indicates no index
2211     int scale        = 0x00;            // 0x00 indicates no scale
2212     int displace     = $src1$$constant; // 0x00 indicates no displacement
2213     relocInfo::relocType disp_reloc = relocInfo::none;
2214     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2215   %}
2216 
2217   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2218     // Compare dst,src
2219     emit_opcode(cbuf,0x3B);
2220     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2221     // jmp dst < src around move
2222     emit_opcode(cbuf,0x7C);
2223     emit_d8(cbuf,2);
2224     // move dst,src
2225     emit_opcode(cbuf,0x8B);
2226     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2227   %}
2228 
2229   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2230     // Compare dst,src
2231     emit_opcode(cbuf,0x3B);
2232     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2233     // jmp dst > src around move
2234     emit_opcode(cbuf,0x7F);
2235     emit_d8(cbuf,2);
2236     // move dst,src
2237     emit_opcode(cbuf,0x8B);
2238     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2239   %}
2240 
2241   enc_class enc_FPR_store(memory mem, regDPR src) %{
2242     // If src is FPR1, we can just FST to store it.
2243     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2244     int reg_encoding = 0x2; // Just store
2245     int base  = $mem$$base;
2246     int index = $mem$$index;
2247     int scale = $mem$$scale;
2248     int displace = $mem$$disp;
2249     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2250     if( $src$$reg != FPR1L_enc ) {
2251       reg_encoding = 0x3;  // Store & pop
2252       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2253       emit_d8( cbuf, 0xC0-1+$src$$reg );
2254     }
2255     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2256     emit_opcode(cbuf,$primary);
2257     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2258   %}
2259 
2260   enc_class neg_reg(rRegI dst) %{
2261     // NEG $dst
2262     emit_opcode(cbuf,0xF7);
2263     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2264   %}
2265 
2266   enc_class setLT_reg(eCXRegI dst) %{
2267     // SETLT $dst
2268     emit_opcode(cbuf,0x0F);
2269     emit_opcode(cbuf,0x9C);
2270     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2271   %}
2272 
2273   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2274     int tmpReg = $tmp$$reg;
2275 
2276     // SUB $p,$q
2277     emit_opcode(cbuf,0x2B);
2278     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2279     // SBB $tmp,$tmp
2280     emit_opcode(cbuf,0x1B);
2281     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2282     // AND $tmp,$y
2283     emit_opcode(cbuf,0x23);
2284     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2285     // ADD $p,$tmp
2286     emit_opcode(cbuf,0x03);
2287     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2288   %}
2289 
2290   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2291     // TEST shift,32
2292     emit_opcode(cbuf,0xF7);
2293     emit_rm(cbuf, 0x3, 0, ECX_enc);
2294     emit_d32(cbuf,0x20);
2295     // JEQ,s small
2296     emit_opcode(cbuf, 0x74);
2297     emit_d8(cbuf, 0x04);
2298     // MOV    $dst.hi,$dst.lo
2299     emit_opcode( cbuf, 0x8B );
2300     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2301     // CLR    $dst.lo
2302     emit_opcode(cbuf, 0x33);
2303     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2304 // small:
2305     // SHLD   $dst.hi,$dst.lo,$shift
2306     emit_opcode(cbuf,0x0F);
2307     emit_opcode(cbuf,0xA5);
2308     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2309     // SHL    $dst.lo,$shift"
2310     emit_opcode(cbuf,0xD3);
2311     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2312   %}
2313 
2314   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2315     // TEST shift,32
2316     emit_opcode(cbuf,0xF7);
2317     emit_rm(cbuf, 0x3, 0, ECX_enc);
2318     emit_d32(cbuf,0x20);
2319     // JEQ,s small
2320     emit_opcode(cbuf, 0x74);
2321     emit_d8(cbuf, 0x04);
2322     // MOV    $dst.lo,$dst.hi
2323     emit_opcode( cbuf, 0x8B );
2324     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2325     // CLR    $dst.hi
2326     emit_opcode(cbuf, 0x33);
2327     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2328 // small:
2329     // SHRD   $dst.lo,$dst.hi,$shift
2330     emit_opcode(cbuf,0x0F);
2331     emit_opcode(cbuf,0xAD);
2332     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2333     // SHR    $dst.hi,$shift"
2334     emit_opcode(cbuf,0xD3);
2335     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2336   %}
2337 
2338   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2339     // TEST shift,32
2340     emit_opcode(cbuf,0xF7);
2341     emit_rm(cbuf, 0x3, 0, ECX_enc);
2342     emit_d32(cbuf,0x20);
2343     // JEQ,s small
2344     emit_opcode(cbuf, 0x74);
2345     emit_d8(cbuf, 0x05);
2346     // MOV    $dst.lo,$dst.hi
2347     emit_opcode( cbuf, 0x8B );
2348     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2349     // SAR    $dst.hi,31
2350     emit_opcode(cbuf, 0xC1);
2351     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2352     emit_d8(cbuf, 0x1F );
2353 // small:
2354     // SHRD   $dst.lo,$dst.hi,$shift
2355     emit_opcode(cbuf,0x0F);
2356     emit_opcode(cbuf,0xAD);
2357     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2358     // SAR    $dst.hi,$shift"
2359     emit_opcode(cbuf,0xD3);
2360     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2361   %}
2362 
2363 
2364   // ----------------- Encodings for floating point unit -----------------
2365   // May leave result in FPU-TOS or FPU reg depending on opcodes
2366   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2367     $$$emit8$primary;
2368     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2369   %}
2370 
2371   // Pop argument in FPR0 with FSTP ST(0)
2372   enc_class PopFPU() %{
2373     emit_opcode( cbuf, 0xDD );
2374     emit_d8( cbuf, 0xD8 );
2375   %}
2376 
2377   // !!!!! equivalent to Pop_Reg_F
2378   enc_class Pop_Reg_DPR( regDPR dst ) %{
2379     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2380     emit_d8( cbuf, 0xD8+$dst$$reg );
2381   %}
2382 
2383   enc_class Push_Reg_DPR( regDPR dst ) %{
2384     emit_opcode( cbuf, 0xD9 );
2385     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2386   %}
2387 
2388   enc_class strictfp_bias1( regDPR dst ) %{
2389     emit_opcode( cbuf, 0xDB );           // FLD m80real
2390     emit_opcode( cbuf, 0x2D );
2391     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2392     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2393     emit_opcode( cbuf, 0xC8+$dst$$reg );
2394   %}
2395 
2396   enc_class strictfp_bias2( regDPR dst ) %{
2397     emit_opcode( cbuf, 0xDB );           // FLD m80real
2398     emit_opcode( cbuf, 0x2D );
2399     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2400     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2401     emit_opcode( cbuf, 0xC8+$dst$$reg );
2402   %}
2403 
2404   // Special case for moving an integer register to a stack slot.
2405   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2406     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2407   %}
2408 
2409   // Special case for moving a register to a stack slot.
2410   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2411     // Opcode already emitted
2412     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2413     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2414     emit_d32(cbuf, $dst$$disp);   // Displacement
2415   %}
2416 
2417   // Push the integer in stackSlot 'src' onto FP-stack
2418   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2419     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2420   %}
2421 
2422   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2423   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2424     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2425   %}
2426 
2427   // Same as Pop_Mem_F except for opcode
2428   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2429   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2430     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2431   %}
2432 
2433   enc_class Pop_Reg_FPR( regFPR dst ) %{
2434     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2435     emit_d8( cbuf, 0xD8+$dst$$reg );
2436   %}
2437 
2438   enc_class Push_Reg_FPR( regFPR dst ) %{
2439     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2440     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2441   %}
2442 
2443   // Push FPU's float to a stack-slot, and pop FPU-stack
2444   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2445     int pop = 0x02;
2446     if ($src$$reg != FPR1L_enc) {
2447       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2448       emit_d8( cbuf, 0xC0-1+$src$$reg );
2449       pop = 0x03;
2450     }
2451     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2452   %}
2453 
2454   // Push FPU's double to a stack-slot, and pop FPU-stack
2455   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2456     int pop = 0x02;
2457     if ($src$$reg != FPR1L_enc) {
2458       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2459       emit_d8( cbuf, 0xC0-1+$src$$reg );
2460       pop = 0x03;
2461     }
2462     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2463   %}
2464 
2465   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2466   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2467     int pop = 0xD0 - 1; // -1 since we skip FLD
2468     if ($src$$reg != FPR1L_enc) {
2469       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2470       emit_d8( cbuf, 0xC0-1+$src$$reg );
2471       pop = 0xD8;
2472     }
2473     emit_opcode( cbuf, 0xDD );
2474     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2475   %}
2476 
2477 
2478   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2479     // load dst in FPR0
2480     emit_opcode( cbuf, 0xD9 );
2481     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2482     if ($src$$reg != FPR1L_enc) {
2483       // fincstp
2484       emit_opcode (cbuf, 0xD9);
2485       emit_opcode (cbuf, 0xF7);
2486       // swap src with FPR1:
2487       // FXCH FPR1 with src
2488       emit_opcode(cbuf, 0xD9);
2489       emit_d8(cbuf, 0xC8-1+$src$$reg );
2490       // fdecstp
2491       emit_opcode (cbuf, 0xD9);
2492       emit_opcode (cbuf, 0xF6);
2493     }
2494   %}
2495 
2496   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2497     MacroAssembler _masm(&cbuf);
2498     __ subptr(rsp, 8);
2499     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2500     __ fld_d(Address(rsp, 0));
2501     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2502     __ fld_d(Address(rsp, 0));
2503   %}
2504 
2505   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2506     MacroAssembler _masm(&cbuf);
2507     __ subptr(rsp, 4);
2508     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2509     __ fld_s(Address(rsp, 0));
2510     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2511     __ fld_s(Address(rsp, 0));
2512   %}
2513 
2514   enc_class Push_ResultD(regD dst) %{
2515     MacroAssembler _masm(&cbuf);
2516     __ fstp_d(Address(rsp, 0));
2517     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2518     __ addptr(rsp, 8);
2519   %}
2520 
2521   enc_class Push_ResultF(regF dst, immI d8) %{
2522     MacroAssembler _masm(&cbuf);
2523     __ fstp_s(Address(rsp, 0));
2524     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2525     __ addptr(rsp, $d8$$constant);
2526   %}
2527 
2528   enc_class Push_SrcD(regD src) %{
2529     MacroAssembler _masm(&cbuf);
2530     __ subptr(rsp, 8);
2531     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2532     __ fld_d(Address(rsp, 0));
2533   %}
2534 
2535   enc_class push_stack_temp_qword() %{
2536     MacroAssembler _masm(&cbuf);
2537     __ subptr(rsp, 8);
2538   %}
2539 
2540   enc_class pop_stack_temp_qword() %{
2541     MacroAssembler _masm(&cbuf);
2542     __ addptr(rsp, 8);
2543   %}
2544 
2545   enc_class push_xmm_to_fpr1(regD src) %{
2546     MacroAssembler _masm(&cbuf);
2547     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2548     __ fld_d(Address(rsp, 0));
2549   %}
2550 
2551   enc_class Push_Result_Mod_DPR( regDPR src) %{
2552     if ($src$$reg != FPR1L_enc) {
2553       // fincstp
2554       emit_opcode (cbuf, 0xD9);
2555       emit_opcode (cbuf, 0xF7);
2556       // FXCH FPR1 with src
2557       emit_opcode(cbuf, 0xD9);
2558       emit_d8(cbuf, 0xC8-1+$src$$reg );
2559       // fdecstp
2560       emit_opcode (cbuf, 0xD9);
2561       emit_opcode (cbuf, 0xF6);
2562     }
2563     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2564     // // FSTP   FPR$dst$$reg
2565     // emit_opcode( cbuf, 0xDD );
2566     // emit_d8( cbuf, 0xD8+$dst$$reg );
2567   %}
2568 
2569   enc_class fnstsw_sahf_skip_parity() %{
2570     // fnstsw ax
2571     emit_opcode( cbuf, 0xDF );
2572     emit_opcode( cbuf, 0xE0 );
2573     // sahf
2574     emit_opcode( cbuf, 0x9E );
2575     // jnp  ::skip
2576     emit_opcode( cbuf, 0x7B );
2577     emit_opcode( cbuf, 0x05 );
2578   %}
2579 
2580   enc_class emitModDPR() %{
2581     // fprem must be iterative
2582     // :: loop
2583     // fprem
2584     emit_opcode( cbuf, 0xD9 );
2585     emit_opcode( cbuf, 0xF8 );
2586     // wait
2587     emit_opcode( cbuf, 0x9b );
2588     // fnstsw ax
2589     emit_opcode( cbuf, 0xDF );
2590     emit_opcode( cbuf, 0xE0 );
2591     // sahf
2592     emit_opcode( cbuf, 0x9E );
2593     // jp  ::loop
2594     emit_opcode( cbuf, 0x0F );
2595     emit_opcode( cbuf, 0x8A );
2596     emit_opcode( cbuf, 0xF4 );
2597     emit_opcode( cbuf, 0xFF );
2598     emit_opcode( cbuf, 0xFF );
2599     emit_opcode( cbuf, 0xFF );
2600   %}
2601 
2602   enc_class fpu_flags() %{
2603     // fnstsw_ax
2604     emit_opcode( cbuf, 0xDF);
2605     emit_opcode( cbuf, 0xE0);
2606     // test ax,0x0400
2607     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2608     emit_opcode( cbuf, 0xA9 );
2609     emit_d16   ( cbuf, 0x0400 );
2610     // // // This sequence works, but stalls for 12-16 cycles on PPro
2611     // // test rax,0x0400
2612     // emit_opcode( cbuf, 0xA9 );
2613     // emit_d32   ( cbuf, 0x00000400 );
2614     //
2615     // jz exit (no unordered comparison)
2616     emit_opcode( cbuf, 0x74 );
2617     emit_d8    ( cbuf, 0x02 );
2618     // mov ah,1 - treat as LT case (set carry flag)
2619     emit_opcode( cbuf, 0xB4 );
2620     emit_d8    ( cbuf, 0x01 );
2621     // sahf
2622     emit_opcode( cbuf, 0x9E);
2623   %}
2624 
2625   enc_class cmpF_P6_fixup() %{
2626     // Fixup the integer flags in case comparison involved a NaN
2627     //
2628     // JNP exit (no unordered comparison, P-flag is set by NaN)
2629     emit_opcode( cbuf, 0x7B );
2630     emit_d8    ( cbuf, 0x03 );
2631     // MOV AH,1 - treat as LT case (set carry flag)
2632     emit_opcode( cbuf, 0xB4 );
2633     emit_d8    ( cbuf, 0x01 );
2634     // SAHF
2635     emit_opcode( cbuf, 0x9E);
2636     // NOP     // target for branch to avoid branch to branch
2637     emit_opcode( cbuf, 0x90);
2638   %}
2639 
2640 //     fnstsw_ax();
2641 //     sahf();
2642 //     movl(dst, nan_result);
2643 //     jcc(Assembler::parity, exit);
2644 //     movl(dst, less_result);
2645 //     jcc(Assembler::below, exit);
2646 //     movl(dst, equal_result);
2647 //     jcc(Assembler::equal, exit);
2648 //     movl(dst, greater_result);
2649 
2650 // less_result     =  1;
2651 // greater_result  = -1;
2652 // equal_result    = 0;
2653 // nan_result      = -1;
2654 
2655   enc_class CmpF_Result(rRegI dst) %{
2656     // fnstsw_ax();
2657     emit_opcode( cbuf, 0xDF);
2658     emit_opcode( cbuf, 0xE0);
2659     // sahf
2660     emit_opcode( cbuf, 0x9E);
2661     // movl(dst, nan_result);
2662     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2663     emit_d32( cbuf, -1 );
2664     // jcc(Assembler::parity, exit);
2665     emit_opcode( cbuf, 0x7A );
2666     emit_d8    ( cbuf, 0x13 );
2667     // movl(dst, less_result);
2668     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2669     emit_d32( cbuf, -1 );
2670     // jcc(Assembler::below, exit);
2671     emit_opcode( cbuf, 0x72 );
2672     emit_d8    ( cbuf, 0x0C );
2673     // movl(dst, equal_result);
2674     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2675     emit_d32( cbuf, 0 );
2676     // jcc(Assembler::equal, exit);
2677     emit_opcode( cbuf, 0x74 );
2678     emit_d8    ( cbuf, 0x05 );
2679     // movl(dst, greater_result);
2680     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2681     emit_d32( cbuf, 1 );
2682   %}
2683 
2684 
2685   // Compare the longs and set flags
2686   // BROKEN!  Do Not use as-is
2687   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2688     // CMP    $src1.hi,$src2.hi
2689     emit_opcode( cbuf, 0x3B );
2690     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2691     // JNE,s  done
2692     emit_opcode(cbuf,0x75);
2693     emit_d8(cbuf, 2 );
2694     // CMP    $src1.lo,$src2.lo
2695     emit_opcode( cbuf, 0x3B );
2696     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2697 // done:
2698   %}
2699 
2700   enc_class convert_int_long( regL dst, rRegI src ) %{
2701     // mov $dst.lo,$src
2702     int dst_encoding = $dst$$reg;
2703     int src_encoding = $src$$reg;
2704     encode_Copy( cbuf, dst_encoding  , src_encoding );
2705     // mov $dst.hi,$src
2706     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2707     // sar $dst.hi,31
2708     emit_opcode( cbuf, 0xC1 );
2709     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2710     emit_d8(cbuf, 0x1F );
2711   %}
2712 
2713   enc_class convert_long_double( eRegL src ) %{
2714     // push $src.hi
2715     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2716     // push $src.lo
2717     emit_opcode(cbuf, 0x50+$src$$reg  );
2718     // fild 64-bits at [SP]
2719     emit_opcode(cbuf,0xdf);
2720     emit_d8(cbuf, 0x6C);
2721     emit_d8(cbuf, 0x24);
2722     emit_d8(cbuf, 0x00);
2723     // pop stack
2724     emit_opcode(cbuf, 0x83); // add  SP, #8
2725     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2726     emit_d8(cbuf, 0x8);
2727   %}
2728 
2729   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2730     // IMUL   EDX:EAX,$src1
2731     emit_opcode( cbuf, 0xF7 );
2732     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2733     // SAR    EDX,$cnt-32
2734     int shift_count = ((int)$cnt$$constant) - 32;
2735     if (shift_count > 0) {
2736       emit_opcode(cbuf, 0xC1);
2737       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2738       emit_d8(cbuf, shift_count);
2739     }
2740   %}
2741 
2742   // this version doesn't have add sp, 8
2743   enc_class convert_long_double2( eRegL src ) %{
2744     // push $src.hi
2745     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2746     // push $src.lo
2747     emit_opcode(cbuf, 0x50+$src$$reg  );
2748     // fild 64-bits at [SP]
2749     emit_opcode(cbuf,0xdf);
2750     emit_d8(cbuf, 0x6C);
2751     emit_d8(cbuf, 0x24);
2752     emit_d8(cbuf, 0x00);
2753   %}
2754 
2755   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2756     // Basic idea: long = (long)int * (long)int
2757     // IMUL EDX:EAX, src
2758     emit_opcode( cbuf, 0xF7 );
2759     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2760   %}
2761 
2762   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2763     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2764     // MUL EDX:EAX, src
2765     emit_opcode( cbuf, 0xF7 );
2766     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2767   %}
2768 
2769   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2770     // Basic idea: lo(result) = lo(x_lo * y_lo)
2771     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2772     // MOV    $tmp,$src.lo
2773     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2774     // IMUL   $tmp,EDX
2775     emit_opcode( cbuf, 0x0F );
2776     emit_opcode( cbuf, 0xAF );
2777     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2778     // MOV    EDX,$src.hi
2779     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2780     // IMUL   EDX,EAX
2781     emit_opcode( cbuf, 0x0F );
2782     emit_opcode( cbuf, 0xAF );
2783     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2784     // ADD    $tmp,EDX
2785     emit_opcode( cbuf, 0x03 );
2786     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2787     // MUL   EDX:EAX,$src.lo
2788     emit_opcode( cbuf, 0xF7 );
2789     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2790     // ADD    EDX,ESI
2791     emit_opcode( cbuf, 0x03 );
2792     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2793   %}
2794 
2795   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2796     // Basic idea: lo(result) = lo(src * y_lo)
2797     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2798     // IMUL   $tmp,EDX,$src
2799     emit_opcode( cbuf, 0x6B );
2800     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2801     emit_d8( cbuf, (int)$src$$constant );
2802     // MOV    EDX,$src
2803     emit_opcode(cbuf, 0xB8 + EDX_enc);
2804     emit_d32( cbuf, (int)$src$$constant );
2805     // MUL   EDX:EAX,EDX
2806     emit_opcode( cbuf, 0xF7 );
2807     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2808     // ADD    EDX,ESI
2809     emit_opcode( cbuf, 0x03 );
2810     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2811   %}
2812 
2813   enc_class long_div( eRegL src1, eRegL src2 ) %{
2814     // PUSH src1.hi
2815     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2816     // PUSH src1.lo
2817     emit_opcode(cbuf,               0x50+$src1$$reg  );
2818     // PUSH src2.hi
2819     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2820     // PUSH src2.lo
2821     emit_opcode(cbuf,               0x50+$src2$$reg  );
2822     // CALL directly to the runtime
2823     cbuf.set_insts_mark();
2824     emit_opcode(cbuf,0xE8);       // Call into runtime
2825     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2826     // Restore stack
2827     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2828     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2829     emit_d8(cbuf, 4*4);
2830   %}
2831 
2832   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2833     // PUSH src1.hi
2834     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2835     // PUSH src1.lo
2836     emit_opcode(cbuf,               0x50+$src1$$reg  );
2837     // PUSH src2.hi
2838     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2839     // PUSH src2.lo
2840     emit_opcode(cbuf,               0x50+$src2$$reg  );
2841     // CALL directly to the runtime
2842     cbuf.set_insts_mark();
2843     emit_opcode(cbuf,0xE8);       // Call into runtime
2844     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2845     // Restore stack
2846     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2847     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2848     emit_d8(cbuf, 4*4);
2849   %}
2850 
2851   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2852     // MOV   $tmp,$src.lo
2853     emit_opcode(cbuf, 0x8B);
2854     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2855     // OR    $tmp,$src.hi
2856     emit_opcode(cbuf, 0x0B);
2857     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2858   %}
2859 
2860   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2861     // CMP    $src1.lo,$src2.lo
2862     emit_opcode( cbuf, 0x3B );
2863     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2864     // JNE,s  skip
2865     emit_cc(cbuf, 0x70, 0x5);
2866     emit_d8(cbuf,2);
2867     // CMP    $src1.hi,$src2.hi
2868     emit_opcode( cbuf, 0x3B );
2869     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2870   %}
2871 
2872   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2873     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2874     emit_opcode( cbuf, 0x3B );
2875     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2876     // MOV    $tmp,$src1.hi
2877     emit_opcode( cbuf, 0x8B );
2878     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2879     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2880     emit_opcode( cbuf, 0x1B );
2881     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2882   %}
2883 
2884   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2885     // XOR    $tmp,$tmp
2886     emit_opcode(cbuf,0x33);  // XOR
2887     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2888     // CMP    $tmp,$src.lo
2889     emit_opcode( cbuf, 0x3B );
2890     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2891     // SBB    $tmp,$src.hi
2892     emit_opcode( cbuf, 0x1B );
2893     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2894   %}
2895 
2896  // Sniff, sniff... smells like Gnu Superoptimizer
2897   enc_class neg_long( eRegL dst ) %{
2898     emit_opcode(cbuf,0xF7);    // NEG hi
2899     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2900     emit_opcode(cbuf,0xF7);    // NEG lo
2901     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2902     emit_opcode(cbuf,0x83);    // SBB hi,0
2903     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2904     emit_d8    (cbuf,0 );
2905   %}
2906 
2907   enc_class enc_pop_rdx() %{
2908     emit_opcode(cbuf,0x5A);
2909   %}
2910 
2911   enc_class enc_rethrow() %{
2912     cbuf.set_insts_mark();
2913     emit_opcode(cbuf, 0xE9);        // jmp    entry
2914     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2915                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2916   %}
2917 
2918 
2919   // Convert a double to an int.  Java semantics require we do complex
2920   // manglelations in the corner cases.  So we set the rounding mode to
2921   // 'zero', store the darned double down as an int, and reset the
2922   // rounding mode to 'nearest'.  The hardware throws an exception which
2923   // patches up the correct value directly to the stack.
2924   enc_class DPR2I_encoding( regDPR src ) %{
2925     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2926     // exceptions here, so that a NAN or other corner-case value will
2927     // thrown an exception (but normal values get converted at full speed).
2928     // However, I2C adapters and other float-stack manglers leave pending
2929     // invalid-op exceptions hanging.  We would have to clear them before
2930     // enabling them and that is more expensive than just testing for the
2931     // invalid value Intel stores down in the corner cases.
2932     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2933     emit_opcode(cbuf,0x2D);
2934     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2935     // Allocate a word
2936     emit_opcode(cbuf,0x83);            // SUB ESP,4
2937     emit_opcode(cbuf,0xEC);
2938     emit_d8(cbuf,0x04);
2939     // Encoding assumes a double has been pushed into FPR0.
2940     // Store down the double as an int, popping the FPU stack
2941     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2942     emit_opcode(cbuf,0x1C);
2943     emit_d8(cbuf,0x24);
2944     // Restore the rounding mode; mask the exception
2945     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2946     emit_opcode(cbuf,0x2D);
2947     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2948         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2949         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2950 
2951     // Load the converted int; adjust CPU stack
2952     emit_opcode(cbuf,0x58);       // POP EAX
2953     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2954     emit_d32   (cbuf,0x80000000); //         0x80000000
2955     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2956     emit_d8    (cbuf,0x07);       // Size of slow_call
2957     // Push src onto stack slow-path
2958     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2959     emit_d8    (cbuf,0xC0-1+$src$$reg );
2960     // CALL directly to the runtime
2961     cbuf.set_insts_mark();
2962     emit_opcode(cbuf,0xE8);       // Call into runtime
2963     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2964     // Carry on here...
2965   %}
2966 
2967   enc_class DPR2L_encoding( regDPR src ) %{
2968     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2969     emit_opcode(cbuf,0x2D);
2970     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2971     // Allocate a word
2972     emit_opcode(cbuf,0x83);            // SUB ESP,8
2973     emit_opcode(cbuf,0xEC);
2974     emit_d8(cbuf,0x08);
2975     // Encoding assumes a double has been pushed into FPR0.
2976     // Store down the double as a long, popping the FPU stack
2977     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
2978     emit_opcode(cbuf,0x3C);
2979     emit_d8(cbuf,0x24);
2980     // Restore the rounding mode; mask the exception
2981     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2982     emit_opcode(cbuf,0x2D);
2983     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2984         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2985         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2986 
2987     // Load the converted int; adjust CPU stack
2988     emit_opcode(cbuf,0x58);       // POP EAX
2989     emit_opcode(cbuf,0x5A);       // POP EDX
2990     emit_opcode(cbuf,0x81);       // CMP EDX,imm
2991     emit_d8    (cbuf,0xFA);       // rdx
2992     emit_d32   (cbuf,0x80000000); //         0x80000000
2993     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2994     emit_d8    (cbuf,0x07+4);     // Size of slow_call
2995     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
2996     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
2997     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2998     emit_d8    (cbuf,0x07);       // Size of slow_call
2999     // Push src onto stack slow-path
3000     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3001     emit_d8    (cbuf,0xC0-1+$src$$reg );
3002     // CALL directly to the runtime
3003     cbuf.set_insts_mark();
3004     emit_opcode(cbuf,0xE8);       // Call into runtime
3005     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3006     // Carry on here...
3007   %}
3008 
3009   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3010     // Operand was loaded from memory into fp ST (stack top)
3011     // FMUL   ST,$src  /* D8 C8+i */
3012     emit_opcode(cbuf, 0xD8);
3013     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3014   %}
3015 
3016   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3017     // FADDP  ST,src2  /* D8 C0+i */
3018     emit_opcode(cbuf, 0xD8);
3019     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3020     //could use FADDP  src2,fpST  /* DE C0+i */
3021   %}
3022 
3023   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3024     // FADDP  src2,ST  /* DE C0+i */
3025     emit_opcode(cbuf, 0xDE);
3026     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3027   %}
3028 
3029   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3030     // Operand has been loaded into fp ST (stack top)
3031       // FSUB   ST,$src1
3032       emit_opcode(cbuf, 0xD8);
3033       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3034 
3035       // FDIV
3036       emit_opcode(cbuf, 0xD8);
3037       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3038   %}
3039 
3040   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3041     // Operand was loaded from memory into fp ST (stack top)
3042     // FADD   ST,$src  /* D8 C0+i */
3043     emit_opcode(cbuf, 0xD8);
3044     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3045 
3046     // FMUL  ST,src2  /* D8 C*+i */
3047     emit_opcode(cbuf, 0xD8);
3048     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3049   %}
3050 
3051 
3052   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3053     // Operand was loaded from memory into fp ST (stack top)
3054     // FADD   ST,$src  /* D8 C0+i */
3055     emit_opcode(cbuf, 0xD8);
3056     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3057 
3058     // FMULP  src2,ST  /* DE C8+i */
3059     emit_opcode(cbuf, 0xDE);
3060     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3061   %}
3062 
3063   // Atomically load the volatile long
3064   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3065     emit_opcode(cbuf,0xDF);
3066     int rm_byte_opcode = 0x05;
3067     int base     = $mem$$base;
3068     int index    = $mem$$index;
3069     int scale    = $mem$$scale;
3070     int displace = $mem$$disp;
3071     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3072     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3073     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3074   %}
3075 
3076   // Volatile Store Long.  Must be atomic, so move it into
3077   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3078   // target address before the store (for null-ptr checks)
3079   // so the memory operand is used twice in the encoding.
3080   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3081     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3082     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3083     emit_opcode(cbuf,0xDF);
3084     int rm_byte_opcode = 0x07;
3085     int base     = $mem$$base;
3086     int index    = $mem$$index;
3087     int scale    = $mem$$scale;
3088     int displace = $mem$$disp;
3089     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3090     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3091   %}
3092 
3093   // Safepoint Poll.  This polls the safepoint page, and causes an
3094   // exception if it is not readable. Unfortunately, it kills the condition code
3095   // in the process
3096   // We current use TESTL [spp],EDI
3097   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3098 
3099   enc_class Safepoint_Poll() %{
3100     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3101     emit_opcode(cbuf,0x85);
3102     emit_rm (cbuf, 0x0, 0x7, 0x5);
3103     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3104   %}
3105 %}
3106 
3107 
3108 //----------FRAME--------------------------------------------------------------
3109 // Definition of frame structure and management information.
3110 //
3111 //  S T A C K   L A Y O U T    Allocators stack-slot number
3112 //                             |   (to get allocators register number
3113 //  G  Owned by    |        |  v    add OptoReg::stack0())
3114 //  r   CALLER     |        |
3115 //  o     |        +--------+      pad to even-align allocators stack-slot
3116 //  w     V        |  pad0  |        numbers; owned by CALLER
3117 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3118 //  h     ^        |   in   |  5
3119 //        |        |  args  |  4   Holes in incoming args owned by SELF
3120 //  |     |        |        |  3
3121 //  |     |        +--------+
3122 //  V     |        | old out|      Empty on Intel, window on Sparc
3123 //        |    old |preserve|      Must be even aligned.
3124 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3125 //        |        |   in   |  3   area for Intel ret address
3126 //     Owned by    |preserve|      Empty on Sparc.
3127 //       SELF      +--------+
3128 //        |        |  pad2  |  2   pad to align old SP
3129 //        |        +--------+  1
3130 //        |        | locks  |  0
3131 //        |        +--------+----> OptoReg::stack0(), even aligned
3132 //        |        |  pad1  | 11   pad to align new SP
3133 //        |        +--------+
3134 //        |        |        | 10
3135 //        |        | spills |  9   spills
3136 //        V        |        |  8   (pad0 slot for callee)
3137 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3138 //        ^        |  out   |  7
3139 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3140 //     Owned by    +--------+
3141 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3142 //        |    new |preserve|      Must be even-aligned.
3143 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3144 //        |        |        |
3145 //
3146 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3147 //         known from SELF's arguments and the Java calling convention.
3148 //         Region 6-7 is determined per call site.
3149 // Note 2: If the calling convention leaves holes in the incoming argument
3150 //         area, those holes are owned by SELF.  Holes in the outgoing area
3151 //         are owned by the CALLEE.  Holes should not be nessecary in the
3152 //         incoming area, as the Java calling convention is completely under
3153 //         the control of the AD file.  Doubles can be sorted and packed to
3154 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3155 //         varargs C calling conventions.
3156 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3157 //         even aligned with pad0 as needed.
3158 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3159 //         region 6-11 is even aligned; it may be padded out more so that
3160 //         the region from SP to FP meets the minimum stack alignment.
3161 
3162 frame %{
3163   // What direction does stack grow in (assumed to be same for C & Java)
3164   stack_direction(TOWARDS_LOW);
3165 
3166   // These three registers define part of the calling convention
3167   // between compiled code and the interpreter.
3168   inline_cache_reg(EAX);                // Inline Cache Register
3169   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3170 
3171   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3172   cisc_spilling_operand_name(indOffset32);
3173 
3174   // Number of stack slots consumed by locking an object
3175   sync_stack_slots(1);
3176 
3177   // Compiled code's Frame Pointer
3178   frame_pointer(ESP);
3179   // Interpreter stores its frame pointer in a register which is
3180   // stored to the stack by I2CAdaptors.
3181   // I2CAdaptors convert from interpreted java to compiled java.
3182   interpreter_frame_pointer(EBP);
3183 
3184   // Stack alignment requirement
3185   // Alignment size in bytes (128-bit -> 16 bytes)
3186   stack_alignment(StackAlignmentInBytes);
3187 
3188   // Number of stack slots between incoming argument block and the start of
3189   // a new frame.  The PROLOG must add this many slots to the stack.  The
3190   // EPILOG must remove this many slots.  Intel needs one slot for
3191   // return address and one for rbp, (must save rbp)
3192   in_preserve_stack_slots(2+VerifyStackAtCalls);
3193 
3194   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3195   // for calls to C.  Supports the var-args backing area for register parms.
3196   varargs_C_out_slots_killed(0);
3197 
3198   // The after-PROLOG location of the return address.  Location of
3199   // return address specifies a type (REG or STACK) and a number
3200   // representing the register number (i.e. - use a register name) or
3201   // stack slot.
3202   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3203   // Otherwise, it is above the locks and verification slot and alignment word
3204   return_addr(STACK - 1 +
3205               round_to((Compile::current()->in_preserve_stack_slots() +
3206                         Compile::current()->fixed_slots()),
3207                        stack_alignment_in_slots()));
3208 
3209   // Body of function which returns an integer array locating
3210   // arguments either in registers or in stack slots.  Passed an array
3211   // of ideal registers called "sig" and a "length" count.  Stack-slot
3212   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3213   // arguments for a CALLEE.  Incoming stack arguments are
3214   // automatically biased by the preserve_stack_slots field above.
3215   calling_convention %{
3216     // No difference between ingoing/outgoing just pass false
3217     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3218   %}
3219 
3220 
3221   // Body of function which returns an integer array locating
3222   // arguments either in registers or in stack slots.  Passed an array
3223   // of ideal registers called "sig" and a "length" count.  Stack-slot
3224   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3225   // arguments for a CALLEE.  Incoming stack arguments are
3226   // automatically biased by the preserve_stack_slots field above.
3227   c_calling_convention %{
3228     // This is obviously always outgoing
3229     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3230   %}
3231 
3232   // Location of C & interpreter return values
3233   c_return_value %{
3234     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3235     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3236     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3237 
3238     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3239     // that C functions return float and double results in XMM0.
3240     if( ideal_reg == Op_RegD && UseSSE>=2 )
3241       return OptoRegPair(XMM0b_num,XMM0_num);
3242     if( ideal_reg == Op_RegF && UseSSE>=2 )
3243       return OptoRegPair(OptoReg::Bad,XMM0_num);
3244 
3245     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3246   %}
3247 
3248   // Location of return values
3249   return_value %{
3250     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3251     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3252     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3253     if( ideal_reg == Op_RegD && UseSSE>=2 )
3254       return OptoRegPair(XMM0b_num,XMM0_num);
3255     if( ideal_reg == Op_RegF && UseSSE>=1 )
3256       return OptoRegPair(OptoReg::Bad,XMM0_num);
3257     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3258   %}
3259 
3260 %}
3261 
3262 //----------ATTRIBUTES---------------------------------------------------------
3263 //----------Operand Attributes-------------------------------------------------
3264 op_attrib op_cost(0);        // Required cost attribute
3265 
3266 //----------Instruction Attributes---------------------------------------------
3267 ins_attrib ins_cost(100);       // Required cost attribute
3268 ins_attrib ins_size(8);         // Required size attribute (in bits)
3269 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3270                                 // non-matching short branch variant of some
3271                                                             // long branch?
3272 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3273                                 // specifies the alignment that some part of the instruction (not
3274                                 // necessarily the start) requires.  If > 1, a compute_padding()
3275                                 // function must be provided for the instruction
3276 
3277 //----------OPERANDS-----------------------------------------------------------
3278 // Operand definitions must precede instruction definitions for correct parsing
3279 // in the ADLC because operands constitute user defined types which are used in
3280 // instruction definitions.
3281 
3282 //----------Simple Operands----------------------------------------------------
3283 // Immediate Operands
3284 // Integer Immediate
3285 operand immI() %{
3286   match(ConI);
3287 
3288   op_cost(10);
3289   format %{ %}
3290   interface(CONST_INTER);
3291 %}
3292 
3293 // Constant for test vs zero
3294 operand immI0() %{
3295   predicate(n->get_int() == 0);
3296   match(ConI);
3297 
3298   op_cost(0);
3299   format %{ %}
3300   interface(CONST_INTER);
3301 %}
3302 
3303 // Constant for increment
3304 operand immI1() %{
3305   predicate(n->get_int() == 1);
3306   match(ConI);
3307 
3308   op_cost(0);
3309   format %{ %}
3310   interface(CONST_INTER);
3311 %}
3312 
3313 // Constant for decrement
3314 operand immI_M1() %{
3315   predicate(n->get_int() == -1);
3316   match(ConI);
3317 
3318   op_cost(0);
3319   format %{ %}
3320   interface(CONST_INTER);
3321 %}
3322 
3323 // Valid scale values for addressing modes
3324 operand immI2() %{
3325   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3326   match(ConI);
3327 
3328   format %{ %}
3329   interface(CONST_INTER);
3330 %}
3331 
3332 operand immI8() %{
3333   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3334   match(ConI);
3335 
3336   op_cost(5);
3337   format %{ %}
3338   interface(CONST_INTER);
3339 %}
3340 
3341 operand immI16() %{
3342   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3343   match(ConI);
3344 
3345   op_cost(10);
3346   format %{ %}
3347   interface(CONST_INTER);
3348 %}
3349 
3350 // Int Immediate non-negative
3351 operand immU31()
3352 %{
3353   predicate(n->get_int() >= 0);
3354   match(ConI);
3355 
3356   op_cost(0);
3357   format %{ %}
3358   interface(CONST_INTER);
3359 %}
3360 
3361 // Constant for long shifts
3362 operand immI_32() %{
3363   predicate( n->get_int() == 32 );
3364   match(ConI);
3365 
3366   op_cost(0);
3367   format %{ %}
3368   interface(CONST_INTER);
3369 %}
3370 
3371 operand immI_1_31() %{
3372   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3373   match(ConI);
3374 
3375   op_cost(0);
3376   format %{ %}
3377   interface(CONST_INTER);
3378 %}
3379 
3380 operand immI_32_63() %{
3381   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3382   match(ConI);
3383   op_cost(0);
3384 
3385   format %{ %}
3386   interface(CONST_INTER);
3387 %}
3388 
3389 operand immI_1() %{
3390   predicate( n->get_int() == 1 );
3391   match(ConI);
3392 
3393   op_cost(0);
3394   format %{ %}
3395   interface(CONST_INTER);
3396 %}
3397 
3398 operand immI_2() %{
3399   predicate( n->get_int() == 2 );
3400   match(ConI);
3401 
3402   op_cost(0);
3403   format %{ %}
3404   interface(CONST_INTER);
3405 %}
3406 
3407 operand immI_3() %{
3408   predicate( n->get_int() == 3 );
3409   match(ConI);
3410 
3411   op_cost(0);
3412   format %{ %}
3413   interface(CONST_INTER);
3414 %}
3415 
3416 // Pointer Immediate
3417 operand immP() %{
3418   match(ConP);
3419 
3420   op_cost(10);
3421   format %{ %}
3422   interface(CONST_INTER);
3423 %}
3424 
3425 // NULL Pointer Immediate
3426 operand immP0() %{
3427   predicate( n->get_ptr() == 0 );
3428   match(ConP);
3429   op_cost(0);
3430 
3431   format %{ %}
3432   interface(CONST_INTER);
3433 %}
3434 
3435 // Long Immediate
3436 operand immL() %{
3437   match(ConL);
3438 
3439   op_cost(20);
3440   format %{ %}
3441   interface(CONST_INTER);
3442 %}
3443 
3444 // Long Immediate zero
3445 operand immL0() %{
3446   predicate( n->get_long() == 0L );
3447   match(ConL);
3448   op_cost(0);
3449 
3450   format %{ %}
3451   interface(CONST_INTER);
3452 %}
3453 
3454 // Long Immediate zero
3455 operand immL_M1() %{
3456   predicate( n->get_long() == -1L );
3457   match(ConL);
3458   op_cost(0);
3459 
3460   format %{ %}
3461   interface(CONST_INTER);
3462 %}
3463 
3464 // Long immediate from 0 to 127.
3465 // Used for a shorter form of long mul by 10.
3466 operand immL_127() %{
3467   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3468   match(ConL);
3469   op_cost(0);
3470 
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Long Immediate: low 32-bit mask
3476 operand immL_32bits() %{
3477   predicate(n->get_long() == 0xFFFFFFFFL);
3478   match(ConL);
3479   op_cost(0);
3480 
3481   format %{ %}
3482   interface(CONST_INTER);
3483 %}
3484 
3485 // Long Immediate: low 32-bit mask
3486 operand immL32() %{
3487   predicate(n->get_long() == (int)(n->get_long()));
3488   match(ConL);
3489   op_cost(20);
3490 
3491   format %{ %}
3492   interface(CONST_INTER);
3493 %}
3494 
3495 //Double Immediate zero
3496 operand immDPR0() %{
3497   // Do additional (and counter-intuitive) test against NaN to work around VC++
3498   // bug that generates code such that NaNs compare equal to 0.0
3499   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3500   match(ConD);
3501 
3502   op_cost(5);
3503   format %{ %}
3504   interface(CONST_INTER);
3505 %}
3506 
3507 // Double Immediate one
3508 operand immDPR1() %{
3509   predicate( UseSSE<=1 && n->getd() == 1.0 );
3510   match(ConD);
3511 
3512   op_cost(5);
3513   format %{ %}
3514   interface(CONST_INTER);
3515 %}
3516 
3517 // Double Immediate
3518 operand immDPR() %{
3519   predicate(UseSSE<=1);
3520   match(ConD);
3521 
3522   op_cost(5);
3523   format %{ %}
3524   interface(CONST_INTER);
3525 %}
3526 
3527 operand immD() %{
3528   predicate(UseSSE>=2);
3529   match(ConD);
3530 
3531   op_cost(5);
3532   format %{ %}
3533   interface(CONST_INTER);
3534 %}
3535 
3536 // Double Immediate zero
3537 operand immD0() %{
3538   // Do additional (and counter-intuitive) test against NaN to work around VC++
3539   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3540   // compare equal to -0.0.
3541   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3542   match(ConD);
3543 
3544   format %{ %}
3545   interface(CONST_INTER);
3546 %}
3547 
3548 // Float Immediate zero
3549 operand immFPR0() %{
3550   predicate(UseSSE == 0 && n->getf() == 0.0F);
3551   match(ConF);
3552 
3553   op_cost(5);
3554   format %{ %}
3555   interface(CONST_INTER);
3556 %}
3557 
3558 // Float Immediate one
3559 operand immFPR1() %{
3560   predicate(UseSSE == 0 && n->getf() == 1.0F);
3561   match(ConF);
3562 
3563   op_cost(5);
3564   format %{ %}
3565   interface(CONST_INTER);
3566 %}
3567 
3568 // Float Immediate
3569 operand immFPR() %{
3570   predicate( UseSSE == 0 );
3571   match(ConF);
3572 
3573   op_cost(5);
3574   format %{ %}
3575   interface(CONST_INTER);
3576 %}
3577 
3578 // Float Immediate
3579 operand immF() %{
3580   predicate(UseSSE >= 1);
3581   match(ConF);
3582 
3583   op_cost(5);
3584   format %{ %}
3585   interface(CONST_INTER);
3586 %}
3587 
3588 // Float Immediate zero.  Zero and not -0.0
3589 operand immF0() %{
3590   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3591   match(ConF);
3592 
3593   op_cost(5);
3594   format %{ %}
3595   interface(CONST_INTER);
3596 %}
3597 
3598 // Immediates for special shifts (sign extend)
3599 
3600 // Constants for increment
3601 operand immI_16() %{
3602   predicate( n->get_int() == 16 );
3603   match(ConI);
3604 
3605   format %{ %}
3606   interface(CONST_INTER);
3607 %}
3608 
3609 operand immI_24() %{
3610   predicate( n->get_int() == 24 );
3611   match(ConI);
3612 
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Constant for byte-wide masking
3618 operand immI_255() %{
3619   predicate( n->get_int() == 255 );
3620   match(ConI);
3621 
3622   format %{ %}
3623   interface(CONST_INTER);
3624 %}
3625 
3626 // Constant for short-wide masking
3627 operand immI_65535() %{
3628   predicate(n->get_int() == 65535);
3629   match(ConI);
3630 
3631   format %{ %}
3632   interface(CONST_INTER);
3633 %}
3634 
3635 // Register Operands
3636 // Integer Register
3637 operand rRegI() %{
3638   constraint(ALLOC_IN_RC(int_reg));
3639   match(RegI);
3640   match(xRegI);
3641   match(eAXRegI);
3642   match(eBXRegI);
3643   match(eCXRegI);
3644   match(eDXRegI);
3645   match(eDIRegI);
3646   match(eSIRegI);
3647 
3648   format %{ %}
3649   interface(REG_INTER);
3650 %}
3651 
3652 // Subset of Integer Register
3653 operand xRegI(rRegI reg) %{
3654   constraint(ALLOC_IN_RC(int_x_reg));
3655   match(reg);
3656   match(eAXRegI);
3657   match(eBXRegI);
3658   match(eCXRegI);
3659   match(eDXRegI);
3660 
3661   format %{ %}
3662   interface(REG_INTER);
3663 %}
3664 
3665 // Special Registers
3666 operand eAXRegI(xRegI reg) %{
3667   constraint(ALLOC_IN_RC(eax_reg));
3668   match(reg);
3669   match(rRegI);
3670 
3671   format %{ "EAX" %}
3672   interface(REG_INTER);
3673 %}
3674 
3675 // Special Registers
3676 operand eBXRegI(xRegI reg) %{
3677   constraint(ALLOC_IN_RC(ebx_reg));
3678   match(reg);
3679   match(rRegI);
3680 
3681   format %{ "EBX" %}
3682   interface(REG_INTER);
3683 %}
3684 
3685 operand eCXRegI(xRegI reg) %{
3686   constraint(ALLOC_IN_RC(ecx_reg));
3687   match(reg);
3688   match(rRegI);
3689 
3690   format %{ "ECX" %}
3691   interface(REG_INTER);
3692 %}
3693 
3694 operand eDXRegI(xRegI reg) %{
3695   constraint(ALLOC_IN_RC(edx_reg));
3696   match(reg);
3697   match(rRegI);
3698 
3699   format %{ "EDX" %}
3700   interface(REG_INTER);
3701 %}
3702 
3703 operand eDIRegI(xRegI reg) %{
3704   constraint(ALLOC_IN_RC(edi_reg));
3705   match(reg);
3706   match(rRegI);
3707 
3708   format %{ "EDI" %}
3709   interface(REG_INTER);
3710 %}
3711 
3712 operand naxRegI() %{
3713   constraint(ALLOC_IN_RC(nax_reg));
3714   match(RegI);
3715   match(eCXRegI);
3716   match(eDXRegI);
3717   match(eSIRegI);
3718   match(eDIRegI);
3719 
3720   format %{ %}
3721   interface(REG_INTER);
3722 %}
3723 
3724 operand nadxRegI() %{
3725   constraint(ALLOC_IN_RC(nadx_reg));
3726   match(RegI);
3727   match(eBXRegI);
3728   match(eCXRegI);
3729   match(eSIRegI);
3730   match(eDIRegI);
3731 
3732   format %{ %}
3733   interface(REG_INTER);
3734 %}
3735 
3736 operand ncxRegI() %{
3737   constraint(ALLOC_IN_RC(ncx_reg));
3738   match(RegI);
3739   match(eAXRegI);
3740   match(eDXRegI);
3741   match(eSIRegI);
3742   match(eDIRegI);
3743 
3744   format %{ %}
3745   interface(REG_INTER);
3746 %}
3747 
3748 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3749 // //
3750 operand eSIRegI(xRegI reg) %{
3751    constraint(ALLOC_IN_RC(esi_reg));
3752    match(reg);
3753    match(rRegI);
3754 
3755    format %{ "ESI" %}
3756    interface(REG_INTER);
3757 %}
3758 
3759 // Pointer Register
3760 operand anyRegP() %{
3761   constraint(ALLOC_IN_RC(any_reg));
3762   match(RegP);
3763   match(eAXRegP);
3764   match(eBXRegP);
3765   match(eCXRegP);
3766   match(eDIRegP);
3767   match(eRegP);
3768 
3769   format %{ %}
3770   interface(REG_INTER);
3771 %}
3772 
3773 operand eRegP() %{
3774   constraint(ALLOC_IN_RC(int_reg));
3775   match(RegP);
3776   match(eAXRegP);
3777   match(eBXRegP);
3778   match(eCXRegP);
3779   match(eDIRegP);
3780 
3781   format %{ %}
3782   interface(REG_INTER);
3783 %}
3784 
3785 // On windows95, EBP is not safe to use for implicit null tests.
3786 operand eRegP_no_EBP() %{
3787   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3788   match(RegP);
3789   match(eAXRegP);
3790   match(eBXRegP);
3791   match(eCXRegP);
3792   match(eDIRegP);
3793 
3794   op_cost(100);
3795   format %{ %}
3796   interface(REG_INTER);
3797 %}
3798 
3799 operand naxRegP() %{
3800   constraint(ALLOC_IN_RC(nax_reg));
3801   match(RegP);
3802   match(eBXRegP);
3803   match(eDXRegP);
3804   match(eCXRegP);
3805   match(eSIRegP);
3806   match(eDIRegP);
3807 
3808   format %{ %}
3809   interface(REG_INTER);
3810 %}
3811 
3812 operand nabxRegP() %{
3813   constraint(ALLOC_IN_RC(nabx_reg));
3814   match(RegP);
3815   match(eCXRegP);
3816   match(eDXRegP);
3817   match(eSIRegP);
3818   match(eDIRegP);
3819 
3820   format %{ %}
3821   interface(REG_INTER);
3822 %}
3823 
3824 operand pRegP() %{
3825   constraint(ALLOC_IN_RC(p_reg));
3826   match(RegP);
3827   match(eBXRegP);
3828   match(eDXRegP);
3829   match(eSIRegP);
3830   match(eDIRegP);
3831 
3832   format %{ %}
3833   interface(REG_INTER);
3834 %}
3835 
3836 // Special Registers
3837 // Return a pointer value
3838 operand eAXRegP(eRegP reg) %{
3839   constraint(ALLOC_IN_RC(eax_reg));
3840   match(reg);
3841   format %{ "EAX" %}
3842   interface(REG_INTER);
3843 %}
3844 
3845 // Used in AtomicAdd
3846 operand eBXRegP(eRegP reg) %{
3847   constraint(ALLOC_IN_RC(ebx_reg));
3848   match(reg);
3849   format %{ "EBX" %}
3850   interface(REG_INTER);
3851 %}
3852 
3853 // Tail-call (interprocedural jump) to interpreter
3854 operand eCXRegP(eRegP reg) %{
3855   constraint(ALLOC_IN_RC(ecx_reg));
3856   match(reg);
3857   format %{ "ECX" %}
3858   interface(REG_INTER);
3859 %}
3860 
3861 operand eSIRegP(eRegP reg) %{
3862   constraint(ALLOC_IN_RC(esi_reg));
3863   match(reg);
3864   format %{ "ESI" %}
3865   interface(REG_INTER);
3866 %}
3867 
3868 // Used in rep stosw
3869 operand eDIRegP(eRegP reg) %{
3870   constraint(ALLOC_IN_RC(edi_reg));
3871   match(reg);
3872   format %{ "EDI" %}
3873   interface(REG_INTER);
3874 %}
3875 
3876 operand eRegL() %{
3877   constraint(ALLOC_IN_RC(long_reg));
3878   match(RegL);
3879   match(eADXRegL);
3880 
3881   format %{ %}
3882   interface(REG_INTER);
3883 %}
3884 
3885 operand eADXRegL( eRegL reg ) %{
3886   constraint(ALLOC_IN_RC(eadx_reg));
3887   match(reg);
3888 
3889   format %{ "EDX:EAX" %}
3890   interface(REG_INTER);
3891 %}
3892 
3893 operand eBCXRegL( eRegL reg ) %{
3894   constraint(ALLOC_IN_RC(ebcx_reg));
3895   match(reg);
3896 
3897   format %{ "EBX:ECX" %}
3898   interface(REG_INTER);
3899 %}
3900 
3901 // Special case for integer high multiply
3902 operand eADXRegL_low_only() %{
3903   constraint(ALLOC_IN_RC(eadx_reg));
3904   match(RegL);
3905 
3906   format %{ "EAX" %}
3907   interface(REG_INTER);
3908 %}
3909 
3910 // Flags register, used as output of compare instructions
3911 operand eFlagsReg() %{
3912   constraint(ALLOC_IN_RC(int_flags));
3913   match(RegFlags);
3914 
3915   format %{ "EFLAGS" %}
3916   interface(REG_INTER);
3917 %}
3918 
3919 // Flags register, used as output of FLOATING POINT compare instructions
3920 operand eFlagsRegU() %{
3921   constraint(ALLOC_IN_RC(int_flags));
3922   match(RegFlags);
3923 
3924   format %{ "EFLAGS_U" %}
3925   interface(REG_INTER);
3926 %}
3927 
3928 operand eFlagsRegUCF() %{
3929   constraint(ALLOC_IN_RC(int_flags));
3930   match(RegFlags);
3931   predicate(false);
3932 
3933   format %{ "EFLAGS_U_CF" %}
3934   interface(REG_INTER);
3935 %}
3936 
3937 // Condition Code Register used by long compare
3938 operand flagsReg_long_LTGE() %{
3939   constraint(ALLOC_IN_RC(int_flags));
3940   match(RegFlags);
3941   format %{ "FLAGS_LTGE" %}
3942   interface(REG_INTER);
3943 %}
3944 operand flagsReg_long_EQNE() %{
3945   constraint(ALLOC_IN_RC(int_flags));
3946   match(RegFlags);
3947   format %{ "FLAGS_EQNE" %}
3948   interface(REG_INTER);
3949 %}
3950 operand flagsReg_long_LEGT() %{
3951   constraint(ALLOC_IN_RC(int_flags));
3952   match(RegFlags);
3953   format %{ "FLAGS_LEGT" %}
3954   interface(REG_INTER);
3955 %}
3956 
3957 // Condition Code Register used by unsigned long compare
3958 operand flagsReg_ulong_LTGE() %{
3959   constraint(ALLOC_IN_RC(int_flags));
3960   match(RegFlags);
3961   format %{ "FLAGS_U_LTGE" %}
3962   interface(REG_INTER);
3963 %}
3964 operand flagsReg_ulong_EQNE() %{
3965   constraint(ALLOC_IN_RC(int_flags));
3966   match(RegFlags);
3967   format %{ "FLAGS_U_EQNE" %}
3968   interface(REG_INTER);
3969 %}
3970 operand flagsReg_ulong_LEGT() %{
3971   constraint(ALLOC_IN_RC(int_flags));
3972   match(RegFlags);
3973   format %{ "FLAGS_U_LEGT" %}
3974   interface(REG_INTER);
3975 %}
3976 
3977 // Float register operands
3978 operand regDPR() %{
3979   predicate( UseSSE < 2 );
3980   constraint(ALLOC_IN_RC(fp_dbl_reg));
3981   match(RegD);
3982   match(regDPR1);
3983   match(regDPR2);
3984   format %{ %}
3985   interface(REG_INTER);
3986 %}
3987 
3988 operand regDPR1(regDPR reg) %{
3989   predicate( UseSSE < 2 );
3990   constraint(ALLOC_IN_RC(fp_dbl_reg0));
3991   match(reg);
3992   format %{ "FPR1" %}
3993   interface(REG_INTER);
3994 %}
3995 
3996 operand regDPR2(regDPR reg) %{
3997   predicate( UseSSE < 2 );
3998   constraint(ALLOC_IN_RC(fp_dbl_reg1));
3999   match(reg);
4000   format %{ "FPR2" %}
4001   interface(REG_INTER);
4002 %}
4003 
4004 operand regnotDPR1(regDPR reg) %{
4005   predicate( UseSSE < 2 );
4006   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4007   match(reg);
4008   format %{ %}
4009   interface(REG_INTER);
4010 %}
4011 
4012 // Float register operands
4013 operand regFPR() %{
4014   predicate( UseSSE < 2 );
4015   constraint(ALLOC_IN_RC(fp_flt_reg));
4016   match(RegF);
4017   match(regFPR1);
4018   format %{ %}
4019   interface(REG_INTER);
4020 %}
4021 
4022 // Float register operands
4023 operand regFPR1(regFPR reg) %{
4024   predicate( UseSSE < 2 );
4025   constraint(ALLOC_IN_RC(fp_flt_reg0));
4026   match(reg);
4027   format %{ "FPR1" %}
4028   interface(REG_INTER);
4029 %}
4030 
4031 // XMM Float register operands
4032 operand regF() %{
4033   predicate( UseSSE>=1 );
4034   constraint(ALLOC_IN_RC(float_reg));
4035   match(RegF);
4036   format %{ %}
4037   interface(REG_INTER);
4038 %}
4039 
4040 // XMM Double register operands
4041 operand regD() %{
4042   predicate( UseSSE>=2 );
4043   constraint(ALLOC_IN_RC(double_reg));
4044   match(RegD);
4045   format %{ %}
4046   interface(REG_INTER);
4047 %}
4048 
4049 
4050 //----------Memory Operands----------------------------------------------------
4051 // Direct Memory Operand
4052 operand direct(immP addr) %{
4053   match(addr);
4054 
4055   format %{ "[$addr]" %}
4056   interface(MEMORY_INTER) %{
4057     base(0xFFFFFFFF);
4058     index(0x4);
4059     scale(0x0);
4060     disp($addr);
4061   %}
4062 %}
4063 
4064 // Indirect Memory Operand
4065 operand indirect(eRegP reg) %{
4066   constraint(ALLOC_IN_RC(int_reg));
4067   match(reg);
4068 
4069   format %{ "[$reg]" %}
4070   interface(MEMORY_INTER) %{
4071     base($reg);
4072     index(0x4);
4073     scale(0x0);
4074     disp(0x0);
4075   %}
4076 %}
4077 
4078 // Indirect Memory Plus Short Offset Operand
4079 operand indOffset8(eRegP reg, immI8 off) %{
4080   match(AddP reg off);
4081 
4082   format %{ "[$reg + $off]" %}
4083   interface(MEMORY_INTER) %{
4084     base($reg);
4085     index(0x4);
4086     scale(0x0);
4087     disp($off);
4088   %}
4089 %}
4090 
4091 // Indirect Memory Plus Long Offset Operand
4092 operand indOffset32(eRegP reg, immI off) %{
4093   match(AddP reg off);
4094 
4095   format %{ "[$reg + $off]" %}
4096   interface(MEMORY_INTER) %{
4097     base($reg);
4098     index(0x4);
4099     scale(0x0);
4100     disp($off);
4101   %}
4102 %}
4103 
4104 // Indirect Memory Plus Long Offset Operand
4105 operand indOffset32X(rRegI reg, immP off) %{
4106   match(AddP off reg);
4107 
4108   format %{ "[$reg + $off]" %}
4109   interface(MEMORY_INTER) %{
4110     base($reg);
4111     index(0x4);
4112     scale(0x0);
4113     disp($off);
4114   %}
4115 %}
4116 
4117 // Indirect Memory Plus Index Register Plus Offset Operand
4118 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4119   match(AddP (AddP reg ireg) off);
4120 
4121   op_cost(10);
4122   format %{"[$reg + $off + $ireg]" %}
4123   interface(MEMORY_INTER) %{
4124     base($reg);
4125     index($ireg);
4126     scale(0x0);
4127     disp($off);
4128   %}
4129 %}
4130 
4131 // Indirect Memory Plus Index Register Plus Offset Operand
4132 operand indIndex(eRegP reg, rRegI ireg) %{
4133   match(AddP reg ireg);
4134 
4135   op_cost(10);
4136   format %{"[$reg + $ireg]" %}
4137   interface(MEMORY_INTER) %{
4138     base($reg);
4139     index($ireg);
4140     scale(0x0);
4141     disp(0x0);
4142   %}
4143 %}
4144 
4145 // // -------------------------------------------------------------------------
4146 // // 486 architecture doesn't support "scale * index + offset" with out a base
4147 // // -------------------------------------------------------------------------
4148 // // Scaled Memory Operands
4149 // // Indirect Memory Times Scale Plus Offset Operand
4150 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4151 //   match(AddP off (LShiftI ireg scale));
4152 //
4153 //   op_cost(10);
4154 //   format %{"[$off + $ireg << $scale]" %}
4155 //   interface(MEMORY_INTER) %{
4156 //     base(0x4);
4157 //     index($ireg);
4158 //     scale($scale);
4159 //     disp($off);
4160 //   %}
4161 // %}
4162 
4163 // Indirect Memory Times Scale Plus Index Register
4164 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4165   match(AddP reg (LShiftI ireg scale));
4166 
4167   op_cost(10);
4168   format %{"[$reg + $ireg << $scale]" %}
4169   interface(MEMORY_INTER) %{
4170     base($reg);
4171     index($ireg);
4172     scale($scale);
4173     disp(0x0);
4174   %}
4175 %}
4176 
4177 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4178 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4179   match(AddP (AddP reg (LShiftI ireg scale)) off);
4180 
4181   op_cost(10);
4182   format %{"[$reg + $off + $ireg << $scale]" %}
4183   interface(MEMORY_INTER) %{
4184     base($reg);
4185     index($ireg);
4186     scale($scale);
4187     disp($off);
4188   %}
4189 %}
4190 
4191 //----------Load Long Memory Operands------------------------------------------
4192 // The load-long idiom will use it's address expression again after loading
4193 // the first word of the long.  If the load-long destination overlaps with
4194 // registers used in the addressing expression, the 2nd half will be loaded
4195 // from a clobbered address.  Fix this by requiring that load-long use
4196 // address registers that do not overlap with the load-long target.
4197 
4198 // load-long support
4199 operand load_long_RegP() %{
4200   constraint(ALLOC_IN_RC(esi_reg));
4201   match(RegP);
4202   match(eSIRegP);
4203   op_cost(100);
4204   format %{  %}
4205   interface(REG_INTER);
4206 %}
4207 
4208 // Indirect Memory Operand Long
4209 operand load_long_indirect(load_long_RegP reg) %{
4210   constraint(ALLOC_IN_RC(esi_reg));
4211   match(reg);
4212 
4213   format %{ "[$reg]" %}
4214   interface(MEMORY_INTER) %{
4215     base($reg);
4216     index(0x4);
4217     scale(0x0);
4218     disp(0x0);
4219   %}
4220 %}
4221 
4222 // Indirect Memory Plus Long Offset Operand
4223 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4224   match(AddP reg off);
4225 
4226   format %{ "[$reg + $off]" %}
4227   interface(MEMORY_INTER) %{
4228     base($reg);
4229     index(0x4);
4230     scale(0x0);
4231     disp($off);
4232   %}
4233 %}
4234 
4235 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4236 
4237 
4238 //----------Special Memory Operands--------------------------------------------
4239 // Stack Slot Operand - This operand is used for loading and storing temporary
4240 //                      values on the stack where a match requires a value to
4241 //                      flow through memory.
4242 operand stackSlotP(sRegP reg) %{
4243   constraint(ALLOC_IN_RC(stack_slots));
4244   // No match rule because this operand is only generated in matching
4245   format %{ "[$reg]" %}
4246   interface(MEMORY_INTER) %{
4247     base(0x4);   // ESP
4248     index(0x4);  // No Index
4249     scale(0x0);  // No Scale
4250     disp($reg);  // Stack Offset
4251   %}
4252 %}
4253 
4254 operand stackSlotI(sRegI reg) %{
4255   constraint(ALLOC_IN_RC(stack_slots));
4256   // No match rule because this operand is only generated in matching
4257   format %{ "[$reg]" %}
4258   interface(MEMORY_INTER) %{
4259     base(0x4);   // ESP
4260     index(0x4);  // No Index
4261     scale(0x0);  // No Scale
4262     disp($reg);  // Stack Offset
4263   %}
4264 %}
4265 
4266 operand stackSlotF(sRegF reg) %{
4267   constraint(ALLOC_IN_RC(stack_slots));
4268   // No match rule because this operand is only generated in matching
4269   format %{ "[$reg]" %}
4270   interface(MEMORY_INTER) %{
4271     base(0x4);   // ESP
4272     index(0x4);  // No Index
4273     scale(0x0);  // No Scale
4274     disp($reg);  // Stack Offset
4275   %}
4276 %}
4277 
4278 operand stackSlotD(sRegD reg) %{
4279   constraint(ALLOC_IN_RC(stack_slots));
4280   // No match rule because this operand is only generated in matching
4281   format %{ "[$reg]" %}
4282   interface(MEMORY_INTER) %{
4283     base(0x4);   // ESP
4284     index(0x4);  // No Index
4285     scale(0x0);  // No Scale
4286     disp($reg);  // Stack Offset
4287   %}
4288 %}
4289 
4290 operand stackSlotL(sRegL reg) %{
4291   constraint(ALLOC_IN_RC(stack_slots));
4292   // No match rule because this operand is only generated in matching
4293   format %{ "[$reg]" %}
4294   interface(MEMORY_INTER) %{
4295     base(0x4);   // ESP
4296     index(0x4);  // No Index
4297     scale(0x0);  // No Scale
4298     disp($reg);  // Stack Offset
4299   %}
4300 %}
4301 
4302 //----------Memory Operands - Win95 Implicit Null Variants----------------
4303 // Indirect Memory Operand
4304 operand indirect_win95_safe(eRegP_no_EBP reg)
4305 %{
4306   constraint(ALLOC_IN_RC(int_reg));
4307   match(reg);
4308 
4309   op_cost(100);
4310   format %{ "[$reg]" %}
4311   interface(MEMORY_INTER) %{
4312     base($reg);
4313     index(0x4);
4314     scale(0x0);
4315     disp(0x0);
4316   %}
4317 %}
4318 
4319 // Indirect Memory Plus Short Offset Operand
4320 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4321 %{
4322   match(AddP reg off);
4323 
4324   op_cost(100);
4325   format %{ "[$reg + $off]" %}
4326   interface(MEMORY_INTER) %{
4327     base($reg);
4328     index(0x4);
4329     scale(0x0);
4330     disp($off);
4331   %}
4332 %}
4333 
4334 // Indirect Memory Plus Long Offset Operand
4335 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4336 %{
4337   match(AddP reg off);
4338 
4339   op_cost(100);
4340   format %{ "[$reg + $off]" %}
4341   interface(MEMORY_INTER) %{
4342     base($reg);
4343     index(0x4);
4344     scale(0x0);
4345     disp($off);
4346   %}
4347 %}
4348 
4349 // Indirect Memory Plus Index Register Plus Offset Operand
4350 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4351 %{
4352   match(AddP (AddP reg ireg) off);
4353 
4354   op_cost(100);
4355   format %{"[$reg + $off + $ireg]" %}
4356   interface(MEMORY_INTER) %{
4357     base($reg);
4358     index($ireg);
4359     scale(0x0);
4360     disp($off);
4361   %}
4362 %}
4363 
4364 // Indirect Memory Times Scale Plus Index Register
4365 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4366 %{
4367   match(AddP reg (LShiftI ireg scale));
4368 
4369   op_cost(100);
4370   format %{"[$reg + $ireg << $scale]" %}
4371   interface(MEMORY_INTER) %{
4372     base($reg);
4373     index($ireg);
4374     scale($scale);
4375     disp(0x0);
4376   %}
4377 %}
4378 
4379 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4380 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4381 %{
4382   match(AddP (AddP reg (LShiftI ireg scale)) off);
4383 
4384   op_cost(100);
4385   format %{"[$reg + $off + $ireg << $scale]" %}
4386   interface(MEMORY_INTER) %{
4387     base($reg);
4388     index($ireg);
4389     scale($scale);
4390     disp($off);
4391   %}
4392 %}
4393 
4394 //----------Conditional Branch Operands----------------------------------------
4395 // Comparison Op  - This is the operation of the comparison, and is limited to
4396 //                  the following set of codes:
4397 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4398 //
4399 // Other attributes of the comparison, such as unsignedness, are specified
4400 // by the comparison instruction that sets a condition code flags register.
4401 // That result is represented by a flags operand whose subtype is appropriate
4402 // to the unsignedness (etc.) of the comparison.
4403 //
4404 // Later, the instruction which matches both the Comparison Op (a Bool) and
4405 // the flags (produced by the Cmp) specifies the coding of the comparison op
4406 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4407 
4408 // Comparision Code
4409 operand cmpOp() %{
4410   match(Bool);
4411 
4412   format %{ "" %}
4413   interface(COND_INTER) %{
4414     equal(0x4, "e");
4415     not_equal(0x5, "ne");
4416     less(0xC, "l");
4417     greater_equal(0xD, "ge");
4418     less_equal(0xE, "le");
4419     greater(0xF, "g");
4420     overflow(0x0, "o");
4421     no_overflow(0x1, "no");
4422   %}
4423 %}
4424 
4425 // Comparison Code, unsigned compare.  Used by FP also, with
4426 // C2 (unordered) turned into GT or LT already.  The other bits
4427 // C0 and C3 are turned into Carry & Zero flags.
4428 operand cmpOpU() %{
4429   match(Bool);
4430 
4431   format %{ "" %}
4432   interface(COND_INTER) %{
4433     equal(0x4, "e");
4434     not_equal(0x5, "ne");
4435     less(0x2, "b");
4436     greater_equal(0x3, "nb");
4437     less_equal(0x6, "be");
4438     greater(0x7, "nbe");
4439     overflow(0x0, "o");
4440     no_overflow(0x1, "no");
4441   %}
4442 %}
4443 
4444 // Floating comparisons that don't require any fixup for the unordered case
4445 operand cmpOpUCF() %{
4446   match(Bool);
4447   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4448             n->as_Bool()->_test._test == BoolTest::ge ||
4449             n->as_Bool()->_test._test == BoolTest::le ||
4450             n->as_Bool()->_test._test == BoolTest::gt);
4451   format %{ "" %}
4452   interface(COND_INTER) %{
4453     equal(0x4, "e");
4454     not_equal(0x5, "ne");
4455     less(0x2, "b");
4456     greater_equal(0x3, "nb");
4457     less_equal(0x6, "be");
4458     greater(0x7, "nbe");
4459     overflow(0x0, "o");
4460     no_overflow(0x1, "no");
4461   %}
4462 %}
4463 
4464 
4465 // Floating comparisons that can be fixed up with extra conditional jumps
4466 operand cmpOpUCF2() %{
4467   match(Bool);
4468   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4469             n->as_Bool()->_test._test == BoolTest::eq);
4470   format %{ "" %}
4471   interface(COND_INTER) %{
4472     equal(0x4, "e");
4473     not_equal(0x5, "ne");
4474     less(0x2, "b");
4475     greater_equal(0x3, "nb");
4476     less_equal(0x6, "be");
4477     greater(0x7, "nbe");
4478     overflow(0x0, "o");
4479     no_overflow(0x1, "no");
4480   %}
4481 %}
4482 
4483 // Comparison Code for FP conditional move
4484 operand cmpOp_fcmov() %{
4485   match(Bool);
4486 
4487   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4488             n->as_Bool()->_test._test != BoolTest::no_overflow);
4489   format %{ "" %}
4490   interface(COND_INTER) %{
4491     equal        (0x0C8);
4492     not_equal    (0x1C8);
4493     less         (0x0C0);
4494     greater_equal(0x1C0);
4495     less_equal   (0x0D0);
4496     greater      (0x1D0);
4497     overflow(0x0, "o"); // not really supported by the instruction
4498     no_overflow(0x1, "no"); // not really supported by the instruction
4499   %}
4500 %}
4501 
4502 // Comparison Code used in long compares
4503 operand cmpOp_commute() %{
4504   match(Bool);
4505 
4506   format %{ "" %}
4507   interface(COND_INTER) %{
4508     equal(0x4, "e");
4509     not_equal(0x5, "ne");
4510     less(0xF, "g");
4511     greater_equal(0xE, "le");
4512     less_equal(0xD, "ge");
4513     greater(0xC, "l");
4514     overflow(0x0, "o");
4515     no_overflow(0x1, "no");
4516   %}
4517 %}
4518 
4519 // Comparison Code used in unsigned long compares
4520 operand cmpOpU_commute() %{
4521   match(Bool);
4522 
4523   format %{ "" %}
4524   interface(COND_INTER) %{
4525     equal(0x4, "e");
4526     not_equal(0x5, "ne");
4527     less(0x7, "nbe");
4528     greater_equal(0x6, "be");
4529     less_equal(0x3, "nb");
4530     greater(0x2, "b");
4531     overflow(0x0, "o");
4532     no_overflow(0x1, "no");
4533   %}
4534 %}
4535 
4536 //----------OPERAND CLASSES----------------------------------------------------
4537 // Operand Classes are groups of operands that are used as to simplify
4538 // instruction definitions by not requiring the AD writer to specify separate
4539 // instructions for every form of operand when the instruction accepts
4540 // multiple operand types with the same basic encoding and format.  The classic
4541 // case of this is memory operands.
4542 
4543 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4544                indIndex, indIndexScale, indIndexScaleOffset);
4545 
4546 // Long memory operations are encoded in 2 instructions and a +4 offset.
4547 // This means some kind of offset is always required and you cannot use
4548 // an oop as the offset (done when working on static globals).
4549 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4550                     indIndex, indIndexScale, indIndexScaleOffset);
4551 
4552 
4553 //----------PIPELINE-----------------------------------------------------------
4554 // Rules which define the behavior of the target architectures pipeline.
4555 pipeline %{
4556 
4557 //----------ATTRIBUTES---------------------------------------------------------
4558 attributes %{
4559   variable_size_instructions;        // Fixed size instructions
4560   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4561   instruction_unit_size = 1;         // An instruction is 1 bytes long
4562   instruction_fetch_unit_size = 16;  // The processor fetches one line
4563   instruction_fetch_units = 1;       // of 16 bytes
4564 
4565   // List of nop instructions
4566   nops( MachNop );
4567 %}
4568 
4569 //----------RESOURCES----------------------------------------------------------
4570 // Resources are the functional units available to the machine
4571 
4572 // Generic P2/P3 pipeline
4573 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4574 // 3 instructions decoded per cycle.
4575 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4576 // 2 ALU op, only ALU0 handles mul/div instructions.
4577 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4578            MS0, MS1, MEM = MS0 | MS1,
4579            BR, FPU,
4580            ALU0, ALU1, ALU = ALU0 | ALU1 );
4581 
4582 //----------PIPELINE DESCRIPTION-----------------------------------------------
4583 // Pipeline Description specifies the stages in the machine's pipeline
4584 
4585 // Generic P2/P3 pipeline
4586 pipe_desc(S0, S1, S2, S3, S4, S5);
4587 
4588 //----------PIPELINE CLASSES---------------------------------------------------
4589 // Pipeline Classes describe the stages in which input and output are
4590 // referenced by the hardware pipeline.
4591 
4592 // Naming convention: ialu or fpu
4593 // Then: _reg
4594 // Then: _reg if there is a 2nd register
4595 // Then: _long if it's a pair of instructions implementing a long
4596 // Then: _fat if it requires the big decoder
4597 //   Or: _mem if it requires the big decoder and a memory unit.
4598 
4599 // Integer ALU reg operation
4600 pipe_class ialu_reg(rRegI dst) %{
4601     single_instruction;
4602     dst    : S4(write);
4603     dst    : S3(read);
4604     DECODE : S0;        // any decoder
4605     ALU    : S3;        // any alu
4606 %}
4607 
4608 // Long ALU reg operation
4609 pipe_class ialu_reg_long(eRegL dst) %{
4610     instruction_count(2);
4611     dst    : S4(write);
4612     dst    : S3(read);
4613     DECODE : S0(2);     // any 2 decoders
4614     ALU    : S3(2);     // both alus
4615 %}
4616 
4617 // Integer ALU reg operation using big decoder
4618 pipe_class ialu_reg_fat(rRegI dst) %{
4619     single_instruction;
4620     dst    : S4(write);
4621     dst    : S3(read);
4622     D0     : S0;        // big decoder only
4623     ALU    : S3;        // any alu
4624 %}
4625 
4626 // Long ALU reg operation using big decoder
4627 pipe_class ialu_reg_long_fat(eRegL dst) %{
4628     instruction_count(2);
4629     dst    : S4(write);
4630     dst    : S3(read);
4631     D0     : S0(2);     // big decoder only; twice
4632     ALU    : S3(2);     // any 2 alus
4633 %}
4634 
4635 // Integer ALU reg-reg operation
4636 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4637     single_instruction;
4638     dst    : S4(write);
4639     src    : S3(read);
4640     DECODE : S0;        // any decoder
4641     ALU    : S3;        // any alu
4642 %}
4643 
4644 // Long ALU reg-reg operation
4645 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4646     instruction_count(2);
4647     dst    : S4(write);
4648     src    : S3(read);
4649     DECODE : S0(2);     // any 2 decoders
4650     ALU    : S3(2);     // both alus
4651 %}
4652 
4653 // Integer ALU reg-reg operation
4654 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4655     single_instruction;
4656     dst    : S4(write);
4657     src    : S3(read);
4658     D0     : S0;        // big decoder only
4659     ALU    : S3;        // any alu
4660 %}
4661 
4662 // Long ALU reg-reg operation
4663 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4664     instruction_count(2);
4665     dst    : S4(write);
4666     src    : S3(read);
4667     D0     : S0(2);     // big decoder only; twice
4668     ALU    : S3(2);     // both alus
4669 %}
4670 
4671 // Integer ALU reg-mem operation
4672 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4673     single_instruction;
4674     dst    : S5(write);
4675     mem    : S3(read);
4676     D0     : S0;        // big decoder only
4677     ALU    : S4;        // any alu
4678     MEM    : S3;        // any mem
4679 %}
4680 
4681 // Long ALU reg-mem operation
4682 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4683     instruction_count(2);
4684     dst    : S5(write);
4685     mem    : S3(read);
4686     D0     : S0(2);     // big decoder only; twice
4687     ALU    : S4(2);     // any 2 alus
4688     MEM    : S3(2);     // both mems
4689 %}
4690 
4691 // Integer mem operation (prefetch)
4692 pipe_class ialu_mem(memory mem)
4693 %{
4694     single_instruction;
4695     mem    : S3(read);
4696     D0     : S0;        // big decoder only
4697     MEM    : S3;        // any mem
4698 %}
4699 
4700 // Integer Store to Memory
4701 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4702     single_instruction;
4703     mem    : S3(read);
4704     src    : S5(read);
4705     D0     : S0;        // big decoder only
4706     ALU    : S4;        // any alu
4707     MEM    : S3;
4708 %}
4709 
4710 // Long Store to Memory
4711 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4712     instruction_count(2);
4713     mem    : S3(read);
4714     src    : S5(read);
4715     D0     : S0(2);     // big decoder only; twice
4716     ALU    : S4(2);     // any 2 alus
4717     MEM    : S3(2);     // Both mems
4718 %}
4719 
4720 // Integer Store to Memory
4721 pipe_class ialu_mem_imm(memory mem) %{
4722     single_instruction;
4723     mem    : S3(read);
4724     D0     : S0;        // big decoder only
4725     ALU    : S4;        // any alu
4726     MEM    : S3;
4727 %}
4728 
4729 // Integer ALU0 reg-reg operation
4730 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4731     single_instruction;
4732     dst    : S4(write);
4733     src    : S3(read);
4734     D0     : S0;        // Big decoder only
4735     ALU0   : S3;        // only alu0
4736 %}
4737 
4738 // Integer ALU0 reg-mem operation
4739 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4740     single_instruction;
4741     dst    : S5(write);
4742     mem    : S3(read);
4743     D0     : S0;        // big decoder only
4744     ALU0   : S4;        // ALU0 only
4745     MEM    : S3;        // any mem
4746 %}
4747 
4748 // Integer ALU reg-reg operation
4749 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4750     single_instruction;
4751     cr     : S4(write);
4752     src1   : S3(read);
4753     src2   : S3(read);
4754     DECODE : S0;        // any decoder
4755     ALU    : S3;        // any alu
4756 %}
4757 
4758 // Integer ALU reg-imm operation
4759 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4760     single_instruction;
4761     cr     : S4(write);
4762     src1   : S3(read);
4763     DECODE : S0;        // any decoder
4764     ALU    : S3;        // any alu
4765 %}
4766 
4767 // Integer ALU reg-mem operation
4768 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4769     single_instruction;
4770     cr     : S4(write);
4771     src1   : S3(read);
4772     src2   : S3(read);
4773     D0     : S0;        // big decoder only
4774     ALU    : S4;        // any alu
4775     MEM    : S3;
4776 %}
4777 
4778 // Conditional move reg-reg
4779 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4780     instruction_count(4);
4781     y      : S4(read);
4782     q      : S3(read);
4783     p      : S3(read);
4784     DECODE : S0(4);     // any decoder
4785 %}
4786 
4787 // Conditional move reg-reg
4788 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4789     single_instruction;
4790     dst    : S4(write);
4791     src    : S3(read);
4792     cr     : S3(read);
4793     DECODE : S0;        // any decoder
4794 %}
4795 
4796 // Conditional move reg-mem
4797 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4798     single_instruction;
4799     dst    : S4(write);
4800     src    : S3(read);
4801     cr     : S3(read);
4802     DECODE : S0;        // any decoder
4803     MEM    : S3;
4804 %}
4805 
4806 // Conditional move reg-reg long
4807 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4808     single_instruction;
4809     dst    : S4(write);
4810     src    : S3(read);
4811     cr     : S3(read);
4812     DECODE : S0(2);     // any 2 decoders
4813 %}
4814 
4815 // Conditional move double reg-reg
4816 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4817     single_instruction;
4818     dst    : S4(write);
4819     src    : S3(read);
4820     cr     : S3(read);
4821     DECODE : S0;        // any decoder
4822 %}
4823 
4824 // Float reg-reg operation
4825 pipe_class fpu_reg(regDPR dst) %{
4826     instruction_count(2);
4827     dst    : S3(read);
4828     DECODE : S0(2);     // any 2 decoders
4829     FPU    : S3;
4830 %}
4831 
4832 // Float reg-reg operation
4833 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4834     instruction_count(2);
4835     dst    : S4(write);
4836     src    : S3(read);
4837     DECODE : S0(2);     // any 2 decoders
4838     FPU    : S3;
4839 %}
4840 
4841 // Float reg-reg operation
4842 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4843     instruction_count(3);
4844     dst    : S4(write);
4845     src1   : S3(read);
4846     src2   : S3(read);
4847     DECODE : S0(3);     // any 3 decoders
4848     FPU    : S3(2);
4849 %}
4850 
4851 // Float reg-reg operation
4852 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4853     instruction_count(4);
4854     dst    : S4(write);
4855     src1   : S3(read);
4856     src2   : S3(read);
4857     src3   : S3(read);
4858     DECODE : S0(4);     // any 3 decoders
4859     FPU    : S3(2);
4860 %}
4861 
4862 // Float reg-reg operation
4863 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4864     instruction_count(4);
4865     dst    : S4(write);
4866     src1   : S3(read);
4867     src2   : S3(read);
4868     src3   : S3(read);
4869     DECODE : S1(3);     // any 3 decoders
4870     D0     : S0;        // Big decoder only
4871     FPU    : S3(2);
4872     MEM    : S3;
4873 %}
4874 
4875 // Float reg-mem operation
4876 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4877     instruction_count(2);
4878     dst    : S5(write);
4879     mem    : S3(read);
4880     D0     : S0;        // big decoder only
4881     DECODE : S1;        // any decoder for FPU POP
4882     FPU    : S4;
4883     MEM    : S3;        // any mem
4884 %}
4885 
4886 // Float reg-mem operation
4887 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4888     instruction_count(3);
4889     dst    : S5(write);
4890     src1   : S3(read);
4891     mem    : S3(read);
4892     D0     : S0;        // big decoder only
4893     DECODE : S1(2);     // any decoder for FPU POP
4894     FPU    : S4;
4895     MEM    : S3;        // any mem
4896 %}
4897 
4898 // Float mem-reg operation
4899 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4900     instruction_count(2);
4901     src    : S5(read);
4902     mem    : S3(read);
4903     DECODE : S0;        // any decoder for FPU PUSH
4904     D0     : S1;        // big decoder only
4905     FPU    : S4;
4906     MEM    : S3;        // any mem
4907 %}
4908 
4909 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4910     instruction_count(3);
4911     src1   : S3(read);
4912     src2   : S3(read);
4913     mem    : S3(read);
4914     DECODE : S0(2);     // any decoder for FPU PUSH
4915     D0     : S1;        // big decoder only
4916     FPU    : S4;
4917     MEM    : S3;        // any mem
4918 %}
4919 
4920 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4921     instruction_count(3);
4922     src1   : S3(read);
4923     src2   : S3(read);
4924     mem    : S4(read);
4925     DECODE : S0;        // any decoder for FPU PUSH
4926     D0     : S0(2);     // big decoder only
4927     FPU    : S4;
4928     MEM    : S3(2);     // any mem
4929 %}
4930 
4931 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4932     instruction_count(2);
4933     src1   : S3(read);
4934     dst    : S4(read);
4935     D0     : S0(2);     // big decoder only
4936     MEM    : S3(2);     // any mem
4937 %}
4938 
4939 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4940     instruction_count(3);
4941     src1   : S3(read);
4942     src2   : S3(read);
4943     dst    : S4(read);
4944     D0     : S0(3);     // big decoder only
4945     FPU    : S4;
4946     MEM    : S3(3);     // any mem
4947 %}
4948 
4949 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4950     instruction_count(3);
4951     src1   : S4(read);
4952     mem    : S4(read);
4953     DECODE : S0;        // any decoder for FPU PUSH
4954     D0     : S0(2);     // big decoder only
4955     FPU    : S4;
4956     MEM    : S3(2);     // any mem
4957 %}
4958 
4959 // Float load constant
4960 pipe_class fpu_reg_con(regDPR dst) %{
4961     instruction_count(2);
4962     dst    : S5(write);
4963     D0     : S0;        // big decoder only for the load
4964     DECODE : S1;        // any decoder for FPU POP
4965     FPU    : S4;
4966     MEM    : S3;        // any mem
4967 %}
4968 
4969 // Float load constant
4970 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4971     instruction_count(3);
4972     dst    : S5(write);
4973     src    : S3(read);
4974     D0     : S0;        // big decoder only for the load
4975     DECODE : S1(2);     // any decoder for FPU POP
4976     FPU    : S4;
4977     MEM    : S3;        // any mem
4978 %}
4979 
4980 // UnConditional branch
4981 pipe_class pipe_jmp( label labl ) %{
4982     single_instruction;
4983     BR   : S3;
4984 %}
4985 
4986 // Conditional branch
4987 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
4988     single_instruction;
4989     cr    : S1(read);
4990     BR    : S3;
4991 %}
4992 
4993 // Allocation idiom
4994 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
4995     instruction_count(1); force_serialization;
4996     fixed_latency(6);
4997     heap_ptr : S3(read);
4998     DECODE   : S0(3);
4999     D0       : S2;
5000     MEM      : S3;
5001     ALU      : S3(2);
5002     dst      : S5(write);
5003     BR       : S5;
5004 %}
5005 
5006 // Generic big/slow expanded idiom
5007 pipe_class pipe_slow(  ) %{
5008     instruction_count(10); multiple_bundles; force_serialization;
5009     fixed_latency(100);
5010     D0  : S0(2);
5011     MEM : S3(2);
5012 %}
5013 
5014 // The real do-nothing guy
5015 pipe_class empty( ) %{
5016     instruction_count(0);
5017 %}
5018 
5019 // Define the class for the Nop node
5020 define %{
5021    MachNop = empty;
5022 %}
5023 
5024 %}
5025 
5026 //----------INSTRUCTIONS-------------------------------------------------------
5027 //
5028 // match      -- States which machine-independent subtree may be replaced
5029 //               by this instruction.
5030 // ins_cost   -- The estimated cost of this instruction is used by instruction
5031 //               selection to identify a minimum cost tree of machine
5032 //               instructions that matches a tree of machine-independent
5033 //               instructions.
5034 // format     -- A string providing the disassembly for this instruction.
5035 //               The value of an instruction's operand may be inserted
5036 //               by referring to it with a '$' prefix.
5037 // opcode     -- Three instruction opcodes may be provided.  These are referred
5038 //               to within an encode class as $primary, $secondary, and $tertiary
5039 //               respectively.  The primary opcode is commonly used to
5040 //               indicate the type of machine instruction, while secondary
5041 //               and tertiary are often used for prefix options or addressing
5042 //               modes.
5043 // ins_encode -- A list of encode classes with parameters. The encode class
5044 //               name must have been defined in an 'enc_class' specification
5045 //               in the encode section of the architecture description.
5046 
5047 //----------BSWAP-Instruction--------------------------------------------------
5048 instruct bytes_reverse_int(rRegI dst) %{
5049   match(Set dst (ReverseBytesI dst));
5050 
5051   format %{ "BSWAP  $dst" %}
5052   opcode(0x0F, 0xC8);
5053   ins_encode( OpcP, OpcSReg(dst) );
5054   ins_pipe( ialu_reg );
5055 %}
5056 
5057 instruct bytes_reverse_long(eRegL dst) %{
5058   match(Set dst (ReverseBytesL dst));
5059 
5060   format %{ "BSWAP  $dst.lo\n\t"
5061             "BSWAP  $dst.hi\n\t"
5062             "XCHG   $dst.lo $dst.hi" %}
5063 
5064   ins_cost(125);
5065   ins_encode( bswap_long_bytes(dst) );
5066   ins_pipe( ialu_reg_reg);
5067 %}
5068 
5069 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5070   match(Set dst (ReverseBytesUS dst));
5071   effect(KILL cr);
5072 
5073   format %{ "BSWAP  $dst\n\t" 
5074             "SHR    $dst,16\n\t" %}
5075   ins_encode %{
5076     __ bswapl($dst$$Register);
5077     __ shrl($dst$$Register, 16); 
5078   %}
5079   ins_pipe( ialu_reg );
5080 %}
5081 
5082 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5083   match(Set dst (ReverseBytesS dst));
5084   effect(KILL cr);
5085 
5086   format %{ "BSWAP  $dst\n\t" 
5087             "SAR    $dst,16\n\t" %}
5088   ins_encode %{
5089     __ bswapl($dst$$Register);
5090     __ sarl($dst$$Register, 16); 
5091   %}
5092   ins_pipe( ialu_reg );
5093 %}
5094 
5095 
5096 //---------- Zeros Count Instructions ------------------------------------------
5097 
5098 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5099   predicate(UseCountLeadingZerosInstruction);
5100   match(Set dst (CountLeadingZerosI src));
5101   effect(KILL cr);
5102 
5103   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5104   ins_encode %{
5105     __ lzcntl($dst$$Register, $src$$Register);
5106   %}
5107   ins_pipe(ialu_reg);
5108 %}
5109 
5110 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5111   predicate(!UseCountLeadingZerosInstruction);
5112   match(Set dst (CountLeadingZerosI src));
5113   effect(KILL cr);
5114 
5115   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5116             "JNZ    skip\n\t"
5117             "MOV    $dst, -1\n"
5118       "skip:\n\t"
5119             "NEG    $dst\n\t"
5120             "ADD    $dst, 31" %}
5121   ins_encode %{
5122     Register Rdst = $dst$$Register;
5123     Register Rsrc = $src$$Register;
5124     Label skip;
5125     __ bsrl(Rdst, Rsrc);
5126     __ jccb(Assembler::notZero, skip);
5127     __ movl(Rdst, -1);
5128     __ bind(skip);
5129     __ negl(Rdst);
5130     __ addl(Rdst, BitsPerInt - 1);
5131   %}
5132   ins_pipe(ialu_reg);
5133 %}
5134 
5135 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5136   predicate(UseCountLeadingZerosInstruction);
5137   match(Set dst (CountLeadingZerosL src));
5138   effect(TEMP dst, KILL cr);
5139 
5140   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5141             "JNC    done\n\t"
5142             "LZCNT  $dst, $src.lo\n\t"
5143             "ADD    $dst, 32\n"
5144       "done:" %}
5145   ins_encode %{
5146     Register Rdst = $dst$$Register;
5147     Register Rsrc = $src$$Register;
5148     Label done;
5149     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5150     __ jccb(Assembler::carryClear, done);
5151     __ lzcntl(Rdst, Rsrc);
5152     __ addl(Rdst, BitsPerInt);
5153     __ bind(done);
5154   %}
5155   ins_pipe(ialu_reg);
5156 %}
5157 
5158 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5159   predicate(!UseCountLeadingZerosInstruction);
5160   match(Set dst (CountLeadingZerosL src));
5161   effect(TEMP dst, KILL cr);
5162 
5163   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5164             "JZ     msw_is_zero\n\t"
5165             "ADD    $dst, 32\n\t"
5166             "JMP    not_zero\n"
5167       "msw_is_zero:\n\t"
5168             "BSR    $dst, $src.lo\n\t"
5169             "JNZ    not_zero\n\t"
5170             "MOV    $dst, -1\n"
5171       "not_zero:\n\t"
5172             "NEG    $dst\n\t"
5173             "ADD    $dst, 63\n" %}
5174  ins_encode %{
5175     Register Rdst = $dst$$Register;
5176     Register Rsrc = $src$$Register;
5177     Label msw_is_zero;
5178     Label not_zero;
5179     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5180     __ jccb(Assembler::zero, msw_is_zero);
5181     __ addl(Rdst, BitsPerInt);
5182     __ jmpb(not_zero);
5183     __ bind(msw_is_zero);
5184     __ bsrl(Rdst, Rsrc);
5185     __ jccb(Assembler::notZero, not_zero);
5186     __ movl(Rdst, -1);
5187     __ bind(not_zero);
5188     __ negl(Rdst);
5189     __ addl(Rdst, BitsPerLong - 1);
5190   %}
5191   ins_pipe(ialu_reg);
5192 %}
5193 
5194 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5195   predicate(UseCountTrailingZerosInstruction);
5196   match(Set dst (CountTrailingZerosI src));
5197   effect(KILL cr);
5198 
5199   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5200   ins_encode %{
5201     __ tzcntl($dst$$Register, $src$$Register);
5202   %}
5203   ins_pipe(ialu_reg);
5204 %}
5205 
5206 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5207   predicate(!UseCountTrailingZerosInstruction);
5208   match(Set dst (CountTrailingZerosI src));
5209   effect(KILL cr);
5210 
5211   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5212             "JNZ    done\n\t"
5213             "MOV    $dst, 32\n"
5214       "done:" %}
5215   ins_encode %{
5216     Register Rdst = $dst$$Register;
5217     Label done;
5218     __ bsfl(Rdst, $src$$Register);
5219     __ jccb(Assembler::notZero, done);
5220     __ movl(Rdst, BitsPerInt);
5221     __ bind(done);
5222   %}
5223   ins_pipe(ialu_reg);
5224 %}
5225 
5226 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5227   predicate(UseCountTrailingZerosInstruction);
5228   match(Set dst (CountTrailingZerosL src));
5229   effect(TEMP dst, KILL cr);
5230 
5231   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5232             "JNC    done\n\t"
5233             "TZCNT  $dst, $src.hi\n\t"
5234             "ADD    $dst, 32\n"
5235             "done:" %}
5236   ins_encode %{
5237     Register Rdst = $dst$$Register;
5238     Register Rsrc = $src$$Register;
5239     Label done;
5240     __ tzcntl(Rdst, Rsrc);
5241     __ jccb(Assembler::carryClear, done);
5242     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5243     __ addl(Rdst, BitsPerInt);
5244     __ bind(done);
5245   %}
5246   ins_pipe(ialu_reg);
5247 %}
5248 
5249 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5250   predicate(!UseCountTrailingZerosInstruction);
5251   match(Set dst (CountTrailingZerosL src));
5252   effect(TEMP dst, KILL cr);
5253 
5254   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5255             "JNZ    done\n\t"
5256             "BSF    $dst, $src.hi\n\t"
5257             "JNZ    msw_not_zero\n\t"
5258             "MOV    $dst, 32\n"
5259       "msw_not_zero:\n\t"
5260             "ADD    $dst, 32\n"
5261       "done:" %}
5262   ins_encode %{
5263     Register Rdst = $dst$$Register;
5264     Register Rsrc = $src$$Register;
5265     Label msw_not_zero;
5266     Label done;
5267     __ bsfl(Rdst, Rsrc);
5268     __ jccb(Assembler::notZero, done);
5269     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5270     __ jccb(Assembler::notZero, msw_not_zero);
5271     __ movl(Rdst, BitsPerInt);
5272     __ bind(msw_not_zero);
5273     __ addl(Rdst, BitsPerInt);
5274     __ bind(done);
5275   %}
5276   ins_pipe(ialu_reg);
5277 %}
5278 
5279 
5280 //---------- Population Count Instructions -------------------------------------
5281 
5282 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5283   predicate(UsePopCountInstruction);
5284   match(Set dst (PopCountI src));
5285   effect(KILL cr);
5286 
5287   format %{ "POPCNT $dst, $src" %}
5288   ins_encode %{
5289     __ popcntl($dst$$Register, $src$$Register);
5290   %}
5291   ins_pipe(ialu_reg);
5292 %}
5293 
5294 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5295   predicate(UsePopCountInstruction);
5296   match(Set dst (PopCountI (LoadI mem)));
5297   effect(KILL cr);
5298 
5299   format %{ "POPCNT $dst, $mem" %}
5300   ins_encode %{
5301     __ popcntl($dst$$Register, $mem$$Address);
5302   %}
5303   ins_pipe(ialu_reg);
5304 %}
5305 
5306 // Note: Long.bitCount(long) returns an int.
5307 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5308   predicate(UsePopCountInstruction);
5309   match(Set dst (PopCountL src));
5310   effect(KILL cr, TEMP tmp, TEMP dst);
5311 
5312   format %{ "POPCNT $dst, $src.lo\n\t"
5313             "POPCNT $tmp, $src.hi\n\t"
5314             "ADD    $dst, $tmp" %}
5315   ins_encode %{
5316     __ popcntl($dst$$Register, $src$$Register);
5317     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5318     __ addl($dst$$Register, $tmp$$Register);
5319   %}
5320   ins_pipe(ialu_reg);
5321 %}
5322 
5323 // Note: Long.bitCount(long) returns an int.
5324 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5325   predicate(UsePopCountInstruction);
5326   match(Set dst (PopCountL (LoadL mem)));
5327   effect(KILL cr, TEMP tmp, TEMP dst);
5328 
5329   format %{ "POPCNT $dst, $mem\n\t"
5330             "POPCNT $tmp, $mem+4\n\t"
5331             "ADD    $dst, $tmp" %}
5332   ins_encode %{
5333     //__ popcntl($dst$$Register, $mem$$Address$$first);
5334     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5335     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5336     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5337     __ addl($dst$$Register, $tmp$$Register);
5338   %}
5339   ins_pipe(ialu_reg);
5340 %}
5341 
5342 
5343 //----------Load/Store/Move Instructions---------------------------------------
5344 //----------Load Instructions--------------------------------------------------
5345 // Load Byte (8bit signed)
5346 instruct loadB(xRegI dst, memory mem) %{
5347   match(Set dst (LoadB mem));
5348 
5349   ins_cost(125);
5350   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5351 
5352   ins_encode %{
5353     __ movsbl($dst$$Register, $mem$$Address);
5354   %}
5355 
5356   ins_pipe(ialu_reg_mem);
5357 %}
5358 
5359 // Load Byte (8bit signed) into Long Register
5360 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5361   match(Set dst (ConvI2L (LoadB mem)));
5362   effect(KILL cr);
5363 
5364   ins_cost(375);
5365   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5366             "MOV    $dst.hi,$dst.lo\n\t"
5367             "SAR    $dst.hi,7" %}
5368 
5369   ins_encode %{
5370     __ movsbl($dst$$Register, $mem$$Address);
5371     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5372     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5373   %}
5374 
5375   ins_pipe(ialu_reg_mem);
5376 %}
5377 
5378 // Load Unsigned Byte (8bit UNsigned)
5379 instruct loadUB(xRegI dst, memory mem) %{
5380   match(Set dst (LoadUB mem));
5381 
5382   ins_cost(125);
5383   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5384 
5385   ins_encode %{
5386     __ movzbl($dst$$Register, $mem$$Address);
5387   %}
5388 
5389   ins_pipe(ialu_reg_mem);
5390 %}
5391 
5392 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5393 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5394   match(Set dst (ConvI2L (LoadUB mem)));
5395   effect(KILL cr);
5396 
5397   ins_cost(250);
5398   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5399             "XOR    $dst.hi,$dst.hi" %}
5400 
5401   ins_encode %{
5402     Register Rdst = $dst$$Register;
5403     __ movzbl(Rdst, $mem$$Address);
5404     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5405   %}
5406 
5407   ins_pipe(ialu_reg_mem);
5408 %}
5409 
5410 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5411 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
5412   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5413   effect(KILL cr);
5414 
5415   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
5416             "XOR    $dst.hi,$dst.hi\n\t"
5417             "AND    $dst.lo,$mask" %}
5418   ins_encode %{
5419     Register Rdst = $dst$$Register;
5420     __ movzbl(Rdst, $mem$$Address);
5421     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5422     __ andl(Rdst, $mask$$constant);
5423   %}
5424   ins_pipe(ialu_reg_mem);
5425 %}
5426 
5427 // Load Short (16bit signed)
5428 instruct loadS(rRegI dst, memory mem) %{
5429   match(Set dst (LoadS mem));
5430 
5431   ins_cost(125);
5432   format %{ "MOVSX  $dst,$mem\t# short" %}
5433 
5434   ins_encode %{
5435     __ movswl($dst$$Register, $mem$$Address);
5436   %}
5437 
5438   ins_pipe(ialu_reg_mem);
5439 %}
5440 
5441 // Load Short (16 bit signed) to Byte (8 bit signed)
5442 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5443   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5444 
5445   ins_cost(125);
5446   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5447   ins_encode %{
5448     __ movsbl($dst$$Register, $mem$$Address);
5449   %}
5450   ins_pipe(ialu_reg_mem);
5451 %}
5452 
5453 // Load Short (16bit signed) into Long Register
5454 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5455   match(Set dst (ConvI2L (LoadS mem)));
5456   effect(KILL cr);
5457 
5458   ins_cost(375);
5459   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5460             "MOV    $dst.hi,$dst.lo\n\t"
5461             "SAR    $dst.hi,15" %}
5462 
5463   ins_encode %{
5464     __ movswl($dst$$Register, $mem$$Address);
5465     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5466     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5467   %}
5468 
5469   ins_pipe(ialu_reg_mem);
5470 %}
5471 
5472 // Load Unsigned Short/Char (16bit unsigned)
5473 instruct loadUS(rRegI dst, memory mem) %{
5474   match(Set dst (LoadUS mem));
5475 
5476   ins_cost(125);
5477   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5478 
5479   ins_encode %{
5480     __ movzwl($dst$$Register, $mem$$Address);
5481   %}
5482 
5483   ins_pipe(ialu_reg_mem);
5484 %}
5485 
5486 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5487 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5488   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5489 
5490   ins_cost(125);
5491   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5492   ins_encode %{
5493     __ movsbl($dst$$Register, $mem$$Address);
5494   %}
5495   ins_pipe(ialu_reg_mem);
5496 %}
5497 
5498 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5499 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5500   match(Set dst (ConvI2L (LoadUS mem)));
5501   effect(KILL cr);
5502 
5503   ins_cost(250);
5504   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5505             "XOR    $dst.hi,$dst.hi" %}
5506 
5507   ins_encode %{
5508     __ movzwl($dst$$Register, $mem$$Address);
5509     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5510   %}
5511 
5512   ins_pipe(ialu_reg_mem);
5513 %}
5514 
5515 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5516 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5517   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5518   effect(KILL cr);
5519 
5520   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5521             "XOR    $dst.hi,$dst.hi" %}
5522   ins_encode %{
5523     Register Rdst = $dst$$Register;
5524     __ movzbl(Rdst, $mem$$Address);
5525     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5526   %}
5527   ins_pipe(ialu_reg_mem);
5528 %}
5529 
5530 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
5531 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
5532   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5533   effect(KILL cr);
5534 
5535   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5536             "XOR    $dst.hi,$dst.hi\n\t"
5537             "AND    $dst.lo,$mask" %}
5538   ins_encode %{
5539     Register Rdst = $dst$$Register;
5540     __ movzwl(Rdst, $mem$$Address);
5541     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5542     __ andl(Rdst, $mask$$constant);
5543   %}
5544   ins_pipe(ialu_reg_mem);
5545 %}
5546 
5547 // Load Integer
5548 instruct loadI(rRegI dst, memory mem) %{
5549   match(Set dst (LoadI mem));
5550 
5551   ins_cost(125);
5552   format %{ "MOV    $dst,$mem\t# int" %}
5553 
5554   ins_encode %{
5555     __ movl($dst$$Register, $mem$$Address);
5556   %}
5557 
5558   ins_pipe(ialu_reg_mem);
5559 %}
5560 
5561 // Load Integer (32 bit signed) to Byte (8 bit signed)
5562 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5563   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5564 
5565   ins_cost(125);
5566   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5567   ins_encode %{
5568     __ movsbl($dst$$Register, $mem$$Address);
5569   %}
5570   ins_pipe(ialu_reg_mem);
5571 %}
5572 
5573 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5574 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5575   match(Set dst (AndI (LoadI mem) mask));
5576 
5577   ins_cost(125);
5578   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5579   ins_encode %{
5580     __ movzbl($dst$$Register, $mem$$Address);
5581   %}
5582   ins_pipe(ialu_reg_mem);
5583 %}
5584 
5585 // Load Integer (32 bit signed) to Short (16 bit signed)
5586 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5587   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5588 
5589   ins_cost(125);
5590   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5591   ins_encode %{
5592     __ movswl($dst$$Register, $mem$$Address);
5593   %}
5594   ins_pipe(ialu_reg_mem);
5595 %}
5596 
5597 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5598 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5599   match(Set dst (AndI (LoadI mem) mask));
5600 
5601   ins_cost(125);
5602   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5603   ins_encode %{
5604     __ movzwl($dst$$Register, $mem$$Address);
5605   %}
5606   ins_pipe(ialu_reg_mem);
5607 %}
5608 
5609 // Load Integer into Long Register
5610 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5611   match(Set dst (ConvI2L (LoadI mem)));
5612   effect(KILL cr);
5613 
5614   ins_cost(375);
5615   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5616             "MOV    $dst.hi,$dst.lo\n\t"
5617             "SAR    $dst.hi,31" %}
5618 
5619   ins_encode %{
5620     __ movl($dst$$Register, $mem$$Address);
5621     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5622     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5623   %}
5624 
5625   ins_pipe(ialu_reg_mem);
5626 %}
5627 
5628 // Load Integer with mask 0xFF into Long Register
5629 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5630   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5631   effect(KILL cr);
5632 
5633   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5634             "XOR    $dst.hi,$dst.hi" %}
5635   ins_encode %{
5636     Register Rdst = $dst$$Register;
5637     __ movzbl(Rdst, $mem$$Address);
5638     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5639   %}
5640   ins_pipe(ialu_reg_mem);
5641 %}
5642 
5643 // Load Integer with mask 0xFFFF into Long Register
5644 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5645   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5646   effect(KILL cr);
5647 
5648   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5649             "XOR    $dst.hi,$dst.hi" %}
5650   ins_encode %{
5651     Register Rdst = $dst$$Register;
5652     __ movzwl(Rdst, $mem$$Address);
5653     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5654   %}
5655   ins_pipe(ialu_reg_mem);
5656 %}
5657 
5658 // Load Integer with 31-bit mask into Long Register
5659 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5660   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5661   effect(KILL cr);
5662 
5663   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5664             "XOR    $dst.hi,$dst.hi\n\t"
5665             "AND    $dst.lo,$mask" %}
5666   ins_encode %{
5667     Register Rdst = $dst$$Register;
5668     __ movl(Rdst, $mem$$Address);
5669     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5670     __ andl(Rdst, $mask$$constant);
5671   %}
5672   ins_pipe(ialu_reg_mem);
5673 %}
5674 
5675 // Load Unsigned Integer into Long Register
5676 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5677   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5678   effect(KILL cr);
5679 
5680   ins_cost(250);
5681   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5682             "XOR    $dst.hi,$dst.hi" %}
5683 
5684   ins_encode %{
5685     __ movl($dst$$Register, $mem$$Address);
5686     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5687   %}
5688 
5689   ins_pipe(ialu_reg_mem);
5690 %}
5691 
5692 // Load Long.  Cannot clobber address while loading, so restrict address
5693 // register to ESI
5694 instruct loadL(eRegL dst, load_long_memory mem) %{
5695   predicate(!((LoadLNode*)n)->require_atomic_access());
5696   match(Set dst (LoadL mem));
5697 
5698   ins_cost(250);
5699   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5700             "MOV    $dst.hi,$mem+4" %}
5701 
5702   ins_encode %{
5703     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5704     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5705     __ movl($dst$$Register, Amemlo);
5706     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5707   %}
5708 
5709   ins_pipe(ialu_reg_long_mem);
5710 %}
5711 
5712 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5713 // then store it down to the stack and reload on the int
5714 // side.
5715 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5716   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5717   match(Set dst (LoadL mem));
5718 
5719   ins_cost(200);
5720   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5721             "FISTp  $dst" %}
5722   ins_encode(enc_loadL_volatile(mem,dst));
5723   ins_pipe( fpu_reg_mem );
5724 %}
5725 
5726 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5727   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5728   match(Set dst (LoadL mem));
5729   effect(TEMP tmp);
5730   ins_cost(180);
5731   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5732             "MOVSD  $dst,$tmp" %}
5733   ins_encode %{
5734     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5735     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5736   %}
5737   ins_pipe( pipe_slow );
5738 %}
5739 
5740 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5741   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5742   match(Set dst (LoadL mem));
5743   effect(TEMP tmp);
5744   ins_cost(160);
5745   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5746             "MOVD   $dst.lo,$tmp\n\t"
5747             "PSRLQ  $tmp,32\n\t"
5748             "MOVD   $dst.hi,$tmp" %}
5749   ins_encode %{
5750     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5751     __ movdl($dst$$Register, $tmp$$XMMRegister);
5752     __ psrlq($tmp$$XMMRegister, 32);
5753     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5754   %}
5755   ins_pipe( pipe_slow );
5756 %}
5757 
5758 // Load Range
5759 instruct loadRange(rRegI dst, memory mem) %{
5760   match(Set dst (LoadRange mem));
5761 
5762   ins_cost(125);
5763   format %{ "MOV    $dst,$mem" %}
5764   opcode(0x8B);
5765   ins_encode( OpcP, RegMem(dst,mem));
5766   ins_pipe( ialu_reg_mem );
5767 %}
5768 
5769 
5770 // Load Pointer
5771 instruct loadP(eRegP dst, memory mem) %{
5772   match(Set dst (LoadP mem));
5773 
5774   ins_cost(125);
5775   format %{ "MOV    $dst,$mem" %}
5776   opcode(0x8B);
5777   ins_encode( OpcP, RegMem(dst,mem));
5778   ins_pipe( ialu_reg_mem );
5779 %}
5780 
5781 // Load Klass Pointer
5782 instruct loadKlass(eRegP dst, memory mem) %{
5783   match(Set dst (LoadKlass mem));
5784 
5785   ins_cost(125);
5786   format %{ "MOV    $dst,$mem" %}
5787   opcode(0x8B);
5788   ins_encode( OpcP, RegMem(dst,mem));
5789   ins_pipe( ialu_reg_mem );
5790 %}
5791 
5792 // Load Double
5793 instruct loadDPR(regDPR dst, memory mem) %{
5794   predicate(UseSSE<=1);
5795   match(Set dst (LoadD mem));
5796 
5797   ins_cost(150);
5798   format %{ "FLD_D  ST,$mem\n\t"
5799             "FSTP   $dst" %}
5800   opcode(0xDD);               /* DD /0 */
5801   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5802               Pop_Reg_DPR(dst) );
5803   ins_pipe( fpu_reg_mem );
5804 %}
5805 
5806 // Load Double to XMM
5807 instruct loadD(regD dst, memory mem) %{
5808   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5809   match(Set dst (LoadD mem));
5810   ins_cost(145);
5811   format %{ "MOVSD  $dst,$mem" %}
5812   ins_encode %{
5813     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5814   %}
5815   ins_pipe( pipe_slow );
5816 %}
5817 
5818 instruct loadD_partial(regD dst, memory mem) %{
5819   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5820   match(Set dst (LoadD mem));
5821   ins_cost(145);
5822   format %{ "MOVLPD $dst,$mem" %}
5823   ins_encode %{
5824     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5825   %}
5826   ins_pipe( pipe_slow );
5827 %}
5828 
5829 // Load to XMM register (single-precision floating point)
5830 // MOVSS instruction
5831 instruct loadF(regF dst, memory mem) %{
5832   predicate(UseSSE>=1);
5833   match(Set dst (LoadF mem));
5834   ins_cost(145);
5835   format %{ "MOVSS  $dst,$mem" %}
5836   ins_encode %{
5837     __ movflt ($dst$$XMMRegister, $mem$$Address);
5838   %}
5839   ins_pipe( pipe_slow );
5840 %}
5841 
5842 // Load Float
5843 instruct loadFPR(regFPR dst, memory mem) %{
5844   predicate(UseSSE==0);
5845   match(Set dst (LoadF mem));
5846 
5847   ins_cost(150);
5848   format %{ "FLD_S  ST,$mem\n\t"
5849             "FSTP   $dst" %}
5850   opcode(0xD9);               /* D9 /0 */
5851   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5852               Pop_Reg_FPR(dst) );
5853   ins_pipe( fpu_reg_mem );
5854 %}
5855 
5856 // Load Effective Address
5857 instruct leaP8(eRegP dst, indOffset8 mem) %{
5858   match(Set dst mem);
5859 
5860   ins_cost(110);
5861   format %{ "LEA    $dst,$mem" %}
5862   opcode(0x8D);
5863   ins_encode( OpcP, RegMem(dst,mem));
5864   ins_pipe( ialu_reg_reg_fat );
5865 %}
5866 
5867 instruct leaP32(eRegP dst, indOffset32 mem) %{
5868   match(Set dst mem);
5869 
5870   ins_cost(110);
5871   format %{ "LEA    $dst,$mem" %}
5872   opcode(0x8D);
5873   ins_encode( OpcP, RegMem(dst,mem));
5874   ins_pipe( ialu_reg_reg_fat );
5875 %}
5876 
5877 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5878   match(Set dst mem);
5879 
5880   ins_cost(110);
5881   format %{ "LEA    $dst,$mem" %}
5882   opcode(0x8D);
5883   ins_encode( OpcP, RegMem(dst,mem));
5884   ins_pipe( ialu_reg_reg_fat );
5885 %}
5886 
5887 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5888   match(Set dst mem);
5889 
5890   ins_cost(110);
5891   format %{ "LEA    $dst,$mem" %}
5892   opcode(0x8D);
5893   ins_encode( OpcP, RegMem(dst,mem));
5894   ins_pipe( ialu_reg_reg_fat );
5895 %}
5896 
5897 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5898   match(Set dst mem);
5899 
5900   ins_cost(110);
5901   format %{ "LEA    $dst,$mem" %}
5902   opcode(0x8D);
5903   ins_encode( OpcP, RegMem(dst,mem));
5904   ins_pipe( ialu_reg_reg_fat );
5905 %}
5906 
5907 // Load Constant
5908 instruct loadConI(rRegI dst, immI src) %{
5909   match(Set dst src);
5910 
5911   format %{ "MOV    $dst,$src" %}
5912   ins_encode( LdImmI(dst, src) );
5913   ins_pipe( ialu_reg_fat );
5914 %}
5915 
5916 // Load Constant zero
5917 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5918   match(Set dst src);
5919   effect(KILL cr);
5920 
5921   ins_cost(50);
5922   format %{ "XOR    $dst,$dst" %}
5923   opcode(0x33);  /* + rd */
5924   ins_encode( OpcP, RegReg( dst, dst ) );
5925   ins_pipe( ialu_reg );
5926 %}
5927 
5928 instruct loadConP(eRegP dst, immP src) %{
5929   match(Set dst src);
5930 
5931   format %{ "MOV    $dst,$src" %}
5932   opcode(0xB8);  /* + rd */
5933   ins_encode( LdImmP(dst, src) );
5934   ins_pipe( ialu_reg_fat );
5935 %}
5936 
5937 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5938   match(Set dst src);
5939   effect(KILL cr);
5940   ins_cost(200);
5941   format %{ "MOV    $dst.lo,$src.lo\n\t"
5942             "MOV    $dst.hi,$src.hi" %}
5943   opcode(0xB8);
5944   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5945   ins_pipe( ialu_reg_long_fat );
5946 %}
5947 
5948 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5949   match(Set dst src);
5950   effect(KILL cr);
5951   ins_cost(150);
5952   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5953             "XOR    $dst.hi,$dst.hi" %}
5954   opcode(0x33,0x33);
5955   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5956   ins_pipe( ialu_reg_long );
5957 %}
5958 
5959 // The instruction usage is guarded by predicate in operand immFPR().
5960 instruct loadConFPR(regFPR dst, immFPR con) %{
5961   match(Set dst con);
5962   ins_cost(125);
5963   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5964             "FSTP   $dst" %}
5965   ins_encode %{
5966     __ fld_s($constantaddress($con));
5967     __ fstp_d($dst$$reg);
5968   %}
5969   ins_pipe(fpu_reg_con);
5970 %}
5971 
5972 // The instruction usage is guarded by predicate in operand immFPR0().
5973 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5974   match(Set dst con);
5975   ins_cost(125);
5976   format %{ "FLDZ   ST\n\t"
5977             "FSTP   $dst" %}
5978   ins_encode %{
5979     __ fldz();
5980     __ fstp_d($dst$$reg);
5981   %}
5982   ins_pipe(fpu_reg_con);
5983 %}
5984 
5985 // The instruction usage is guarded by predicate in operand immFPR1().
5986 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
5987   match(Set dst con);
5988   ins_cost(125);
5989   format %{ "FLD1   ST\n\t"
5990             "FSTP   $dst" %}
5991   ins_encode %{
5992     __ fld1();
5993     __ fstp_d($dst$$reg);
5994   %}
5995   ins_pipe(fpu_reg_con);
5996 %}
5997 
5998 // The instruction usage is guarded by predicate in operand immF().
5999 instruct loadConF(regF dst, immF con) %{
6000   match(Set dst con);
6001   ins_cost(125);
6002   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6003   ins_encode %{
6004     __ movflt($dst$$XMMRegister, $constantaddress($con));
6005   %}
6006   ins_pipe(pipe_slow);
6007 %}
6008 
6009 // The instruction usage is guarded by predicate in operand immF0().
6010 instruct loadConF0(regF dst, immF0 src) %{
6011   match(Set dst src);
6012   ins_cost(100);
6013   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6014   ins_encode %{
6015     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6016   %}
6017   ins_pipe(pipe_slow);
6018 %}
6019 
6020 // The instruction usage is guarded by predicate in operand immDPR().
6021 instruct loadConDPR(regDPR dst, immDPR con) %{
6022   match(Set dst con);
6023   ins_cost(125);
6024 
6025   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6026             "FSTP   $dst" %}
6027   ins_encode %{
6028     __ fld_d($constantaddress($con));
6029     __ fstp_d($dst$$reg);
6030   %}
6031   ins_pipe(fpu_reg_con);
6032 %}
6033 
6034 // The instruction usage is guarded by predicate in operand immDPR0().
6035 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6036   match(Set dst con);
6037   ins_cost(125);
6038 
6039   format %{ "FLDZ   ST\n\t"
6040             "FSTP   $dst" %}
6041   ins_encode %{
6042     __ fldz();
6043     __ fstp_d($dst$$reg);
6044   %}
6045   ins_pipe(fpu_reg_con);
6046 %}
6047 
6048 // The instruction usage is guarded by predicate in operand immDPR1().
6049 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6050   match(Set dst con);
6051   ins_cost(125);
6052 
6053   format %{ "FLD1   ST\n\t"
6054             "FSTP   $dst" %}
6055   ins_encode %{
6056     __ fld1();
6057     __ fstp_d($dst$$reg);
6058   %}
6059   ins_pipe(fpu_reg_con);
6060 %}
6061 
6062 // The instruction usage is guarded by predicate in operand immD().
6063 instruct loadConD(regD dst, immD con) %{
6064   match(Set dst con);
6065   ins_cost(125);
6066   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6067   ins_encode %{
6068     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6069   %}
6070   ins_pipe(pipe_slow);
6071 %}
6072 
6073 // The instruction usage is guarded by predicate in operand immD0().
6074 instruct loadConD0(regD dst, immD0 src) %{
6075   match(Set dst src);
6076   ins_cost(100);
6077   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6078   ins_encode %{
6079     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6080   %}
6081   ins_pipe( pipe_slow );
6082 %}
6083 
6084 // Load Stack Slot
6085 instruct loadSSI(rRegI dst, stackSlotI src) %{
6086   match(Set dst src);
6087   ins_cost(125);
6088 
6089   format %{ "MOV    $dst,$src" %}
6090   opcode(0x8B);
6091   ins_encode( OpcP, RegMem(dst,src));
6092   ins_pipe( ialu_reg_mem );
6093 %}
6094 
6095 instruct loadSSL(eRegL dst, stackSlotL src) %{
6096   match(Set dst src);
6097 
6098   ins_cost(200);
6099   format %{ "MOV    $dst,$src.lo\n\t"
6100             "MOV    $dst+4,$src.hi" %}
6101   opcode(0x8B, 0x8B);
6102   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6103   ins_pipe( ialu_mem_long_reg );
6104 %}
6105 
6106 // Load Stack Slot
6107 instruct loadSSP(eRegP dst, stackSlotP src) %{
6108   match(Set dst src);
6109   ins_cost(125);
6110 
6111   format %{ "MOV    $dst,$src" %}
6112   opcode(0x8B);
6113   ins_encode( OpcP, RegMem(dst,src));
6114   ins_pipe( ialu_reg_mem );
6115 %}
6116 
6117 // Load Stack Slot
6118 instruct loadSSF(regFPR dst, stackSlotF src) %{
6119   match(Set dst src);
6120   ins_cost(125);
6121 
6122   format %{ "FLD_S  $src\n\t"
6123             "FSTP   $dst" %}
6124   opcode(0xD9);               /* D9 /0, FLD m32real */
6125   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6126               Pop_Reg_FPR(dst) );
6127   ins_pipe( fpu_reg_mem );
6128 %}
6129 
6130 // Load Stack Slot
6131 instruct loadSSD(regDPR dst, stackSlotD src) %{
6132   match(Set dst src);
6133   ins_cost(125);
6134 
6135   format %{ "FLD_D  $src\n\t"
6136             "FSTP   $dst" %}
6137   opcode(0xDD);               /* DD /0, FLD m64real */
6138   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6139               Pop_Reg_DPR(dst) );
6140   ins_pipe( fpu_reg_mem );
6141 %}
6142 
6143 // Prefetch instructions.
6144 // Must be safe to execute with invalid address (cannot fault).
6145 
6146 instruct prefetchr0( memory mem ) %{
6147   predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
6148   match(PrefetchRead mem);
6149   ins_cost(0);
6150   size(0);
6151   format %{ "PREFETCHR (non-SSE is empty encoding)" %}
6152   ins_encode();
6153   ins_pipe(empty);
6154 %}
6155 
6156 instruct prefetchr( memory mem ) %{
6157   predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3);
6158   match(PrefetchRead mem);
6159   ins_cost(100);
6160 
6161   format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
6162   ins_encode %{
6163     __ prefetchr($mem$$Address);
6164   %}
6165   ins_pipe(ialu_mem);
6166 %}
6167 
6168 instruct prefetchrNTA( memory mem ) %{
6169   predicate(UseSSE>=1 && ReadPrefetchInstr==0);
6170   match(PrefetchRead mem);
6171   ins_cost(100);
6172 
6173   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
6174   ins_encode %{
6175     __ prefetchnta($mem$$Address);
6176   %}
6177   ins_pipe(ialu_mem);
6178 %}
6179 
6180 instruct prefetchrT0( memory mem ) %{
6181   predicate(UseSSE>=1 && ReadPrefetchInstr==1);
6182   match(PrefetchRead mem);
6183   ins_cost(100);
6184 
6185   format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
6186   ins_encode %{
6187     __ prefetcht0($mem$$Address);
6188   %}
6189   ins_pipe(ialu_mem);
6190 %}
6191 
6192 instruct prefetchrT2( memory mem ) %{
6193   predicate(UseSSE>=1 && ReadPrefetchInstr==2);
6194   match(PrefetchRead mem);
6195   ins_cost(100);
6196 
6197   format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
6198   ins_encode %{
6199     __ prefetcht2($mem$$Address);
6200   %}
6201   ins_pipe(ialu_mem);
6202 %}
6203 
6204 instruct prefetchw0( memory mem ) %{
6205   predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
6206   match(PrefetchWrite mem);
6207   ins_cost(0);
6208   size(0);
6209   format %{ "Prefetch (non-SSE is empty encoding)" %}
6210   ins_encode();
6211   ins_pipe(empty);
6212 %}
6213 
6214 instruct prefetchw( memory mem ) %{
6215   predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch());
6216   match( PrefetchWrite mem );
6217   ins_cost(100);
6218 
6219   format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
6220   ins_encode %{
6221     __ prefetchw($mem$$Address);
6222   %}
6223   ins_pipe(ialu_mem);
6224 %}
6225 
6226 instruct prefetchwNTA( memory mem ) %{
6227   predicate(UseSSE>=1);
6228   match(PrefetchWrite mem);
6229   ins_cost(100);
6230 
6231   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
6232   ins_encode %{
6233     __ prefetchnta($mem$$Address);
6234   %}
6235   ins_pipe(ialu_mem);
6236 %}
6237 
6238 // Prefetch instructions for allocation.
6239 
6240 instruct prefetchAlloc0( memory mem ) %{
6241   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6242   match(PrefetchAllocation mem);
6243   ins_cost(0);
6244   size(0);
6245   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6246   ins_encode();
6247   ins_pipe(empty);
6248 %}
6249 
6250 instruct prefetchAlloc( memory mem ) %{
6251   predicate(AllocatePrefetchInstr==3);
6252   match( PrefetchAllocation mem );
6253   ins_cost(100);
6254 
6255   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6256   ins_encode %{
6257     __ prefetchw($mem$$Address);
6258   %}
6259   ins_pipe(ialu_mem);
6260 %}
6261 
6262 instruct prefetchAllocNTA( memory mem ) %{
6263   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6264   match(PrefetchAllocation mem);
6265   ins_cost(100);
6266 
6267   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6268   ins_encode %{
6269     __ prefetchnta($mem$$Address);
6270   %}
6271   ins_pipe(ialu_mem);
6272 %}
6273 
6274 instruct prefetchAllocT0( memory mem ) %{
6275   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6276   match(PrefetchAllocation mem);
6277   ins_cost(100);
6278 
6279   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6280   ins_encode %{
6281     __ prefetcht0($mem$$Address);
6282   %}
6283   ins_pipe(ialu_mem);
6284 %}
6285 
6286 instruct prefetchAllocT2( memory mem ) %{
6287   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6288   match(PrefetchAllocation mem);
6289   ins_cost(100);
6290 
6291   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6292   ins_encode %{
6293     __ prefetcht2($mem$$Address);
6294   %}
6295   ins_pipe(ialu_mem);
6296 %}
6297 
6298 //----------Store Instructions-------------------------------------------------
6299 
6300 // Store Byte
6301 instruct storeB(memory mem, xRegI src) %{
6302   match(Set mem (StoreB mem src));
6303 
6304   ins_cost(125);
6305   format %{ "MOV8   $mem,$src" %}
6306   opcode(0x88);
6307   ins_encode( OpcP, RegMem( src, mem ) );
6308   ins_pipe( ialu_mem_reg );
6309 %}
6310 
6311 // Store Char/Short
6312 instruct storeC(memory mem, rRegI src) %{
6313   match(Set mem (StoreC mem src));
6314 
6315   ins_cost(125);
6316   format %{ "MOV16  $mem,$src" %}
6317   opcode(0x89, 0x66);
6318   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6319   ins_pipe( ialu_mem_reg );
6320 %}
6321 
6322 // Store Integer
6323 instruct storeI(memory mem, rRegI src) %{
6324   match(Set mem (StoreI mem src));
6325 
6326   ins_cost(125);
6327   format %{ "MOV    $mem,$src" %}
6328   opcode(0x89);
6329   ins_encode( OpcP, RegMem( src, mem ) );
6330   ins_pipe( ialu_mem_reg );
6331 %}
6332 
6333 // Store Long
6334 instruct storeL(long_memory mem, eRegL src) %{
6335   predicate(!((StoreLNode*)n)->require_atomic_access());
6336   match(Set mem (StoreL mem src));
6337 
6338   ins_cost(200);
6339   format %{ "MOV    $mem,$src.lo\n\t"
6340             "MOV    $mem+4,$src.hi" %}
6341   opcode(0x89, 0x89);
6342   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6343   ins_pipe( ialu_mem_long_reg );
6344 %}
6345 
6346 // Store Long to Integer
6347 instruct storeL2I(memory mem, eRegL src) %{
6348   match(Set mem (StoreI mem (ConvL2I src)));
6349 
6350   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6351   ins_encode %{
6352     __ movl($mem$$Address, $src$$Register);
6353   %}
6354   ins_pipe(ialu_mem_reg);
6355 %}
6356 
6357 // Volatile Store Long.  Must be atomic, so move it into
6358 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6359 // target address before the store (for null-ptr checks)
6360 // so the memory operand is used twice in the encoding.
6361 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6362   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6363   match(Set mem (StoreL mem src));
6364   effect( KILL cr );
6365   ins_cost(400);
6366   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6367             "FILD   $src\n\t"
6368             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6369   opcode(0x3B);
6370   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6371   ins_pipe( fpu_reg_mem );
6372 %}
6373 
6374 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6375   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6376   match(Set mem (StoreL mem src));
6377   effect( TEMP tmp, KILL cr );
6378   ins_cost(380);
6379   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6380             "MOVSD  $tmp,$src\n\t"
6381             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6382   ins_encode %{
6383     __ cmpl(rax, $mem$$Address);
6384     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6385     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6386   %}
6387   ins_pipe( pipe_slow );
6388 %}
6389 
6390 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6391   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6392   match(Set mem (StoreL mem src));
6393   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6394   ins_cost(360);
6395   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6396             "MOVD   $tmp,$src.lo\n\t"
6397             "MOVD   $tmp2,$src.hi\n\t"
6398             "PUNPCKLDQ $tmp,$tmp2\n\t"
6399             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6400   ins_encode %{
6401     __ cmpl(rax, $mem$$Address);
6402     __ movdl($tmp$$XMMRegister, $src$$Register);
6403     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6404     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6405     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6406   %}
6407   ins_pipe( pipe_slow );
6408 %}
6409 
6410 // Store Pointer; for storing unknown oops and raw pointers
6411 instruct storeP(memory mem, anyRegP src) %{
6412   match(Set mem (StoreP mem src));
6413 
6414   ins_cost(125);
6415   format %{ "MOV    $mem,$src" %}
6416   opcode(0x89);
6417   ins_encode( OpcP, RegMem( src, mem ) );
6418   ins_pipe( ialu_mem_reg );
6419 %}
6420 
6421 // Store Integer Immediate
6422 instruct storeImmI(memory mem, immI src) %{
6423   match(Set mem (StoreI mem src));
6424 
6425   ins_cost(150);
6426   format %{ "MOV    $mem,$src" %}
6427   opcode(0xC7);               /* C7 /0 */
6428   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6429   ins_pipe( ialu_mem_imm );
6430 %}
6431 
6432 // Store Short/Char Immediate
6433 instruct storeImmI16(memory mem, immI16 src) %{
6434   predicate(UseStoreImmI16);
6435   match(Set mem (StoreC mem src));
6436 
6437   ins_cost(150);
6438   format %{ "MOV16  $mem,$src" %}
6439   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6440   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6441   ins_pipe( ialu_mem_imm );
6442 %}
6443 
6444 // Store Pointer Immediate; null pointers or constant oops that do not
6445 // need card-mark barriers.
6446 instruct storeImmP(memory mem, immP src) %{
6447   match(Set mem (StoreP mem src));
6448 
6449   ins_cost(150);
6450   format %{ "MOV    $mem,$src" %}
6451   opcode(0xC7);               /* C7 /0 */
6452   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6453   ins_pipe( ialu_mem_imm );
6454 %}
6455 
6456 // Store Byte Immediate
6457 instruct storeImmB(memory mem, immI8 src) %{
6458   match(Set mem (StoreB mem src));
6459 
6460   ins_cost(150);
6461   format %{ "MOV8   $mem,$src" %}
6462   opcode(0xC6);               /* C6 /0 */
6463   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6464   ins_pipe( ialu_mem_imm );
6465 %}
6466 
6467 // Store CMS card-mark Immediate
6468 instruct storeImmCM(memory mem, immI8 src) %{
6469   match(Set mem (StoreCM mem src));
6470 
6471   ins_cost(150);
6472   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6473   opcode(0xC6);               /* C6 /0 */
6474   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6475   ins_pipe( ialu_mem_imm );
6476 %}
6477 
6478 // Store Double
6479 instruct storeDPR( memory mem, regDPR1 src) %{
6480   predicate(UseSSE<=1);
6481   match(Set mem (StoreD mem src));
6482 
6483   ins_cost(100);
6484   format %{ "FST_D  $mem,$src" %}
6485   opcode(0xDD);       /* DD /2 */
6486   ins_encode( enc_FPR_store(mem,src) );
6487   ins_pipe( fpu_mem_reg );
6488 %}
6489 
6490 // Store double does rounding on x86
6491 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6492   predicate(UseSSE<=1);
6493   match(Set mem (StoreD mem (RoundDouble src)));
6494 
6495   ins_cost(100);
6496   format %{ "FST_D  $mem,$src\t# round" %}
6497   opcode(0xDD);       /* DD /2 */
6498   ins_encode( enc_FPR_store(mem,src) );
6499   ins_pipe( fpu_mem_reg );
6500 %}
6501 
6502 // Store XMM register to memory (double-precision floating points)
6503 // MOVSD instruction
6504 instruct storeD(memory mem, regD src) %{
6505   predicate(UseSSE>=2);
6506   match(Set mem (StoreD mem src));
6507   ins_cost(95);
6508   format %{ "MOVSD  $mem,$src" %}
6509   ins_encode %{
6510     __ movdbl($mem$$Address, $src$$XMMRegister);
6511   %}
6512   ins_pipe( pipe_slow );
6513 %}
6514 
6515 // Store XMM register to memory (single-precision floating point)
6516 // MOVSS instruction
6517 instruct storeF(memory mem, regF src) %{
6518   predicate(UseSSE>=1);
6519   match(Set mem (StoreF mem src));
6520   ins_cost(95);
6521   format %{ "MOVSS  $mem,$src" %}
6522   ins_encode %{
6523     __ movflt($mem$$Address, $src$$XMMRegister);
6524   %}
6525   ins_pipe( pipe_slow );
6526 %}
6527 
6528 // Store Float
6529 instruct storeFPR( memory mem, regFPR1 src) %{
6530   predicate(UseSSE==0);
6531   match(Set mem (StoreF mem src));
6532 
6533   ins_cost(100);
6534   format %{ "FST_S  $mem,$src" %}
6535   opcode(0xD9);       /* D9 /2 */
6536   ins_encode( enc_FPR_store(mem,src) );
6537   ins_pipe( fpu_mem_reg );
6538 %}
6539 
6540 // Store Float does rounding on x86
6541 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6542   predicate(UseSSE==0);
6543   match(Set mem (StoreF mem (RoundFloat src)));
6544 
6545   ins_cost(100);
6546   format %{ "FST_S  $mem,$src\t# round" %}
6547   opcode(0xD9);       /* D9 /2 */
6548   ins_encode( enc_FPR_store(mem,src) );
6549   ins_pipe( fpu_mem_reg );
6550 %}
6551 
6552 // Store Float does rounding on x86
6553 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6554   predicate(UseSSE<=1);
6555   match(Set mem (StoreF mem (ConvD2F src)));
6556 
6557   ins_cost(100);
6558   format %{ "FST_S  $mem,$src\t# D-round" %}
6559   opcode(0xD9);       /* D9 /2 */
6560   ins_encode( enc_FPR_store(mem,src) );
6561   ins_pipe( fpu_mem_reg );
6562 %}
6563 
6564 // Store immediate Float value (it is faster than store from FPU register)
6565 // The instruction usage is guarded by predicate in operand immFPR().
6566 instruct storeFPR_imm( memory mem, immFPR src) %{
6567   match(Set mem (StoreF mem src));
6568 
6569   ins_cost(50);
6570   format %{ "MOV    $mem,$src\t# store float" %}
6571   opcode(0xC7);               /* C7 /0 */
6572   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6573   ins_pipe( ialu_mem_imm );
6574 %}
6575 
6576 // Store immediate Float value (it is faster than store from XMM register)
6577 // The instruction usage is guarded by predicate in operand immF().
6578 instruct storeF_imm( memory mem, immF src) %{
6579   match(Set mem (StoreF mem src));
6580 
6581   ins_cost(50);
6582   format %{ "MOV    $mem,$src\t# store float" %}
6583   opcode(0xC7);               /* C7 /0 */
6584   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6585   ins_pipe( ialu_mem_imm );
6586 %}
6587 
6588 // Store Integer to stack slot
6589 instruct storeSSI(stackSlotI dst, rRegI src) %{
6590   match(Set dst src);
6591 
6592   ins_cost(100);
6593   format %{ "MOV    $dst,$src" %}
6594   opcode(0x89);
6595   ins_encode( OpcPRegSS( dst, src ) );
6596   ins_pipe( ialu_mem_reg );
6597 %}
6598 
6599 // Store Integer to stack slot
6600 instruct storeSSP(stackSlotP dst, eRegP src) %{
6601   match(Set dst src);
6602 
6603   ins_cost(100);
6604   format %{ "MOV    $dst,$src" %}
6605   opcode(0x89);
6606   ins_encode( OpcPRegSS( dst, src ) );
6607   ins_pipe( ialu_mem_reg );
6608 %}
6609 
6610 // Store Long to stack slot
6611 instruct storeSSL(stackSlotL dst, eRegL src) %{
6612   match(Set dst src);
6613 
6614   ins_cost(200);
6615   format %{ "MOV    $dst,$src.lo\n\t"
6616             "MOV    $dst+4,$src.hi" %}
6617   opcode(0x89, 0x89);
6618   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6619   ins_pipe( ialu_mem_long_reg );
6620 %}
6621 
6622 //----------MemBar Instructions-----------------------------------------------
6623 // Memory barrier flavors
6624 
6625 instruct membar_acquire() %{
6626   match(MemBarAcquire);
6627   match(LoadFence);
6628   ins_cost(400);
6629 
6630   size(0);
6631   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6632   ins_encode();
6633   ins_pipe(empty);
6634 %}
6635 
6636 instruct membar_acquire_lock() %{
6637   match(MemBarAcquireLock);
6638   ins_cost(0);
6639 
6640   size(0);
6641   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6642   ins_encode( );
6643   ins_pipe(empty);
6644 %}
6645 
6646 instruct membar_release() %{
6647   match(MemBarRelease);
6648   match(StoreFence);
6649   ins_cost(400);
6650 
6651   size(0);
6652   format %{ "MEMBAR-release ! (empty encoding)" %}
6653   ins_encode( );
6654   ins_pipe(empty);
6655 %}
6656 
6657 instruct membar_release_lock() %{
6658   match(MemBarReleaseLock);
6659   ins_cost(0);
6660 
6661   size(0);
6662   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6663   ins_encode( );
6664   ins_pipe(empty);
6665 %}
6666 
6667 instruct membar_volatile(eFlagsReg cr) %{
6668   match(MemBarVolatile);
6669   effect(KILL cr);
6670   ins_cost(400);
6671 
6672   format %{ 
6673     $$template
6674     if (os::is_MP()) {
6675       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6676     } else {
6677       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6678     }
6679   %}
6680   ins_encode %{
6681     __ membar(Assembler::StoreLoad);
6682   %}
6683   ins_pipe(pipe_slow);
6684 %}
6685 
6686 instruct unnecessary_membar_volatile() %{
6687   match(MemBarVolatile);
6688   predicate(Matcher::post_store_load_barrier(n));
6689   ins_cost(0);
6690 
6691   size(0);
6692   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6693   ins_encode( );
6694   ins_pipe(empty);
6695 %}
6696 
6697 instruct membar_storestore() %{
6698   match(MemBarStoreStore);
6699   ins_cost(0);
6700 
6701   size(0);
6702   format %{ "MEMBAR-storestore (empty encoding)" %}
6703   ins_encode( );
6704   ins_pipe(empty);
6705 %}
6706 
6707 //----------Move Instructions--------------------------------------------------
6708 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6709   match(Set dst (CastX2P src));
6710   format %{ "# X2P  $dst, $src" %}
6711   ins_encode( /*empty encoding*/ );
6712   ins_cost(0);
6713   ins_pipe(empty);
6714 %}
6715 
6716 instruct castP2X(rRegI dst, eRegP src ) %{
6717   match(Set dst (CastP2X src));
6718   ins_cost(50);
6719   format %{ "MOV    $dst, $src\t# CastP2X" %}
6720   ins_encode( enc_Copy( dst, src) );
6721   ins_pipe( ialu_reg_reg );
6722 %}
6723 
6724 //----------Conditional Move---------------------------------------------------
6725 // Conditional move
6726 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6727   predicate(!VM_Version::supports_cmov() );
6728   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6729   ins_cost(200);
6730   format %{ "J$cop,us skip\t# signed cmove\n\t"
6731             "MOV    $dst,$src\n"
6732       "skip:" %}
6733   ins_encode %{
6734     Label Lskip;
6735     // Invert sense of branch from sense of CMOV
6736     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6737     __ movl($dst$$Register, $src$$Register);
6738     __ bind(Lskip);
6739   %}
6740   ins_pipe( pipe_cmov_reg );
6741 %}
6742 
6743 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6744   predicate(!VM_Version::supports_cmov() );
6745   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6746   ins_cost(200);
6747   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6748             "MOV    $dst,$src\n"
6749       "skip:" %}
6750   ins_encode %{
6751     Label Lskip;
6752     // Invert sense of branch from sense of CMOV
6753     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6754     __ movl($dst$$Register, $src$$Register);
6755     __ bind(Lskip);
6756   %}
6757   ins_pipe( pipe_cmov_reg );
6758 %}
6759 
6760 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6761   predicate(VM_Version::supports_cmov() );
6762   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6763   ins_cost(200);
6764   format %{ "CMOV$cop $dst,$src" %}
6765   opcode(0x0F,0x40);
6766   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6767   ins_pipe( pipe_cmov_reg );
6768 %}
6769 
6770 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6771   predicate(VM_Version::supports_cmov() );
6772   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6773   ins_cost(200);
6774   format %{ "CMOV$cop $dst,$src" %}
6775   opcode(0x0F,0x40);
6776   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6777   ins_pipe( pipe_cmov_reg );
6778 %}
6779 
6780 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6781   predicate(VM_Version::supports_cmov() );
6782   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6783   ins_cost(200);
6784   expand %{
6785     cmovI_regU(cop, cr, dst, src);
6786   %}
6787 %}
6788 
6789 // Conditional move
6790 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6791   predicate(VM_Version::supports_cmov() );
6792   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6793   ins_cost(250);
6794   format %{ "CMOV$cop $dst,$src" %}
6795   opcode(0x0F,0x40);
6796   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6797   ins_pipe( pipe_cmov_mem );
6798 %}
6799 
6800 // Conditional move
6801 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6802   predicate(VM_Version::supports_cmov() );
6803   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6804   ins_cost(250);
6805   format %{ "CMOV$cop $dst,$src" %}
6806   opcode(0x0F,0x40);
6807   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6808   ins_pipe( pipe_cmov_mem );
6809 %}
6810 
6811 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6812   predicate(VM_Version::supports_cmov() );
6813   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6814   ins_cost(250);
6815   expand %{
6816     cmovI_memU(cop, cr, dst, src);
6817   %}
6818 %}
6819 
6820 // Conditional move
6821 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6822   predicate(VM_Version::supports_cmov() );
6823   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6824   ins_cost(200);
6825   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6826   opcode(0x0F,0x40);
6827   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6828   ins_pipe( pipe_cmov_reg );
6829 %}
6830 
6831 // Conditional move (non-P6 version)
6832 // Note:  a CMoveP is generated for  stubs and native wrappers
6833 //        regardless of whether we are on a P6, so we
6834 //        emulate a cmov here
6835 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6836   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6837   ins_cost(300);
6838   format %{ "Jn$cop   skip\n\t"
6839           "MOV    $dst,$src\t# pointer\n"
6840       "skip:" %}
6841   opcode(0x8b);
6842   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6843   ins_pipe( pipe_cmov_reg );
6844 %}
6845 
6846 // Conditional move
6847 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6848   predicate(VM_Version::supports_cmov() );
6849   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6850   ins_cost(200);
6851   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6852   opcode(0x0F,0x40);
6853   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6854   ins_pipe( pipe_cmov_reg );
6855 %}
6856 
6857 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6858   predicate(VM_Version::supports_cmov() );
6859   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6860   ins_cost(200);
6861   expand %{
6862     cmovP_regU(cop, cr, dst, src);
6863   %}
6864 %}
6865 
6866 // DISABLED: Requires the ADLC to emit a bottom_type call that
6867 // correctly meets the two pointer arguments; one is an incoming
6868 // register but the other is a memory operand.  ALSO appears to
6869 // be buggy with implicit null checks.
6870 //
6871 //// Conditional move
6872 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6873 //  predicate(VM_Version::supports_cmov() );
6874 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6875 //  ins_cost(250);
6876 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6877 //  opcode(0x0F,0x40);
6878 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6879 //  ins_pipe( pipe_cmov_mem );
6880 //%}
6881 //
6882 //// Conditional move
6883 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6884 //  predicate(VM_Version::supports_cmov() );
6885 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6886 //  ins_cost(250);
6887 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6888 //  opcode(0x0F,0x40);
6889 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6890 //  ins_pipe( pipe_cmov_mem );
6891 //%}
6892 
6893 // Conditional move
6894 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6895   predicate(UseSSE<=1);
6896   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6897   ins_cost(200);
6898   format %{ "FCMOV$cop $dst,$src\t# double" %}
6899   opcode(0xDA);
6900   ins_encode( enc_cmov_dpr(cop,src) );
6901   ins_pipe( pipe_cmovDPR_reg );
6902 %}
6903 
6904 // Conditional move
6905 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6906   predicate(UseSSE==0);
6907   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6908   ins_cost(200);
6909   format %{ "FCMOV$cop $dst,$src\t# float" %}
6910   opcode(0xDA);
6911   ins_encode( enc_cmov_dpr(cop,src) );
6912   ins_pipe( pipe_cmovDPR_reg );
6913 %}
6914 
6915 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6916 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6917   predicate(UseSSE<=1);
6918   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6919   ins_cost(200);
6920   format %{ "Jn$cop   skip\n\t"
6921             "MOV    $dst,$src\t# double\n"
6922       "skip:" %}
6923   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6924   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6925   ins_pipe( pipe_cmovDPR_reg );
6926 %}
6927 
6928 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6929 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6930   predicate(UseSSE==0);
6931   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6932   ins_cost(200);
6933   format %{ "Jn$cop    skip\n\t"
6934             "MOV    $dst,$src\t# float\n"
6935       "skip:" %}
6936   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6937   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6938   ins_pipe( pipe_cmovDPR_reg );
6939 %}
6940 
6941 // No CMOVE with SSE/SSE2
6942 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6943   predicate (UseSSE>=1);
6944   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6945   ins_cost(200);
6946   format %{ "Jn$cop   skip\n\t"
6947             "MOVSS  $dst,$src\t# float\n"
6948       "skip:" %}
6949   ins_encode %{
6950     Label skip;
6951     // Invert sense of branch from sense of CMOV
6952     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6953     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6954     __ bind(skip);
6955   %}
6956   ins_pipe( pipe_slow );
6957 %}
6958 
6959 // No CMOVE with SSE/SSE2
6960 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6961   predicate (UseSSE>=2);
6962   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6963   ins_cost(200);
6964   format %{ "Jn$cop   skip\n\t"
6965             "MOVSD  $dst,$src\t# float\n"
6966       "skip:" %}
6967   ins_encode %{
6968     Label skip;
6969     // Invert sense of branch from sense of CMOV
6970     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6971     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6972     __ bind(skip);
6973   %}
6974   ins_pipe( pipe_slow );
6975 %}
6976 
6977 // unsigned version
6978 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6979   predicate (UseSSE>=1);
6980   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6981   ins_cost(200);
6982   format %{ "Jn$cop   skip\n\t"
6983             "MOVSS  $dst,$src\t# float\n"
6984       "skip:" %}
6985   ins_encode %{
6986     Label skip;
6987     // Invert sense of branch from sense of CMOV
6988     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6989     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6990     __ bind(skip);
6991   %}
6992   ins_pipe( pipe_slow );
6993 %}
6994 
6995 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6996   predicate (UseSSE>=1);
6997   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6998   ins_cost(200);
6999   expand %{
7000     fcmovF_regU(cop, cr, dst, src);
7001   %}
7002 %}
7003 
7004 // unsigned version
7005 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7006   predicate (UseSSE>=2);
7007   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7008   ins_cost(200);
7009   format %{ "Jn$cop   skip\n\t"
7010             "MOVSD  $dst,$src\t# float\n"
7011       "skip:" %}
7012   ins_encode %{
7013     Label skip;
7014     // Invert sense of branch from sense of CMOV
7015     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7016     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7017     __ bind(skip);
7018   %}
7019   ins_pipe( pipe_slow );
7020 %}
7021 
7022 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7023   predicate (UseSSE>=2);
7024   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7025   ins_cost(200);
7026   expand %{
7027     fcmovD_regU(cop, cr, dst, src);
7028   %}
7029 %}
7030 
7031 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7032   predicate(VM_Version::supports_cmov() );
7033   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7034   ins_cost(200);
7035   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7036             "CMOV$cop $dst.hi,$src.hi" %}
7037   opcode(0x0F,0x40);
7038   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7039   ins_pipe( pipe_cmov_reg_long );
7040 %}
7041 
7042 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7043   predicate(VM_Version::supports_cmov() );
7044   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7045   ins_cost(200);
7046   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7047             "CMOV$cop $dst.hi,$src.hi" %}
7048   opcode(0x0F,0x40);
7049   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7050   ins_pipe( pipe_cmov_reg_long );
7051 %}
7052 
7053 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7054   predicate(VM_Version::supports_cmov() );
7055   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7056   ins_cost(200);
7057   expand %{
7058     cmovL_regU(cop, cr, dst, src);
7059   %}
7060 %}
7061 
7062 //----------Arithmetic Instructions--------------------------------------------
7063 //----------Addition Instructions----------------------------------------------
7064 
7065 // Integer Addition Instructions
7066 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7067   match(Set dst (AddI dst src));
7068   effect(KILL cr);
7069 
7070   size(2);
7071   format %{ "ADD    $dst,$src" %}
7072   opcode(0x03);
7073   ins_encode( OpcP, RegReg( dst, src) );
7074   ins_pipe( ialu_reg_reg );
7075 %}
7076 
7077 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7078   match(Set dst (AddI dst src));
7079   effect(KILL cr);
7080 
7081   format %{ "ADD    $dst,$src" %}
7082   opcode(0x81, 0x00); /* /0 id */
7083   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7084   ins_pipe( ialu_reg );
7085 %}
7086 
7087 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7088   predicate(UseIncDec);
7089   match(Set dst (AddI dst src));
7090   effect(KILL cr);
7091 
7092   size(1);
7093   format %{ "INC    $dst" %}
7094   opcode(0x40); /*  */
7095   ins_encode( Opc_plus( primary, dst ) );
7096   ins_pipe( ialu_reg );
7097 %}
7098 
7099 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7100   match(Set dst (AddI src0 src1));
7101   ins_cost(110);
7102 
7103   format %{ "LEA    $dst,[$src0 + $src1]" %}
7104   opcode(0x8D); /* 0x8D /r */
7105   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7106   ins_pipe( ialu_reg_reg );
7107 %}
7108 
7109 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7110   match(Set dst (AddP src0 src1));
7111   ins_cost(110);
7112 
7113   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7114   opcode(0x8D); /* 0x8D /r */
7115   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7116   ins_pipe( ialu_reg_reg );
7117 %}
7118 
7119 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7120   predicate(UseIncDec);
7121   match(Set dst (AddI dst src));
7122   effect(KILL cr);
7123 
7124   size(1);
7125   format %{ "DEC    $dst" %}
7126   opcode(0x48); /*  */
7127   ins_encode( Opc_plus( primary, dst ) );
7128   ins_pipe( ialu_reg );
7129 %}
7130 
7131 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7132   match(Set dst (AddP dst src));
7133   effect(KILL cr);
7134 
7135   size(2);
7136   format %{ "ADD    $dst,$src" %}
7137   opcode(0x03);
7138   ins_encode( OpcP, RegReg( dst, src) );
7139   ins_pipe( ialu_reg_reg );
7140 %}
7141 
7142 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7143   match(Set dst (AddP dst src));
7144   effect(KILL cr);
7145 
7146   format %{ "ADD    $dst,$src" %}
7147   opcode(0x81,0x00); /* Opcode 81 /0 id */
7148   // ins_encode( RegImm( dst, src) );
7149   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7150   ins_pipe( ialu_reg );
7151 %}
7152 
7153 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7154   match(Set dst (AddI dst (LoadI src)));
7155   effect(KILL cr);
7156 
7157   ins_cost(125);
7158   format %{ "ADD    $dst,$src" %}
7159   opcode(0x03);
7160   ins_encode( OpcP, RegMem( dst, src) );
7161   ins_pipe( ialu_reg_mem );
7162 %}
7163 
7164 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7165   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7166   effect(KILL cr);
7167 
7168   ins_cost(150);
7169   format %{ "ADD    $dst,$src" %}
7170   opcode(0x01);  /* Opcode 01 /r */
7171   ins_encode( OpcP, RegMem( src, dst ) );
7172   ins_pipe( ialu_mem_reg );
7173 %}
7174 
7175 // Add Memory with Immediate
7176 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7177   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7178   effect(KILL cr);
7179 
7180   ins_cost(125);
7181   format %{ "ADD    $dst,$src" %}
7182   opcode(0x81);               /* Opcode 81 /0 id */
7183   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7184   ins_pipe( ialu_mem_imm );
7185 %}
7186 
7187 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7188   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7189   effect(KILL cr);
7190 
7191   ins_cost(125);
7192   format %{ "INC    $dst" %}
7193   opcode(0xFF);               /* Opcode FF /0 */
7194   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7195   ins_pipe( ialu_mem_imm );
7196 %}
7197 
7198 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7199   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7200   effect(KILL cr);
7201 
7202   ins_cost(125);
7203   format %{ "DEC    $dst" %}
7204   opcode(0xFF);               /* Opcode FF /1 */
7205   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7206   ins_pipe( ialu_mem_imm );
7207 %}
7208 
7209 
7210 instruct checkCastPP( eRegP dst ) %{
7211   match(Set dst (CheckCastPP dst));
7212 
7213   size(0);
7214   format %{ "#checkcastPP of $dst" %}
7215   ins_encode( /*empty encoding*/ );
7216   ins_pipe( empty );
7217 %}
7218 
7219 instruct castPP( eRegP dst ) %{
7220   match(Set dst (CastPP dst));
7221   format %{ "#castPP of $dst" %}
7222   ins_encode( /*empty encoding*/ );
7223   ins_pipe( empty );
7224 %}
7225 
7226 instruct castII( rRegI dst ) %{
7227   match(Set dst (CastII dst));
7228   format %{ "#castII of $dst" %}
7229   ins_encode( /*empty encoding*/ );
7230   ins_cost(0);
7231   ins_pipe( empty );
7232 %}
7233 
7234 
7235 // Load-locked - same as a regular pointer load when used with compare-swap
7236 instruct loadPLocked(eRegP dst, memory mem) %{
7237   match(Set dst (LoadPLocked mem));
7238 
7239   ins_cost(125);
7240   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7241   opcode(0x8B);
7242   ins_encode( OpcP, RegMem(dst,mem));
7243   ins_pipe( ialu_reg_mem );
7244 %}
7245 
7246 // Conditional-store of the updated heap-top.
7247 // Used during allocation of the shared heap.
7248 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7249 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7250   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7251   // EAX is killed if there is contention, but then it's also unused.
7252   // In the common case of no contention, EAX holds the new oop address.
7253   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7254   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7255   ins_pipe( pipe_cmpxchg );
7256 %}
7257 
7258 // Conditional-store of an int value.
7259 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7260 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7261   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7262   effect(KILL oldval);
7263   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7264   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7265   ins_pipe( pipe_cmpxchg );
7266 %}
7267 
7268 // Conditional-store of a long value.
7269 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7270 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7271   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7272   effect(KILL oldval);
7273   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7274             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7275             "XCHG   EBX,ECX"
7276   %}
7277   ins_encode %{
7278     // Note: we need to swap rbx, and rcx before and after the
7279     //       cmpxchg8 instruction because the instruction uses
7280     //       rcx as the high order word of the new value to store but
7281     //       our register encoding uses rbx.
7282     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7283     if( os::is_MP() )
7284       __ lock();
7285     __ cmpxchg8($mem$$Address);
7286     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7287   %}
7288   ins_pipe( pipe_cmpxchg );
7289 %}
7290 
7291 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7292 
7293 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7294   predicate(VM_Version::supports_cx8());
7295   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7296   effect(KILL cr, KILL oldval);
7297   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7298             "MOV    $res,0\n\t"
7299             "JNE,s  fail\n\t"
7300             "MOV    $res,1\n"
7301           "fail:" %}
7302   ins_encode( enc_cmpxchg8(mem_ptr),
7303               enc_flags_ne_to_boolean(res) );
7304   ins_pipe( pipe_cmpxchg );
7305 %}
7306 
7307 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7308   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
7309   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7310   effect(KILL cr, KILL oldval);
7311   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7312             "MOV    $res,0\n\t"
7313             "JNE,s  fail\n\t"
7314             "MOV    $res,1\n"
7315           "fail:" %}
7316   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7317   ins_pipe( pipe_cmpxchg );
7318 %}
7319 
7320 instruct compareAndSwapP_shenandoah(rRegI res,
7321                                     memory mem_ptr,
7322                                     eRegP tmp1, eRegP tmp2,
7323                                     eAXRegP oldval, eCXRegP newval,
7324                                     eFlagsReg cr)
7325 %{
7326   predicate(UseShenandoahGC && ShenandoahCASBarrier && n->in(3)->in(1)->bottom_type() != TypePtr::NULL_PTR);
7327   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7328   effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval);
7329 
7330   format %{ "shenandoah_cas_oop $mem_ptr,$newval" %}
7331 
7332   ins_encode %{
7333     ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(&_masm,
7334       $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register,
7335       false, // swap
7336       $tmp1$$Register, $tmp2$$Register
7337     );
7338   %}
7339   ins_pipe( pipe_cmpxchg );
7340 %}
7341 
7342 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7343   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7344   effect(KILL cr, KILL oldval);
7345   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7346             "MOV    $res,0\n\t"
7347             "JNE,s  fail\n\t"
7348             "MOV    $res,1\n"
7349           "fail:" %}
7350   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7351   ins_pipe( pipe_cmpxchg );
7352 %}
7353 
7354 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7355   predicate(n->as_LoadStore()->result_not_used());
7356   match(Set dummy (GetAndAddI mem add));
7357   effect(KILL cr);
7358   format %{ "ADDL  [$mem],$add" %}
7359   ins_encode %{
7360     if (os::is_MP()) { __ lock(); }
7361     __ addl($mem$$Address, $add$$constant);
7362   %}
7363   ins_pipe( pipe_cmpxchg );
7364 %}
7365 
7366 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7367   match(Set newval (GetAndAddI mem newval));
7368   effect(KILL cr);
7369   format %{ "XADDL  [$mem],$newval" %}
7370   ins_encode %{
7371     if (os::is_MP()) { __ lock(); }
7372     __ xaddl($mem$$Address, $newval$$Register);
7373   %}
7374   ins_pipe( pipe_cmpxchg );
7375 %}
7376 
7377 instruct xchgI( memory mem, rRegI newval) %{
7378   match(Set newval (GetAndSetI mem newval));
7379   format %{ "XCHGL  $newval,[$mem]" %}
7380   ins_encode %{
7381     __ xchgl($newval$$Register, $mem$$Address);
7382   %}
7383   ins_pipe( pipe_cmpxchg );
7384 %}
7385 
7386 instruct xchgP( memory mem, pRegP newval) %{
7387   match(Set newval (GetAndSetP mem newval));
7388   format %{ "XCHGL  $newval,[$mem]" %}
7389   ins_encode %{
7390     __ xchgl($newval$$Register, $mem$$Address);
7391   %}
7392   ins_pipe( pipe_cmpxchg );
7393 %}
7394 
7395 //----------Subtraction Instructions-------------------------------------------
7396 
7397 // Integer Subtraction Instructions
7398 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7399   match(Set dst (SubI dst src));
7400   effect(KILL cr);
7401 
7402   size(2);
7403   format %{ "SUB    $dst,$src" %}
7404   opcode(0x2B);
7405   ins_encode( OpcP, RegReg( dst, src) );
7406   ins_pipe( ialu_reg_reg );
7407 %}
7408 
7409 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7410   match(Set dst (SubI dst src));
7411   effect(KILL cr);
7412 
7413   format %{ "SUB    $dst,$src" %}
7414   opcode(0x81,0x05);  /* Opcode 81 /5 */
7415   // ins_encode( RegImm( dst, src) );
7416   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7417   ins_pipe( ialu_reg );
7418 %}
7419 
7420 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7421   match(Set dst (SubI dst (LoadI src)));
7422   effect(KILL cr);
7423 
7424   ins_cost(125);
7425   format %{ "SUB    $dst,$src" %}
7426   opcode(0x2B);
7427   ins_encode( OpcP, RegMem( dst, src) );
7428   ins_pipe( ialu_reg_mem );
7429 %}
7430 
7431 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7432   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7433   effect(KILL cr);
7434 
7435   ins_cost(150);
7436   format %{ "SUB    $dst,$src" %}
7437   opcode(0x29);  /* Opcode 29 /r */
7438   ins_encode( OpcP, RegMem( src, dst ) );
7439   ins_pipe( ialu_mem_reg );
7440 %}
7441 
7442 // Subtract from a pointer
7443 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7444   match(Set dst (AddP dst (SubI zero src)));
7445   effect(KILL cr);
7446 
7447   size(2);
7448   format %{ "SUB    $dst,$src" %}
7449   opcode(0x2B);
7450   ins_encode( OpcP, RegReg( dst, src) );
7451   ins_pipe( ialu_reg_reg );
7452 %}
7453 
7454 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7455   match(Set dst (SubI zero dst));
7456   effect(KILL cr);
7457 
7458   size(2);
7459   format %{ "NEG    $dst" %}
7460   opcode(0xF7,0x03);  // Opcode F7 /3
7461   ins_encode( OpcP, RegOpc( dst ) );
7462   ins_pipe( ialu_reg );
7463 %}
7464 
7465 //----------Multiplication/Division Instructions-------------------------------
7466 // Integer Multiplication Instructions
7467 // Multiply Register
7468 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7469   match(Set dst (MulI dst src));
7470   effect(KILL cr);
7471 
7472   size(3);
7473   ins_cost(300);
7474   format %{ "IMUL   $dst,$src" %}
7475   opcode(0xAF, 0x0F);
7476   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7477   ins_pipe( ialu_reg_reg_alu0 );
7478 %}
7479 
7480 // Multiply 32-bit Immediate
7481 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7482   match(Set dst (MulI src imm));
7483   effect(KILL cr);
7484 
7485   ins_cost(300);
7486   format %{ "IMUL   $dst,$src,$imm" %}
7487   opcode(0x69);  /* 69 /r id */
7488   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7489   ins_pipe( ialu_reg_reg_alu0 );
7490 %}
7491 
7492 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7493   match(Set dst src);
7494   effect(KILL cr);
7495 
7496   // Note that this is artificially increased to make it more expensive than loadConL
7497   ins_cost(250);
7498   format %{ "MOV    EAX,$src\t// low word only" %}
7499   opcode(0xB8);
7500   ins_encode( LdImmL_Lo(dst, src) );
7501   ins_pipe( ialu_reg_fat );
7502 %}
7503 
7504 // Multiply by 32-bit Immediate, taking the shifted high order results
7505 //  (special case for shift by 32)
7506 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7507   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7508   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7509              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7510              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7511   effect(USE src1, KILL cr);
7512 
7513   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7514   ins_cost(0*100 + 1*400 - 150);
7515   format %{ "IMUL   EDX:EAX,$src1" %}
7516   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7517   ins_pipe( pipe_slow );
7518 %}
7519 
7520 // Multiply by 32-bit Immediate, taking the shifted high order results
7521 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7522   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7523   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7524              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7525              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7526   effect(USE src1, KILL cr);
7527 
7528   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7529   ins_cost(1*100 + 1*400 - 150);
7530   format %{ "IMUL   EDX:EAX,$src1\n\t"
7531             "SAR    EDX,$cnt-32" %}
7532   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7533   ins_pipe( pipe_slow );
7534 %}
7535 
7536 // Multiply Memory 32-bit Immediate
7537 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7538   match(Set dst (MulI (LoadI src) imm));
7539   effect(KILL cr);
7540 
7541   ins_cost(300);
7542   format %{ "IMUL   $dst,$src,$imm" %}
7543   opcode(0x69);  /* 69 /r id */
7544   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7545   ins_pipe( ialu_reg_mem_alu0 );
7546 %}
7547 
7548 // Multiply Memory
7549 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7550   match(Set dst (MulI dst (LoadI src)));
7551   effect(KILL cr);
7552 
7553   ins_cost(350);
7554   format %{ "IMUL   $dst,$src" %}
7555   opcode(0xAF, 0x0F);
7556   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7557   ins_pipe( ialu_reg_mem_alu0 );
7558 %}
7559 
7560 // Multiply Register Int to Long
7561 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7562   // Basic Idea: long = (long)int * (long)int
7563   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7564   effect(DEF dst, USE src, USE src1, KILL flags);
7565 
7566   ins_cost(300);
7567   format %{ "IMUL   $dst,$src1" %}
7568 
7569   ins_encode( long_int_multiply( dst, src1 ) );
7570   ins_pipe( ialu_reg_reg_alu0 );
7571 %}
7572 
7573 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7574   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7575   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7576   effect(KILL flags);
7577 
7578   ins_cost(300);
7579   format %{ "MUL    $dst,$src1" %}
7580 
7581   ins_encode( long_uint_multiply(dst, src1) );
7582   ins_pipe( ialu_reg_reg_alu0 );
7583 %}
7584 
7585 // Multiply Register Long
7586 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7587   match(Set dst (MulL dst src));
7588   effect(KILL cr, TEMP tmp);
7589   ins_cost(4*100+3*400);
7590 // Basic idea: lo(result) = lo(x_lo * y_lo)
7591 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7592   format %{ "MOV    $tmp,$src.lo\n\t"
7593             "IMUL   $tmp,EDX\n\t"
7594             "MOV    EDX,$src.hi\n\t"
7595             "IMUL   EDX,EAX\n\t"
7596             "ADD    $tmp,EDX\n\t"
7597             "MUL    EDX:EAX,$src.lo\n\t"
7598             "ADD    EDX,$tmp" %}
7599   ins_encode( long_multiply( dst, src, tmp ) );
7600   ins_pipe( pipe_slow );
7601 %}
7602 
7603 // Multiply Register Long where the left operand's high 32 bits are zero
7604 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7605   predicate(is_operand_hi32_zero(n->in(1)));
7606   match(Set dst (MulL dst src));
7607   effect(KILL cr, TEMP tmp);
7608   ins_cost(2*100+2*400);
7609 // Basic idea: lo(result) = lo(x_lo * y_lo)
7610 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7611   format %{ "MOV    $tmp,$src.hi\n\t"
7612             "IMUL   $tmp,EAX\n\t"
7613             "MUL    EDX:EAX,$src.lo\n\t"
7614             "ADD    EDX,$tmp" %}
7615   ins_encode %{
7616     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7617     __ imull($tmp$$Register, rax);
7618     __ mull($src$$Register);
7619     __ addl(rdx, $tmp$$Register);
7620   %}
7621   ins_pipe( pipe_slow );
7622 %}
7623 
7624 // Multiply Register Long where the right operand's high 32 bits are zero
7625 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7626   predicate(is_operand_hi32_zero(n->in(2)));
7627   match(Set dst (MulL dst src));
7628   effect(KILL cr, TEMP tmp);
7629   ins_cost(2*100+2*400);
7630 // Basic idea: lo(result) = lo(x_lo * y_lo)
7631 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7632   format %{ "MOV    $tmp,$src.lo\n\t"
7633             "IMUL   $tmp,EDX\n\t"
7634             "MUL    EDX:EAX,$src.lo\n\t"
7635             "ADD    EDX,$tmp" %}
7636   ins_encode %{
7637     __ movl($tmp$$Register, $src$$Register);
7638     __ imull($tmp$$Register, rdx);
7639     __ mull($src$$Register);
7640     __ addl(rdx, $tmp$$Register);
7641   %}
7642   ins_pipe( pipe_slow );
7643 %}
7644 
7645 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7646 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7647   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7648   match(Set dst (MulL dst src));
7649   effect(KILL cr);
7650   ins_cost(1*400);
7651 // Basic idea: lo(result) = lo(x_lo * y_lo)
7652 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7653   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7654   ins_encode %{
7655     __ mull($src$$Register);
7656   %}
7657   ins_pipe( pipe_slow );
7658 %}
7659 
7660 // Multiply Register Long by small constant
7661 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7662   match(Set dst (MulL dst src));
7663   effect(KILL cr, TEMP tmp);
7664   ins_cost(2*100+2*400);
7665   size(12);
7666 // Basic idea: lo(result) = lo(src * EAX)
7667 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7668   format %{ "IMUL   $tmp,EDX,$src\n\t"
7669             "MOV    EDX,$src\n\t"
7670             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7671             "ADD    EDX,$tmp" %}
7672   ins_encode( long_multiply_con( dst, src, tmp ) );
7673   ins_pipe( pipe_slow );
7674 %}
7675 
7676 // Integer DIV with Register
7677 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7678   match(Set rax (DivI rax div));
7679   effect(KILL rdx, KILL cr);
7680   size(26);
7681   ins_cost(30*100+10*100);
7682   format %{ "CMP    EAX,0x80000000\n\t"
7683             "JNE,s  normal\n\t"
7684             "XOR    EDX,EDX\n\t"
7685             "CMP    ECX,-1\n\t"
7686             "JE,s   done\n"
7687     "normal: CDQ\n\t"
7688             "IDIV   $div\n\t"
7689     "done:"        %}
7690   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7691   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7692   ins_pipe( ialu_reg_reg_alu0 );
7693 %}
7694 
7695 // Divide Register Long
7696 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7697   match(Set dst (DivL src1 src2));
7698   effect( KILL cr, KILL cx, KILL bx );
7699   ins_cost(10000);
7700   format %{ "PUSH   $src1.hi\n\t"
7701             "PUSH   $src1.lo\n\t"
7702             "PUSH   $src2.hi\n\t"
7703             "PUSH   $src2.lo\n\t"
7704             "CALL   SharedRuntime::ldiv\n\t"
7705             "ADD    ESP,16" %}
7706   ins_encode( long_div(src1,src2) );
7707   ins_pipe( pipe_slow );
7708 %}
7709 
7710 // Integer DIVMOD with Register, both quotient and mod results
7711 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7712   match(DivModI rax div);
7713   effect(KILL cr);
7714   size(26);
7715   ins_cost(30*100+10*100);
7716   format %{ "CMP    EAX,0x80000000\n\t"
7717             "JNE,s  normal\n\t"
7718             "XOR    EDX,EDX\n\t"
7719             "CMP    ECX,-1\n\t"
7720             "JE,s   done\n"
7721     "normal: CDQ\n\t"
7722             "IDIV   $div\n\t"
7723     "done:"        %}
7724   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7725   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7726   ins_pipe( pipe_slow );
7727 %}
7728 
7729 // Integer MOD with Register
7730 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7731   match(Set rdx (ModI rax div));
7732   effect(KILL rax, KILL cr);
7733 
7734   size(26);
7735   ins_cost(300);
7736   format %{ "CDQ\n\t"
7737             "IDIV   $div" %}
7738   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7739   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7740   ins_pipe( ialu_reg_reg_alu0 );
7741 %}
7742 
7743 // Remainder Register Long
7744 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7745   match(Set dst (ModL src1 src2));
7746   effect( KILL cr, KILL cx, KILL bx );
7747   ins_cost(10000);
7748   format %{ "PUSH   $src1.hi\n\t"
7749             "PUSH   $src1.lo\n\t"
7750             "PUSH   $src2.hi\n\t"
7751             "PUSH   $src2.lo\n\t"
7752             "CALL   SharedRuntime::lrem\n\t"
7753             "ADD    ESP,16" %}
7754   ins_encode( long_mod(src1,src2) );
7755   ins_pipe( pipe_slow );
7756 %}
7757 
7758 // Divide Register Long (no special case since divisor != -1)
7759 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7760   match(Set dst (DivL dst imm));
7761   effect( TEMP tmp, TEMP tmp2, KILL cr );
7762   ins_cost(1000);
7763   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7764             "XOR    $tmp2,$tmp2\n\t"
7765             "CMP    $tmp,EDX\n\t"
7766             "JA,s   fast\n\t"
7767             "MOV    $tmp2,EAX\n\t"
7768             "MOV    EAX,EDX\n\t"
7769             "MOV    EDX,0\n\t"
7770             "JLE,s  pos\n\t"
7771             "LNEG   EAX : $tmp2\n\t"
7772             "DIV    $tmp # unsigned division\n\t"
7773             "XCHG   EAX,$tmp2\n\t"
7774             "DIV    $tmp\n\t"
7775             "LNEG   $tmp2 : EAX\n\t"
7776             "JMP,s  done\n"
7777     "pos:\n\t"
7778             "DIV    $tmp\n\t"
7779             "XCHG   EAX,$tmp2\n"
7780     "fast:\n\t"
7781             "DIV    $tmp\n"
7782     "done:\n\t"
7783             "MOV    EDX,$tmp2\n\t"
7784             "NEG    EDX:EAX # if $imm < 0" %}
7785   ins_encode %{
7786     int con = (int)$imm$$constant;
7787     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7788     int pcon = (con > 0) ? con : -con;
7789     Label Lfast, Lpos, Ldone;
7790 
7791     __ movl($tmp$$Register, pcon);
7792     __ xorl($tmp2$$Register,$tmp2$$Register);
7793     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7794     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7795 
7796     __ movl($tmp2$$Register, $dst$$Register); // save
7797     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7798     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7799     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7800 
7801     // Negative dividend.
7802     // convert value to positive to use unsigned division
7803     __ lneg($dst$$Register, $tmp2$$Register);
7804     __ divl($tmp$$Register);
7805     __ xchgl($dst$$Register, $tmp2$$Register);
7806     __ divl($tmp$$Register);
7807     // revert result back to negative
7808     __ lneg($tmp2$$Register, $dst$$Register);
7809     __ jmpb(Ldone);
7810 
7811     __ bind(Lpos);
7812     __ divl($tmp$$Register); // Use unsigned division
7813     __ xchgl($dst$$Register, $tmp2$$Register);
7814     // Fallthrow for final divide, tmp2 has 32 bit hi result
7815 
7816     __ bind(Lfast);
7817     // fast path: src is positive
7818     __ divl($tmp$$Register); // Use unsigned division
7819 
7820     __ bind(Ldone);
7821     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7822     if (con < 0) {
7823       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7824     }
7825   %}
7826   ins_pipe( pipe_slow );
7827 %}
7828 
7829 // Remainder Register Long (remainder fit into 32 bits)
7830 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7831   match(Set dst (ModL dst imm));
7832   effect( TEMP tmp, TEMP tmp2, KILL cr );
7833   ins_cost(1000);
7834   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7835             "CMP    $tmp,EDX\n\t"
7836             "JA,s   fast\n\t"
7837             "MOV    $tmp2,EAX\n\t"
7838             "MOV    EAX,EDX\n\t"
7839             "MOV    EDX,0\n\t"
7840             "JLE,s  pos\n\t"
7841             "LNEG   EAX : $tmp2\n\t"
7842             "DIV    $tmp # unsigned division\n\t"
7843             "MOV    EAX,$tmp2\n\t"
7844             "DIV    $tmp\n\t"
7845             "NEG    EDX\n\t"
7846             "JMP,s  done\n"
7847     "pos:\n\t"
7848             "DIV    $tmp\n\t"
7849             "MOV    EAX,$tmp2\n"
7850     "fast:\n\t"
7851             "DIV    $tmp\n"
7852     "done:\n\t"
7853             "MOV    EAX,EDX\n\t"
7854             "SAR    EDX,31\n\t" %}
7855   ins_encode %{
7856     int con = (int)$imm$$constant;
7857     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7858     int pcon = (con > 0) ? con : -con;
7859     Label  Lfast, Lpos, Ldone;
7860 
7861     __ movl($tmp$$Register, pcon);
7862     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7863     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7864 
7865     __ movl($tmp2$$Register, $dst$$Register); // save
7866     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7867     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7868     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7869 
7870     // Negative dividend.
7871     // convert value to positive to use unsigned division
7872     __ lneg($dst$$Register, $tmp2$$Register);
7873     __ divl($tmp$$Register);
7874     __ movl($dst$$Register, $tmp2$$Register);
7875     __ divl($tmp$$Register);
7876     // revert remainder back to negative
7877     __ negl(HIGH_FROM_LOW($dst$$Register));
7878     __ jmpb(Ldone);
7879 
7880     __ bind(Lpos);
7881     __ divl($tmp$$Register);
7882     __ movl($dst$$Register, $tmp2$$Register);
7883 
7884     __ bind(Lfast);
7885     // fast path: src is positive
7886     __ divl($tmp$$Register);
7887 
7888     __ bind(Ldone);
7889     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7890     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7891 
7892   %}
7893   ins_pipe( pipe_slow );
7894 %}
7895 
7896 // Integer Shift Instructions
7897 // Shift Left by one
7898 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7899   match(Set dst (LShiftI dst shift));
7900   effect(KILL cr);
7901 
7902   size(2);
7903   format %{ "SHL    $dst,$shift" %}
7904   opcode(0xD1, 0x4);  /* D1 /4 */
7905   ins_encode( OpcP, RegOpc( dst ) );
7906   ins_pipe( ialu_reg );
7907 %}
7908 
7909 // Shift Left by 8-bit immediate
7910 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7911   match(Set dst (LShiftI dst shift));
7912   effect(KILL cr);
7913 
7914   size(3);
7915   format %{ "SHL    $dst,$shift" %}
7916   opcode(0xC1, 0x4);  /* C1 /4 ib */
7917   ins_encode( RegOpcImm( dst, shift) );
7918   ins_pipe( ialu_reg );
7919 %}
7920 
7921 // Shift Left by variable
7922 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7923   match(Set dst (LShiftI dst shift));
7924   effect(KILL cr);
7925 
7926   size(2);
7927   format %{ "SHL    $dst,$shift" %}
7928   opcode(0xD3, 0x4);  /* D3 /4 */
7929   ins_encode( OpcP, RegOpc( dst ) );
7930   ins_pipe( ialu_reg_reg );
7931 %}
7932 
7933 // Arithmetic shift right by one
7934 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7935   match(Set dst (RShiftI dst shift));
7936   effect(KILL cr);
7937 
7938   size(2);
7939   format %{ "SAR    $dst,$shift" %}
7940   opcode(0xD1, 0x7);  /* D1 /7 */
7941   ins_encode( OpcP, RegOpc( dst ) );
7942   ins_pipe( ialu_reg );
7943 %}
7944 
7945 // Arithmetic shift right by one
7946 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7947   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7948   effect(KILL cr);
7949   format %{ "SAR    $dst,$shift" %}
7950   opcode(0xD1, 0x7);  /* D1 /7 */
7951   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7952   ins_pipe( ialu_mem_imm );
7953 %}
7954 
7955 // Arithmetic Shift Right by 8-bit immediate
7956 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7957   match(Set dst (RShiftI dst shift));
7958   effect(KILL cr);
7959 
7960   size(3);
7961   format %{ "SAR    $dst,$shift" %}
7962   opcode(0xC1, 0x7);  /* C1 /7 ib */
7963   ins_encode( RegOpcImm( dst, shift ) );
7964   ins_pipe( ialu_mem_imm );
7965 %}
7966 
7967 // Arithmetic Shift Right by 8-bit immediate
7968 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7969   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7970   effect(KILL cr);
7971 
7972   format %{ "SAR    $dst,$shift" %}
7973   opcode(0xC1, 0x7);  /* C1 /7 ib */
7974   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7975   ins_pipe( ialu_mem_imm );
7976 %}
7977 
7978 // Arithmetic Shift Right by variable
7979 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7980   match(Set dst (RShiftI dst shift));
7981   effect(KILL cr);
7982 
7983   size(2);
7984   format %{ "SAR    $dst,$shift" %}
7985   opcode(0xD3, 0x7);  /* D3 /7 */
7986   ins_encode( OpcP, RegOpc( dst ) );
7987   ins_pipe( ialu_reg_reg );
7988 %}
7989 
7990 // Logical shift right by one
7991 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7992   match(Set dst (URShiftI dst shift));
7993   effect(KILL cr);
7994 
7995   size(2);
7996   format %{ "SHR    $dst,$shift" %}
7997   opcode(0xD1, 0x5);  /* D1 /5 */
7998   ins_encode( OpcP, RegOpc( dst ) );
7999   ins_pipe( ialu_reg );
8000 %}
8001 
8002 // Logical Shift Right by 8-bit immediate
8003 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8004   match(Set dst (URShiftI dst shift));
8005   effect(KILL cr);
8006 
8007   size(3);
8008   format %{ "SHR    $dst,$shift" %}
8009   opcode(0xC1, 0x5);  /* C1 /5 ib */
8010   ins_encode( RegOpcImm( dst, shift) );
8011   ins_pipe( ialu_reg );
8012 %}
8013 
8014 
8015 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8016 // This idiom is used by the compiler for the i2b bytecode.
8017 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8018   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8019 
8020   size(3);
8021   format %{ "MOVSX  $dst,$src :8" %}
8022   ins_encode %{
8023     __ movsbl($dst$$Register, $src$$Register);
8024   %}
8025   ins_pipe(ialu_reg_reg);
8026 %}
8027 
8028 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8029 // This idiom is used by the compiler the i2s bytecode.
8030 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8031   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8032 
8033   size(3);
8034   format %{ "MOVSX  $dst,$src :16" %}
8035   ins_encode %{
8036     __ movswl($dst$$Register, $src$$Register);
8037   %}
8038   ins_pipe(ialu_reg_reg);
8039 %}
8040 
8041 
8042 // Logical Shift Right by variable
8043 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8044   match(Set dst (URShiftI dst shift));
8045   effect(KILL cr);
8046 
8047   size(2);
8048   format %{ "SHR    $dst,$shift" %}
8049   opcode(0xD3, 0x5);  /* D3 /5 */
8050   ins_encode( OpcP, RegOpc( dst ) );
8051   ins_pipe( ialu_reg_reg );
8052 %}
8053 
8054 
8055 //----------Logical Instructions-----------------------------------------------
8056 //----------Integer Logical Instructions---------------------------------------
8057 // And Instructions
8058 // And Register with Register
8059 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8060   match(Set dst (AndI dst src));
8061   effect(KILL cr);
8062 
8063   size(2);
8064   format %{ "AND    $dst,$src" %}
8065   opcode(0x23);
8066   ins_encode( OpcP, RegReg( dst, src) );
8067   ins_pipe( ialu_reg_reg );
8068 %}
8069 
8070 // And Register with Immediate
8071 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8072   match(Set dst (AndI dst src));
8073   effect(KILL cr);
8074 
8075   format %{ "AND    $dst,$src" %}
8076   opcode(0x81,0x04);  /* Opcode 81 /4 */
8077   // ins_encode( RegImm( dst, src) );
8078   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8079   ins_pipe( ialu_reg );
8080 %}
8081 
8082 // And Register with Memory
8083 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8084   match(Set dst (AndI dst (LoadI src)));
8085   effect(KILL cr);
8086 
8087   ins_cost(125);
8088   format %{ "AND    $dst,$src" %}
8089   opcode(0x23);
8090   ins_encode( OpcP, RegMem( dst, src) );
8091   ins_pipe( ialu_reg_mem );
8092 %}
8093 
8094 // And Memory with Register
8095 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8096   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8097   effect(KILL cr);
8098 
8099   ins_cost(150);
8100   format %{ "AND    $dst,$src" %}
8101   opcode(0x21);  /* Opcode 21 /r */
8102   ins_encode( OpcP, RegMem( src, dst ) );
8103   ins_pipe( ialu_mem_reg );
8104 %}
8105 
8106 // And Memory with Immediate
8107 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8108   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8109   effect(KILL cr);
8110 
8111   ins_cost(125);
8112   format %{ "AND    $dst,$src" %}
8113   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8114   // ins_encode( MemImm( dst, src) );
8115   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8116   ins_pipe( ialu_mem_imm );
8117 %}
8118 
8119 // BMI1 instructions
8120 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8121   match(Set dst (AndI (XorI src1 minus_1) src2));
8122   predicate(UseBMI1Instructions);
8123   effect(KILL cr);
8124 
8125   format %{ "ANDNL  $dst, $src1, $src2" %}
8126 
8127   ins_encode %{
8128     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8129   %}
8130   ins_pipe(ialu_reg);
8131 %}
8132 
8133 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8134   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8135   predicate(UseBMI1Instructions);
8136   effect(KILL cr);
8137 
8138   ins_cost(125);
8139   format %{ "ANDNL  $dst, $src1, $src2" %}
8140 
8141   ins_encode %{
8142     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8143   %}
8144   ins_pipe(ialu_reg_mem);
8145 %}
8146 
8147 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8148   match(Set dst (AndI (SubI imm_zero src) src));
8149   predicate(UseBMI1Instructions);
8150   effect(KILL cr);
8151 
8152   format %{ "BLSIL  $dst, $src" %}
8153 
8154   ins_encode %{
8155     __ blsil($dst$$Register, $src$$Register);
8156   %}
8157   ins_pipe(ialu_reg);
8158 %}
8159 
8160 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8161   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8162   predicate(UseBMI1Instructions);
8163   effect(KILL cr);
8164 
8165   ins_cost(125);
8166   format %{ "BLSIL  $dst, $src" %}
8167 
8168   ins_encode %{
8169     __ blsil($dst$$Register, $src$$Address);
8170   %}
8171   ins_pipe(ialu_reg_mem);
8172 %}
8173 
8174 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8175 %{
8176   match(Set dst (XorI (AddI src minus_1) src));
8177   predicate(UseBMI1Instructions);
8178   effect(KILL cr);
8179 
8180   format %{ "BLSMSKL $dst, $src" %}
8181 
8182   ins_encode %{
8183     __ blsmskl($dst$$Register, $src$$Register);
8184   %}
8185 
8186   ins_pipe(ialu_reg);
8187 %}
8188 
8189 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8190 %{
8191   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8192   predicate(UseBMI1Instructions);
8193   effect(KILL cr);
8194 
8195   ins_cost(125);
8196   format %{ "BLSMSKL $dst, $src" %}
8197 
8198   ins_encode %{
8199     __ blsmskl($dst$$Register, $src$$Address);
8200   %}
8201 
8202   ins_pipe(ialu_reg_mem);
8203 %}
8204 
8205 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8206 %{
8207   match(Set dst (AndI (AddI src minus_1) src) );
8208   predicate(UseBMI1Instructions);
8209   effect(KILL cr);
8210 
8211   format %{ "BLSRL  $dst, $src" %}
8212 
8213   ins_encode %{
8214     __ blsrl($dst$$Register, $src$$Register);
8215   %}
8216 
8217   ins_pipe(ialu_reg);
8218 %}
8219 
8220 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8221 %{
8222   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8223   predicate(UseBMI1Instructions);
8224   effect(KILL cr);
8225 
8226   ins_cost(125);
8227   format %{ "BLSRL  $dst, $src" %}
8228 
8229   ins_encode %{
8230     __ blsrl($dst$$Register, $src$$Address);
8231   %}
8232 
8233   ins_pipe(ialu_reg_mem);
8234 %}
8235 
8236 // Or Instructions
8237 // Or Register with Register
8238 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8239   match(Set dst (OrI dst src));
8240   effect(KILL cr);
8241 
8242   size(2);
8243   format %{ "OR     $dst,$src" %}
8244   opcode(0x0B);
8245   ins_encode( OpcP, RegReg( dst, src) );
8246   ins_pipe( ialu_reg_reg );
8247 %}
8248 
8249 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8250   match(Set dst (OrI dst (CastP2X src)));
8251   effect(KILL cr);
8252 
8253   size(2);
8254   format %{ "OR     $dst,$src" %}
8255   opcode(0x0B);
8256   ins_encode( OpcP, RegReg( dst, src) );
8257   ins_pipe( ialu_reg_reg );
8258 %}
8259 
8260 
8261 // Or Register with Immediate
8262 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8263   match(Set dst (OrI dst src));
8264   effect(KILL cr);
8265 
8266   format %{ "OR     $dst,$src" %}
8267   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8268   // ins_encode( RegImm( dst, src) );
8269   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8270   ins_pipe( ialu_reg );
8271 %}
8272 
8273 // Or Register with Memory
8274 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8275   match(Set dst (OrI dst (LoadI src)));
8276   effect(KILL cr);
8277 
8278   ins_cost(125);
8279   format %{ "OR     $dst,$src" %}
8280   opcode(0x0B);
8281   ins_encode( OpcP, RegMem( dst, src) );
8282   ins_pipe( ialu_reg_mem );
8283 %}
8284 
8285 // Or Memory with Register
8286 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8287   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8288   effect(KILL cr);
8289 
8290   ins_cost(150);
8291   format %{ "OR     $dst,$src" %}
8292   opcode(0x09);  /* Opcode 09 /r */
8293   ins_encode( OpcP, RegMem( src, dst ) );
8294   ins_pipe( ialu_mem_reg );
8295 %}
8296 
8297 // Or Memory with Immediate
8298 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8299   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8300   effect(KILL cr);
8301 
8302   ins_cost(125);
8303   format %{ "OR     $dst,$src" %}
8304   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8305   // ins_encode( MemImm( dst, src) );
8306   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8307   ins_pipe( ialu_mem_imm );
8308 %}
8309 
8310 // ROL/ROR
8311 // ROL expand
8312 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8313   effect(USE_DEF dst, USE shift, KILL cr);
8314 
8315   format %{ "ROL    $dst, $shift" %}
8316   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8317   ins_encode( OpcP, RegOpc( dst ));
8318   ins_pipe( ialu_reg );
8319 %}
8320 
8321 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8322   effect(USE_DEF dst, USE shift, KILL cr);
8323 
8324   format %{ "ROL    $dst, $shift" %}
8325   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8326   ins_encode( RegOpcImm(dst, shift) );
8327   ins_pipe(ialu_reg);
8328 %}
8329 
8330 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8331   effect(USE_DEF dst, USE shift, KILL cr);
8332 
8333   format %{ "ROL    $dst, $shift" %}
8334   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8335   ins_encode(OpcP, RegOpc(dst));
8336   ins_pipe( ialu_reg_reg );
8337 %}
8338 // end of ROL expand
8339 
8340 // ROL 32bit by one once
8341 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8342   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8343 
8344   expand %{
8345     rolI_eReg_imm1(dst, lshift, cr);
8346   %}
8347 %}
8348 
8349 // ROL 32bit var by imm8 once
8350 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8351   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8352   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8353 
8354   expand %{
8355     rolI_eReg_imm8(dst, lshift, cr);
8356   %}
8357 %}
8358 
8359 // ROL 32bit var by var once
8360 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8361   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8362 
8363   expand %{
8364     rolI_eReg_CL(dst, shift, cr);
8365   %}
8366 %}
8367 
8368 // ROL 32bit var by var once
8369 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8370   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8371 
8372   expand %{
8373     rolI_eReg_CL(dst, shift, cr);
8374   %}
8375 %}
8376 
8377 // ROR expand
8378 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8379   effect(USE_DEF dst, USE shift, KILL cr);
8380 
8381   format %{ "ROR    $dst, $shift" %}
8382   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8383   ins_encode( OpcP, RegOpc( dst ) );
8384   ins_pipe( ialu_reg );
8385 %}
8386 
8387 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8388   effect (USE_DEF dst, USE shift, KILL cr);
8389 
8390   format %{ "ROR    $dst, $shift" %}
8391   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8392   ins_encode( RegOpcImm(dst, shift) );
8393   ins_pipe( ialu_reg );
8394 %}
8395 
8396 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8397   effect(USE_DEF dst, USE shift, KILL cr);
8398 
8399   format %{ "ROR    $dst, $shift" %}
8400   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8401   ins_encode(OpcP, RegOpc(dst));
8402   ins_pipe( ialu_reg_reg );
8403 %}
8404 // end of ROR expand
8405 
8406 // ROR right once
8407 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8408   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8409 
8410   expand %{
8411     rorI_eReg_imm1(dst, rshift, cr);
8412   %}
8413 %}
8414 
8415 // ROR 32bit by immI8 once
8416 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8417   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8418   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8419 
8420   expand %{
8421     rorI_eReg_imm8(dst, rshift, cr);
8422   %}
8423 %}
8424 
8425 // ROR 32bit var by var once
8426 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8427   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8428 
8429   expand %{
8430     rorI_eReg_CL(dst, shift, cr);
8431   %}
8432 %}
8433 
8434 // ROR 32bit var by var once
8435 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8436   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8437 
8438   expand %{
8439     rorI_eReg_CL(dst, shift, cr);
8440   %}
8441 %}
8442 
8443 // Xor Instructions
8444 // Xor Register with Register
8445 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8446   match(Set dst (XorI dst src));
8447   effect(KILL cr);
8448 
8449   size(2);
8450   format %{ "XOR    $dst,$src" %}
8451   opcode(0x33);
8452   ins_encode( OpcP, RegReg( dst, src) );
8453   ins_pipe( ialu_reg_reg );
8454 %}
8455 
8456 // Xor Register with Immediate -1
8457 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8458   match(Set dst (XorI dst imm));  
8459 
8460   size(2);
8461   format %{ "NOT    $dst" %}  
8462   ins_encode %{
8463      __ notl($dst$$Register);
8464   %}
8465   ins_pipe( ialu_reg );
8466 %}
8467 
8468 // Xor Register with Immediate
8469 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8470   match(Set dst (XorI dst src));
8471   effect(KILL cr);
8472 
8473   format %{ "XOR    $dst,$src" %}
8474   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8475   // ins_encode( RegImm( dst, src) );
8476   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8477   ins_pipe( ialu_reg );
8478 %}
8479 
8480 // Xor Register with Memory
8481 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8482   match(Set dst (XorI dst (LoadI src)));
8483   effect(KILL cr);
8484 
8485   ins_cost(125);
8486   format %{ "XOR    $dst,$src" %}
8487   opcode(0x33);
8488   ins_encode( OpcP, RegMem(dst, src) );
8489   ins_pipe( ialu_reg_mem );
8490 %}
8491 
8492 // Xor Memory with Register
8493 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8494   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8495   effect(KILL cr);
8496 
8497   ins_cost(150);
8498   format %{ "XOR    $dst,$src" %}
8499   opcode(0x31);  /* Opcode 31 /r */
8500   ins_encode( OpcP, RegMem( src, dst ) );
8501   ins_pipe( ialu_mem_reg );
8502 %}
8503 
8504 // Xor Memory with Immediate
8505 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8506   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8507   effect(KILL cr);
8508 
8509   ins_cost(125);
8510   format %{ "XOR    $dst,$src" %}
8511   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8512   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8513   ins_pipe( ialu_mem_imm );
8514 %}
8515 
8516 //----------Convert Int to Boolean---------------------------------------------
8517 
8518 instruct movI_nocopy(rRegI dst, rRegI src) %{
8519   effect( DEF dst, USE src );
8520   format %{ "MOV    $dst,$src" %}
8521   ins_encode( enc_Copy( dst, src) );
8522   ins_pipe( ialu_reg_reg );
8523 %}
8524 
8525 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8526   effect( USE_DEF dst, USE src, KILL cr );
8527 
8528   size(4);
8529   format %{ "NEG    $dst\n\t"
8530             "ADC    $dst,$src" %}
8531   ins_encode( neg_reg(dst),
8532               OpcRegReg(0x13,dst,src) );
8533   ins_pipe( ialu_reg_reg_long );
8534 %}
8535 
8536 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8537   match(Set dst (Conv2B src));
8538 
8539   expand %{
8540     movI_nocopy(dst,src);
8541     ci2b(dst,src,cr);
8542   %}
8543 %}
8544 
8545 instruct movP_nocopy(rRegI dst, eRegP src) %{
8546   effect( DEF dst, USE src );
8547   format %{ "MOV    $dst,$src" %}
8548   ins_encode( enc_Copy( dst, src) );
8549   ins_pipe( ialu_reg_reg );
8550 %}
8551 
8552 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8553   effect( USE_DEF dst, USE src, KILL cr );
8554   format %{ "NEG    $dst\n\t"
8555             "ADC    $dst,$src" %}
8556   ins_encode( neg_reg(dst),
8557               OpcRegReg(0x13,dst,src) );
8558   ins_pipe( ialu_reg_reg_long );
8559 %}
8560 
8561 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8562   match(Set dst (Conv2B src));
8563 
8564   expand %{
8565     movP_nocopy(dst,src);
8566     cp2b(dst,src,cr);
8567   %}
8568 %}
8569 
8570 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8571   match(Set dst (CmpLTMask p q));
8572   effect(KILL cr);
8573   ins_cost(400);
8574 
8575   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8576   format %{ "XOR    $dst,$dst\n\t"
8577             "CMP    $p,$q\n\t"
8578             "SETlt  $dst\n\t"
8579             "NEG    $dst" %}
8580   ins_encode %{
8581     Register Rp = $p$$Register;
8582     Register Rq = $q$$Register;
8583     Register Rd = $dst$$Register;
8584     Label done;
8585     __ xorl(Rd, Rd);
8586     __ cmpl(Rp, Rq);
8587     __ setb(Assembler::less, Rd);
8588     __ negl(Rd);
8589   %}
8590 
8591   ins_pipe(pipe_slow);
8592 %}
8593 
8594 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8595   match(Set dst (CmpLTMask dst zero));
8596   effect(DEF dst, KILL cr);
8597   ins_cost(100);
8598 
8599   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8600   ins_encode %{
8601   __ sarl($dst$$Register, 31);
8602   %}
8603   ins_pipe(ialu_reg);
8604 %}
8605 
8606 /* better to save a register than avoid a branch */
8607 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8608   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8609   effect(KILL cr);
8610   ins_cost(400);
8611   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8612             "JGE    done\n\t"
8613             "ADD    $p,$y\n"
8614             "done:  " %}
8615   ins_encode %{
8616     Register Rp = $p$$Register;
8617     Register Rq = $q$$Register;
8618     Register Ry = $y$$Register;
8619     Label done;
8620     __ subl(Rp, Rq);
8621     __ jccb(Assembler::greaterEqual, done);
8622     __ addl(Rp, Ry);
8623     __ bind(done);
8624   %}
8625 
8626   ins_pipe(pipe_cmplt);
8627 %}
8628 
8629 /* better to save a register than avoid a branch */
8630 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8631   match(Set y (AndI (CmpLTMask p q) y));
8632   effect(KILL cr);
8633 
8634   ins_cost(300);
8635 
8636   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8637             "JLT      done\n\t"
8638             "XORL     $y, $y\n"
8639             "done:  " %}
8640   ins_encode %{
8641     Register Rp = $p$$Register;
8642     Register Rq = $q$$Register;
8643     Register Ry = $y$$Register;
8644     Label done;
8645     __ cmpl(Rp, Rq);
8646     __ jccb(Assembler::less, done);
8647     __ xorl(Ry, Ry);
8648     __ bind(done);
8649   %}
8650 
8651   ins_pipe(pipe_cmplt);
8652 %}
8653 
8654 /* If I enable this, I encourage spilling in the inner loop of compress.
8655 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8656   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8657 */
8658 //----------Overflow Math Instructions-----------------------------------------
8659 
8660 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8661 %{
8662   match(Set cr (OverflowAddI op1 op2));
8663   effect(DEF cr, USE_KILL op1, USE op2);
8664 
8665   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8666 
8667   ins_encode %{
8668     __ addl($op1$$Register, $op2$$Register);
8669   %}
8670   ins_pipe(ialu_reg_reg);
8671 %}
8672 
8673 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8674 %{
8675   match(Set cr (OverflowAddI op1 op2));
8676   effect(DEF cr, USE_KILL op1, USE op2);
8677 
8678   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8679 
8680   ins_encode %{
8681     __ addl($op1$$Register, $op2$$constant);
8682   %}
8683   ins_pipe(ialu_reg_reg);
8684 %}
8685 
8686 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8687 %{
8688   match(Set cr (OverflowSubI op1 op2));
8689 
8690   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8691   ins_encode %{
8692     __ cmpl($op1$$Register, $op2$$Register);
8693   %}
8694   ins_pipe(ialu_reg_reg);
8695 %}
8696 
8697 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8698 %{
8699   match(Set cr (OverflowSubI op1 op2));
8700 
8701   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8702   ins_encode %{
8703     __ cmpl($op1$$Register, $op2$$constant);
8704   %}
8705   ins_pipe(ialu_reg_reg);
8706 %}
8707 
8708 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8709 %{
8710   match(Set cr (OverflowSubI zero op2));
8711   effect(DEF cr, USE_KILL op2);
8712 
8713   format %{ "NEG    $op2\t# overflow check int" %}
8714   ins_encode %{
8715     __ negl($op2$$Register);
8716   %}
8717   ins_pipe(ialu_reg_reg);
8718 %}
8719 
8720 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8721 %{
8722   match(Set cr (OverflowMulI op1 op2));
8723   effect(DEF cr, USE_KILL op1, USE op2);
8724 
8725   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8726   ins_encode %{
8727     __ imull($op1$$Register, $op2$$Register);
8728   %}
8729   ins_pipe(ialu_reg_reg_alu0);
8730 %}
8731 
8732 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8733 %{
8734   match(Set cr (OverflowMulI op1 op2));
8735   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8736 
8737   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8738   ins_encode %{
8739     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8740   %}
8741   ins_pipe(ialu_reg_reg_alu0);
8742 %}
8743 
8744 //----------Long Instructions------------------------------------------------
8745 // Add Long Register with Register
8746 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8747   match(Set dst (AddL dst src));
8748   effect(KILL cr);
8749   ins_cost(200);
8750   format %{ "ADD    $dst.lo,$src.lo\n\t"
8751             "ADC    $dst.hi,$src.hi" %}
8752   opcode(0x03, 0x13);
8753   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8754   ins_pipe( ialu_reg_reg_long );
8755 %}
8756 
8757 // Add Long Register with Immediate
8758 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8759   match(Set dst (AddL dst src));
8760   effect(KILL cr);
8761   format %{ "ADD    $dst.lo,$src.lo\n\t"
8762             "ADC    $dst.hi,$src.hi" %}
8763   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8764   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8765   ins_pipe( ialu_reg_long );
8766 %}
8767 
8768 // Add Long Register with Memory
8769 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8770   match(Set dst (AddL dst (LoadL mem)));
8771   effect(KILL cr);
8772   ins_cost(125);
8773   format %{ "ADD    $dst.lo,$mem\n\t"
8774             "ADC    $dst.hi,$mem+4" %}
8775   opcode(0x03, 0x13);
8776   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8777   ins_pipe( ialu_reg_long_mem );
8778 %}
8779 
8780 // Subtract Long Register with Register.
8781 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8782   match(Set dst (SubL dst src));
8783   effect(KILL cr);
8784   ins_cost(200);
8785   format %{ "SUB    $dst.lo,$src.lo\n\t"
8786             "SBB    $dst.hi,$src.hi" %}
8787   opcode(0x2B, 0x1B);
8788   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8789   ins_pipe( ialu_reg_reg_long );
8790 %}
8791 
8792 // Subtract Long Register with Immediate
8793 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8794   match(Set dst (SubL dst src));
8795   effect(KILL cr);
8796   format %{ "SUB    $dst.lo,$src.lo\n\t"
8797             "SBB    $dst.hi,$src.hi" %}
8798   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8799   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8800   ins_pipe( ialu_reg_long );
8801 %}
8802 
8803 // Subtract Long Register with Memory
8804 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8805   match(Set dst (SubL dst (LoadL mem)));
8806   effect(KILL cr);
8807   ins_cost(125);
8808   format %{ "SUB    $dst.lo,$mem\n\t"
8809             "SBB    $dst.hi,$mem+4" %}
8810   opcode(0x2B, 0x1B);
8811   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8812   ins_pipe( ialu_reg_long_mem );
8813 %}
8814 
8815 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8816   match(Set dst (SubL zero dst));
8817   effect(KILL cr);
8818   ins_cost(300);
8819   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8820   ins_encode( neg_long(dst) );
8821   ins_pipe( ialu_reg_reg_long );
8822 %}
8823 
8824 // And Long Register with Register
8825 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8826   match(Set dst (AndL dst src));
8827   effect(KILL cr);
8828   format %{ "AND    $dst.lo,$src.lo\n\t"
8829             "AND    $dst.hi,$src.hi" %}
8830   opcode(0x23,0x23);
8831   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8832   ins_pipe( ialu_reg_reg_long );
8833 %}
8834 
8835 // And Long Register with Immediate
8836 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8837   match(Set dst (AndL dst src));
8838   effect(KILL cr);
8839   format %{ "AND    $dst.lo,$src.lo\n\t"
8840             "AND    $dst.hi,$src.hi" %}
8841   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8842   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8843   ins_pipe( ialu_reg_long );
8844 %}
8845 
8846 // And Long Register with Memory
8847 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8848   match(Set dst (AndL dst (LoadL mem)));
8849   effect(KILL cr);
8850   ins_cost(125);
8851   format %{ "AND    $dst.lo,$mem\n\t"
8852             "AND    $dst.hi,$mem+4" %}
8853   opcode(0x23, 0x23);
8854   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8855   ins_pipe( ialu_reg_long_mem );
8856 %}
8857 
8858 // BMI1 instructions
8859 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8860   match(Set dst (AndL (XorL src1 minus_1) src2));
8861   predicate(UseBMI1Instructions);
8862   effect(KILL cr, TEMP dst);
8863 
8864   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8865             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8866          %}
8867 
8868   ins_encode %{
8869     Register Rdst = $dst$$Register;
8870     Register Rsrc1 = $src1$$Register;
8871     Register Rsrc2 = $src2$$Register;
8872     __ andnl(Rdst, Rsrc1, Rsrc2);
8873     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8874   %}
8875   ins_pipe(ialu_reg_reg_long);
8876 %}
8877 
8878 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8879   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8880   predicate(UseBMI1Instructions);
8881   effect(KILL cr, TEMP dst);
8882 
8883   ins_cost(125);
8884   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8885             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8886          %}
8887 
8888   ins_encode %{
8889     Register Rdst = $dst$$Register;
8890     Register Rsrc1 = $src1$$Register;
8891     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8892 
8893     __ andnl(Rdst, Rsrc1, $src2$$Address);
8894     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8895   %}
8896   ins_pipe(ialu_reg_mem);
8897 %}
8898 
8899 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8900   match(Set dst (AndL (SubL imm_zero src) src));
8901   predicate(UseBMI1Instructions);
8902   effect(KILL cr, TEMP dst);
8903 
8904   format %{ "MOVL   $dst.hi, 0\n\t"
8905             "BLSIL  $dst.lo, $src.lo\n\t"
8906             "JNZ    done\n\t"
8907             "BLSIL  $dst.hi, $src.hi\n"
8908             "done:"
8909          %}
8910 
8911   ins_encode %{
8912     Label done;
8913     Register Rdst = $dst$$Register;
8914     Register Rsrc = $src$$Register;
8915     __ movl(HIGH_FROM_LOW(Rdst), 0);
8916     __ blsil(Rdst, Rsrc);
8917     __ jccb(Assembler::notZero, done);
8918     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8919     __ bind(done);
8920   %}
8921   ins_pipe(ialu_reg);
8922 %}
8923 
8924 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8925   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8926   predicate(UseBMI1Instructions);
8927   effect(KILL cr, TEMP dst);
8928 
8929   ins_cost(125);
8930   format %{ "MOVL   $dst.hi, 0\n\t"
8931             "BLSIL  $dst.lo, $src\n\t"
8932             "JNZ    done\n\t"
8933             "BLSIL  $dst.hi, $src+4\n"
8934             "done:"
8935          %}
8936 
8937   ins_encode %{
8938     Label done;
8939     Register Rdst = $dst$$Register;
8940     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8941 
8942     __ movl(HIGH_FROM_LOW(Rdst), 0);
8943     __ blsil(Rdst, $src$$Address);
8944     __ jccb(Assembler::notZero, done);
8945     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8946     __ bind(done);
8947   %}
8948   ins_pipe(ialu_reg_mem);
8949 %}
8950 
8951 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8952 %{
8953   match(Set dst (XorL (AddL src minus_1) src));
8954   predicate(UseBMI1Instructions);
8955   effect(KILL cr, TEMP dst);
8956 
8957   format %{ "MOVL    $dst.hi, 0\n\t"
8958             "BLSMSKL $dst.lo, $src.lo\n\t"
8959             "JNC     done\n\t"
8960             "BLSMSKL $dst.hi, $src.hi\n"
8961             "done:"
8962          %}
8963 
8964   ins_encode %{
8965     Label done;
8966     Register Rdst = $dst$$Register;
8967     Register Rsrc = $src$$Register;
8968     __ movl(HIGH_FROM_LOW(Rdst), 0);
8969     __ blsmskl(Rdst, Rsrc);
8970     __ jccb(Assembler::carryClear, done);
8971     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8972     __ bind(done);
8973   %}
8974 
8975   ins_pipe(ialu_reg);
8976 %}
8977 
8978 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8979 %{
8980   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8981   predicate(UseBMI1Instructions);
8982   effect(KILL cr, TEMP dst);
8983 
8984   ins_cost(125);
8985   format %{ "MOVL    $dst.hi, 0\n\t"
8986             "BLSMSKL $dst.lo, $src\n\t"
8987             "JNC     done\n\t"
8988             "BLSMSKL $dst.hi, $src+4\n"
8989             "done:"
8990          %}
8991 
8992   ins_encode %{
8993     Label done;
8994     Register Rdst = $dst$$Register;
8995     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8996 
8997     __ movl(HIGH_FROM_LOW(Rdst), 0);
8998     __ blsmskl(Rdst, $src$$Address);
8999     __ jccb(Assembler::carryClear, done);
9000     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9001     __ bind(done);
9002   %}
9003 
9004   ins_pipe(ialu_reg_mem);
9005 %}
9006 
9007 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9008 %{
9009   match(Set dst (AndL (AddL src minus_1) src) );
9010   predicate(UseBMI1Instructions);
9011   effect(KILL cr, TEMP dst);
9012 
9013   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9014             "BLSRL  $dst.lo, $src.lo\n\t"
9015             "JNC    done\n\t"
9016             "BLSRL  $dst.hi, $src.hi\n"
9017             "done:"
9018   %}
9019 
9020   ins_encode %{
9021     Label done;
9022     Register Rdst = $dst$$Register;
9023     Register Rsrc = $src$$Register;
9024     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9025     __ blsrl(Rdst, Rsrc);
9026     __ jccb(Assembler::carryClear, done);
9027     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9028     __ bind(done);
9029   %}
9030 
9031   ins_pipe(ialu_reg);
9032 %}
9033 
9034 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9035 %{
9036   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9037   predicate(UseBMI1Instructions);
9038   effect(KILL cr, TEMP dst);
9039 
9040   ins_cost(125);
9041   format %{ "MOVL   $dst.hi, $src+4\n\t"
9042             "BLSRL  $dst.lo, $src\n\t"
9043             "JNC    done\n\t"
9044             "BLSRL  $dst.hi, $src+4\n"
9045             "done:"
9046   %}
9047 
9048   ins_encode %{
9049     Label done;
9050     Register Rdst = $dst$$Register;
9051     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9052     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9053     __ blsrl(Rdst, $src$$Address);
9054     __ jccb(Assembler::carryClear, done);
9055     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9056     __ bind(done);
9057   %}
9058 
9059   ins_pipe(ialu_reg_mem);
9060 %}
9061 
9062 // Or Long Register with Register
9063 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9064   match(Set dst (OrL dst src));
9065   effect(KILL cr);
9066   format %{ "OR     $dst.lo,$src.lo\n\t"
9067             "OR     $dst.hi,$src.hi" %}
9068   opcode(0x0B,0x0B);
9069   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9070   ins_pipe( ialu_reg_reg_long );
9071 %}
9072 
9073 // Or Long Register with Immediate
9074 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9075   match(Set dst (OrL dst src));
9076   effect(KILL cr);
9077   format %{ "OR     $dst.lo,$src.lo\n\t"
9078             "OR     $dst.hi,$src.hi" %}
9079   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9080   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9081   ins_pipe( ialu_reg_long );
9082 %}
9083 
9084 // Or Long Register with Memory
9085 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9086   match(Set dst (OrL dst (LoadL mem)));
9087   effect(KILL cr);
9088   ins_cost(125);
9089   format %{ "OR     $dst.lo,$mem\n\t"
9090             "OR     $dst.hi,$mem+4" %}
9091   opcode(0x0B,0x0B);
9092   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9093   ins_pipe( ialu_reg_long_mem );
9094 %}
9095 
9096 // Xor Long Register with Register
9097 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9098   match(Set dst (XorL dst src));
9099   effect(KILL cr);
9100   format %{ "XOR    $dst.lo,$src.lo\n\t"
9101             "XOR    $dst.hi,$src.hi" %}
9102   opcode(0x33,0x33);
9103   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9104   ins_pipe( ialu_reg_reg_long );
9105 %}
9106 
9107 // Xor Long Register with Immediate -1
9108 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9109   match(Set dst (XorL dst imm));  
9110   format %{ "NOT    $dst.lo\n\t"
9111             "NOT    $dst.hi" %}
9112   ins_encode %{
9113      __ notl($dst$$Register);
9114      __ notl(HIGH_FROM_LOW($dst$$Register));
9115   %}
9116   ins_pipe( ialu_reg_long );
9117 %}
9118 
9119 // Xor Long Register with Immediate
9120 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9121   match(Set dst (XorL dst src));
9122   effect(KILL cr);
9123   format %{ "XOR    $dst.lo,$src.lo\n\t"
9124             "XOR    $dst.hi,$src.hi" %}
9125   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9126   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9127   ins_pipe( ialu_reg_long );
9128 %}
9129 
9130 // Xor Long Register with Memory
9131 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9132   match(Set dst (XorL dst (LoadL mem)));
9133   effect(KILL cr);
9134   ins_cost(125);
9135   format %{ "XOR    $dst.lo,$mem\n\t"
9136             "XOR    $dst.hi,$mem+4" %}
9137   opcode(0x33,0x33);
9138   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9139   ins_pipe( ialu_reg_long_mem );
9140 %}
9141 
9142 // Shift Left Long by 1
9143 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9144   predicate(UseNewLongLShift);
9145   match(Set dst (LShiftL dst cnt));
9146   effect(KILL cr);
9147   ins_cost(100);
9148   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9149             "ADC    $dst.hi,$dst.hi" %}
9150   ins_encode %{
9151     __ addl($dst$$Register,$dst$$Register);
9152     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9153   %}
9154   ins_pipe( ialu_reg_long );
9155 %}
9156 
9157 // Shift Left Long by 2
9158 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9159   predicate(UseNewLongLShift);
9160   match(Set dst (LShiftL dst cnt));
9161   effect(KILL cr);
9162   ins_cost(100);
9163   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9164             "ADC    $dst.hi,$dst.hi\n\t" 
9165             "ADD    $dst.lo,$dst.lo\n\t"
9166             "ADC    $dst.hi,$dst.hi" %}
9167   ins_encode %{
9168     __ addl($dst$$Register,$dst$$Register);
9169     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9170     __ addl($dst$$Register,$dst$$Register);
9171     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9172   %}
9173   ins_pipe( ialu_reg_long );
9174 %}
9175 
9176 // Shift Left Long by 3
9177 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9178   predicate(UseNewLongLShift);
9179   match(Set dst (LShiftL dst cnt));
9180   effect(KILL cr);
9181   ins_cost(100);
9182   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9183             "ADC    $dst.hi,$dst.hi\n\t" 
9184             "ADD    $dst.lo,$dst.lo\n\t"
9185             "ADC    $dst.hi,$dst.hi\n\t" 
9186             "ADD    $dst.lo,$dst.lo\n\t"
9187             "ADC    $dst.hi,$dst.hi" %}
9188   ins_encode %{
9189     __ addl($dst$$Register,$dst$$Register);
9190     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9191     __ addl($dst$$Register,$dst$$Register);
9192     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9193     __ addl($dst$$Register,$dst$$Register);
9194     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9195   %}
9196   ins_pipe( ialu_reg_long );
9197 %}
9198 
9199 // Shift Left Long by 1-31
9200 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9201   match(Set dst (LShiftL dst cnt));
9202   effect(KILL cr);
9203   ins_cost(200);
9204   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9205             "SHL    $dst.lo,$cnt" %}
9206   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9207   ins_encode( move_long_small_shift(dst,cnt) );
9208   ins_pipe( ialu_reg_long );
9209 %}
9210 
9211 // Shift Left Long by 32-63
9212 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9213   match(Set dst (LShiftL dst cnt));
9214   effect(KILL cr);
9215   ins_cost(300);
9216   format %{ "MOV    $dst.hi,$dst.lo\n"
9217           "\tSHL    $dst.hi,$cnt-32\n"
9218           "\tXOR    $dst.lo,$dst.lo" %}
9219   opcode(0xC1, 0x4);  /* C1 /4 ib */
9220   ins_encode( move_long_big_shift_clr(dst,cnt) );
9221   ins_pipe( ialu_reg_long );
9222 %}
9223 
9224 // Shift Left Long by variable
9225 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9226   match(Set dst (LShiftL dst shift));
9227   effect(KILL cr);
9228   ins_cost(500+200);
9229   size(17);
9230   format %{ "TEST   $shift,32\n\t"
9231             "JEQ,s  small\n\t"
9232             "MOV    $dst.hi,$dst.lo\n\t"
9233             "XOR    $dst.lo,$dst.lo\n"
9234     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9235             "SHL    $dst.lo,$shift" %}
9236   ins_encode( shift_left_long( dst, shift ) );
9237   ins_pipe( pipe_slow );
9238 %}
9239 
9240 // Shift Right Long by 1-31
9241 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9242   match(Set dst (URShiftL dst cnt));
9243   effect(KILL cr);
9244   ins_cost(200);
9245   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9246             "SHR    $dst.hi,$cnt" %}
9247   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9248   ins_encode( move_long_small_shift(dst,cnt) );
9249   ins_pipe( ialu_reg_long );
9250 %}
9251 
9252 // Shift Right Long by 32-63
9253 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9254   match(Set dst (URShiftL dst cnt));
9255   effect(KILL cr);
9256   ins_cost(300);
9257   format %{ "MOV    $dst.lo,$dst.hi\n"
9258           "\tSHR    $dst.lo,$cnt-32\n"
9259           "\tXOR    $dst.hi,$dst.hi" %}
9260   opcode(0xC1, 0x5);  /* C1 /5 ib */
9261   ins_encode( move_long_big_shift_clr(dst,cnt) );
9262   ins_pipe( ialu_reg_long );
9263 %}
9264 
9265 // Shift Right Long by variable
9266 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9267   match(Set dst (URShiftL dst shift));
9268   effect(KILL cr);
9269   ins_cost(600);
9270   size(17);
9271   format %{ "TEST   $shift,32\n\t"
9272             "JEQ,s  small\n\t"
9273             "MOV    $dst.lo,$dst.hi\n\t"
9274             "XOR    $dst.hi,$dst.hi\n"
9275     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9276             "SHR    $dst.hi,$shift" %}
9277   ins_encode( shift_right_long( dst, shift ) );
9278   ins_pipe( pipe_slow );
9279 %}
9280 
9281 // Shift Right Long by 1-31
9282 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9283   match(Set dst (RShiftL dst cnt));
9284   effect(KILL cr);
9285   ins_cost(200);
9286   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9287             "SAR    $dst.hi,$cnt" %}
9288   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9289   ins_encode( move_long_small_shift(dst,cnt) );
9290   ins_pipe( ialu_reg_long );
9291 %}
9292 
9293 // Shift Right Long by 32-63
9294 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9295   match(Set dst (RShiftL dst cnt));
9296   effect(KILL cr);
9297   ins_cost(300);
9298   format %{ "MOV    $dst.lo,$dst.hi\n"
9299           "\tSAR    $dst.lo,$cnt-32\n"
9300           "\tSAR    $dst.hi,31" %}
9301   opcode(0xC1, 0x7);  /* C1 /7 ib */
9302   ins_encode( move_long_big_shift_sign(dst,cnt) );
9303   ins_pipe( ialu_reg_long );
9304 %}
9305 
9306 // Shift Right arithmetic Long by variable
9307 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9308   match(Set dst (RShiftL dst shift));
9309   effect(KILL cr);
9310   ins_cost(600);
9311   size(18);
9312   format %{ "TEST   $shift,32\n\t"
9313             "JEQ,s  small\n\t"
9314             "MOV    $dst.lo,$dst.hi\n\t"
9315             "SAR    $dst.hi,31\n"
9316     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9317             "SAR    $dst.hi,$shift" %}
9318   ins_encode( shift_right_arith_long( dst, shift ) );
9319   ins_pipe( pipe_slow );
9320 %}
9321 
9322 
9323 //----------Double Instructions------------------------------------------------
9324 // Double Math
9325 
9326 // Compare & branch
9327 
9328 // P6 version of float compare, sets condition codes in EFLAGS
9329 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9330   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9331   match(Set cr (CmpD src1 src2));
9332   effect(KILL rax);
9333   ins_cost(150);
9334   format %{ "FLD    $src1\n\t"
9335             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9336             "JNP    exit\n\t"
9337             "MOV    ah,1       // saw a NaN, set CF\n\t"
9338             "SAHF\n"
9339      "exit:\tNOP               // avoid branch to branch" %}
9340   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9341   ins_encode( Push_Reg_DPR(src1),
9342               OpcP, RegOpc(src2),
9343               cmpF_P6_fixup );
9344   ins_pipe( pipe_slow );
9345 %}
9346 
9347 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9348   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9349   match(Set cr (CmpD src1 src2));
9350   ins_cost(150);
9351   format %{ "FLD    $src1\n\t"
9352             "FUCOMIP ST,$src2  // P6 instruction" %}
9353   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9354   ins_encode( Push_Reg_DPR(src1),
9355               OpcP, RegOpc(src2));
9356   ins_pipe( pipe_slow );
9357 %}
9358 
9359 // Compare & branch
9360 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9361   predicate(UseSSE<=1);
9362   match(Set cr (CmpD src1 src2));
9363   effect(KILL rax);
9364   ins_cost(200);
9365   format %{ "FLD    $src1\n\t"
9366             "FCOMp  $src2\n\t"
9367             "FNSTSW AX\n\t"
9368             "TEST   AX,0x400\n\t"
9369             "JZ,s   flags\n\t"
9370             "MOV    AH,1\t# unordered treat as LT\n"
9371     "flags:\tSAHF" %}
9372   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9373   ins_encode( Push_Reg_DPR(src1),
9374               OpcP, RegOpc(src2),
9375               fpu_flags);
9376   ins_pipe( pipe_slow );
9377 %}
9378 
9379 // Compare vs zero into -1,0,1
9380 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9381   predicate(UseSSE<=1);
9382   match(Set dst (CmpD3 src1 zero));
9383   effect(KILL cr, KILL rax);
9384   ins_cost(280);
9385   format %{ "FTSTD  $dst,$src1" %}
9386   opcode(0xE4, 0xD9);
9387   ins_encode( Push_Reg_DPR(src1),
9388               OpcS, OpcP, PopFPU,
9389               CmpF_Result(dst));
9390   ins_pipe( pipe_slow );
9391 %}
9392 
9393 // Compare into -1,0,1
9394 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9395   predicate(UseSSE<=1);
9396   match(Set dst (CmpD3 src1 src2));
9397   effect(KILL cr, KILL rax);
9398   ins_cost(300);
9399   format %{ "FCMPD  $dst,$src1,$src2" %}
9400   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9401   ins_encode( Push_Reg_DPR(src1),
9402               OpcP, RegOpc(src2),
9403               CmpF_Result(dst));
9404   ins_pipe( pipe_slow );
9405 %}
9406 
9407 // float compare and set condition codes in EFLAGS by XMM regs
9408 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9409   predicate(UseSSE>=2);
9410   match(Set cr (CmpD src1 src2));
9411   ins_cost(145);
9412   format %{ "UCOMISD $src1,$src2\n\t"
9413             "JNP,s   exit\n\t"
9414             "PUSHF\t# saw NaN, set CF\n\t"
9415             "AND     [rsp], #0xffffff2b\n\t"
9416             "POPF\n"
9417     "exit:" %}
9418   ins_encode %{
9419     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9420     emit_cmpfp_fixup(_masm);
9421   %}
9422   ins_pipe( pipe_slow );
9423 %}
9424 
9425 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9426   predicate(UseSSE>=2);
9427   match(Set cr (CmpD src1 src2));
9428   ins_cost(100);
9429   format %{ "UCOMISD $src1,$src2" %}
9430   ins_encode %{
9431     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9432   %}
9433   ins_pipe( pipe_slow );
9434 %}
9435 
9436 // float compare and set condition codes in EFLAGS by XMM regs
9437 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9438   predicate(UseSSE>=2);
9439   match(Set cr (CmpD src1 (LoadD src2)));
9440   ins_cost(145);
9441   format %{ "UCOMISD $src1,$src2\n\t"
9442             "JNP,s   exit\n\t"
9443             "PUSHF\t# saw NaN, set CF\n\t"
9444             "AND     [rsp], #0xffffff2b\n\t"
9445             "POPF\n"
9446     "exit:" %}
9447   ins_encode %{
9448     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9449     emit_cmpfp_fixup(_masm);
9450   %}
9451   ins_pipe( pipe_slow );
9452 %}
9453 
9454 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9455   predicate(UseSSE>=2);
9456   match(Set cr (CmpD src1 (LoadD src2)));
9457   ins_cost(100);
9458   format %{ "UCOMISD $src1,$src2" %}
9459   ins_encode %{
9460     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9461   %}
9462   ins_pipe( pipe_slow );
9463 %}
9464 
9465 // Compare into -1,0,1 in XMM
9466 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9467   predicate(UseSSE>=2);
9468   match(Set dst (CmpD3 src1 src2));
9469   effect(KILL cr);
9470   ins_cost(255);
9471   format %{ "UCOMISD $src1, $src2\n\t"
9472             "MOV     $dst, #-1\n\t"
9473             "JP,s    done\n\t"
9474             "JB,s    done\n\t"
9475             "SETNE   $dst\n\t"
9476             "MOVZB   $dst, $dst\n"
9477     "done:" %}
9478   ins_encode %{
9479     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9480     emit_cmpfp3(_masm, $dst$$Register);
9481   %}
9482   ins_pipe( pipe_slow );
9483 %}
9484 
9485 // Compare into -1,0,1 in XMM and memory
9486 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9487   predicate(UseSSE>=2);
9488   match(Set dst (CmpD3 src1 (LoadD src2)));
9489   effect(KILL cr);
9490   ins_cost(275);
9491   format %{ "UCOMISD $src1, $src2\n\t"
9492             "MOV     $dst, #-1\n\t"
9493             "JP,s    done\n\t"
9494             "JB,s    done\n\t"
9495             "SETNE   $dst\n\t"
9496             "MOVZB   $dst, $dst\n"
9497     "done:" %}
9498   ins_encode %{
9499     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9500     emit_cmpfp3(_masm, $dst$$Register);
9501   %}
9502   ins_pipe( pipe_slow );
9503 %}
9504 
9505 
9506 instruct subDPR_reg(regDPR dst, regDPR src) %{
9507   predicate (UseSSE <=1);
9508   match(Set dst (SubD dst src));
9509 
9510   format %{ "FLD    $src\n\t"
9511             "DSUBp  $dst,ST" %}
9512   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9513   ins_cost(150);
9514   ins_encode( Push_Reg_DPR(src),
9515               OpcP, RegOpc(dst) );
9516   ins_pipe( fpu_reg_reg );
9517 %}
9518 
9519 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9520   predicate (UseSSE <=1);
9521   match(Set dst (RoundDouble (SubD src1 src2)));
9522   ins_cost(250);
9523 
9524   format %{ "FLD    $src2\n\t"
9525             "DSUB   ST,$src1\n\t"
9526             "FSTP_D $dst\t# D-round" %}
9527   opcode(0xD8, 0x5);
9528   ins_encode( Push_Reg_DPR(src2),
9529               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9530   ins_pipe( fpu_mem_reg_reg );
9531 %}
9532 
9533 
9534 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9535   predicate (UseSSE <=1);
9536   match(Set dst (SubD dst (LoadD src)));
9537   ins_cost(150);
9538 
9539   format %{ "FLD    $src\n\t"
9540             "DSUBp  $dst,ST" %}
9541   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9542   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9543               OpcP, RegOpc(dst) );
9544   ins_pipe( fpu_reg_mem );
9545 %}
9546 
9547 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9548   predicate (UseSSE<=1);
9549   match(Set dst (AbsD src));
9550   ins_cost(100);
9551   format %{ "FABS" %}
9552   opcode(0xE1, 0xD9);
9553   ins_encode( OpcS, OpcP );
9554   ins_pipe( fpu_reg_reg );
9555 %}
9556 
9557 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9558   predicate(UseSSE<=1);
9559   match(Set dst (NegD src));
9560   ins_cost(100);
9561   format %{ "FCHS" %}
9562   opcode(0xE0, 0xD9);
9563   ins_encode( OpcS, OpcP );
9564   ins_pipe( fpu_reg_reg );
9565 %}
9566 
9567 instruct addDPR_reg(regDPR dst, regDPR src) %{
9568   predicate(UseSSE<=1);
9569   match(Set dst (AddD dst src));
9570   format %{ "FLD    $src\n\t"
9571             "DADD   $dst,ST" %}
9572   size(4);
9573   ins_cost(150);
9574   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9575   ins_encode( Push_Reg_DPR(src),
9576               OpcP, RegOpc(dst) );
9577   ins_pipe( fpu_reg_reg );
9578 %}
9579 
9580 
9581 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9582   predicate(UseSSE<=1);
9583   match(Set dst (RoundDouble (AddD src1 src2)));
9584   ins_cost(250);
9585 
9586   format %{ "FLD    $src2\n\t"
9587             "DADD   ST,$src1\n\t"
9588             "FSTP_D $dst\t# D-round" %}
9589   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9590   ins_encode( Push_Reg_DPR(src2),
9591               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9592   ins_pipe( fpu_mem_reg_reg );
9593 %}
9594 
9595 
9596 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9597   predicate(UseSSE<=1);
9598   match(Set dst (AddD dst (LoadD src)));
9599   ins_cost(150);
9600 
9601   format %{ "FLD    $src\n\t"
9602             "DADDp  $dst,ST" %}
9603   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9604   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9605               OpcP, RegOpc(dst) );
9606   ins_pipe( fpu_reg_mem );
9607 %}
9608 
9609 // add-to-memory
9610 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9611   predicate(UseSSE<=1);
9612   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9613   ins_cost(150);
9614 
9615   format %{ "FLD_D  $dst\n\t"
9616             "DADD   ST,$src\n\t"
9617             "FST_D  $dst" %}
9618   opcode(0xDD, 0x0);
9619   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9620               Opcode(0xD8), RegOpc(src),
9621               set_instruction_start,
9622               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9623   ins_pipe( fpu_reg_mem );
9624 %}
9625 
9626 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9627   predicate(UseSSE<=1);
9628   match(Set dst (AddD dst con));
9629   ins_cost(125);
9630   format %{ "FLD1\n\t"
9631             "DADDp  $dst,ST" %}
9632   ins_encode %{
9633     __ fld1();
9634     __ faddp($dst$$reg);
9635   %}
9636   ins_pipe(fpu_reg);
9637 %}
9638 
9639 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9640   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9641   match(Set dst (AddD dst con));
9642   ins_cost(200);
9643   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9644             "DADDp  $dst,ST" %}
9645   ins_encode %{
9646     __ fld_d($constantaddress($con));
9647     __ faddp($dst$$reg);
9648   %}
9649   ins_pipe(fpu_reg_mem);
9650 %}
9651 
9652 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9653   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9654   match(Set dst (RoundDouble (AddD src con)));
9655   ins_cost(200);
9656   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9657             "DADD   ST,$src\n\t"
9658             "FSTP_D $dst\t# D-round" %}
9659   ins_encode %{
9660     __ fld_d($constantaddress($con));
9661     __ fadd($src$$reg);
9662     __ fstp_d(Address(rsp, $dst$$disp));
9663   %}
9664   ins_pipe(fpu_mem_reg_con);
9665 %}
9666 
9667 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9668   predicate(UseSSE<=1);
9669   match(Set dst (MulD dst src));
9670   format %{ "FLD    $src\n\t"
9671             "DMULp  $dst,ST" %}
9672   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9673   ins_cost(150);
9674   ins_encode( Push_Reg_DPR(src),
9675               OpcP, RegOpc(dst) );
9676   ins_pipe( fpu_reg_reg );
9677 %}
9678 
9679 // Strict FP instruction biases argument before multiply then
9680 // biases result to avoid double rounding of subnormals.
9681 //
9682 // scale arg1 by multiplying arg1 by 2^(-15360)
9683 // load arg2
9684 // multiply scaled arg1 by arg2
9685 // rescale product by 2^(15360)
9686 //
9687 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9688   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9689   match(Set dst (MulD dst src));
9690   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9691 
9692   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9693             "DMULp  $dst,ST\n\t"
9694             "FLD    $src\n\t"
9695             "DMULp  $dst,ST\n\t"
9696             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9697             "DMULp  $dst,ST\n\t" %}
9698   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9699   ins_encode( strictfp_bias1(dst),
9700               Push_Reg_DPR(src),
9701               OpcP, RegOpc(dst),
9702               strictfp_bias2(dst) );
9703   ins_pipe( fpu_reg_reg );
9704 %}
9705 
9706 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9707   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9708   match(Set dst (MulD dst con));
9709   ins_cost(200);
9710   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9711             "DMULp  $dst,ST" %}
9712   ins_encode %{
9713     __ fld_d($constantaddress($con));
9714     __ fmulp($dst$$reg);
9715   %}
9716   ins_pipe(fpu_reg_mem);
9717 %}
9718 
9719 
9720 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9721   predicate( UseSSE<=1 );
9722   match(Set dst (MulD dst (LoadD src)));
9723   ins_cost(200);
9724   format %{ "FLD_D  $src\n\t"
9725             "DMULp  $dst,ST" %}
9726   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9727   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9728               OpcP, RegOpc(dst) );
9729   ins_pipe( fpu_reg_mem );
9730 %}
9731 
9732 //
9733 // Cisc-alternate to reg-reg multiply
9734 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9735   predicate( UseSSE<=1 );
9736   match(Set dst (MulD src (LoadD mem)));
9737   ins_cost(250);
9738   format %{ "FLD_D  $mem\n\t"
9739             "DMUL   ST,$src\n\t"
9740             "FSTP_D $dst" %}
9741   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9742   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9743               OpcReg_FPR(src),
9744               Pop_Reg_DPR(dst) );
9745   ins_pipe( fpu_reg_reg_mem );
9746 %}
9747 
9748 
9749 // MACRO3 -- addDPR a mulDPR
9750 // This instruction is a '2-address' instruction in that the result goes
9751 // back to src2.  This eliminates a move from the macro; possibly the
9752 // register allocator will have to add it back (and maybe not).
9753 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9754   predicate( UseSSE<=1 );
9755   match(Set src2 (AddD (MulD src0 src1) src2));
9756   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9757             "DMUL   ST,$src1\n\t"
9758             "DADDp  $src2,ST" %}
9759   ins_cost(250);
9760   opcode(0xDD); /* LoadD DD /0 */
9761   ins_encode( Push_Reg_FPR(src0),
9762               FMul_ST_reg(src1),
9763               FAddP_reg_ST(src2) );
9764   ins_pipe( fpu_reg_reg_reg );
9765 %}
9766 
9767 
9768 // MACRO3 -- subDPR a mulDPR
9769 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9770   predicate( UseSSE<=1 );
9771   match(Set src2 (SubD (MulD src0 src1) src2));
9772   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9773             "DMUL   ST,$src1\n\t"
9774             "DSUBRp $src2,ST" %}
9775   ins_cost(250);
9776   ins_encode( Push_Reg_FPR(src0),
9777               FMul_ST_reg(src1),
9778               Opcode(0xDE), Opc_plus(0xE0,src2));
9779   ins_pipe( fpu_reg_reg_reg );
9780 %}
9781 
9782 
9783 instruct divDPR_reg(regDPR dst, regDPR src) %{
9784   predicate( UseSSE<=1 );
9785   match(Set dst (DivD dst src));
9786 
9787   format %{ "FLD    $src\n\t"
9788             "FDIVp  $dst,ST" %}
9789   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9790   ins_cost(150);
9791   ins_encode( Push_Reg_DPR(src),
9792               OpcP, RegOpc(dst) );
9793   ins_pipe( fpu_reg_reg );
9794 %}
9795 
9796 // Strict FP instruction biases argument before division then
9797 // biases result, to avoid double rounding of subnormals.
9798 //
9799 // scale dividend by multiplying dividend by 2^(-15360)
9800 // load divisor
9801 // divide scaled dividend by divisor
9802 // rescale quotient by 2^(15360)
9803 //
9804 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9805   predicate (UseSSE<=1);
9806   match(Set dst (DivD dst src));
9807   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9808   ins_cost(01);
9809 
9810   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9811             "DMULp  $dst,ST\n\t"
9812             "FLD    $src\n\t"
9813             "FDIVp  $dst,ST\n\t"
9814             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9815             "DMULp  $dst,ST\n\t" %}
9816   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9817   ins_encode( strictfp_bias1(dst),
9818               Push_Reg_DPR(src),
9819               OpcP, RegOpc(dst),
9820               strictfp_bias2(dst) );
9821   ins_pipe( fpu_reg_reg );
9822 %}
9823 
9824 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9825   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9826   match(Set dst (RoundDouble (DivD src1 src2)));
9827 
9828   format %{ "FLD    $src1\n\t"
9829             "FDIV   ST,$src2\n\t"
9830             "FSTP_D $dst\t# D-round" %}
9831   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9832   ins_encode( Push_Reg_DPR(src1),
9833               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9834   ins_pipe( fpu_mem_reg_reg );
9835 %}
9836 
9837 
9838 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9839   predicate(UseSSE<=1);
9840   match(Set dst (ModD dst src));
9841   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9842 
9843   format %{ "DMOD   $dst,$src" %}
9844   ins_cost(250);
9845   ins_encode(Push_Reg_Mod_DPR(dst, src),
9846               emitModDPR(),
9847               Push_Result_Mod_DPR(src),
9848               Pop_Reg_DPR(dst));
9849   ins_pipe( pipe_slow );
9850 %}
9851 
9852 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9853   predicate(UseSSE>=2);
9854   match(Set dst (ModD src0 src1));
9855   effect(KILL rax, KILL cr);
9856 
9857   format %{ "SUB    ESP,8\t # DMOD\n"
9858           "\tMOVSD  [ESP+0],$src1\n"
9859           "\tFLD_D  [ESP+0]\n"
9860           "\tMOVSD  [ESP+0],$src0\n"
9861           "\tFLD_D  [ESP+0]\n"
9862      "loop:\tFPREM\n"
9863           "\tFWAIT\n"
9864           "\tFNSTSW AX\n"
9865           "\tSAHF\n"
9866           "\tJP     loop\n"
9867           "\tFSTP_D [ESP+0]\n"
9868           "\tMOVSD  $dst,[ESP+0]\n"
9869           "\tADD    ESP,8\n"
9870           "\tFSTP   ST0\t # Restore FPU Stack"
9871     %}
9872   ins_cost(250);
9873   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9874   ins_pipe( pipe_slow );
9875 %}
9876 
9877 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9878   predicate (UseSSE<=1);
9879   match(Set dst (SinD src));
9880   ins_cost(1800);
9881   format %{ "DSIN   $dst" %}
9882   opcode(0xD9, 0xFE);
9883   ins_encode( OpcP, OpcS );
9884   ins_pipe( pipe_slow );
9885 %}
9886 
9887 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9888   predicate (UseSSE>=2);
9889   match(Set dst (SinD dst));
9890   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9891   ins_cost(1800);
9892   format %{ "DSIN   $dst" %}
9893   opcode(0xD9, 0xFE);
9894   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9895   ins_pipe( pipe_slow );
9896 %}
9897 
9898 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9899   predicate (UseSSE<=1);
9900   match(Set dst (CosD src));
9901   ins_cost(1800);
9902   format %{ "DCOS   $dst" %}
9903   opcode(0xD9, 0xFF);
9904   ins_encode( OpcP, OpcS );
9905   ins_pipe( pipe_slow );
9906 %}
9907 
9908 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9909   predicate (UseSSE>=2);
9910   match(Set dst (CosD dst));
9911   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9912   ins_cost(1800);
9913   format %{ "DCOS   $dst" %}
9914   opcode(0xD9, 0xFF);
9915   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9916   ins_pipe( pipe_slow );
9917 %}
9918 
9919 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9920   predicate (UseSSE<=1);
9921   match(Set dst(TanD src));
9922   format %{ "DTAN   $dst" %}
9923   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9924               Opcode(0xDD), Opcode(0xD8));   // fstp st
9925   ins_pipe( pipe_slow );
9926 %}
9927 
9928 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9929   predicate (UseSSE>=2);
9930   match(Set dst(TanD dst));
9931   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9932   format %{ "DTAN   $dst" %}
9933   ins_encode( Push_SrcD(dst),
9934               Opcode(0xD9), Opcode(0xF2),    // fptan
9935               Opcode(0xDD), Opcode(0xD8),   // fstp st
9936               Push_ResultD(dst) );
9937   ins_pipe( pipe_slow );
9938 %}
9939 
9940 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9941   predicate (UseSSE<=1);
9942   match(Set dst(AtanD dst src));
9943   format %{ "DATA   $dst,$src" %}
9944   opcode(0xD9, 0xF3);
9945   ins_encode( Push_Reg_DPR(src),
9946               OpcP, OpcS, RegOpc(dst) );
9947   ins_pipe( pipe_slow );
9948 %}
9949 
9950 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9951   predicate (UseSSE>=2);
9952   match(Set dst(AtanD dst src));
9953   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9954   format %{ "DATA   $dst,$src" %}
9955   opcode(0xD9, 0xF3);
9956   ins_encode( Push_SrcD(src),
9957               OpcP, OpcS, Push_ResultD(dst) );
9958   ins_pipe( pipe_slow );
9959 %}
9960 
9961 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9962   predicate (UseSSE<=1);
9963   match(Set dst (SqrtD src));
9964   format %{ "DSQRT  $dst,$src" %}
9965   opcode(0xFA, 0xD9);
9966   ins_encode( Push_Reg_DPR(src),
9967               OpcS, OpcP, Pop_Reg_DPR(dst) );
9968   ins_pipe( pipe_slow );
9969 %}
9970 
9971 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9972   predicate (UseSSE<=1);
9973   match(Set Y (PowD X Y));  // Raise X to the Yth power
9974   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9975   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9976   ins_encode %{
9977     __ subptr(rsp, 8);
9978     __ fld_s($X$$reg - 1);
9979     __ fast_pow();
9980     __ addptr(rsp, 8);
9981   %}
9982   ins_pipe( pipe_slow );
9983 %}
9984 
9985 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9986   predicate (UseSSE>=2);
9987   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9988   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9989   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9990   ins_encode %{
9991     __ subptr(rsp, 8);
9992     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9993     __ fld_d(Address(rsp, 0));
9994     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9995     __ fld_d(Address(rsp, 0));
9996     __ fast_pow();
9997     __ fstp_d(Address(rsp, 0));
9998     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9999     __ addptr(rsp, 8);
10000   %}
10001   ins_pipe( pipe_slow );
10002 %}
10003 
10004 
10005 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
10006   predicate (UseSSE<=1);
10007   match(Set dpr1 (ExpD dpr1));
10008   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
10009   format %{ "fast_exp $dpr1 -> $dpr1  // KILL $rax, $rcx, $rdx" %}
10010   ins_encode %{
10011     __ fast_exp();
10012   %}
10013   ins_pipe( pipe_slow );
10014 %}
10015 
10016 instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
10017   predicate (UseSSE>=2);
10018   match(Set dst (ExpD src));
10019   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
10020   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
10021   ins_encode %{
10022     __ subptr(rsp, 8);
10023     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10024     __ fld_d(Address(rsp, 0));
10025     __ fast_exp();
10026     __ fstp_d(Address(rsp, 0));
10027     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
10028     __ addptr(rsp, 8);
10029   %}
10030   ins_pipe( pipe_slow );
10031 %}
10032 
10033 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
10034   predicate (UseSSE<=1);
10035   // The source Double operand on FPU stack
10036   match(Set dst (Log10D src));
10037   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10038   // fxch         ; swap ST(0) with ST(1)
10039   // fyl2x        ; compute log_10(2) * log_2(x)
10040   format %{ "FLDLG2 \t\t\t#Log10\n\t"
10041             "FXCH   \n\t"
10042             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10043          %}
10044   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10045               Opcode(0xD9), Opcode(0xC9),   // fxch
10046               Opcode(0xD9), Opcode(0xF1));  // fyl2x
10047 
10048   ins_pipe( pipe_slow );
10049 %}
10050 
10051 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
10052   predicate (UseSSE>=2);
10053   effect(KILL cr);
10054   match(Set dst (Log10D src));
10055   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10056   // fyl2x        ; compute log_10(2) * log_2(x)
10057   format %{ "FLDLG2 \t\t\t#Log10\n\t"
10058             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10059          %}
10060   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10061               Push_SrcD(src),
10062               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10063               Push_ResultD(dst));
10064 
10065   ins_pipe( pipe_slow );
10066 %}
10067 
10068 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
10069   predicate (UseSSE<=1);
10070   // The source Double operand on FPU stack
10071   match(Set dst (LogD src));
10072   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10073   // fxch         ; swap ST(0) with ST(1)
10074   // fyl2x        ; compute log_e(2) * log_2(x)
10075   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10076             "FXCH   \n\t"
10077             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10078          %}
10079   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10080               Opcode(0xD9), Opcode(0xC9),   // fxch
10081               Opcode(0xD9), Opcode(0xF1));  // fyl2x
10082 
10083   ins_pipe( pipe_slow );
10084 %}
10085 
10086 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
10087   predicate (UseSSE>=2);
10088   effect(KILL cr);
10089   // The source and result Double operands in XMM registers
10090   match(Set dst (LogD src));
10091   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10092   // fyl2x        ; compute log_e(2) * log_2(x)
10093   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10094             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10095          %}
10096   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10097               Push_SrcD(src),
10098               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10099               Push_ResultD(dst));
10100   ins_pipe( pipe_slow );
10101 %}
10102 
10103 //-------------Float Instructions-------------------------------
10104 // Float Math
10105 
10106 // Code for float compare:
10107 //     fcompp();
10108 //     fwait(); fnstsw_ax();
10109 //     sahf();
10110 //     movl(dst, unordered_result);
10111 //     jcc(Assembler::parity, exit);
10112 //     movl(dst, less_result);
10113 //     jcc(Assembler::below, exit);
10114 //     movl(dst, equal_result);
10115 //     jcc(Assembler::equal, exit);
10116 //     movl(dst, greater_result);
10117 //   exit:
10118 
10119 // P6 version of float compare, sets condition codes in EFLAGS
10120 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10121   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10122   match(Set cr (CmpF src1 src2));
10123   effect(KILL rax);
10124   ins_cost(150);
10125   format %{ "FLD    $src1\n\t"
10126             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10127             "JNP    exit\n\t"
10128             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10129             "SAHF\n"
10130      "exit:\tNOP               // avoid branch to branch" %}
10131   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10132   ins_encode( Push_Reg_DPR(src1),
10133               OpcP, RegOpc(src2),
10134               cmpF_P6_fixup );
10135   ins_pipe( pipe_slow );
10136 %}
10137 
10138 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10139   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10140   match(Set cr (CmpF src1 src2));
10141   ins_cost(100);
10142   format %{ "FLD    $src1\n\t"
10143             "FUCOMIP ST,$src2  // P6 instruction" %}
10144   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10145   ins_encode( Push_Reg_DPR(src1),
10146               OpcP, RegOpc(src2));
10147   ins_pipe( pipe_slow );
10148 %}
10149 
10150 
10151 // Compare & branch
10152 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10153   predicate(UseSSE == 0);
10154   match(Set cr (CmpF src1 src2));
10155   effect(KILL rax);
10156   ins_cost(200);
10157   format %{ "FLD    $src1\n\t"
10158             "FCOMp  $src2\n\t"
10159             "FNSTSW AX\n\t"
10160             "TEST   AX,0x400\n\t"
10161             "JZ,s   flags\n\t"
10162             "MOV    AH,1\t# unordered treat as LT\n"
10163     "flags:\tSAHF" %}
10164   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10165   ins_encode( Push_Reg_DPR(src1),
10166               OpcP, RegOpc(src2),
10167               fpu_flags);
10168   ins_pipe( pipe_slow );
10169 %}
10170 
10171 // Compare vs zero into -1,0,1
10172 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10173   predicate(UseSSE == 0);
10174   match(Set dst (CmpF3 src1 zero));
10175   effect(KILL cr, KILL rax);
10176   ins_cost(280);
10177   format %{ "FTSTF  $dst,$src1" %}
10178   opcode(0xE4, 0xD9);
10179   ins_encode( Push_Reg_DPR(src1),
10180               OpcS, OpcP, PopFPU,
10181               CmpF_Result(dst));
10182   ins_pipe( pipe_slow );
10183 %}
10184 
10185 // Compare into -1,0,1
10186 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10187   predicate(UseSSE == 0);
10188   match(Set dst (CmpF3 src1 src2));
10189   effect(KILL cr, KILL rax);
10190   ins_cost(300);
10191   format %{ "FCMPF  $dst,$src1,$src2" %}
10192   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10193   ins_encode( Push_Reg_DPR(src1),
10194               OpcP, RegOpc(src2),
10195               CmpF_Result(dst));
10196   ins_pipe( pipe_slow );
10197 %}
10198 
10199 // float compare and set condition codes in EFLAGS by XMM regs
10200 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10201   predicate(UseSSE>=1);
10202   match(Set cr (CmpF src1 src2));
10203   ins_cost(145);
10204   format %{ "UCOMISS $src1,$src2\n\t"
10205             "JNP,s   exit\n\t"
10206             "PUSHF\t# saw NaN, set CF\n\t"
10207             "AND     [rsp], #0xffffff2b\n\t"
10208             "POPF\n"
10209     "exit:" %}
10210   ins_encode %{
10211     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10212     emit_cmpfp_fixup(_masm);
10213   %}
10214   ins_pipe( pipe_slow );
10215 %}
10216 
10217 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10218   predicate(UseSSE>=1);
10219   match(Set cr (CmpF src1 src2));
10220   ins_cost(100);
10221   format %{ "UCOMISS $src1,$src2" %}
10222   ins_encode %{
10223     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10224   %}
10225   ins_pipe( pipe_slow );
10226 %}
10227 
10228 // float compare and set condition codes in EFLAGS by XMM regs
10229 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10230   predicate(UseSSE>=1);
10231   match(Set cr (CmpF src1 (LoadF src2)));
10232   ins_cost(165);
10233   format %{ "UCOMISS $src1,$src2\n\t"
10234             "JNP,s   exit\n\t"
10235             "PUSHF\t# saw NaN, set CF\n\t"
10236             "AND     [rsp], #0xffffff2b\n\t"
10237             "POPF\n"
10238     "exit:" %}
10239   ins_encode %{
10240     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10241     emit_cmpfp_fixup(_masm);
10242   %}
10243   ins_pipe( pipe_slow );
10244 %}
10245 
10246 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10247   predicate(UseSSE>=1);
10248   match(Set cr (CmpF src1 (LoadF src2)));
10249   ins_cost(100);
10250   format %{ "UCOMISS $src1,$src2" %}
10251   ins_encode %{
10252     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10253   %}
10254   ins_pipe( pipe_slow );
10255 %}
10256 
10257 // Compare into -1,0,1 in XMM
10258 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10259   predicate(UseSSE>=1);
10260   match(Set dst (CmpF3 src1 src2));
10261   effect(KILL cr);
10262   ins_cost(255);
10263   format %{ "UCOMISS $src1, $src2\n\t"
10264             "MOV     $dst, #-1\n\t"
10265             "JP,s    done\n\t"
10266             "JB,s    done\n\t"
10267             "SETNE   $dst\n\t"
10268             "MOVZB   $dst, $dst\n"
10269     "done:" %}
10270   ins_encode %{
10271     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10272     emit_cmpfp3(_masm, $dst$$Register);
10273   %}
10274   ins_pipe( pipe_slow );
10275 %}
10276 
10277 // Compare into -1,0,1 in XMM and memory
10278 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10279   predicate(UseSSE>=1);
10280   match(Set dst (CmpF3 src1 (LoadF src2)));
10281   effect(KILL cr);
10282   ins_cost(275);
10283   format %{ "UCOMISS $src1, $src2\n\t"
10284             "MOV     $dst, #-1\n\t"
10285             "JP,s    done\n\t"
10286             "JB,s    done\n\t"
10287             "SETNE   $dst\n\t"
10288             "MOVZB   $dst, $dst\n"
10289     "done:" %}
10290   ins_encode %{
10291     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10292     emit_cmpfp3(_masm, $dst$$Register);
10293   %}
10294   ins_pipe( pipe_slow );
10295 %}
10296 
10297 // Spill to obtain 24-bit precision
10298 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10299   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10300   match(Set dst (SubF src1 src2));
10301 
10302   format %{ "FSUB   $dst,$src1 - $src2" %}
10303   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10304   ins_encode( Push_Reg_FPR(src1),
10305               OpcReg_FPR(src2),
10306               Pop_Mem_FPR(dst) );
10307   ins_pipe( fpu_mem_reg_reg );
10308 %}
10309 //
10310 // This instruction does not round to 24-bits
10311 instruct subFPR_reg(regFPR dst, regFPR src) %{
10312   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10313   match(Set dst (SubF dst src));
10314 
10315   format %{ "FSUB   $dst,$src" %}
10316   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10317   ins_encode( Push_Reg_FPR(src),
10318               OpcP, RegOpc(dst) );
10319   ins_pipe( fpu_reg_reg );
10320 %}
10321 
10322 // Spill to obtain 24-bit precision
10323 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10324   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10325   match(Set dst (AddF src1 src2));
10326 
10327   format %{ "FADD   $dst,$src1,$src2" %}
10328   opcode(0xD8, 0x0); /* D8 C0+i */
10329   ins_encode( Push_Reg_FPR(src2),
10330               OpcReg_FPR(src1),
10331               Pop_Mem_FPR(dst) );
10332   ins_pipe( fpu_mem_reg_reg );
10333 %}
10334 //
10335 // This instruction does not round to 24-bits
10336 instruct addFPR_reg(regFPR dst, regFPR src) %{
10337   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10338   match(Set dst (AddF dst src));
10339 
10340   format %{ "FLD    $src\n\t"
10341             "FADDp  $dst,ST" %}
10342   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10343   ins_encode( Push_Reg_FPR(src),
10344               OpcP, RegOpc(dst) );
10345   ins_pipe( fpu_reg_reg );
10346 %}
10347 
10348 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10349   predicate(UseSSE==0);
10350   match(Set dst (AbsF src));
10351   ins_cost(100);
10352   format %{ "FABS" %}
10353   opcode(0xE1, 0xD9);
10354   ins_encode( OpcS, OpcP );
10355   ins_pipe( fpu_reg_reg );
10356 %}
10357 
10358 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10359   predicate(UseSSE==0);
10360   match(Set dst (NegF src));
10361   ins_cost(100);
10362   format %{ "FCHS" %}
10363   opcode(0xE0, 0xD9);
10364   ins_encode( OpcS, OpcP );
10365   ins_pipe( fpu_reg_reg );
10366 %}
10367 
10368 // Cisc-alternate to addFPR_reg
10369 // Spill to obtain 24-bit precision
10370 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10371   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10372   match(Set dst (AddF src1 (LoadF src2)));
10373 
10374   format %{ "FLD    $src2\n\t"
10375             "FADD   ST,$src1\n\t"
10376             "FSTP_S $dst" %}
10377   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10378   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10379               OpcReg_FPR(src1),
10380               Pop_Mem_FPR(dst) );
10381   ins_pipe( fpu_mem_reg_mem );
10382 %}
10383 //
10384 // Cisc-alternate to addFPR_reg
10385 // This instruction does not round to 24-bits
10386 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10387   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10388   match(Set dst (AddF dst (LoadF src)));
10389 
10390   format %{ "FADD   $dst,$src" %}
10391   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10392   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10393               OpcP, RegOpc(dst) );
10394   ins_pipe( fpu_reg_mem );
10395 %}
10396 
10397 // // Following two instructions for _222_mpegaudio
10398 // Spill to obtain 24-bit precision
10399 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10400   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10401   match(Set dst (AddF src1 src2));
10402 
10403   format %{ "FADD   $dst,$src1,$src2" %}
10404   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10405   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10406               OpcReg_FPR(src2),
10407               Pop_Mem_FPR(dst) );
10408   ins_pipe( fpu_mem_reg_mem );
10409 %}
10410 
10411 // Cisc-spill variant
10412 // Spill to obtain 24-bit precision
10413 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10414   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10415   match(Set dst (AddF src1 (LoadF src2)));
10416 
10417   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10418   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10419   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10420               set_instruction_start,
10421               OpcP, RMopc_Mem(secondary,src1),
10422               Pop_Mem_FPR(dst) );
10423   ins_pipe( fpu_mem_mem_mem );
10424 %}
10425 
10426 // Spill to obtain 24-bit precision
10427 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10428   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10429   match(Set dst (AddF src1 src2));
10430 
10431   format %{ "FADD   $dst,$src1,$src2" %}
10432   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10433   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10434               set_instruction_start,
10435               OpcP, RMopc_Mem(secondary,src1),
10436               Pop_Mem_FPR(dst) );
10437   ins_pipe( fpu_mem_mem_mem );
10438 %}
10439 
10440 
10441 // Spill to obtain 24-bit precision
10442 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10443   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10444   match(Set dst (AddF src con));
10445   format %{ "FLD    $src\n\t"
10446             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10447             "FSTP_S $dst"  %}
10448   ins_encode %{
10449     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10450     __ fadd_s($constantaddress($con));
10451     __ fstp_s(Address(rsp, $dst$$disp));
10452   %}
10453   ins_pipe(fpu_mem_reg_con);
10454 %}
10455 //
10456 // This instruction does not round to 24-bits
10457 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10458   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10459   match(Set dst (AddF src con));
10460   format %{ "FLD    $src\n\t"
10461             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10462             "FSTP   $dst"  %}
10463   ins_encode %{
10464     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10465     __ fadd_s($constantaddress($con));
10466     __ fstp_d($dst$$reg);
10467   %}
10468   ins_pipe(fpu_reg_reg_con);
10469 %}
10470 
10471 // Spill to obtain 24-bit precision
10472 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10473   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10474   match(Set dst (MulF src1 src2));
10475 
10476   format %{ "FLD    $src1\n\t"
10477             "FMUL   $src2\n\t"
10478             "FSTP_S $dst"  %}
10479   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10480   ins_encode( Push_Reg_FPR(src1),
10481               OpcReg_FPR(src2),
10482               Pop_Mem_FPR(dst) );
10483   ins_pipe( fpu_mem_reg_reg );
10484 %}
10485 //
10486 // This instruction does not round to 24-bits
10487 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10488   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10489   match(Set dst (MulF src1 src2));
10490 
10491   format %{ "FLD    $src1\n\t"
10492             "FMUL   $src2\n\t"
10493             "FSTP_S $dst"  %}
10494   opcode(0xD8, 0x1); /* D8 C8+i */
10495   ins_encode( Push_Reg_FPR(src2),
10496               OpcReg_FPR(src1),
10497               Pop_Reg_FPR(dst) );
10498   ins_pipe( fpu_reg_reg_reg );
10499 %}
10500 
10501 
10502 // Spill to obtain 24-bit precision
10503 // Cisc-alternate to reg-reg multiply
10504 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10505   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10506   match(Set dst (MulF src1 (LoadF src2)));
10507 
10508   format %{ "FLD_S  $src2\n\t"
10509             "FMUL   $src1\n\t"
10510             "FSTP_S $dst"  %}
10511   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10512   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10513               OpcReg_FPR(src1),
10514               Pop_Mem_FPR(dst) );
10515   ins_pipe( fpu_mem_reg_mem );
10516 %}
10517 //
10518 // This instruction does not round to 24-bits
10519 // Cisc-alternate to reg-reg multiply
10520 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10521   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10522   match(Set dst (MulF src1 (LoadF src2)));
10523 
10524   format %{ "FMUL   $dst,$src1,$src2" %}
10525   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10526   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10527               OpcReg_FPR(src1),
10528               Pop_Reg_FPR(dst) );
10529   ins_pipe( fpu_reg_reg_mem );
10530 %}
10531 
10532 // Spill to obtain 24-bit precision
10533 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10534   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10535   match(Set dst (MulF src1 src2));
10536 
10537   format %{ "FMUL   $dst,$src1,$src2" %}
10538   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10539   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10540               set_instruction_start,
10541               OpcP, RMopc_Mem(secondary,src1),
10542               Pop_Mem_FPR(dst) );
10543   ins_pipe( fpu_mem_mem_mem );
10544 %}
10545 
10546 // Spill to obtain 24-bit precision
10547 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10548   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10549   match(Set dst (MulF src con));
10550 
10551   format %{ "FLD    $src\n\t"
10552             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10553             "FSTP_S $dst"  %}
10554   ins_encode %{
10555     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10556     __ fmul_s($constantaddress($con));
10557     __ fstp_s(Address(rsp, $dst$$disp));
10558   %}
10559   ins_pipe(fpu_mem_reg_con);
10560 %}
10561 //
10562 // This instruction does not round to 24-bits
10563 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10564   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10565   match(Set dst (MulF src con));
10566 
10567   format %{ "FLD    $src\n\t"
10568             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10569             "FSTP   $dst"  %}
10570   ins_encode %{
10571     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10572     __ fmul_s($constantaddress($con));
10573     __ fstp_d($dst$$reg);
10574   %}
10575   ins_pipe(fpu_reg_reg_con);
10576 %}
10577 
10578 
10579 //
10580 // MACRO1 -- subsume unshared load into mulFPR
10581 // This instruction does not round to 24-bits
10582 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10583   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10584   match(Set dst (MulF (LoadF mem1) src));
10585 
10586   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10587             "FMUL   ST,$src\n\t"
10588             "FSTP   $dst" %}
10589   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10590   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10591               OpcReg_FPR(src),
10592               Pop_Reg_FPR(dst) );
10593   ins_pipe( fpu_reg_reg_mem );
10594 %}
10595 //
10596 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10597 // This instruction does not round to 24-bits
10598 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10599   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10600   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10601   ins_cost(95);
10602 
10603   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10604             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10605             "FADD   ST,$src2\n\t"
10606             "FSTP   $dst" %}
10607   opcode(0xD9); /* LoadF D9 /0 */
10608   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10609               FMul_ST_reg(src1),
10610               FAdd_ST_reg(src2),
10611               Pop_Reg_FPR(dst) );
10612   ins_pipe( fpu_reg_mem_reg_reg );
10613 %}
10614 
10615 // MACRO3 -- addFPR a mulFPR
10616 // This instruction does not round to 24-bits.  It is a '2-address'
10617 // instruction in that the result goes back to src2.  This eliminates
10618 // a move from the macro; possibly the register allocator will have
10619 // to add it back (and maybe not).
10620 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10621   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10622   match(Set src2 (AddF (MulF src0 src1) src2));
10623 
10624   format %{ "FLD    $src0     ===MACRO3===\n\t"
10625             "FMUL   ST,$src1\n\t"
10626             "FADDP  $src2,ST" %}
10627   opcode(0xD9); /* LoadF D9 /0 */
10628   ins_encode( Push_Reg_FPR(src0),
10629               FMul_ST_reg(src1),
10630               FAddP_reg_ST(src2) );
10631   ins_pipe( fpu_reg_reg_reg );
10632 %}
10633 
10634 // MACRO4 -- divFPR subFPR
10635 // This instruction does not round to 24-bits
10636 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10637   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10638   match(Set dst (DivF (SubF src2 src1) src3));
10639 
10640   format %{ "FLD    $src2   ===MACRO4===\n\t"
10641             "FSUB   ST,$src1\n\t"
10642             "FDIV   ST,$src3\n\t"
10643             "FSTP  $dst" %}
10644   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10645   ins_encode( Push_Reg_FPR(src2),
10646               subFPR_divFPR_encode(src1,src3),
10647               Pop_Reg_FPR(dst) );
10648   ins_pipe( fpu_reg_reg_reg_reg );
10649 %}
10650 
10651 // Spill to obtain 24-bit precision
10652 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10653   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10654   match(Set dst (DivF src1 src2));
10655 
10656   format %{ "FDIV   $dst,$src1,$src2" %}
10657   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10658   ins_encode( Push_Reg_FPR(src1),
10659               OpcReg_FPR(src2),
10660               Pop_Mem_FPR(dst) );
10661   ins_pipe( fpu_mem_reg_reg );
10662 %}
10663 //
10664 // This instruction does not round to 24-bits
10665 instruct divFPR_reg(regFPR dst, regFPR src) %{
10666   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10667   match(Set dst (DivF dst src));
10668 
10669   format %{ "FDIV   $dst,$src" %}
10670   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10671   ins_encode( Push_Reg_FPR(src),
10672               OpcP, RegOpc(dst) );
10673   ins_pipe( fpu_reg_reg );
10674 %}
10675 
10676 
10677 // Spill to obtain 24-bit precision
10678 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10679   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10680   match(Set dst (ModF src1 src2));
10681   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10682 
10683   format %{ "FMOD   $dst,$src1,$src2" %}
10684   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10685               emitModDPR(),
10686               Push_Result_Mod_DPR(src2),
10687               Pop_Mem_FPR(dst));
10688   ins_pipe( pipe_slow );
10689 %}
10690 //
10691 // This instruction does not round to 24-bits
10692 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10693   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10694   match(Set dst (ModF dst src));
10695   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10696 
10697   format %{ "FMOD   $dst,$src" %}
10698   ins_encode(Push_Reg_Mod_DPR(dst, src),
10699               emitModDPR(),
10700               Push_Result_Mod_DPR(src),
10701               Pop_Reg_FPR(dst));
10702   ins_pipe( pipe_slow );
10703 %}
10704 
10705 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10706   predicate(UseSSE>=1);
10707   match(Set dst (ModF src0 src1));
10708   effect(KILL rax, KILL cr);
10709   format %{ "SUB    ESP,4\t # FMOD\n"
10710           "\tMOVSS  [ESP+0],$src1\n"
10711           "\tFLD_S  [ESP+0]\n"
10712           "\tMOVSS  [ESP+0],$src0\n"
10713           "\tFLD_S  [ESP+0]\n"
10714      "loop:\tFPREM\n"
10715           "\tFWAIT\n"
10716           "\tFNSTSW AX\n"
10717           "\tSAHF\n"
10718           "\tJP     loop\n"
10719           "\tFSTP_S [ESP+0]\n"
10720           "\tMOVSS  $dst,[ESP+0]\n"
10721           "\tADD    ESP,4\n"
10722           "\tFSTP   ST0\t # Restore FPU Stack"
10723     %}
10724   ins_cost(250);
10725   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10726   ins_pipe( pipe_slow );
10727 %}
10728 
10729 
10730 //----------Arithmetic Conversion Instructions---------------------------------
10731 // The conversions operations are all Alpha sorted.  Please keep it that way!
10732 
10733 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10734   predicate(UseSSE==0);
10735   match(Set dst (RoundFloat src));
10736   ins_cost(125);
10737   format %{ "FST_S  $dst,$src\t# F-round" %}
10738   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10739   ins_pipe( fpu_mem_reg );
10740 %}
10741 
10742 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10743   predicate(UseSSE<=1);
10744   match(Set dst (RoundDouble src));
10745   ins_cost(125);
10746   format %{ "FST_D  $dst,$src\t# D-round" %}
10747   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10748   ins_pipe( fpu_mem_reg );
10749 %}
10750 
10751 // Force rounding to 24-bit precision and 6-bit exponent
10752 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10753   predicate(UseSSE==0);
10754   match(Set dst (ConvD2F src));
10755   format %{ "FST_S  $dst,$src\t# F-round" %}
10756   expand %{
10757     roundFloat_mem_reg(dst,src);
10758   %}
10759 %}
10760 
10761 // Force rounding to 24-bit precision and 6-bit exponent
10762 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10763   predicate(UseSSE==1);
10764   match(Set dst (ConvD2F src));
10765   effect( KILL cr );
10766   format %{ "SUB    ESP,4\n\t"
10767             "FST_S  [ESP],$src\t# F-round\n\t"
10768             "MOVSS  $dst,[ESP]\n\t"
10769             "ADD ESP,4" %}
10770   ins_encode %{
10771     __ subptr(rsp, 4);
10772     if ($src$$reg != FPR1L_enc) {
10773       __ fld_s($src$$reg-1);
10774       __ fstp_s(Address(rsp, 0));
10775     } else {
10776       __ fst_s(Address(rsp, 0));
10777     }
10778     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10779     __ addptr(rsp, 4);
10780   %}
10781   ins_pipe( pipe_slow );
10782 %}
10783 
10784 // Force rounding double precision to single precision
10785 instruct convD2F_reg(regF dst, regD src) %{
10786   predicate(UseSSE>=2);
10787   match(Set dst (ConvD2F src));
10788   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10789   ins_encode %{
10790     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10791   %}
10792   ins_pipe( pipe_slow );
10793 %}
10794 
10795 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10796   predicate(UseSSE==0);
10797   match(Set dst (ConvF2D src));
10798   format %{ "FST_S  $dst,$src\t# D-round" %}
10799   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10800   ins_pipe( fpu_reg_reg );
10801 %}
10802 
10803 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10804   predicate(UseSSE==1);
10805   match(Set dst (ConvF2D src));
10806   format %{ "FST_D  $dst,$src\t# D-round" %}
10807   expand %{
10808     roundDouble_mem_reg(dst,src);
10809   %}
10810 %}
10811 
10812 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10813   predicate(UseSSE==1);
10814   match(Set dst (ConvF2D src));
10815   effect( KILL cr );
10816   format %{ "SUB    ESP,4\n\t"
10817             "MOVSS  [ESP] $src\n\t"
10818             "FLD_S  [ESP]\n\t"
10819             "ADD    ESP,4\n\t"
10820             "FSTP   $dst\t# D-round" %}
10821   ins_encode %{
10822     __ subptr(rsp, 4);
10823     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10824     __ fld_s(Address(rsp, 0));
10825     __ addptr(rsp, 4);
10826     __ fstp_d($dst$$reg);
10827   %}
10828   ins_pipe( pipe_slow );
10829 %}
10830 
10831 instruct convF2D_reg(regD dst, regF src) %{
10832   predicate(UseSSE>=2);
10833   match(Set dst (ConvF2D src));
10834   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10835   ins_encode %{
10836     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10837   %}
10838   ins_pipe( pipe_slow );
10839 %}
10840 
10841 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10842 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10843   predicate(UseSSE<=1);
10844   match(Set dst (ConvD2I src));
10845   effect( KILL tmp, KILL cr );
10846   format %{ "FLD    $src\t# Convert double to int \n\t"
10847             "FLDCW  trunc mode\n\t"
10848             "SUB    ESP,4\n\t"
10849             "FISTp  [ESP + #0]\n\t"
10850             "FLDCW  std/24-bit mode\n\t"
10851             "POP    EAX\n\t"
10852             "CMP    EAX,0x80000000\n\t"
10853             "JNE,s  fast\n\t"
10854             "FLD_D  $src\n\t"
10855             "CALL   d2i_wrapper\n"
10856       "fast:" %}
10857   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10858   ins_pipe( pipe_slow );
10859 %}
10860 
10861 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10862 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10863   predicate(UseSSE>=2);
10864   match(Set dst (ConvD2I src));
10865   effect( KILL tmp, KILL cr );
10866   format %{ "CVTTSD2SI $dst, $src\n\t"
10867             "CMP    $dst,0x80000000\n\t"
10868             "JNE,s  fast\n\t"
10869             "SUB    ESP, 8\n\t"
10870             "MOVSD  [ESP], $src\n\t"
10871             "FLD_D  [ESP]\n\t"
10872             "ADD    ESP, 8\n\t"
10873             "CALL   d2i_wrapper\n"
10874       "fast:" %}
10875   ins_encode %{
10876     Label fast;
10877     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10878     __ cmpl($dst$$Register, 0x80000000);
10879     __ jccb(Assembler::notEqual, fast);
10880     __ subptr(rsp, 8);
10881     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10882     __ fld_d(Address(rsp, 0));
10883     __ addptr(rsp, 8);
10884     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10885     __ bind(fast);
10886   %}
10887   ins_pipe( pipe_slow );
10888 %}
10889 
10890 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10891   predicate(UseSSE<=1);
10892   match(Set dst (ConvD2L src));
10893   effect( KILL cr );
10894   format %{ "FLD    $src\t# Convert double to long\n\t"
10895             "FLDCW  trunc mode\n\t"
10896             "SUB    ESP,8\n\t"
10897             "FISTp  [ESP + #0]\n\t"
10898             "FLDCW  std/24-bit mode\n\t"
10899             "POP    EAX\n\t"
10900             "POP    EDX\n\t"
10901             "CMP    EDX,0x80000000\n\t"
10902             "JNE,s  fast\n\t"
10903             "TEST   EAX,EAX\n\t"
10904             "JNE,s  fast\n\t"
10905             "FLD    $src\n\t"
10906             "CALL   d2l_wrapper\n"
10907       "fast:" %}
10908   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10909   ins_pipe( pipe_slow );
10910 %}
10911 
10912 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10913 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10914   predicate (UseSSE>=2);
10915   match(Set dst (ConvD2L src));
10916   effect( KILL cr );
10917   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10918             "MOVSD  [ESP],$src\n\t"
10919             "FLD_D  [ESP]\n\t"
10920             "FLDCW  trunc mode\n\t"
10921             "FISTp  [ESP + #0]\n\t"
10922             "FLDCW  std/24-bit mode\n\t"
10923             "POP    EAX\n\t"
10924             "POP    EDX\n\t"
10925             "CMP    EDX,0x80000000\n\t"
10926             "JNE,s  fast\n\t"
10927             "TEST   EAX,EAX\n\t"
10928             "JNE,s  fast\n\t"
10929             "SUB    ESP,8\n\t"
10930             "MOVSD  [ESP],$src\n\t"
10931             "FLD_D  [ESP]\n\t"
10932             "ADD    ESP,8\n\t"
10933             "CALL   d2l_wrapper\n"
10934       "fast:" %}
10935   ins_encode %{
10936     Label fast;
10937     __ subptr(rsp, 8);
10938     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10939     __ fld_d(Address(rsp, 0));
10940     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10941     __ fistp_d(Address(rsp, 0));
10942     // Restore the rounding mode, mask the exception
10943     if (Compile::current()->in_24_bit_fp_mode()) {
10944       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10945     } else {
10946       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10947     }
10948     // Load the converted long, adjust CPU stack
10949     __ pop(rax);
10950     __ pop(rdx);
10951     __ cmpl(rdx, 0x80000000);
10952     __ jccb(Assembler::notEqual, fast);
10953     __ testl(rax, rax);
10954     __ jccb(Assembler::notEqual, fast);
10955     __ subptr(rsp, 8);
10956     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10957     __ fld_d(Address(rsp, 0));
10958     __ addptr(rsp, 8);
10959     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10960     __ bind(fast);
10961   %}
10962   ins_pipe( pipe_slow );
10963 %}
10964 
10965 // Convert a double to an int.  Java semantics require we do complex
10966 // manglations in the corner cases.  So we set the rounding mode to
10967 // 'zero', store the darned double down as an int, and reset the
10968 // rounding mode to 'nearest'.  The hardware stores a flag value down
10969 // if we would overflow or converted a NAN; we check for this and
10970 // and go the slow path if needed.
10971 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10972   predicate(UseSSE==0);
10973   match(Set dst (ConvF2I src));
10974   effect( KILL tmp, KILL cr );
10975   format %{ "FLD    $src\t# Convert float to int \n\t"
10976             "FLDCW  trunc mode\n\t"
10977             "SUB    ESP,4\n\t"
10978             "FISTp  [ESP + #0]\n\t"
10979             "FLDCW  std/24-bit mode\n\t"
10980             "POP    EAX\n\t"
10981             "CMP    EAX,0x80000000\n\t"
10982             "JNE,s  fast\n\t"
10983             "FLD    $src\n\t"
10984             "CALL   d2i_wrapper\n"
10985       "fast:" %}
10986   // DPR2I_encoding works for FPR2I
10987   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10988   ins_pipe( pipe_slow );
10989 %}
10990 
10991 // Convert a float in xmm to an int reg.
10992 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10993   predicate(UseSSE>=1);
10994   match(Set dst (ConvF2I src));
10995   effect( KILL tmp, KILL cr );
10996   format %{ "CVTTSS2SI $dst, $src\n\t"
10997             "CMP    $dst,0x80000000\n\t"
10998             "JNE,s  fast\n\t"
10999             "SUB    ESP, 4\n\t"
11000             "MOVSS  [ESP], $src\n\t"
11001             "FLD    [ESP]\n\t"
11002             "ADD    ESP, 4\n\t"
11003             "CALL   d2i_wrapper\n"
11004       "fast:" %}
11005   ins_encode %{
11006     Label fast;
11007     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11008     __ cmpl($dst$$Register, 0x80000000);
11009     __ jccb(Assembler::notEqual, fast);
11010     __ subptr(rsp, 4);
11011     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11012     __ fld_s(Address(rsp, 0));
11013     __ addptr(rsp, 4);
11014     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11015     __ bind(fast);
11016   %}
11017   ins_pipe( pipe_slow );
11018 %}
11019 
11020 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11021   predicate(UseSSE==0);
11022   match(Set dst (ConvF2L src));
11023   effect( KILL cr );
11024   format %{ "FLD    $src\t# Convert float to long\n\t"
11025             "FLDCW  trunc mode\n\t"
11026             "SUB    ESP,8\n\t"
11027             "FISTp  [ESP + #0]\n\t"
11028             "FLDCW  std/24-bit mode\n\t"
11029             "POP    EAX\n\t"
11030             "POP    EDX\n\t"
11031             "CMP    EDX,0x80000000\n\t"
11032             "JNE,s  fast\n\t"
11033             "TEST   EAX,EAX\n\t"
11034             "JNE,s  fast\n\t"
11035             "FLD    $src\n\t"
11036             "CALL   d2l_wrapper\n"
11037       "fast:" %}
11038   // DPR2L_encoding works for FPR2L
11039   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11040   ins_pipe( pipe_slow );
11041 %}
11042 
11043 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11044 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11045   predicate (UseSSE>=1);
11046   match(Set dst (ConvF2L src));
11047   effect( KILL cr );
11048   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11049             "MOVSS  [ESP],$src\n\t"
11050             "FLD_S  [ESP]\n\t"
11051             "FLDCW  trunc mode\n\t"
11052             "FISTp  [ESP + #0]\n\t"
11053             "FLDCW  std/24-bit mode\n\t"
11054             "POP    EAX\n\t"
11055             "POP    EDX\n\t"
11056             "CMP    EDX,0x80000000\n\t"
11057             "JNE,s  fast\n\t"
11058             "TEST   EAX,EAX\n\t"
11059             "JNE,s  fast\n\t"
11060             "SUB    ESP,4\t# Convert float to long\n\t"
11061             "MOVSS  [ESP],$src\n\t"
11062             "FLD_S  [ESP]\n\t"
11063             "ADD    ESP,4\n\t"
11064             "CALL   d2l_wrapper\n"
11065       "fast:" %}
11066   ins_encode %{
11067     Label fast;
11068     __ subptr(rsp, 8);
11069     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11070     __ fld_s(Address(rsp, 0));
11071     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11072     __ fistp_d(Address(rsp, 0));
11073     // Restore the rounding mode, mask the exception
11074     if (Compile::current()->in_24_bit_fp_mode()) {
11075       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11076     } else {
11077       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11078     }
11079     // Load the converted long, adjust CPU stack
11080     __ pop(rax);
11081     __ pop(rdx);
11082     __ cmpl(rdx, 0x80000000);
11083     __ jccb(Assembler::notEqual, fast);
11084     __ testl(rax, rax);
11085     __ jccb(Assembler::notEqual, fast);
11086     __ subptr(rsp, 4);
11087     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11088     __ fld_s(Address(rsp, 0));
11089     __ addptr(rsp, 4);
11090     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11091     __ bind(fast);
11092   %}
11093   ins_pipe( pipe_slow );
11094 %}
11095 
11096 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11097   predicate( UseSSE<=1 );
11098   match(Set dst (ConvI2D src));
11099   format %{ "FILD   $src\n\t"
11100             "FSTP   $dst" %}
11101   opcode(0xDB, 0x0);  /* DB /0 */
11102   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11103   ins_pipe( fpu_reg_mem );
11104 %}
11105 
11106 instruct convI2D_reg(regD dst, rRegI src) %{
11107   predicate( UseSSE>=2 && !UseXmmI2D );
11108   match(Set dst (ConvI2D src));
11109   format %{ "CVTSI2SD $dst,$src" %}
11110   ins_encode %{
11111     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11112   %}
11113   ins_pipe( pipe_slow );
11114 %}
11115 
11116 instruct convI2D_mem(regD dst, memory mem) %{
11117   predicate( UseSSE>=2 );
11118   match(Set dst (ConvI2D (LoadI mem)));
11119   format %{ "CVTSI2SD $dst,$mem" %}
11120   ins_encode %{
11121     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11122   %}
11123   ins_pipe( pipe_slow );
11124 %}
11125 
11126 instruct convXI2D_reg(regD dst, rRegI src)
11127 %{
11128   predicate( UseSSE>=2 && UseXmmI2D );
11129   match(Set dst (ConvI2D src));
11130 
11131   format %{ "MOVD  $dst,$src\n\t"
11132             "CVTDQ2PD $dst,$dst\t# i2d" %}
11133   ins_encode %{
11134     __ movdl($dst$$XMMRegister, $src$$Register);
11135     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11136   %}
11137   ins_pipe(pipe_slow); // XXX
11138 %}
11139 
11140 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11141   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11142   match(Set dst (ConvI2D (LoadI mem)));
11143   format %{ "FILD   $mem\n\t"
11144             "FSTP   $dst" %}
11145   opcode(0xDB);      /* DB /0 */
11146   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11147               Pop_Reg_DPR(dst));
11148   ins_pipe( fpu_reg_mem );
11149 %}
11150 
11151 // Convert a byte to a float; no rounding step needed.
11152 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11153   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11154   match(Set dst (ConvI2F src));
11155   format %{ "FILD   $src\n\t"
11156             "FSTP   $dst" %}
11157 
11158   opcode(0xDB, 0x0);  /* DB /0 */
11159   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11160   ins_pipe( fpu_reg_mem );
11161 %}
11162 
11163 // In 24-bit mode, force exponent rounding by storing back out
11164 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11165   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11166   match(Set dst (ConvI2F src));
11167   ins_cost(200);
11168   format %{ "FILD   $src\n\t"
11169             "FSTP_S $dst" %}
11170   opcode(0xDB, 0x0);  /* DB /0 */
11171   ins_encode( Push_Mem_I(src),
11172               Pop_Mem_FPR(dst));
11173   ins_pipe( fpu_mem_mem );
11174 %}
11175 
11176 // In 24-bit mode, force exponent rounding by storing back out
11177 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11178   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11179   match(Set dst (ConvI2F (LoadI mem)));
11180   ins_cost(200);
11181   format %{ "FILD   $mem\n\t"
11182             "FSTP_S $dst" %}
11183   opcode(0xDB);  /* DB /0 */
11184   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11185               Pop_Mem_FPR(dst));
11186   ins_pipe( fpu_mem_mem );
11187 %}
11188 
11189 // This instruction does not round to 24-bits
11190 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11191   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11192   match(Set dst (ConvI2F src));
11193   format %{ "FILD   $src\n\t"
11194             "FSTP   $dst" %}
11195   opcode(0xDB, 0x0);  /* DB /0 */
11196   ins_encode( Push_Mem_I(src),
11197               Pop_Reg_FPR(dst));
11198   ins_pipe( fpu_reg_mem );
11199 %}
11200 
11201 // This instruction does not round to 24-bits
11202 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11203   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11204   match(Set dst (ConvI2F (LoadI mem)));
11205   format %{ "FILD   $mem\n\t"
11206             "FSTP   $dst" %}
11207   opcode(0xDB);      /* DB /0 */
11208   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11209               Pop_Reg_FPR(dst));
11210   ins_pipe( fpu_reg_mem );
11211 %}
11212 
11213 // Convert an int to a float in xmm; no rounding step needed.
11214 instruct convI2F_reg(regF dst, rRegI src) %{
11215   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11216   match(Set dst (ConvI2F src));
11217   format %{ "CVTSI2SS $dst, $src" %}
11218   ins_encode %{
11219     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11220   %}
11221   ins_pipe( pipe_slow );
11222 %}
11223 
11224  instruct convXI2F_reg(regF dst, rRegI src)
11225 %{
11226   predicate( UseSSE>=2 && UseXmmI2F );
11227   match(Set dst (ConvI2F src));
11228 
11229   format %{ "MOVD  $dst,$src\n\t"
11230             "CVTDQ2PS $dst,$dst\t# i2f" %}
11231   ins_encode %{
11232     __ movdl($dst$$XMMRegister, $src$$Register);
11233     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11234   %}
11235   ins_pipe(pipe_slow); // XXX
11236 %}
11237 
11238 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11239   match(Set dst (ConvI2L src));
11240   effect(KILL cr);
11241   ins_cost(375);
11242   format %{ "MOV    $dst.lo,$src\n\t"
11243             "MOV    $dst.hi,$src\n\t"
11244             "SAR    $dst.hi,31" %}
11245   ins_encode(convert_int_long(dst,src));
11246   ins_pipe( ialu_reg_reg_long );
11247 %}
11248 
11249 // Zero-extend convert int to long
11250 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11251   match(Set dst (AndL (ConvI2L src) mask) );
11252   effect( KILL flags );
11253   ins_cost(250);
11254   format %{ "MOV    $dst.lo,$src\n\t"
11255             "XOR    $dst.hi,$dst.hi" %}
11256   opcode(0x33); // XOR
11257   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11258   ins_pipe( ialu_reg_reg_long );
11259 %}
11260 
11261 // Zero-extend long
11262 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11263   match(Set dst (AndL src mask) );
11264   effect( KILL flags );
11265   ins_cost(250);
11266   format %{ "MOV    $dst.lo,$src.lo\n\t"
11267             "XOR    $dst.hi,$dst.hi\n\t" %}
11268   opcode(0x33); // XOR
11269   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11270   ins_pipe( ialu_reg_reg_long );
11271 %}
11272 
11273 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11274   predicate (UseSSE<=1);
11275   match(Set dst (ConvL2D src));
11276   effect( KILL cr );
11277   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11278             "PUSH   $src.lo\n\t"
11279             "FILD   ST,[ESP + #0]\n\t"
11280             "ADD    ESP,8\n\t"
11281             "FSTP_D $dst\t# D-round" %}
11282   opcode(0xDF, 0x5);  /* DF /5 */
11283   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11284   ins_pipe( pipe_slow );
11285 %}
11286 
11287 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11288   predicate (UseSSE>=2);
11289   match(Set dst (ConvL2D src));
11290   effect( KILL cr );
11291   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11292             "PUSH   $src.lo\n\t"
11293             "FILD_D [ESP]\n\t"
11294             "FSTP_D [ESP]\n\t"
11295             "MOVSD  $dst,[ESP]\n\t"
11296             "ADD    ESP,8" %}
11297   opcode(0xDF, 0x5);  /* DF /5 */
11298   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11299   ins_pipe( pipe_slow );
11300 %}
11301 
11302 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11303   predicate (UseSSE>=1);
11304   match(Set dst (ConvL2F src));
11305   effect( KILL cr );
11306   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11307             "PUSH   $src.lo\n\t"
11308             "FILD_D [ESP]\n\t"
11309             "FSTP_S [ESP]\n\t"
11310             "MOVSS  $dst,[ESP]\n\t"
11311             "ADD    ESP,8" %}
11312   opcode(0xDF, 0x5);  /* DF /5 */
11313   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11314   ins_pipe( pipe_slow );
11315 %}
11316 
11317 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11318   match(Set dst (ConvL2F src));
11319   effect( KILL cr );
11320   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11321             "PUSH   $src.lo\n\t"
11322             "FILD   ST,[ESP + #0]\n\t"
11323             "ADD    ESP,8\n\t"
11324             "FSTP_S $dst\t# F-round" %}
11325   opcode(0xDF, 0x5);  /* DF /5 */
11326   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11327   ins_pipe( pipe_slow );
11328 %}
11329 
11330 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11331   match(Set dst (ConvL2I src));
11332   effect( DEF dst, USE src );
11333   format %{ "MOV    $dst,$src.lo" %}
11334   ins_encode(enc_CopyL_Lo(dst,src));
11335   ins_pipe( ialu_reg_reg );
11336 %}
11337 
11338 
11339 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11340   match(Set dst (MoveF2I src));
11341   effect( DEF dst, USE src );
11342   ins_cost(100);
11343   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11344   ins_encode %{
11345     __ movl($dst$$Register, Address(rsp, $src$$disp));
11346   %}
11347   ins_pipe( ialu_reg_mem );
11348 %}
11349 
11350 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11351   predicate(UseSSE==0);
11352   match(Set dst (MoveF2I src));
11353   effect( DEF dst, USE src );
11354 
11355   ins_cost(125);
11356   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11357   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11358   ins_pipe( fpu_mem_reg );
11359 %}
11360 
11361 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11362   predicate(UseSSE>=1);
11363   match(Set dst (MoveF2I src));
11364   effect( DEF dst, USE src );
11365 
11366   ins_cost(95);
11367   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11368   ins_encode %{
11369     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11370   %}
11371   ins_pipe( pipe_slow );
11372 %}
11373 
11374 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11375   predicate(UseSSE>=2);
11376   match(Set dst (MoveF2I src));
11377   effect( DEF dst, USE src );
11378   ins_cost(85);
11379   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11380   ins_encode %{
11381     __ movdl($dst$$Register, $src$$XMMRegister);
11382   %}
11383   ins_pipe( pipe_slow );
11384 %}
11385 
11386 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11387   match(Set dst (MoveI2F src));
11388   effect( DEF dst, USE src );
11389 
11390   ins_cost(100);
11391   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11392   ins_encode %{
11393     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11394   %}
11395   ins_pipe( ialu_mem_reg );
11396 %}
11397 
11398 
11399 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11400   predicate(UseSSE==0);
11401   match(Set dst (MoveI2F src));
11402   effect(DEF dst, USE src);
11403 
11404   ins_cost(125);
11405   format %{ "FLD_S  $src\n\t"
11406             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11407   opcode(0xD9);               /* D9 /0, FLD m32real */
11408   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11409               Pop_Reg_FPR(dst) );
11410   ins_pipe( fpu_reg_mem );
11411 %}
11412 
11413 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11414   predicate(UseSSE>=1);
11415   match(Set dst (MoveI2F src));
11416   effect( DEF dst, USE src );
11417 
11418   ins_cost(95);
11419   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11420   ins_encode %{
11421     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11422   %}
11423   ins_pipe( pipe_slow );
11424 %}
11425 
11426 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11427   predicate(UseSSE>=2);
11428   match(Set dst (MoveI2F src));
11429   effect( DEF dst, USE src );
11430 
11431   ins_cost(85);
11432   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11433   ins_encode %{
11434     __ movdl($dst$$XMMRegister, $src$$Register);
11435   %}
11436   ins_pipe( pipe_slow );
11437 %}
11438 
11439 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11440   match(Set dst (MoveD2L src));
11441   effect(DEF dst, USE src);
11442 
11443   ins_cost(250);
11444   format %{ "MOV    $dst.lo,$src\n\t"
11445             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11446   opcode(0x8B, 0x8B);
11447   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11448   ins_pipe( ialu_mem_long_reg );
11449 %}
11450 
11451 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11452   predicate(UseSSE<=1);
11453   match(Set dst (MoveD2L src));
11454   effect(DEF dst, USE src);
11455 
11456   ins_cost(125);
11457   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11458   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11459   ins_pipe( fpu_mem_reg );
11460 %}
11461 
11462 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11463   predicate(UseSSE>=2);
11464   match(Set dst (MoveD2L src));
11465   effect(DEF dst, USE src);
11466   ins_cost(95);
11467   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11468   ins_encode %{
11469     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11470   %}
11471   ins_pipe( pipe_slow );
11472 %}
11473 
11474 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11475   predicate(UseSSE>=2);
11476   match(Set dst (MoveD2L src));
11477   effect(DEF dst, USE src, TEMP tmp);
11478   ins_cost(85);
11479   format %{ "MOVD   $dst.lo,$src\n\t"
11480             "PSHUFLW $tmp,$src,0x4E\n\t"
11481             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11482   ins_encode %{
11483     __ movdl($dst$$Register, $src$$XMMRegister);
11484     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11485     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11486   %}
11487   ins_pipe( pipe_slow );
11488 %}
11489 
11490 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11491   match(Set dst (MoveL2D src));
11492   effect(DEF dst, USE src);
11493 
11494   ins_cost(200);
11495   format %{ "MOV    $dst,$src.lo\n\t"
11496             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11497   opcode(0x89, 0x89);
11498   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11499   ins_pipe( ialu_mem_long_reg );
11500 %}
11501 
11502 
11503 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11504   predicate(UseSSE<=1);
11505   match(Set dst (MoveL2D src));
11506   effect(DEF dst, USE src);
11507   ins_cost(125);
11508 
11509   format %{ "FLD_D  $src\n\t"
11510             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11511   opcode(0xDD);               /* DD /0, FLD m64real */
11512   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11513               Pop_Reg_DPR(dst) );
11514   ins_pipe( fpu_reg_mem );
11515 %}
11516 
11517 
11518 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11519   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11520   match(Set dst (MoveL2D src));
11521   effect(DEF dst, USE src);
11522 
11523   ins_cost(95);
11524   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11525   ins_encode %{
11526     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11527   %}
11528   ins_pipe( pipe_slow );
11529 %}
11530 
11531 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11532   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11533   match(Set dst (MoveL2D src));
11534   effect(DEF dst, USE src);
11535 
11536   ins_cost(95);
11537   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11538   ins_encode %{
11539     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11540   %}
11541   ins_pipe( pipe_slow );
11542 %}
11543 
11544 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11545   predicate(UseSSE>=2);
11546   match(Set dst (MoveL2D src));
11547   effect(TEMP dst, USE src, TEMP tmp);
11548   ins_cost(85);
11549   format %{ "MOVD   $dst,$src.lo\n\t"
11550             "MOVD   $tmp,$src.hi\n\t"
11551             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11552   ins_encode %{
11553     __ movdl($dst$$XMMRegister, $src$$Register);
11554     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11555     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11556   %}
11557   ins_pipe( pipe_slow );
11558 %}
11559 
11560 
11561 // =======================================================================
11562 // fast clearing of an array
11563 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11564   predicate(!UseFastStosb);
11565   match(Set dummy (ClearArray cnt base));
11566   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11567   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11568             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11569             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11570   ins_encode %{ 
11571     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11572   %}
11573   ins_pipe( pipe_slow );
11574 %}
11575 
11576 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11577   predicate(UseFastStosb);
11578   match(Set dummy (ClearArray cnt base));
11579   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11580   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11581             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11582             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11583   ins_encode %{ 
11584     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11585   %}
11586   ins_pipe( pipe_slow );
11587 %}
11588 
11589 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11590                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11591   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11592   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11593 
11594   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11595   ins_encode %{
11596     __ string_compare($str1$$Register, $str2$$Register,
11597                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11598                       $tmp1$$XMMRegister);
11599   %}
11600   ins_pipe( pipe_slow );
11601 %}
11602 
11603 // fast string equals
11604 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11605                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11606   match(Set result (StrEquals (Binary str1 str2) cnt));
11607   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11608 
11609   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11610   ins_encode %{
11611     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11612                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11613                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11614   %}
11615   ins_pipe( pipe_slow );
11616 %}
11617 
11618 // fast search of substring with known size.
11619 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11620                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11621   predicate(UseSSE42Intrinsics);
11622   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11623   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11624 
11625   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11626   ins_encode %{
11627     int icnt2 = (int)$int_cnt2$$constant;
11628     if (icnt2 >= 8) {
11629       // IndexOf for constant substrings with size >= 8 elements
11630       // which don't need to be loaded through stack.
11631       __ string_indexofC8($str1$$Register, $str2$$Register,
11632                           $cnt1$$Register, $cnt2$$Register,
11633                           icnt2, $result$$Register,
11634                           $vec$$XMMRegister, $tmp$$Register);
11635     } else {
11636       // Small strings are loaded through stack if they cross page boundary.
11637       __ string_indexof($str1$$Register, $str2$$Register,
11638                         $cnt1$$Register, $cnt2$$Register,
11639                         icnt2, $result$$Register,
11640                         $vec$$XMMRegister, $tmp$$Register);
11641     }
11642   %}
11643   ins_pipe( pipe_slow );
11644 %}
11645 
11646 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11647                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11648   predicate(UseSSE42Intrinsics);
11649   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11650   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11651 
11652   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11653   ins_encode %{
11654     __ string_indexof($str1$$Register, $str2$$Register,
11655                       $cnt1$$Register, $cnt2$$Register,
11656                       (-1), $result$$Register,
11657                       $vec$$XMMRegister, $tmp$$Register);
11658   %}
11659   ins_pipe( pipe_slow );
11660 %}
11661 
11662 // fast array equals
11663 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11664                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11665 %{
11666   match(Set result (AryEq ary1 ary2));
11667   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11668   //ins_cost(300);
11669 
11670   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11671   ins_encode %{
11672     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11673                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11674                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11675   %}
11676   ins_pipe( pipe_slow );
11677 %}
11678 
11679 // encode char[] to byte[] in ISO_8859_1
11680 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11681                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11682                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11683   match(Set result (EncodeISOArray src (Binary dst len)));
11684   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11685 
11686   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11687   ins_encode %{
11688     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11689                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11690                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11691   %}
11692   ins_pipe( pipe_slow );
11693 %}
11694 
11695 
11696 //----------Control Flow Instructions------------------------------------------
11697 // Signed compare Instructions
11698 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11699   match(Set cr (CmpI op1 op2));
11700   effect( DEF cr, USE op1, USE op2 );
11701   format %{ "CMP    $op1,$op2" %}
11702   opcode(0x3B);  /* Opcode 3B /r */
11703   ins_encode( OpcP, RegReg( op1, op2) );
11704   ins_pipe( ialu_cr_reg_reg );
11705 %}
11706 
11707 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11708   match(Set cr (CmpI op1 op2));
11709   effect( DEF cr, USE op1 );
11710   format %{ "CMP    $op1,$op2" %}
11711   opcode(0x81,0x07);  /* Opcode 81 /7 */
11712   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11713   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11714   ins_pipe( ialu_cr_reg_imm );
11715 %}
11716 
11717 // Cisc-spilled version of cmpI_eReg
11718 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11719   match(Set cr (CmpI op1 (LoadI op2)));
11720 
11721   format %{ "CMP    $op1,$op2" %}
11722   ins_cost(500);
11723   opcode(0x3B);  /* Opcode 3B /r */
11724   ins_encode( OpcP, RegMem( op1, op2) );
11725   ins_pipe( ialu_cr_reg_mem );
11726 %}
11727 
11728 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11729   match(Set cr (CmpI src zero));
11730   effect( DEF cr, USE src );
11731 
11732   format %{ "TEST   $src,$src" %}
11733   opcode(0x85);
11734   ins_encode( OpcP, RegReg( src, src ) );
11735   ins_pipe( ialu_cr_reg_imm );
11736 %}
11737 
11738 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11739   match(Set cr (CmpI (AndI src con) zero));
11740 
11741   format %{ "TEST   $src,$con" %}
11742   opcode(0xF7,0x00);
11743   ins_encode( OpcP, RegOpc(src), Con32(con) );
11744   ins_pipe( ialu_cr_reg_imm );
11745 %}
11746 
11747 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11748   match(Set cr (CmpI (AndI src mem) zero));
11749 
11750   format %{ "TEST   $src,$mem" %}
11751   opcode(0x85);
11752   ins_encode( OpcP, RegMem( src, mem ) );
11753   ins_pipe( ialu_cr_reg_mem );
11754 %}
11755 
11756 // Unsigned compare Instructions; really, same as signed except they
11757 // produce an eFlagsRegU instead of eFlagsReg.
11758 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11759   match(Set cr (CmpU op1 op2));
11760 
11761   format %{ "CMPu   $op1,$op2" %}
11762   opcode(0x3B);  /* Opcode 3B /r */
11763   ins_encode( OpcP, RegReg( op1, op2) );
11764   ins_pipe( ialu_cr_reg_reg );
11765 %}
11766 
11767 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11768   match(Set cr (CmpU op1 op2));
11769 
11770   format %{ "CMPu   $op1,$op2" %}
11771   opcode(0x81,0x07);  /* Opcode 81 /7 */
11772   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11773   ins_pipe( ialu_cr_reg_imm );
11774 %}
11775 
11776 // // Cisc-spilled version of cmpU_eReg
11777 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11778   match(Set cr (CmpU op1 (LoadI op2)));
11779 
11780   format %{ "CMPu   $op1,$op2" %}
11781   ins_cost(500);
11782   opcode(0x3B);  /* Opcode 3B /r */
11783   ins_encode( OpcP, RegMem( op1, op2) );
11784   ins_pipe( ialu_cr_reg_mem );
11785 %}
11786 
11787 // // Cisc-spilled version of cmpU_eReg
11788 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11789 //  match(Set cr (CmpU (LoadI op1) op2));
11790 //
11791 //  format %{ "CMPu   $op1,$op2" %}
11792 //  ins_cost(500);
11793 //  opcode(0x39);  /* Opcode 39 /r */
11794 //  ins_encode( OpcP, RegMem( op1, op2) );
11795 //%}
11796 
11797 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11798   match(Set cr (CmpU src zero));
11799 
11800   format %{ "TESTu  $src,$src" %}
11801   opcode(0x85);
11802   ins_encode( OpcP, RegReg( src, src ) );
11803   ins_pipe( ialu_cr_reg_imm );
11804 %}
11805 
11806 // Unsigned pointer compare Instructions
11807 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11808   match(Set cr (CmpP op1 op2));
11809 
11810   format %{ "CMPu   $op1,$op2" %}
11811   opcode(0x3B);  /* Opcode 3B /r */
11812   ins_encode( OpcP, RegReg( op1, op2) );
11813   ins_pipe( ialu_cr_reg_reg );
11814 %}
11815 
11816 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11817   match(Set cr (CmpP op1 op2));
11818 
11819   format %{ "CMPu   $op1,$op2" %}
11820   opcode(0x81,0x07);  /* Opcode 81 /7 */
11821   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11822   ins_pipe( ialu_cr_reg_imm );
11823 %}
11824 
11825 // // Cisc-spilled version of cmpP_eReg
11826 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11827   match(Set cr (CmpP op1 (LoadP op2)));
11828 
11829   format %{ "CMPu   $op1,$op2" %}
11830   ins_cost(500);
11831   opcode(0x3B);  /* Opcode 3B /r */
11832   ins_encode( OpcP, RegMem( op1, op2) );
11833   ins_pipe( ialu_cr_reg_mem );
11834 %}
11835 
11836 // // Cisc-spilled version of cmpP_eReg
11837 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11838 //  match(Set cr (CmpP (LoadP op1) op2));
11839 //
11840 //  format %{ "CMPu   $op1,$op2" %}
11841 //  ins_cost(500);
11842 //  opcode(0x39);  /* Opcode 39 /r */
11843 //  ins_encode( OpcP, RegMem( op1, op2) );
11844 //%}
11845 
11846 // Compare raw pointer (used in out-of-heap check).
11847 // Only works because non-oop pointers must be raw pointers
11848 // and raw pointers have no anti-dependencies.
11849 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11850   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11851   match(Set cr (CmpP op1 (LoadP op2)));
11852 
11853   format %{ "CMPu   $op1,$op2" %}
11854   opcode(0x3B);  /* Opcode 3B /r */
11855   ins_encode( OpcP, RegMem( op1, op2) );
11856   ins_pipe( ialu_cr_reg_mem );
11857 %}
11858 
11859 //
11860 // This will generate a signed flags result. This should be ok
11861 // since any compare to a zero should be eq/neq.
11862 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11863   match(Set cr (CmpP src zero));
11864 
11865   format %{ "TEST   $src,$src" %}
11866   opcode(0x85);
11867   ins_encode( OpcP, RegReg( src, src ) );
11868   ins_pipe( ialu_cr_reg_imm );
11869 %}
11870 
11871 // Cisc-spilled version of testP_reg
11872 // This will generate a signed flags result. This should be ok
11873 // since any compare to a zero should be eq/neq.
11874 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11875   match(Set cr (CmpP (LoadP op) zero));
11876 
11877   format %{ "TEST   $op,0xFFFFFFFF" %}
11878   ins_cost(500);
11879   opcode(0xF7);               /* Opcode F7 /0 */
11880   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11881   ins_pipe( ialu_cr_reg_imm );
11882 %}
11883 
11884 // Yanked all unsigned pointer compare operations.
11885 // Pointer compares are done with CmpP which is already unsigned.
11886 
11887 //----------Max and Min--------------------------------------------------------
11888 // Min Instructions
11889 ////
11890 //   *** Min and Max using the conditional move are slower than the
11891 //   *** branch version on a Pentium III.
11892 // // Conditional move for min
11893 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11894 //  effect( USE_DEF op2, USE op1, USE cr );
11895 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11896 //  opcode(0x4C,0x0F);
11897 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11898 //  ins_pipe( pipe_cmov_reg );
11899 //%}
11900 //
11901 //// Min Register with Register (P6 version)
11902 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11903 //  predicate(VM_Version::supports_cmov() );
11904 //  match(Set op2 (MinI op1 op2));
11905 //  ins_cost(200);
11906 //  expand %{
11907 //    eFlagsReg cr;
11908 //    compI_eReg(cr,op1,op2);
11909 //    cmovI_reg_lt(op2,op1,cr);
11910 //  %}
11911 //%}
11912 
11913 // Min Register with Register (generic version)
11914 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11915   match(Set dst (MinI dst src));
11916   effect(KILL flags);
11917   ins_cost(300);
11918 
11919   format %{ "MIN    $dst,$src" %}
11920   opcode(0xCC);
11921   ins_encode( min_enc(dst,src) );
11922   ins_pipe( pipe_slow );
11923 %}
11924 
11925 // Max Register with Register
11926 //   *** Min and Max using the conditional move are slower than the
11927 //   *** branch version on a Pentium III.
11928 // // Conditional move for max
11929 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11930 //  effect( USE_DEF op2, USE op1, USE cr );
11931 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11932 //  opcode(0x4F,0x0F);
11933 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11934 //  ins_pipe( pipe_cmov_reg );
11935 //%}
11936 //
11937 // // Max Register with Register (P6 version)
11938 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11939 //  predicate(VM_Version::supports_cmov() );
11940 //  match(Set op2 (MaxI op1 op2));
11941 //  ins_cost(200);
11942 //  expand %{
11943 //    eFlagsReg cr;
11944 //    compI_eReg(cr,op1,op2);
11945 //    cmovI_reg_gt(op2,op1,cr);
11946 //  %}
11947 //%}
11948 
11949 // Max Register with Register (generic version)
11950 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11951   match(Set dst (MaxI dst src));
11952   effect(KILL flags);
11953   ins_cost(300);
11954 
11955   format %{ "MAX    $dst,$src" %}
11956   opcode(0xCC);
11957   ins_encode( max_enc(dst,src) );
11958   ins_pipe( pipe_slow );
11959 %}
11960 
11961 // ============================================================================
11962 // Counted Loop limit node which represents exact final iterator value.
11963 // Note: the resulting value should fit into integer range since
11964 // counted loops have limit check on overflow.
11965 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11966   match(Set limit (LoopLimit (Binary init limit) stride));
11967   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11968   ins_cost(300);
11969 
11970   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11971   ins_encode %{
11972     int strd = (int)$stride$$constant;
11973     assert(strd != 1 && strd != -1, "sanity");
11974     int m1 = (strd > 0) ? 1 : -1;
11975     // Convert limit to long (EAX:EDX)
11976     __ cdql();
11977     // Convert init to long (init:tmp)
11978     __ movl($tmp$$Register, $init$$Register);
11979     __ sarl($tmp$$Register, 31);
11980     // $limit - $init
11981     __ subl($limit$$Register, $init$$Register);
11982     __ sbbl($limit_hi$$Register, $tmp$$Register);
11983     // + ($stride - 1)
11984     if (strd > 0) {
11985       __ addl($limit$$Register, (strd - 1));
11986       __ adcl($limit_hi$$Register, 0);
11987       __ movl($tmp$$Register, strd);
11988     } else {
11989       __ addl($limit$$Register, (strd + 1));
11990       __ adcl($limit_hi$$Register, -1);
11991       __ lneg($limit_hi$$Register, $limit$$Register);
11992       __ movl($tmp$$Register, -strd);
11993     }
11994     // signed devision: (EAX:EDX) / pos_stride
11995     __ idivl($tmp$$Register);
11996     if (strd < 0) {
11997       // restore sign
11998       __ negl($tmp$$Register);
11999     }
12000     // (EAX) * stride
12001     __ mull($tmp$$Register);
12002     // + init (ignore upper bits)
12003     __ addl($limit$$Register, $init$$Register);
12004   %}
12005   ins_pipe( pipe_slow );
12006 %}
12007 
12008 // ============================================================================
12009 // Branch Instructions
12010 // Jump Table
12011 instruct jumpXtnd(rRegI switch_val) %{
12012   match(Jump switch_val);
12013   ins_cost(350);
12014   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12015   ins_encode %{
12016     // Jump to Address(table_base + switch_reg)
12017     Address index(noreg, $switch_val$$Register, Address::times_1);
12018     __ jump(ArrayAddress($constantaddress, index));
12019   %}
12020   ins_pipe(pipe_jmp);
12021 %}
12022 
12023 // Jump Direct - Label defines a relative address from JMP+1
12024 instruct jmpDir(label labl) %{
12025   match(Goto);
12026   effect(USE labl);
12027 
12028   ins_cost(300);
12029   format %{ "JMP    $labl" %}
12030   size(5);
12031   ins_encode %{
12032     Label* L = $labl$$label;
12033     __ jmp(*L, false); // Always long jump
12034   %}
12035   ins_pipe( pipe_jmp );
12036 %}
12037 
12038 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12039 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12040   match(If cop cr);
12041   effect(USE labl);
12042 
12043   ins_cost(300);
12044   format %{ "J$cop    $labl" %}
12045   size(6);
12046   ins_encode %{
12047     Label* L = $labl$$label;
12048     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12049   %}
12050   ins_pipe( pipe_jcc );
12051 %}
12052 
12053 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12054 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12055   match(CountedLoopEnd cop cr);
12056   effect(USE labl);
12057 
12058   ins_cost(300);
12059   format %{ "J$cop    $labl\t# Loop end" %}
12060   size(6);
12061   ins_encode %{
12062     Label* L = $labl$$label;
12063     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12064   %}
12065   ins_pipe( pipe_jcc );
12066 %}
12067 
12068 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12069 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12070   match(CountedLoopEnd cop cmp);
12071   effect(USE labl);
12072 
12073   ins_cost(300);
12074   format %{ "J$cop,u  $labl\t# Loop end" %}
12075   size(6);
12076   ins_encode %{
12077     Label* L = $labl$$label;
12078     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12079   %}
12080   ins_pipe( pipe_jcc );
12081 %}
12082 
12083 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12084   match(CountedLoopEnd cop cmp);
12085   effect(USE labl);
12086 
12087   ins_cost(200);
12088   format %{ "J$cop,u  $labl\t# Loop end" %}
12089   size(6);
12090   ins_encode %{
12091     Label* L = $labl$$label;
12092     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12093   %}
12094   ins_pipe( pipe_jcc );
12095 %}
12096 
12097 // Jump Direct Conditional - using unsigned comparison
12098 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12099   match(If cop cmp);
12100   effect(USE labl);
12101 
12102   ins_cost(300);
12103   format %{ "J$cop,u  $labl" %}
12104   size(6);
12105   ins_encode %{
12106     Label* L = $labl$$label;
12107     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12108   %}
12109   ins_pipe(pipe_jcc);
12110 %}
12111 
12112 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12113   match(If cop cmp);
12114   effect(USE labl);
12115 
12116   ins_cost(200);
12117   format %{ "J$cop,u  $labl" %}
12118   size(6);
12119   ins_encode %{
12120     Label* L = $labl$$label;
12121     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12122   %}
12123   ins_pipe(pipe_jcc);
12124 %}
12125 
12126 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12127   match(If cop cmp);
12128   effect(USE labl);
12129 
12130   ins_cost(200);
12131   format %{ $$template
12132     if ($cop$$cmpcode == Assembler::notEqual) {
12133       $$emit$$"JP,u   $labl\n\t"
12134       $$emit$$"J$cop,u   $labl"
12135     } else {
12136       $$emit$$"JP,u   done\n\t"
12137       $$emit$$"J$cop,u   $labl\n\t"
12138       $$emit$$"done:"
12139     }
12140   %}
12141   ins_encode %{
12142     Label* l = $labl$$label;
12143     if ($cop$$cmpcode == Assembler::notEqual) {
12144       __ jcc(Assembler::parity, *l, false);
12145       __ jcc(Assembler::notEqual, *l, false);
12146     } else if ($cop$$cmpcode == Assembler::equal) {
12147       Label done;
12148       __ jccb(Assembler::parity, done);
12149       __ jcc(Assembler::equal, *l, false);
12150       __ bind(done);
12151     } else {
12152        ShouldNotReachHere();
12153     }
12154   %}
12155   ins_pipe(pipe_jcc);
12156 %}
12157 
12158 // ============================================================================
12159 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12160 // array for an instance of the superklass.  Set a hidden internal cache on a
12161 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12162 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12163 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12164   match(Set result (PartialSubtypeCheck sub super));
12165   effect( KILL rcx, KILL cr );
12166 
12167   ins_cost(1100);  // slightly larger than the next version
12168   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12169             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12170             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12171             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12172             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12173             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12174             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12175      "miss:\t" %}
12176 
12177   opcode(0x1); // Force a XOR of EDI
12178   ins_encode( enc_PartialSubtypeCheck() );
12179   ins_pipe( pipe_slow );
12180 %}
12181 
12182 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12183   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12184   effect( KILL rcx, KILL result );
12185 
12186   ins_cost(1000);
12187   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12188             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12189             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12190             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12191             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12192             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12193      "miss:\t" %}
12194 
12195   opcode(0x0);  // No need to XOR EDI
12196   ins_encode( enc_PartialSubtypeCheck() );
12197   ins_pipe( pipe_slow );
12198 %}
12199 
12200 // ============================================================================
12201 // Branch Instructions -- short offset versions
12202 //
12203 // These instructions are used to replace jumps of a long offset (the default
12204 // match) with jumps of a shorter offset.  These instructions are all tagged
12205 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12206 // match rules in general matching.  Instead, the ADLC generates a conversion
12207 // method in the MachNode which can be used to do in-place replacement of the
12208 // long variant with the shorter variant.  The compiler will determine if a
12209 // branch can be taken by the is_short_branch_offset() predicate in the machine
12210 // specific code section of the file.
12211 
12212 // Jump Direct - Label defines a relative address from JMP+1
12213 instruct jmpDir_short(label labl) %{
12214   match(Goto);
12215   effect(USE labl);
12216 
12217   ins_cost(300);
12218   format %{ "JMP,s  $labl" %}
12219   size(2);
12220   ins_encode %{
12221     Label* L = $labl$$label;
12222     __ jmpb(*L);
12223   %}
12224   ins_pipe( pipe_jmp );
12225   ins_short_branch(1);
12226 %}
12227 
12228 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12229 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12230   match(If cop cr);
12231   effect(USE labl);
12232 
12233   ins_cost(300);
12234   format %{ "J$cop,s  $labl" %}
12235   size(2);
12236   ins_encode %{
12237     Label* L = $labl$$label;
12238     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12239   %}
12240   ins_pipe( pipe_jcc );
12241   ins_short_branch(1);
12242 %}
12243 
12244 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12245 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12246   match(CountedLoopEnd cop cr);
12247   effect(USE labl);
12248 
12249   ins_cost(300);
12250   format %{ "J$cop,s  $labl\t# Loop end" %}
12251   size(2);
12252   ins_encode %{
12253     Label* L = $labl$$label;
12254     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12255   %}
12256   ins_pipe( pipe_jcc );
12257   ins_short_branch(1);
12258 %}
12259 
12260 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12261 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12262   match(CountedLoopEnd cop cmp);
12263   effect(USE labl);
12264 
12265   ins_cost(300);
12266   format %{ "J$cop,us $labl\t# Loop end" %}
12267   size(2);
12268   ins_encode %{
12269     Label* L = $labl$$label;
12270     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12271   %}
12272   ins_pipe( pipe_jcc );
12273   ins_short_branch(1);
12274 %}
12275 
12276 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12277   match(CountedLoopEnd cop cmp);
12278   effect(USE labl);
12279 
12280   ins_cost(300);
12281   format %{ "J$cop,us $labl\t# Loop end" %}
12282   size(2);
12283   ins_encode %{
12284     Label* L = $labl$$label;
12285     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12286   %}
12287   ins_pipe( pipe_jcc );
12288   ins_short_branch(1);
12289 %}
12290 
12291 // Jump Direct Conditional - using unsigned comparison
12292 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12293   match(If cop cmp);
12294   effect(USE labl);
12295 
12296   ins_cost(300);
12297   format %{ "J$cop,us $labl" %}
12298   size(2);
12299   ins_encode %{
12300     Label* L = $labl$$label;
12301     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12302   %}
12303   ins_pipe( pipe_jcc );
12304   ins_short_branch(1);
12305 %}
12306 
12307 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12308   match(If cop cmp);
12309   effect(USE labl);
12310 
12311   ins_cost(300);
12312   format %{ "J$cop,us $labl" %}
12313   size(2);
12314   ins_encode %{
12315     Label* L = $labl$$label;
12316     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12317   %}
12318   ins_pipe( pipe_jcc );
12319   ins_short_branch(1);
12320 %}
12321 
12322 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12323   match(If cop cmp);
12324   effect(USE labl);
12325 
12326   ins_cost(300);
12327   format %{ $$template
12328     if ($cop$$cmpcode == Assembler::notEqual) {
12329       $$emit$$"JP,u,s   $labl\n\t"
12330       $$emit$$"J$cop,u,s   $labl"
12331     } else {
12332       $$emit$$"JP,u,s   done\n\t"
12333       $$emit$$"J$cop,u,s  $labl\n\t"
12334       $$emit$$"done:"
12335     }
12336   %}
12337   size(4);
12338   ins_encode %{
12339     Label* l = $labl$$label;
12340     if ($cop$$cmpcode == Assembler::notEqual) {
12341       __ jccb(Assembler::parity, *l);
12342       __ jccb(Assembler::notEqual, *l);
12343     } else if ($cop$$cmpcode == Assembler::equal) {
12344       Label done;
12345       __ jccb(Assembler::parity, done);
12346       __ jccb(Assembler::equal, *l);
12347       __ bind(done);
12348     } else {
12349        ShouldNotReachHere();
12350     }
12351   %}
12352   ins_pipe(pipe_jcc);
12353   ins_short_branch(1);
12354 %}
12355 
12356 // ============================================================================
12357 // Long Compare
12358 //
12359 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12360 // is tricky.  The flavor of compare used depends on whether we are testing
12361 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12362 // The GE test is the negated LT test.  The LE test can be had by commuting
12363 // the operands (yielding a GE test) and then negating; negate again for the
12364 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12365 // NE test is negated from that.
12366 
12367 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12368 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12369 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12370 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12371 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12372 // foo match ends up with the wrong leaf.  One fix is to not match both
12373 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12374 // both forms beat the trinary form of long-compare and both are very useful
12375 // on Intel which has so few registers.
12376 
12377 // Manifest a CmpL result in an integer register.  Very painful.
12378 // This is the test to avoid.
12379 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12380   match(Set dst (CmpL3 src1 src2));
12381   effect( KILL flags );
12382   ins_cost(1000);
12383   format %{ "XOR    $dst,$dst\n\t"
12384             "CMP    $src1.hi,$src2.hi\n\t"
12385             "JLT,s  m_one\n\t"
12386             "JGT,s  p_one\n\t"
12387             "CMP    $src1.lo,$src2.lo\n\t"
12388             "JB,s   m_one\n\t"
12389             "JEQ,s  done\n"
12390     "p_one:\tINC    $dst\n\t"
12391             "JMP,s  done\n"
12392     "m_one:\tDEC    $dst\n"
12393      "done:" %}
12394   ins_encode %{
12395     Label p_one, m_one, done;
12396     __ xorptr($dst$$Register, $dst$$Register);
12397     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12398     __ jccb(Assembler::less,    m_one);
12399     __ jccb(Assembler::greater, p_one);
12400     __ cmpl($src1$$Register, $src2$$Register);
12401     __ jccb(Assembler::below,   m_one);
12402     __ jccb(Assembler::equal,   done);
12403     __ bind(p_one);
12404     __ incrementl($dst$$Register);
12405     __ jmpb(done);
12406     __ bind(m_one);
12407     __ decrementl($dst$$Register);
12408     __ bind(done);
12409   %}
12410   ins_pipe( pipe_slow );
12411 %}
12412 
12413 //======
12414 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12415 // compares.  Can be used for LE or GT compares by reversing arguments.
12416 // NOT GOOD FOR EQ/NE tests.
12417 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12418   match( Set flags (CmpL src zero ));
12419   ins_cost(100);
12420   format %{ "TEST   $src.hi,$src.hi" %}
12421   opcode(0x85);
12422   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12423   ins_pipe( ialu_cr_reg_reg );
12424 %}
12425 
12426 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12427 // compares.  Can be used for LE or GT compares by reversing arguments.
12428 // NOT GOOD FOR EQ/NE tests.
12429 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12430   match( Set flags (CmpL src1 src2 ));
12431   effect( TEMP tmp );
12432   ins_cost(300);
12433   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12434             "MOV    $tmp,$src1.hi\n\t"
12435             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12436   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12437   ins_pipe( ialu_cr_reg_reg );
12438 %}
12439 
12440 // Long compares reg < zero/req OR reg >= zero/req.
12441 // Just a wrapper for a normal branch, plus the predicate test.
12442 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12443   match(If cmp flags);
12444   effect(USE labl);
12445   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12446   expand %{
12447     jmpCon(cmp,flags,labl);    // JLT or JGE...
12448   %}
12449 %}
12450 
12451 //======
12452 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12453 // compares.  Can be used for LE or GT compares by reversing arguments.
12454 // NOT GOOD FOR EQ/NE tests.
12455 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12456   match(Set flags (CmpUL src zero));
12457   ins_cost(100);
12458   format %{ "TEST   $src.hi,$src.hi" %}
12459   opcode(0x85);
12460   ins_encode(OpcP, RegReg_Hi2(src, src));
12461   ins_pipe(ialu_cr_reg_reg);
12462 %}
12463 
12464 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12465 // compares.  Can be used for LE or GT compares by reversing arguments.
12466 // NOT GOOD FOR EQ/NE tests.
12467 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12468   match(Set flags (CmpUL src1 src2));
12469   effect(TEMP tmp);
12470   ins_cost(300);
12471   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12472             "MOV    $tmp,$src1.hi\n\t"
12473             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12474   ins_encode(long_cmp_flags2(src1, src2, tmp));
12475   ins_pipe(ialu_cr_reg_reg);
12476 %}
12477 
12478 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12479 // Just a wrapper for a normal branch, plus the predicate test.
12480 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12481   match(If cmp flags);
12482   effect(USE labl);
12483   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12484   expand %{
12485     jmpCon(cmp, flags, labl);    // JLT or JGE...
12486   %}
12487 %}
12488 
12489 // Compare 2 longs and CMOVE longs.
12490 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12491   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12492   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12493   ins_cost(400);
12494   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12495             "CMOV$cmp $dst.hi,$src.hi" %}
12496   opcode(0x0F,0x40);
12497   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12498   ins_pipe( pipe_cmov_reg_long );
12499 %}
12500 
12501 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12502   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12503   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12504   ins_cost(500);
12505   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12506             "CMOV$cmp $dst.hi,$src.hi" %}
12507   opcode(0x0F,0x40);
12508   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12509   ins_pipe( pipe_cmov_reg_long );
12510 %}
12511 
12512 // Compare 2 longs and CMOVE ints.
12513 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12514   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12515   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12516   ins_cost(200);
12517   format %{ "CMOV$cmp $dst,$src" %}
12518   opcode(0x0F,0x40);
12519   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12520   ins_pipe( pipe_cmov_reg );
12521 %}
12522 
12523 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12524   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12525   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12526   ins_cost(250);
12527   format %{ "CMOV$cmp $dst,$src" %}
12528   opcode(0x0F,0x40);
12529   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12530   ins_pipe( pipe_cmov_mem );
12531 %}
12532 
12533 // Compare 2 longs and CMOVE ints.
12534 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12535   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12536   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12537   ins_cost(200);
12538   format %{ "CMOV$cmp $dst,$src" %}
12539   opcode(0x0F,0x40);
12540   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12541   ins_pipe( pipe_cmov_reg );
12542 %}
12543 
12544 // Compare 2 longs and CMOVE doubles
12545 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12546   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12547   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12548   ins_cost(200);
12549   expand %{
12550     fcmovDPR_regS(cmp,flags,dst,src);
12551   %}
12552 %}
12553 
12554 // Compare 2 longs and CMOVE doubles
12555 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12556   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12557   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12558   ins_cost(200);
12559   expand %{
12560     fcmovD_regS(cmp,flags,dst,src);
12561   %}
12562 %}
12563 
12564 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12565   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12566   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12567   ins_cost(200);
12568   expand %{
12569     fcmovFPR_regS(cmp,flags,dst,src);
12570   %}
12571 %}
12572 
12573 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12574   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12575   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12576   ins_cost(200);
12577   expand %{
12578     fcmovF_regS(cmp,flags,dst,src);
12579   %}
12580 %}
12581 
12582 //======
12583 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12584 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12585   match( Set flags (CmpL src zero ));
12586   effect(TEMP tmp);
12587   ins_cost(200);
12588   format %{ "MOV    $tmp,$src.lo\n\t"
12589             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12590   ins_encode( long_cmp_flags0( src, tmp ) );
12591   ins_pipe( ialu_reg_reg_long );
12592 %}
12593 
12594 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12595 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12596   match( Set flags (CmpL src1 src2 ));
12597   ins_cost(200+300);
12598   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12599             "JNE,s  skip\n\t"
12600             "CMP    $src1.hi,$src2.hi\n\t"
12601      "skip:\t" %}
12602   ins_encode( long_cmp_flags1( src1, src2 ) );
12603   ins_pipe( ialu_cr_reg_reg );
12604 %}
12605 
12606 // Long compare reg == zero/reg OR reg != zero/reg
12607 // Just a wrapper for a normal branch, plus the predicate test.
12608 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12609   match(If cmp flags);
12610   effect(USE labl);
12611   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12612   expand %{
12613     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12614   %}
12615 %}
12616 
12617 //======
12618 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12619 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12620   match(Set flags (CmpUL src zero));
12621   effect(TEMP tmp);
12622   ins_cost(200);
12623   format %{ "MOV    $tmp,$src.lo\n\t"
12624             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12625   ins_encode(long_cmp_flags0(src, tmp));
12626   ins_pipe(ialu_reg_reg_long);
12627 %}
12628 
12629 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12630 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12631   match(Set flags (CmpUL src1 src2));
12632   ins_cost(200+300);
12633   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12634             "JNE,s  skip\n\t"
12635             "CMP    $src1.hi,$src2.hi\n\t"
12636      "skip:\t" %}
12637   ins_encode(long_cmp_flags1(src1, src2));
12638   ins_pipe(ialu_cr_reg_reg);
12639 %}
12640 
12641 // Unsigned long compare reg == zero/reg OR reg != zero/reg
12642 // Just a wrapper for a normal branch, plus the predicate test.
12643 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12644   match(If cmp flags);
12645   effect(USE labl);
12646   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12647   expand %{
12648     jmpCon(cmp, flags, labl);    // JEQ or JNE...
12649   %}
12650 %}
12651 
12652 // Compare 2 longs and CMOVE longs.
12653 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12654   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12655   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12656   ins_cost(400);
12657   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12658             "CMOV$cmp $dst.hi,$src.hi" %}
12659   opcode(0x0F,0x40);
12660   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12661   ins_pipe( pipe_cmov_reg_long );
12662 %}
12663 
12664 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12665   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12666   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12667   ins_cost(500);
12668   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12669             "CMOV$cmp $dst.hi,$src.hi" %}
12670   opcode(0x0F,0x40);
12671   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12672   ins_pipe( pipe_cmov_reg_long );
12673 %}
12674 
12675 // Compare 2 longs and CMOVE ints.
12676 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12677   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12678   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12679   ins_cost(200);
12680   format %{ "CMOV$cmp $dst,$src" %}
12681   opcode(0x0F,0x40);
12682   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12683   ins_pipe( pipe_cmov_reg );
12684 %}
12685 
12686 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12687   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12688   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12689   ins_cost(250);
12690   format %{ "CMOV$cmp $dst,$src" %}
12691   opcode(0x0F,0x40);
12692   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12693   ins_pipe( pipe_cmov_mem );
12694 %}
12695 
12696 // Compare 2 longs and CMOVE ints.
12697 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12698   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12699   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12700   ins_cost(200);
12701   format %{ "CMOV$cmp $dst,$src" %}
12702   opcode(0x0F,0x40);
12703   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12704   ins_pipe( pipe_cmov_reg );
12705 %}
12706 
12707 // Compare 2 longs and CMOVE doubles
12708 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12709   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12710   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12711   ins_cost(200);
12712   expand %{
12713     fcmovDPR_regS(cmp,flags,dst,src);
12714   %}
12715 %}
12716 
12717 // Compare 2 longs and CMOVE doubles
12718 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12719   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12720   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12721   ins_cost(200);
12722   expand %{
12723     fcmovD_regS(cmp,flags,dst,src);
12724   %}
12725 %}
12726 
12727 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12728   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12729   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12730   ins_cost(200);
12731   expand %{
12732     fcmovFPR_regS(cmp,flags,dst,src);
12733   %}
12734 %}
12735 
12736 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12737   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12738   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12739   ins_cost(200);
12740   expand %{
12741     fcmovF_regS(cmp,flags,dst,src);
12742   %}
12743 %}
12744 
12745 //======
12746 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12747 // Same as cmpL_reg_flags_LEGT except must negate src
12748 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12749   match( Set flags (CmpL src zero ));
12750   effect( TEMP tmp );
12751   ins_cost(300);
12752   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12753             "CMP    $tmp,$src.lo\n\t"
12754             "SBB    $tmp,$src.hi\n\t" %}
12755   ins_encode( long_cmp_flags3(src, tmp) );
12756   ins_pipe( ialu_reg_reg_long );
12757 %}
12758 
12759 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12760 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12761 // requires a commuted test to get the same result.
12762 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12763   match( Set flags (CmpL src1 src2 ));
12764   effect( TEMP tmp );
12765   ins_cost(300);
12766   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12767             "MOV    $tmp,$src2.hi\n\t"
12768             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12769   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12770   ins_pipe( ialu_cr_reg_reg );
12771 %}
12772 
12773 // Long compares reg < zero/req OR reg >= zero/req.
12774 // Just a wrapper for a normal branch, plus the predicate test
12775 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12776   match(If cmp flags);
12777   effect(USE labl);
12778   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12779   ins_cost(300);
12780   expand %{
12781     jmpCon(cmp,flags,labl);    // JGT or JLE...
12782   %}
12783 %}
12784 
12785 //======
12786 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
12787 // Same as cmpUL_reg_flags_LEGT except must negate src
12788 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
12789   match(Set flags (CmpUL src zero));
12790   effect(TEMP tmp);
12791   ins_cost(300);
12792   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
12793             "CMP    $tmp,$src.lo\n\t"
12794             "SBB    $tmp,$src.hi\n\t" %}
12795   ins_encode(long_cmp_flags3(src, tmp));
12796   ins_pipe(ialu_reg_reg_long);
12797 %}
12798 
12799 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
12800 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
12801 // requires a commuted test to get the same result.
12802 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
12803   match(Set flags (CmpUL src1 src2));
12804   effect(TEMP tmp);
12805   ins_cost(300);
12806   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
12807             "MOV    $tmp,$src2.hi\n\t"
12808             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
12809   ins_encode(long_cmp_flags2( src2, src1, tmp));
12810   ins_pipe(ialu_cr_reg_reg);
12811 %}
12812 
12813 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12814 // Just a wrapper for a normal branch, plus the predicate test
12815 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
12816   match(If cmp flags);
12817   effect(USE labl);
12818   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
12819   ins_cost(300);
12820   expand %{
12821     jmpCon(cmp, flags, labl);    // JGT or JLE...
12822   %}
12823 %}
12824 
12825 // Compare 2 longs and CMOVE longs.
12826 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12827   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12828   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12829   ins_cost(400);
12830   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12831             "CMOV$cmp $dst.hi,$src.hi" %}
12832   opcode(0x0F,0x40);
12833   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12834   ins_pipe( pipe_cmov_reg_long );
12835 %}
12836 
12837 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12838   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12839   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12840   ins_cost(500);
12841   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12842             "CMOV$cmp $dst.hi,$src.hi+4" %}
12843   opcode(0x0F,0x40);
12844   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12845   ins_pipe( pipe_cmov_reg_long );
12846 %}
12847 
12848 // Compare 2 longs and CMOVE ints.
12849 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12850   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12851   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12852   ins_cost(200);
12853   format %{ "CMOV$cmp $dst,$src" %}
12854   opcode(0x0F,0x40);
12855   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12856   ins_pipe( pipe_cmov_reg );
12857 %}
12858 
12859 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12860   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12861   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12862   ins_cost(250);
12863   format %{ "CMOV$cmp $dst,$src" %}
12864   opcode(0x0F,0x40);
12865   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12866   ins_pipe( pipe_cmov_mem );
12867 %}
12868 
12869 // Compare 2 longs and CMOVE ptrs.
12870 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12871   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12872   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12873   ins_cost(200);
12874   format %{ "CMOV$cmp $dst,$src" %}
12875   opcode(0x0F,0x40);
12876   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12877   ins_pipe( pipe_cmov_reg );
12878 %}
12879 
12880 // Compare 2 longs and CMOVE doubles
12881 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12882   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12883   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12884   ins_cost(200);
12885   expand %{
12886     fcmovDPR_regS(cmp,flags,dst,src);
12887   %}
12888 %}
12889 
12890 // Compare 2 longs and CMOVE doubles
12891 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12892   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12893   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12894   ins_cost(200);
12895   expand %{
12896     fcmovD_regS(cmp,flags,dst,src);
12897   %}
12898 %}
12899 
12900 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12901   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12902   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12903   ins_cost(200);
12904   expand %{
12905     fcmovFPR_regS(cmp,flags,dst,src);
12906   %}
12907 %}
12908 
12909 
12910 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12911   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12912   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12913   ins_cost(200);
12914   expand %{
12915     fcmovF_regS(cmp,flags,dst,src);
12916   %}
12917 %}
12918 
12919 
12920 // ============================================================================
12921 // Procedure Call/Return Instructions
12922 // Call Java Static Instruction
12923 // Note: If this code changes, the corresponding ret_addr_offset() and
12924 //       compute_padding() functions will have to be adjusted.
12925 instruct CallStaticJavaDirect(method meth) %{
12926   match(CallStaticJava);
12927   effect(USE meth);
12928 
12929   ins_cost(300);
12930   format %{ "CALL,static " %}
12931   opcode(0xE8); /* E8 cd */
12932   ins_encode( pre_call_resets,
12933               Java_Static_Call( meth ),
12934               call_epilog,
12935               post_call_FPU );
12936   ins_pipe( pipe_slow );
12937   ins_alignment(4);
12938 %}
12939 
12940 // Call Java Dynamic Instruction
12941 // Note: If this code changes, the corresponding ret_addr_offset() and
12942 //       compute_padding() functions will have to be adjusted.
12943 instruct CallDynamicJavaDirect(method meth) %{
12944   match(CallDynamicJava);
12945   effect(USE meth);
12946 
12947   ins_cost(300);
12948   format %{ "MOV    EAX,(oop)-1\n\t"
12949             "CALL,dynamic" %}
12950   opcode(0xE8); /* E8 cd */
12951   ins_encode( pre_call_resets,
12952               Java_Dynamic_Call( meth ),
12953               call_epilog,
12954               post_call_FPU );
12955   ins_pipe( pipe_slow );
12956   ins_alignment(4);
12957 %}
12958 
12959 // Call Runtime Instruction
12960 instruct CallRuntimeDirect(method meth) %{
12961   match(CallRuntime );
12962   effect(USE meth);
12963 
12964   ins_cost(300);
12965   format %{ "CALL,runtime " %}
12966   opcode(0xE8); /* E8 cd */
12967   // Use FFREEs to clear entries in float stack
12968   ins_encode( pre_call_resets,
12969               FFree_Float_Stack_All,
12970               Java_To_Runtime( meth ),
12971               post_call_FPU );
12972   ins_pipe( pipe_slow );
12973 %}
12974 
12975 // Call runtime without safepoint
12976 instruct CallLeafDirect(method meth) %{
12977   match(CallLeaf);
12978   effect(USE meth);
12979 
12980   ins_cost(300);
12981   format %{ "CALL_LEAF,runtime " %}
12982   opcode(0xE8); /* E8 cd */
12983   ins_encode( pre_call_resets,
12984               FFree_Float_Stack_All,
12985               Java_To_Runtime( meth ),
12986               Verify_FPU_For_Leaf, post_call_FPU );
12987   ins_pipe( pipe_slow );
12988 %}
12989 
12990 instruct CallLeafNoFPDirect(method meth) %{
12991   match(CallLeafNoFP);
12992   effect(USE meth);
12993 
12994   ins_cost(300);
12995   format %{ "CALL_LEAF_NOFP,runtime " %}
12996   opcode(0xE8); /* E8 cd */
12997   ins_encode(Java_To_Runtime(meth));
12998   ins_pipe( pipe_slow );
12999 %}
13000 
13001 
13002 // Return Instruction
13003 // Remove the return address & jump to it.
13004 instruct Ret() %{
13005   match(Return);
13006   format %{ "RET" %}
13007   opcode(0xC3);
13008   ins_encode(OpcP);
13009   ins_pipe( pipe_jmp );
13010 %}
13011 
13012 // Tail Call; Jump from runtime stub to Java code.
13013 // Also known as an 'interprocedural jump'.
13014 // Target of jump will eventually return to caller.
13015 // TailJump below removes the return address.
13016 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13017   match(TailCall jump_target method_oop );
13018   ins_cost(300);
13019   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13020   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13021   ins_encode( OpcP, RegOpc(jump_target) );
13022   ins_pipe( pipe_jmp );
13023 %}
13024 
13025 
13026 // Tail Jump; remove the return address; jump to target.
13027 // TailCall above leaves the return address around.
13028 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13029   match( TailJump jump_target ex_oop );
13030   ins_cost(300);
13031   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13032             "JMP    $jump_target " %}
13033   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13034   ins_encode( enc_pop_rdx,
13035               OpcP, RegOpc(jump_target) );
13036   ins_pipe( pipe_jmp );
13037 %}
13038 
13039 // Create exception oop: created by stack-crawling runtime code.
13040 // Created exception is now available to this handler, and is setup
13041 // just prior to jumping to this handler.  No code emitted.
13042 instruct CreateException( eAXRegP ex_oop )
13043 %{
13044   match(Set ex_oop (CreateEx));
13045 
13046   size(0);
13047   // use the following format syntax
13048   format %{ "# exception oop is in EAX; no code emitted" %}
13049   ins_encode();
13050   ins_pipe( empty );
13051 %}
13052 
13053 
13054 // Rethrow exception:
13055 // The exception oop will come in the first argument position.
13056 // Then JUMP (not call) to the rethrow stub code.
13057 instruct RethrowException()
13058 %{
13059   match(Rethrow);
13060 
13061   // use the following format syntax
13062   format %{ "JMP    rethrow_stub" %}
13063   ins_encode(enc_rethrow);
13064   ins_pipe( pipe_jmp );
13065 %}
13066 
13067 // inlined locking and unlocking
13068 
13069 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13070   predicate(Compile::current()->use_rtm());
13071   match(Set cr (FastLock object box));
13072   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13073   ins_cost(300);
13074   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13075   ins_encode %{
13076     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13077                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13078                  _counters, _rtm_counters, _stack_rtm_counters,
13079                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13080                  true, ra_->C->profile_rtm());
13081   %}
13082   ins_pipe(pipe_slow);
13083 %}
13084 
13085 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13086   predicate(!Compile::current()->use_rtm());
13087   match(Set cr (FastLock object box));
13088   effect(TEMP tmp, TEMP scr, USE_KILL box);
13089   ins_cost(300);
13090   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13091   ins_encode %{
13092     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13093                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13094   %}
13095   ins_pipe(pipe_slow);
13096 %}
13097 
13098 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13099   match(Set cr (FastUnlock object box));
13100   effect(TEMP tmp, USE_KILL box);
13101   ins_cost(300);
13102   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13103   ins_encode %{
13104     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13105   %}
13106   ins_pipe(pipe_slow);
13107 %}
13108 
13109 
13110 
13111 // ============================================================================
13112 // Safepoint Instruction
13113 instruct safePoint_poll(eFlagsReg cr) %{
13114   match(SafePoint);
13115   effect(KILL cr);
13116 
13117   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13118   // On SPARC that might be acceptable as we can generate the address with
13119   // just a sethi, saving an or.  By polling at offset 0 we can end up
13120   // putting additional pressure on the index-0 in the D$.  Because of
13121   // alignment (just like the situation at hand) the lower indices tend
13122   // to see more traffic.  It'd be better to change the polling address
13123   // to offset 0 of the last $line in the polling page.
13124 
13125   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13126   ins_cost(125);
13127   size(6) ;
13128   ins_encode( Safepoint_Poll() );
13129   ins_pipe( ialu_reg_mem );
13130 %}
13131 
13132 
13133 // ============================================================================
13134 // This name is KNOWN by the ADLC and cannot be changed.
13135 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13136 // for this guy.
13137 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13138   match(Set dst (ThreadLocal));
13139   effect(DEF dst, KILL cr);
13140 
13141   format %{ "MOV    $dst, Thread::current()" %}
13142   ins_encode %{
13143     Register dstReg = as_Register($dst$$reg);
13144     __ get_thread(dstReg);
13145   %}
13146   ins_pipe( ialu_reg_fat );
13147 %}
13148 
13149 
13150 
13151 //----------PEEPHOLE RULES-----------------------------------------------------
13152 // These must follow all instruction definitions as they use the names
13153 // defined in the instructions definitions.
13154 //
13155 // peepmatch ( root_instr_name [preceding_instruction]* );
13156 //
13157 // peepconstraint %{
13158 // (instruction_number.operand_name relational_op instruction_number.operand_name
13159 //  [, ...] );
13160 // // instruction numbers are zero-based using left to right order in peepmatch
13161 //
13162 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13163 // // provide an instruction_number.operand_name for each operand that appears
13164 // // in the replacement instruction's match rule
13165 //
13166 // ---------VM FLAGS---------------------------------------------------------
13167 //
13168 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13169 //
13170 // Each peephole rule is given an identifying number starting with zero and
13171 // increasing by one in the order seen by the parser.  An individual peephole
13172 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13173 // on the command-line.
13174 //
13175 // ---------CURRENT LIMITATIONS----------------------------------------------
13176 //
13177 // Only match adjacent instructions in same basic block
13178 // Only equality constraints
13179 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13180 // Only one replacement instruction
13181 //
13182 // ---------EXAMPLE----------------------------------------------------------
13183 //
13184 // // pertinent parts of existing instructions in architecture description
13185 // instruct movI(rRegI dst, rRegI src) %{
13186 //   match(Set dst (CopyI src));
13187 // %}
13188 //
13189 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13190 //   match(Set dst (AddI dst src));
13191 //   effect(KILL cr);
13192 // %}
13193 //
13194 // // Change (inc mov) to lea
13195 // peephole %{
13196 //   // increment preceeded by register-register move
13197 //   peepmatch ( incI_eReg movI );
13198 //   // require that the destination register of the increment
13199 //   // match the destination register of the move
13200 //   peepconstraint ( 0.dst == 1.dst );
13201 //   // construct a replacement instruction that sets
13202 //   // the destination to ( move's source register + one )
13203 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13204 // %}
13205 //
13206 // Implementation no longer uses movX instructions since
13207 // machine-independent system no longer uses CopyX nodes.
13208 //
13209 // peephole %{
13210 //   peepmatch ( incI_eReg movI );
13211 //   peepconstraint ( 0.dst == 1.dst );
13212 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13213 // %}
13214 //
13215 // peephole %{
13216 //   peepmatch ( decI_eReg movI );
13217 //   peepconstraint ( 0.dst == 1.dst );
13218 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13219 // %}
13220 //
13221 // peephole %{
13222 //   peepmatch ( addI_eReg_imm movI );
13223 //   peepconstraint ( 0.dst == 1.dst );
13224 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13225 // %}
13226 //
13227 // peephole %{
13228 //   peepmatch ( addP_eReg_imm movP );
13229 //   peepconstraint ( 0.dst == 1.dst );
13230 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13231 // %}
13232 
13233 // // Change load of spilled value to only a spill
13234 // instruct storeI(memory mem, rRegI src) %{
13235 //   match(Set mem (StoreI mem src));
13236 // %}
13237 //
13238 // instruct loadI(rRegI dst, memory mem) %{
13239 //   match(Set dst (LoadI mem));
13240 // %}
13241 //
13242 peephole %{
13243   peepmatch ( loadI storeI );
13244   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13245   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13246 %}
13247 
13248 //----------SMARTSPILL RULES---------------------------------------------------
13249 // These must follow all instruction definitions as they use the names
13250 // defined in the instructions definitions.
--- EOF ---