1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 217 // Not AX or DX, used in divides 218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 219 // Not AX or DX (and neither EBP), used in divides 220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 223 224 // Floating point registers. Notice FPR0 is not a choice. 225 // FPR0 is not ever allocated; we use clever encodings to fake 226 // a 2-address instructions out of Intels FP stack. 227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 228 229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 230 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 231 FPR7L,FPR7H ); 232 233 reg_class fp_flt_reg0( FPR1L ); 234 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 235 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 237 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 238 239 %} 240 241 242 //----------SOURCE BLOCK------------------------------------------------------- 243 // This is a block of C++ code which provides values, functions, and 244 // definitions necessary in the rest of the architecture description 245 source_hpp %{ 246 // Must be visible to the DFA in dfa_x86_32.cpp 247 extern bool is_operand_hi32_zero(Node* n); 248 %} 249 250 source %{ 251 #define RELOC_IMM32 Assembler::imm_operand 252 #define RELOC_DISP32 Assembler::disp32_operand 253 254 #define __ _masm. 255 256 // How to find the high register of a Long pair, given the low register 257 #define HIGH_FROM_LOW(x) ((x)+2) 258 259 // These masks are used to provide 128-bit aligned bitmasks to the XMM 260 // instructions, to allow sign-masking or sign-bit flipping. They allow 261 // fast versions of NegF/NegD and AbsF/AbsD. 262 263 void reg_mask_init() { 264 if (Matcher::has_predicated_vectors()) { 265 // Post-loop multi-versioning expects mask to be present in K1 register, till the time 266 // its fixed, RA should not be allocting K1 register, this shall prevent any accidental 267 // curruption of value held in K1 register. 268 if (PostLoopMultiversioning) { 269 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg())); 270 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next())); 271 } 272 } 273 } 274 275 // Note: 'double' and 'long long' have 32-bits alignment on x86. 276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 277 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 278 // of 128-bits operands for SSE instructions. 279 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 280 // Store the value to a 128-bits operand. 281 operand[0] = lo; 282 operand[1] = hi; 283 return operand; 284 } 285 286 // Buffer for 128-bits masks used by SSE instructions. 287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 288 289 // Static initialization during VM startup. 290 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 292 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 294 295 // Offset hacking within calls. 296 static int pre_call_resets_size() { 297 int size = 0; 298 Compile* C = Compile::current(); 299 if (C->in_24_bit_fp_mode()) { 300 size += 6; // fldcw 301 } 302 if (VM_Version::supports_vzeroupper()) { 303 size += 3; // vzeroupper 304 } 305 return size; 306 } 307 308 // !!!!! Special hack to get all type of calls to specify the byte offset 309 // from the start of the call to the point where the return address 310 // will point. 311 int MachCallStaticJavaNode::ret_addr_offset() { 312 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 313 } 314 315 int MachCallDynamicJavaNode::ret_addr_offset() { 316 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 317 } 318 319 static int sizeof_FFree_Float_Stack_All = -1; 320 321 int MachCallRuntimeNode::ret_addr_offset() { 322 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 323 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 324 } 325 326 int MachCallNativeNode::ret_addr_offset() { 327 ShouldNotCallThis(); 328 return -1; 329 } 330 331 // 332 // Compute padding required for nodes which need alignment 333 // 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 1; // skip call opcode byte 340 return align_up(current_offset, alignment_required()) - current_offset; 341 } 342 343 // The address of the call instruction needs to be 4-byte aligned to 344 // ensure that it does not span a cache line so that it can be patched. 345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 346 current_offset += pre_call_resets_size(); // skip fldcw, if any 347 current_offset += 5; // skip MOV instruction 348 current_offset += 1; // skip call opcode byte 349 return align_up(current_offset, alignment_required()) - current_offset; 350 } 351 352 // EMIT_RM() 353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 354 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_CC() 359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 360 unsigned char c = (unsigned char)( f1 | f2 ); 361 cbuf.insts()->emit_int8(c); 362 } 363 364 // EMIT_OPCODE() 365 void emit_opcode(CodeBuffer &cbuf, int code) { 366 cbuf.insts()->emit_int8((unsigned char) code); 367 } 368 369 // EMIT_OPCODE() w/ relocation information 370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 371 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 372 emit_opcode(cbuf, code); 373 } 374 375 // EMIT_D8() 376 void emit_d8(CodeBuffer &cbuf, int d8) { 377 cbuf.insts()->emit_int8((unsigned char) d8); 378 } 379 380 // EMIT_D16() 381 void emit_d16(CodeBuffer &cbuf, int d16) { 382 cbuf.insts()->emit_int16(d16); 383 } 384 385 // EMIT_D32() 386 void emit_d32(CodeBuffer &cbuf, int d32) { 387 cbuf.insts()->emit_int32(d32); 388 } 389 390 // emit 32 bit value and construct relocation entry from relocInfo::relocType 391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 392 int format) { 393 cbuf.relocate(cbuf.insts_mark(), reloc, format); 394 cbuf.insts()->emit_int32(d32); 395 } 396 397 // emit 32 bit value and construct relocation entry from RelocationHolder 398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 399 int format) { 400 #ifdef ASSERT 401 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 402 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 403 } 404 #endif 405 cbuf.relocate(cbuf.insts_mark(), rspec, format); 406 cbuf.insts()->emit_int32(d32); 407 } 408 409 // Access stack slot for load or store 410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 411 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 412 if( -128 <= disp && disp <= 127 ) { 413 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 414 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 415 emit_d8 (cbuf, disp); // Displacement // R/M byte 416 } else { 417 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 418 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 419 emit_d32(cbuf, disp); // Displacement // R/M byte 420 } 421 } 422 423 // rRegI ereg, memory mem) %{ // emit_reg_mem 424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 425 // There is no index & no scale, use form without SIB byte 426 if ((index == 0x4) && 427 (scale == 0) && (base != ESP_enc)) { 428 // If no displacement, mode is 0x0; unless base is [EBP] 429 if ( (displace == 0) && (base != EBP_enc) ) { 430 emit_rm(cbuf, 0x0, reg_encoding, base); 431 } 432 else { // If 8-bit displacement, mode 0x1 433 if ((displace >= -128) && (displace <= 127) 434 && (disp_reloc == relocInfo::none) ) { 435 emit_rm(cbuf, 0x1, reg_encoding, base); 436 emit_d8(cbuf, displace); 437 } 438 else { // If 32-bit displacement 439 if (base == -1) { // Special flag for absolute address 440 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 441 // (manual lies; no SIB needed here) 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 else { // Normal base + offset 449 emit_rm(cbuf, 0x2, reg_encoding, base); 450 if ( disp_reloc != relocInfo::none ) { 451 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 452 } else { 453 emit_d32 (cbuf, displace); 454 } 455 } 456 } 457 } 458 } 459 else { // Else, encode with the SIB byte 460 // If no displacement, mode is 0x0; unless base is [EBP] 461 if (displace == 0 && (base != EBP_enc)) { // If no displacement 462 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 } 465 else { // If 8-bit displacement, mode 0x1 466 if ((displace >= -128) && (displace <= 127) 467 && (disp_reloc == relocInfo::none) ) { 468 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, base); 470 emit_d8(cbuf, displace); 471 } 472 else { // If 32-bit displacement 473 if (base == 0x04 ) { 474 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 475 emit_rm(cbuf, scale, index, 0x04); 476 } else { 477 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 478 emit_rm(cbuf, scale, index, base); 479 } 480 if ( disp_reloc != relocInfo::none ) { 481 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 482 } else { 483 emit_d32 (cbuf, displace); 484 } 485 } 486 } 487 } 488 } 489 490 491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 492 if( dst_encoding == src_encoding ) { 493 // reg-reg copy, use an empty encoding 494 } else { 495 emit_opcode( cbuf, 0x8B ); 496 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 497 } 498 } 499 500 void emit_cmpfp_fixup(MacroAssembler& _masm) { 501 Label exit; 502 __ jccb(Assembler::noParity, exit); 503 __ pushf(); 504 // 505 // comiss/ucomiss instructions set ZF,PF,CF flags and 506 // zero OF,AF,SF for NaN values. 507 // Fixup flags by zeroing ZF,PF so that compare of NaN 508 // values returns 'less than' result (CF is set). 509 // Leave the rest of flags unchanged. 510 // 511 // 7 6 5 4 3 2 1 0 512 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 513 // 0 0 1 0 1 0 1 1 (0x2B) 514 // 515 __ andl(Address(rsp, 0), 0xffffff2b); 516 __ popf(); 517 __ bind(exit); 518 } 519 520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 521 Label done; 522 __ movl(dst, -1); 523 __ jcc(Assembler::parity, done); 524 __ jcc(Assembler::below, done); 525 __ setb(Assembler::notEqual, dst); 526 __ movzbl(dst, dst); 527 __ bind(done); 528 } 529 530 531 //============================================================================= 532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 533 534 int ConstantTable::calculate_table_base_offset() const { 535 return 0; // absolute addressing, no offset 536 } 537 538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 540 ShouldNotReachHere(); 541 } 542 543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 544 // Empty encoding 545 } 546 547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 548 return 0; 549 } 550 551 #ifndef PRODUCT 552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 st->print("# MachConstantBaseNode (empty encoding)"); 554 } 555 #endif 556 557 558 //============================================================================= 559 #ifndef PRODUCT 560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 561 Compile* C = ra_->C; 562 563 int framesize = C->output()->frame_size_in_bytes(); 564 int bangsize = C->output()->bang_size_in_bytes(); 565 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 566 // Remove wordSize for return addr which is already pushed. 567 framesize -= wordSize; 568 569 if (C->output()->need_stack_bang(bangsize)) { 570 framesize -= wordSize; 571 st->print("# stack bang (%d bytes)", bangsize); 572 st->print("\n\t"); 573 st->print("PUSH EBP\t# Save EBP"); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 } 578 if (framesize) { 579 st->print("\n\t"); 580 st->print("SUB ESP, #%d\t# Create frame",framesize); 581 } 582 } else { 583 st->print("SUB ESP, #%d\t# Create frame",framesize); 584 st->print("\n\t"); 585 framesize -= wordSize; 586 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 587 if (PreserveFramePointer) { 588 st->print("\n\t"); 589 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 590 if (framesize > 0) { 591 st->print("\n\t"); 592 st->print("ADD EBP, #%d", framesize); 593 } 594 } 595 } 596 597 if (VerifyStackAtCalls) { 598 st->print("\n\t"); 599 framesize -= wordSize; 600 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 601 } 602 603 if( C->in_24_bit_fp_mode() ) { 604 st->print("\n\t"); 605 st->print("FLDCW \t# load 24 bit fpu control word"); 606 } 607 if (UseSSE >= 2 && VerifyFPU) { 608 st->print("\n\t"); 609 st->print("# verify FPU stack (must be clean on entry)"); 610 } 611 612 #ifdef ASSERT 613 if (VerifyStackAtCalls) { 614 st->print("\n\t"); 615 st->print("# stack alignment check"); 616 } 617 #endif 618 st->cr(); 619 } 620 #endif 621 622 623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 624 Compile* C = ra_->C; 625 MacroAssembler _masm(&cbuf); 626 627 __ verified_entry(C); 628 629 C->output()->set_frame_complete(cbuf.insts_size()); 630 631 if (C->has_mach_constant_base_node()) { 632 // NOTE: We set the table base offset here because users might be 633 // emitted before MachConstantBaseNode. 634 ConstantTable& constant_table = C->output()->constant_table(); 635 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 636 } 637 } 638 639 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 640 return MachNode::size(ra_); // too many variables; just compute it the hard way 641 } 642 643 int MachPrologNode::reloc() const { 644 return 0; // a large enough number 645 } 646 647 //============================================================================= 648 #ifndef PRODUCT 649 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 650 Compile *C = ra_->C; 651 int framesize = C->output()->frame_size_in_bytes(); 652 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 653 // Remove two words for return addr and rbp, 654 framesize -= 2*wordSize; 655 656 if (C->max_vector_size() > 16) { 657 st->print("VZEROUPPER"); 658 st->cr(); st->print("\t"); 659 } 660 if (C->in_24_bit_fp_mode()) { 661 st->print("FLDCW standard control word"); 662 st->cr(); st->print("\t"); 663 } 664 if (framesize) { 665 st->print("ADD ESP,%d\t# Destroy frame",framesize); 666 st->cr(); st->print("\t"); 667 } 668 st->print_cr("POPL EBP"); st->print("\t"); 669 if (do_polling() && C->is_method_compilation()) { 670 st->print("CMPL rsp, poll_offset[thread] \n\t" 671 "JA #safepoint_stub\t" 672 "# Safepoint: poll for GC"); 673 } 674 } 675 #endif 676 677 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 678 Compile *C = ra_->C; 679 MacroAssembler _masm(&cbuf); 680 681 if (C->max_vector_size() > 16) { 682 // Clear upper bits of YMM registers when current compiled code uses 683 // wide vectors to avoid AVX <-> SSE transition penalty during call. 684 _masm.vzeroupper(); 685 } 686 // If method set FPU control word, restore to standard control word 687 if (C->in_24_bit_fp_mode()) { 688 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 689 } 690 691 int framesize = C->output()->frame_size_in_bytes(); 692 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 693 // Remove two words for return addr and rbp, 694 framesize -= 2*wordSize; 695 696 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 697 698 if (framesize >= 128) { 699 emit_opcode(cbuf, 0x81); // add SP, #framesize 700 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 701 emit_d32(cbuf, framesize); 702 } else if (framesize) { 703 emit_opcode(cbuf, 0x83); // add SP, #framesize 704 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 705 emit_d8(cbuf, framesize); 706 } 707 708 emit_opcode(cbuf, 0x58 | EBP_enc); 709 710 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 711 __ reserved_stack_check(); 712 } 713 714 if (do_polling() && C->is_method_compilation()) { 715 Register thread = as_Register(EBX_enc); 716 MacroAssembler masm(&cbuf); 717 __ get_thread(thread); 718 Label dummy_label; 719 Label* code_stub = &dummy_label; 720 if (!C->output()->in_scratch_emit_size()) { 721 code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); 722 } 723 __ relocate(relocInfo::poll_return_type); 724 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 725 } 726 } 727 728 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 729 return MachNode::size(ra_); // too many variables; just compute it 730 // the hard way 731 } 732 733 int MachEpilogNode::reloc() const { 734 return 0; // a large enough number 735 } 736 737 const Pipeline * MachEpilogNode::pipeline() const { 738 return MachNode::pipeline_class(); 739 } 740 741 //============================================================================= 742 743 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 744 static enum RC rc_class( OptoReg::Name reg ) { 745 746 if( !OptoReg::is_valid(reg) ) return rc_bad; 747 if (OptoReg::is_stack(reg)) return rc_stack; 748 749 VMReg r = OptoReg::as_VMReg(reg); 750 if (r->is_Register()) return rc_int; 751 if (r->is_FloatRegister()) { 752 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 753 return rc_float; 754 } 755 if (r->is_KRegister()) return rc_kreg; 756 assert(r->is_XMMRegister(), "must be"); 757 return rc_xmm; 758 } 759 760 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 761 int opcode, const char *op_str, int size, outputStream* st ) { 762 if( cbuf ) { 763 emit_opcode (*cbuf, opcode ); 764 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 765 #ifndef PRODUCT 766 } else if( !do_size ) { 767 if( size != 0 ) st->print("\n\t"); 768 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 769 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 770 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 771 } else { // FLD, FST, PUSH, POP 772 st->print("%s [ESP + #%d]",op_str,offset); 773 } 774 #endif 775 } 776 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 777 return size+3+offset_size; 778 } 779 780 // Helper for XMM registers. Extra opcode bits, limited syntax. 781 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 782 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 783 int in_size_in_bits = Assembler::EVEX_32bit; 784 int evex_encoding = 0; 785 if (reg_lo+1 == reg_hi) { 786 in_size_in_bits = Assembler::EVEX_64bit; 787 evex_encoding = Assembler::VEX_W; 788 } 789 if (cbuf) { 790 MacroAssembler _masm(cbuf); 791 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 792 // it maps more cases to single byte displacement 793 _masm.set_managed(); 794 if (reg_lo+1 == reg_hi) { // double move? 795 if (is_load) { 796 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 797 } else { 798 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 799 } 800 } else { 801 if (is_load) { 802 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 803 } else { 804 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 805 } 806 } 807 #ifndef PRODUCT 808 } else if (!do_size) { 809 if (size != 0) st->print("\n\t"); 810 if (reg_lo+1 == reg_hi) { // double move? 811 if (is_load) st->print("%s %s,[ESP + #%d]", 812 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 813 Matcher::regName[reg_lo], offset); 814 else st->print("MOVSD [ESP + #%d],%s", 815 offset, Matcher::regName[reg_lo]); 816 } else { 817 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 818 Matcher::regName[reg_lo], offset); 819 else st->print("MOVSS [ESP + #%d],%s", 820 offset, Matcher::regName[reg_lo]); 821 } 822 #endif 823 } 824 bool is_single_byte = false; 825 if ((UseAVX > 2) && (offset != 0)) { 826 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 827 } 828 int offset_size = 0; 829 if (UseAVX > 2 ) { 830 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 831 } else { 832 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 833 } 834 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 835 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 836 return size+5+offset_size; 837 } 838 839 840 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 841 int src_hi, int dst_hi, int size, outputStream* st ) { 842 if (cbuf) { 843 MacroAssembler _masm(cbuf); 844 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 845 _masm.set_managed(); 846 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 847 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 848 as_XMMRegister(Matcher::_regEncode[src_lo])); 849 } else { 850 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 851 as_XMMRegister(Matcher::_regEncode[src_lo])); 852 } 853 #ifndef PRODUCT 854 } else if (!do_size) { 855 if (size != 0) st->print("\n\t"); 856 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 857 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 858 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } else { 860 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } 862 } else { 863 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 864 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } else { 866 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } 868 } 869 #endif 870 } 871 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 872 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 873 int sz = (UseAVX > 2) ? 6 : 4; 874 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 875 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 876 return size + sz; 877 } 878 879 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 880 int src_hi, int dst_hi, int size, outputStream* st ) { 881 // 32-bit 882 if (cbuf) { 883 MacroAssembler _masm(cbuf); 884 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 885 _masm.set_managed(); 886 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 887 as_Register(Matcher::_regEncode[src_lo])); 888 #ifndef PRODUCT 889 } else if (!do_size) { 890 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 891 #endif 892 } 893 return (UseAVX> 2) ? 6 : 4; 894 } 895 896 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 898 int src_hi, int dst_hi, int size, outputStream* st ) { 899 // 32-bit 900 if (cbuf) { 901 MacroAssembler _masm(cbuf); 902 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 903 _masm.set_managed(); 904 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 905 as_XMMRegister(Matcher::_regEncode[src_lo])); 906 #ifndef PRODUCT 907 } else if (!do_size) { 908 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 909 #endif 910 } 911 return (UseAVX> 2) ? 6 : 4; 912 } 913 914 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 915 if( cbuf ) { 916 emit_opcode(*cbuf, 0x8B ); 917 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 918 #ifndef PRODUCT 919 } else if( !do_size ) { 920 if( size != 0 ) st->print("\n\t"); 921 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 922 #endif 923 } 924 return size+2; 925 } 926 927 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 928 int offset, int size, outputStream* st ) { 929 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 930 if( cbuf ) { 931 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 932 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 933 #ifndef PRODUCT 934 } else if( !do_size ) { 935 if( size != 0 ) st->print("\n\t"); 936 st->print("FLD %s",Matcher::regName[src_lo]); 937 #endif 938 } 939 size += 2; 940 } 941 942 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 943 const char *op_str; 944 int op; 945 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 946 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 947 op = 0xDD; 948 } else { // 32-bit store 949 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 950 op = 0xD9; 951 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 952 } 953 954 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 955 } 956 957 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 958 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 959 int src_hi, int dst_hi, uint ireg, outputStream* st); 960 961 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 962 int stack_offset, int reg, uint ireg, outputStream* st); 963 964 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 965 int dst_offset, uint ireg, outputStream* st) { 966 if (cbuf) { 967 MacroAssembler _masm(cbuf); 968 switch (ireg) { 969 case Op_VecS: 970 __ pushl(Address(rsp, src_offset)); 971 __ popl (Address(rsp, dst_offset)); 972 break; 973 case Op_VecD: 974 __ pushl(Address(rsp, src_offset)); 975 __ popl (Address(rsp, dst_offset)); 976 __ pushl(Address(rsp, src_offset+4)); 977 __ popl (Address(rsp, dst_offset+4)); 978 break; 979 case Op_VecX: 980 __ movdqu(Address(rsp, -16), xmm0); 981 __ movdqu(xmm0, Address(rsp, src_offset)); 982 __ movdqu(Address(rsp, dst_offset), xmm0); 983 __ movdqu(xmm0, Address(rsp, -16)); 984 break; 985 case Op_VecY: 986 __ vmovdqu(Address(rsp, -32), xmm0); 987 __ vmovdqu(xmm0, Address(rsp, src_offset)); 988 __ vmovdqu(Address(rsp, dst_offset), xmm0); 989 __ vmovdqu(xmm0, Address(rsp, -32)); 990 break; 991 case Op_VecZ: 992 __ evmovdquq(Address(rsp, -64), xmm0, 2); 993 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 994 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 995 __ evmovdquq(xmm0, Address(rsp, -64), 2); 996 break; 997 default: 998 ShouldNotReachHere(); 999 } 1000 #ifndef PRODUCT 1001 } else { 1002 switch (ireg) { 1003 case Op_VecS: 1004 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1005 "popl [rsp + #%d]", 1006 src_offset, dst_offset); 1007 break; 1008 case Op_VecD: 1009 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1010 "popq [rsp + #%d]\n\t" 1011 "pushl [rsp + #%d]\n\t" 1012 "popq [rsp + #%d]", 1013 src_offset, dst_offset, src_offset+4, dst_offset+4); 1014 break; 1015 case Op_VecX: 1016 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1017 "movdqu xmm0, [rsp + #%d]\n\t" 1018 "movdqu [rsp + #%d], xmm0\n\t" 1019 "movdqu xmm0, [rsp - #16]", 1020 src_offset, dst_offset); 1021 break; 1022 case Op_VecY: 1023 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1024 "vmovdqu xmm0, [rsp + #%d]\n\t" 1025 "vmovdqu [rsp + #%d], xmm0\n\t" 1026 "vmovdqu xmm0, [rsp - #32]", 1027 src_offset, dst_offset); 1028 break; 1029 case Op_VecZ: 1030 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1031 "vmovdqu xmm0, [rsp + #%d]\n\t" 1032 "vmovdqu [rsp + #%d], xmm0\n\t" 1033 "vmovdqu xmm0, [rsp - #64]", 1034 src_offset, dst_offset); 1035 break; 1036 default: 1037 ShouldNotReachHere(); 1038 } 1039 #endif 1040 } 1041 } 1042 1043 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1044 // Get registers to move 1045 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1046 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1047 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1048 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1049 1050 enum RC src_second_rc = rc_class(src_second); 1051 enum RC src_first_rc = rc_class(src_first); 1052 enum RC dst_second_rc = rc_class(dst_second); 1053 enum RC dst_first_rc = rc_class(dst_first); 1054 1055 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1056 1057 // Generate spill code! 1058 int size = 0; 1059 1060 if( src_first == dst_first && src_second == dst_second ) 1061 return size; // Self copy, no move 1062 1063 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1064 uint ireg = ideal_reg(); 1065 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1066 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1067 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1068 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1069 // mem -> mem 1070 int src_offset = ra_->reg2offset(src_first); 1071 int dst_offset = ra_->reg2offset(dst_first); 1072 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1073 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1074 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1075 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1076 int stack_offset = ra_->reg2offset(dst_first); 1077 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1078 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1079 int stack_offset = ra_->reg2offset(src_first); 1080 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1081 } else { 1082 ShouldNotReachHere(); 1083 } 1084 return 0; 1085 } 1086 1087 // -------------------------------------- 1088 // Check for mem-mem move. push/pop to move. 1089 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1090 if( src_second == dst_first ) { // overlapping stack copy ranges 1091 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1092 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1093 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1094 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1095 } 1096 // move low bits 1097 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1098 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1099 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1100 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1101 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1102 } 1103 return size; 1104 } 1105 1106 // -------------------------------------- 1107 // Check for integer reg-reg copy 1108 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1109 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1110 1111 // Check for integer store 1112 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1113 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1114 1115 // Check for integer load 1116 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1117 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1118 1119 // Check for integer reg-xmm reg copy 1120 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1121 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1122 "no 64 bit integer-float reg moves" ); 1123 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1124 } 1125 // -------------------------------------- 1126 // Check for float reg-reg copy 1127 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1128 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1129 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1130 if( cbuf ) { 1131 1132 // Note the mucking with the register encode to compensate for the 0/1 1133 // indexing issue mentioned in a comment in the reg_def sections 1134 // for FPR registers many lines above here. 1135 1136 if( src_first != FPR1L_num ) { 1137 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1138 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1139 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1140 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1141 } else { 1142 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1143 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1144 } 1145 #ifndef PRODUCT 1146 } else if( !do_size ) { 1147 if( size != 0 ) st->print("\n\t"); 1148 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1149 else st->print( "FST %s", Matcher::regName[dst_first]); 1150 #endif 1151 } 1152 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1153 } 1154 1155 // Check for float store 1156 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1157 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1158 } 1159 1160 // Check for float load 1161 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1162 int offset = ra_->reg2offset(src_first); 1163 const char *op_str; 1164 int op; 1165 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1166 op_str = "FLD_D"; 1167 op = 0xDD; 1168 } else { // 32-bit load 1169 op_str = "FLD_S"; 1170 op = 0xD9; 1171 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1172 } 1173 if( cbuf ) { 1174 emit_opcode (*cbuf, op ); 1175 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1176 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1177 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1178 #ifndef PRODUCT 1179 } else if( !do_size ) { 1180 if( size != 0 ) st->print("\n\t"); 1181 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1182 #endif 1183 } 1184 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1185 return size + 3+offset_size+2; 1186 } 1187 1188 // Check for xmm reg-reg copy 1189 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1190 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1191 (src_first+1 == src_second && dst_first+1 == dst_second), 1192 "no non-adjacent float-moves" ); 1193 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1194 } 1195 1196 // Check for xmm reg-integer reg copy 1197 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1198 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1199 "no 64 bit float-integer reg moves" ); 1200 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1201 } 1202 1203 // Check for xmm store 1204 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1205 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1206 } 1207 1208 // Check for float xmm load 1209 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1210 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1211 } 1212 1213 // Copy from float reg to xmm reg 1214 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1215 // copy to the top of stack from floating point reg 1216 // and use LEA to preserve flags 1217 if( cbuf ) { 1218 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1219 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1220 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1221 emit_d8(*cbuf,0xF8); 1222 #ifndef PRODUCT 1223 } else if( !do_size ) { 1224 if( size != 0 ) st->print("\n\t"); 1225 st->print("LEA ESP,[ESP-8]"); 1226 #endif 1227 } 1228 size += 4; 1229 1230 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1231 1232 // Copy from the temp memory to the xmm reg. 1233 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1234 1235 if( cbuf ) { 1236 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1237 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1238 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1239 emit_d8(*cbuf,0x08); 1240 #ifndef PRODUCT 1241 } else if( !do_size ) { 1242 if( size != 0 ) st->print("\n\t"); 1243 st->print("LEA ESP,[ESP+8]"); 1244 #endif 1245 } 1246 size += 4; 1247 return size; 1248 } 1249 1250 // AVX-512 opmask specific spilling. 1251 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1252 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1253 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1254 MacroAssembler _masm(cbuf); 1255 int offset = ra_->reg2offset(src_first); 1256 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1257 return 0; 1258 } 1259 1260 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1261 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1262 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1263 MacroAssembler _masm(cbuf); 1264 int offset = ra_->reg2offset(dst_first); 1265 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1266 return 0; 1267 } 1268 1269 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1270 Unimplemented(); 1271 return 0; 1272 } 1273 1274 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1275 Unimplemented(); 1276 return 0; 1277 } 1278 1279 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1280 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1281 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1282 MacroAssembler _masm(cbuf); 1283 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1284 return 0; 1285 } 1286 1287 assert( size > 0, "missed a case" ); 1288 1289 // -------------------------------------------------------------------- 1290 // Check for second bits still needing moving. 1291 if( src_second == dst_second ) 1292 return size; // Self copy; no move 1293 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1294 1295 // Check for second word int-int move 1296 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1297 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1298 1299 // Check for second word integer store 1300 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1301 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1302 1303 // Check for second word integer load 1304 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1305 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1306 1307 Unimplemented(); 1308 return 0; // Mute compiler 1309 } 1310 1311 #ifndef PRODUCT 1312 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1313 implementation( NULL, ra_, false, st ); 1314 } 1315 #endif 1316 1317 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1318 implementation( &cbuf, ra_, false, NULL ); 1319 } 1320 1321 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1322 return MachNode::size(ra_); 1323 } 1324 1325 1326 //============================================================================= 1327 #ifndef PRODUCT 1328 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1329 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1330 int reg = ra_->get_reg_first(this); 1331 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1332 } 1333 #endif 1334 1335 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1336 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1337 int reg = ra_->get_encode(this); 1338 if( offset >= 128 ) { 1339 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1340 emit_rm(cbuf, 0x2, reg, 0x04); 1341 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1342 emit_d32(cbuf, offset); 1343 } 1344 else { 1345 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1346 emit_rm(cbuf, 0x1, reg, 0x04); 1347 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1348 emit_d8(cbuf, offset); 1349 } 1350 } 1351 1352 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1353 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1354 if( offset >= 128 ) { 1355 return 7; 1356 } 1357 else { 1358 return 4; 1359 } 1360 } 1361 1362 //============================================================================= 1363 #ifndef PRODUCT 1364 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1365 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1366 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1367 st->print_cr("\tNOP"); 1368 st->print_cr("\tNOP"); 1369 if( !OptoBreakpoint ) 1370 st->print_cr("\tNOP"); 1371 } 1372 #endif 1373 1374 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1375 MacroAssembler masm(&cbuf); 1376 #ifdef ASSERT 1377 uint insts_size = cbuf.insts_size(); 1378 #endif 1379 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1380 masm.jump_cc(Assembler::notEqual, 1381 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1382 /* WARNING these NOPs are critical so that verified entry point is properly 1383 aligned for patching by NativeJump::patch_verified_entry() */ 1384 int nops_cnt = 2; 1385 if( !OptoBreakpoint ) // Leave space for int3 1386 nops_cnt += 1; 1387 masm.nop(nops_cnt); 1388 1389 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1390 } 1391 1392 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1393 return OptoBreakpoint ? 11 : 12; 1394 } 1395 1396 1397 //============================================================================= 1398 1399 // Vector calling convention not supported. 1400 const bool Matcher::supports_vector_calling_convention() { 1401 return false; 1402 } 1403 1404 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1405 Unimplemented(); 1406 return OptoRegPair(0, 0); 1407 } 1408 1409 // Is this branch offset short enough that a short branch can be used? 1410 // 1411 // NOTE: If the platform does not provide any short branch variants, then 1412 // this method should return false for offset 0. 1413 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1414 // The passed offset is relative to address of the branch. 1415 // On 86 a branch displacement is calculated relative to address 1416 // of a next instruction. 1417 offset -= br_size; 1418 1419 // the short version of jmpConUCF2 contains multiple branches, 1420 // making the reach slightly less 1421 if (rule == jmpConUCF2_rule) 1422 return (-126 <= offset && offset <= 125); 1423 return (-128 <= offset && offset <= 127); 1424 } 1425 1426 // Return whether or not this register is ever used as an argument. This 1427 // function is used on startup to build the trampoline stubs in generateOptoStub. 1428 // Registers not mentioned will be killed by the VM call in the trampoline, and 1429 // arguments in those registers not be available to the callee. 1430 bool Matcher::can_be_java_arg( int reg ) { 1431 if( reg == ECX_num || reg == EDX_num ) return true; 1432 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1433 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1434 return false; 1435 } 1436 1437 bool Matcher::is_spillable_arg( int reg ) { 1438 return can_be_java_arg(reg); 1439 } 1440 1441 uint Matcher::int_pressure_limit() 1442 { 1443 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1444 } 1445 1446 uint Matcher::float_pressure_limit() 1447 { 1448 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1449 } 1450 1451 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1452 // Use hardware integer DIV instruction when 1453 // it is faster than a code which use multiply. 1454 // Only when constant divisor fits into 32 bit 1455 // (min_jint is excluded to get only correct 1456 // positive 32 bit values from negative). 1457 return VM_Version::has_fast_idiv() && 1458 (divisor == (int)divisor && divisor != min_jint); 1459 } 1460 1461 // Register for DIVI projection of divmodI 1462 RegMask Matcher::divI_proj_mask() { 1463 return EAX_REG_mask(); 1464 } 1465 1466 // Register for MODI projection of divmodI 1467 RegMask Matcher::modI_proj_mask() { 1468 return EDX_REG_mask(); 1469 } 1470 1471 // Register for DIVL projection of divmodL 1472 RegMask Matcher::divL_proj_mask() { 1473 ShouldNotReachHere(); 1474 return RegMask(); 1475 } 1476 1477 // Register for MODL projection of divmodL 1478 RegMask Matcher::modL_proj_mask() { 1479 ShouldNotReachHere(); 1480 return RegMask(); 1481 } 1482 1483 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1484 return NO_REG_mask(); 1485 } 1486 1487 // Returns true if the high 32 bits of the value is known to be zero. 1488 bool is_operand_hi32_zero(Node* n) { 1489 int opc = n->Opcode(); 1490 if (opc == Op_AndL) { 1491 Node* o2 = n->in(2); 1492 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1493 return true; 1494 } 1495 } 1496 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1497 return true; 1498 } 1499 return false; 1500 } 1501 1502 %} 1503 1504 //----------ENCODING BLOCK----------------------------------------------------- 1505 // This block specifies the encoding classes used by the compiler to output 1506 // byte streams. Encoding classes generate functions which are called by 1507 // Machine Instruction Nodes in order to generate the bit encoding of the 1508 // instruction. Operands specify their base encoding interface with the 1509 // interface keyword. There are currently supported four interfaces, 1510 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1511 // operand to generate a function which returns its register number when 1512 // queried. CONST_INTER causes an operand to generate a function which 1513 // returns the value of the constant when queried. MEMORY_INTER causes an 1514 // operand to generate four functions which return the Base Register, the 1515 // Index Register, the Scale Value, and the Offset Value of the operand when 1516 // queried. COND_INTER causes an operand to generate six functions which 1517 // return the encoding code (ie - encoding bits for the instruction) 1518 // associated with each basic boolean condition for a conditional instruction. 1519 // Instructions specify two basic values for encoding. They use the 1520 // ins_encode keyword to specify their encoding class (which must be one of 1521 // the class names specified in the encoding block), and they use the 1522 // opcode keyword to specify, in order, their primary, secondary, and 1523 // tertiary opcode. Only the opcode sections which a particular instruction 1524 // needs for encoding need to be specified. 1525 encode %{ 1526 // Build emit functions for each basic byte or larger field in the intel 1527 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1528 // code in the enc_class source block. Emit functions will live in the 1529 // main source block for now. In future, we can generalize this by 1530 // adding a syntax that specifies the sizes of fields in an order, 1531 // so that the adlc can build the emit functions automagically 1532 1533 // Emit primary opcode 1534 enc_class OpcP %{ 1535 emit_opcode(cbuf, $primary); 1536 %} 1537 1538 // Emit secondary opcode 1539 enc_class OpcS %{ 1540 emit_opcode(cbuf, $secondary); 1541 %} 1542 1543 // Emit opcode directly 1544 enc_class Opcode(immI d8) %{ 1545 emit_opcode(cbuf, $d8$$constant); 1546 %} 1547 1548 enc_class SizePrefix %{ 1549 emit_opcode(cbuf,0x66); 1550 %} 1551 1552 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1553 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1554 %} 1555 1556 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1557 emit_opcode(cbuf,$opcode$$constant); 1558 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1559 %} 1560 1561 enc_class mov_r32_imm0( rRegI dst ) %{ 1562 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1563 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1564 %} 1565 1566 enc_class cdq_enc %{ 1567 // Full implementation of Java idiv and irem; checks for 1568 // special case as described in JVM spec., p.243 & p.271. 1569 // 1570 // normal case special case 1571 // 1572 // input : rax,: dividend min_int 1573 // reg: divisor -1 1574 // 1575 // output: rax,: quotient (= rax, idiv reg) min_int 1576 // rdx: remainder (= rax, irem reg) 0 1577 // 1578 // Code sequnce: 1579 // 1580 // 81 F8 00 00 00 80 cmp rax,80000000h 1581 // 0F 85 0B 00 00 00 jne normal_case 1582 // 33 D2 xor rdx,edx 1583 // 83 F9 FF cmp rcx,0FFh 1584 // 0F 84 03 00 00 00 je done 1585 // normal_case: 1586 // 99 cdq 1587 // F7 F9 idiv rax,ecx 1588 // done: 1589 // 1590 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1591 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1592 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1593 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1594 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1595 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1596 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1597 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1598 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1599 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1600 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1601 // normal_case: 1602 emit_opcode(cbuf,0x99); // cdq 1603 // idiv (note: must be emitted by the user of this rule) 1604 // normal: 1605 %} 1606 1607 // Dense encoding for older common ops 1608 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1609 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1610 %} 1611 1612 1613 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1614 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1615 // Check for 8-bit immediate, and set sign extend bit in opcode 1616 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1617 emit_opcode(cbuf, $primary | 0x02); 1618 } 1619 else { // If 32-bit immediate 1620 emit_opcode(cbuf, $primary); 1621 } 1622 %} 1623 1624 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1625 // Emit primary opcode and set sign-extend bit 1626 // Check for 8-bit immediate, and set sign extend bit in opcode 1627 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1628 emit_opcode(cbuf, $primary | 0x02); } 1629 else { // If 32-bit immediate 1630 emit_opcode(cbuf, $primary); 1631 } 1632 // Emit r/m byte with secondary opcode, after primary opcode. 1633 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1634 %} 1635 1636 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1637 // Check for 8-bit immediate, and set sign extend bit in opcode 1638 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1639 $$$emit8$imm$$constant; 1640 } 1641 else { // If 32-bit immediate 1642 // Output immediate 1643 $$$emit32$imm$$constant; 1644 } 1645 %} 1646 1647 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1648 // Emit primary opcode and set sign-extend bit 1649 // Check for 8-bit immediate, and set sign extend bit in opcode 1650 int con = (int)$imm$$constant; // Throw away top bits 1651 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1652 // Emit r/m byte with secondary opcode, after primary opcode. 1653 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1654 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1655 else emit_d32(cbuf,con); 1656 %} 1657 1658 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1659 // Emit primary opcode and set sign-extend bit 1660 // Check for 8-bit immediate, and set sign extend bit in opcode 1661 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1662 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1663 // Emit r/m byte with tertiary opcode, after primary opcode. 1664 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1665 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1666 else emit_d32(cbuf,con); 1667 %} 1668 1669 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1670 emit_cc(cbuf, $secondary, $dst$$reg ); 1671 %} 1672 1673 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1674 int destlo = $dst$$reg; 1675 int desthi = HIGH_FROM_LOW(destlo); 1676 // bswap lo 1677 emit_opcode(cbuf, 0x0F); 1678 emit_cc(cbuf, 0xC8, destlo); 1679 // bswap hi 1680 emit_opcode(cbuf, 0x0F); 1681 emit_cc(cbuf, 0xC8, desthi); 1682 // xchg lo and hi 1683 emit_opcode(cbuf, 0x87); 1684 emit_rm(cbuf, 0x3, destlo, desthi); 1685 %} 1686 1687 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1688 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1689 %} 1690 1691 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1692 $$$emit8$primary; 1693 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1694 %} 1695 1696 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1697 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1698 emit_d8(cbuf, op >> 8 ); 1699 emit_d8(cbuf, op & 255); 1700 %} 1701 1702 // emulate a CMOV with a conditional branch around a MOV 1703 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1704 // Invert sense of branch from sense of CMOV 1705 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1706 emit_d8( cbuf, $brOffs$$constant ); 1707 %} 1708 1709 enc_class enc_PartialSubtypeCheck( ) %{ 1710 Register Redi = as_Register(EDI_enc); // result register 1711 Register Reax = as_Register(EAX_enc); // super class 1712 Register Recx = as_Register(ECX_enc); // killed 1713 Register Resi = as_Register(ESI_enc); // sub class 1714 Label miss; 1715 1716 MacroAssembler _masm(&cbuf); 1717 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1718 NULL, &miss, 1719 /*set_cond_codes:*/ true); 1720 if ($primary) { 1721 __ xorptr(Redi, Redi); 1722 } 1723 __ bind(miss); 1724 %} 1725 1726 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1727 MacroAssembler masm(&cbuf); 1728 int start = masm.offset(); 1729 if (UseSSE >= 2) { 1730 if (VerifyFPU) { 1731 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1732 } 1733 } else { 1734 // External c_calling_convention expects the FPU stack to be 'clean'. 1735 // Compiled code leaves it dirty. Do cleanup now. 1736 masm.empty_FPU_stack(); 1737 } 1738 if (sizeof_FFree_Float_Stack_All == -1) { 1739 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1740 } else { 1741 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1742 } 1743 %} 1744 1745 enc_class Verify_FPU_For_Leaf %{ 1746 if( VerifyFPU ) { 1747 MacroAssembler masm(&cbuf); 1748 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1749 } 1750 %} 1751 1752 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1753 // This is the instruction starting address for relocation info. 1754 cbuf.set_insts_mark(); 1755 $$$emit8$primary; 1756 // CALL directly to the runtime 1757 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1758 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1759 1760 if (UseSSE >= 2) { 1761 MacroAssembler _masm(&cbuf); 1762 BasicType rt = tf()->return_type(); 1763 1764 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1765 // A C runtime call where the return value is unused. In SSE2+ 1766 // mode the result needs to be removed from the FPU stack. It's 1767 // likely that this function call could be removed by the 1768 // optimizer if the C function is a pure function. 1769 __ ffree(0); 1770 } else if (rt == T_FLOAT) { 1771 __ lea(rsp, Address(rsp, -4)); 1772 __ fstp_s(Address(rsp, 0)); 1773 __ movflt(xmm0, Address(rsp, 0)); 1774 __ lea(rsp, Address(rsp, 4)); 1775 } else if (rt == T_DOUBLE) { 1776 __ lea(rsp, Address(rsp, -8)); 1777 __ fstp_d(Address(rsp, 0)); 1778 __ movdbl(xmm0, Address(rsp, 0)); 1779 __ lea(rsp, Address(rsp, 8)); 1780 } 1781 } 1782 %} 1783 1784 enc_class pre_call_resets %{ 1785 // If method sets FPU control word restore it here 1786 debug_only(int off0 = cbuf.insts_size()); 1787 if (ra_->C->in_24_bit_fp_mode()) { 1788 MacroAssembler _masm(&cbuf); 1789 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1790 } 1791 // Clear upper bits of YMM registers when current compiled code uses 1792 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1793 MacroAssembler _masm(&cbuf); 1794 __ vzeroupper(); 1795 debug_only(int off1 = cbuf.insts_size()); 1796 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1797 %} 1798 1799 enc_class post_call_FPU %{ 1800 // If method sets FPU control word do it here also 1801 if (Compile::current()->in_24_bit_fp_mode()) { 1802 MacroAssembler masm(&cbuf); 1803 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1804 } 1805 %} 1806 1807 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1808 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1809 // who we intended to call. 1810 cbuf.set_insts_mark(); 1811 $$$emit8$primary; 1812 1813 if (!_method) { 1814 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1815 runtime_call_Relocation::spec(), 1816 RELOC_IMM32); 1817 } else { 1818 int method_index = resolved_method_index(cbuf); 1819 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1820 : static_call_Relocation::spec(method_index); 1821 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1822 rspec, RELOC_DISP32); 1823 // Emit stubs for static call. 1824 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1825 if (stub == NULL) { 1826 ciEnv::current()->record_failure("CodeCache is full"); 1827 return; 1828 } 1829 } 1830 %} 1831 1832 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1833 MacroAssembler _masm(&cbuf); 1834 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1835 %} 1836 1837 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1838 int disp = in_bytes(Method::from_compiled_offset()); 1839 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1840 1841 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1842 cbuf.set_insts_mark(); 1843 $$$emit8$primary; 1844 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1845 emit_d8(cbuf, disp); // Displacement 1846 1847 %} 1848 1849 // Following encoding is no longer used, but may be restored if calling 1850 // convention changes significantly. 1851 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1852 // 1853 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1854 // // int ic_reg = Matcher::inline_cache_reg(); 1855 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1856 // // int imo_reg = Matcher::interpreter_method_reg(); 1857 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1858 // 1859 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1860 // // // so we load it immediately before the call 1861 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1862 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1863 // 1864 // // xor rbp,ebp 1865 // emit_opcode(cbuf, 0x33); 1866 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1867 // 1868 // // CALL to interpreter. 1869 // cbuf.set_insts_mark(); 1870 // $$$emit8$primary; 1871 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1872 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1873 // %} 1874 1875 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1876 $$$emit8$primary; 1877 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1878 $$$emit8$shift$$constant; 1879 %} 1880 1881 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1882 // Load immediate does not have a zero or sign extended version 1883 // for 8-bit immediates 1884 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1885 $$$emit32$src$$constant; 1886 %} 1887 1888 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1889 // Load immediate does not have a zero or sign extended version 1890 // for 8-bit immediates 1891 emit_opcode(cbuf, $primary + $dst$$reg); 1892 $$$emit32$src$$constant; 1893 %} 1894 1895 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1896 // Load immediate does not have a zero or sign extended version 1897 // for 8-bit immediates 1898 int dst_enc = $dst$$reg; 1899 int src_con = $src$$constant & 0x0FFFFFFFFL; 1900 if (src_con == 0) { 1901 // xor dst, dst 1902 emit_opcode(cbuf, 0x33); 1903 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1904 } else { 1905 emit_opcode(cbuf, $primary + dst_enc); 1906 emit_d32(cbuf, src_con); 1907 } 1908 %} 1909 1910 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1911 // Load immediate does not have a zero or sign extended version 1912 // for 8-bit immediates 1913 int dst_enc = $dst$$reg + 2; 1914 int src_con = ((julong)($src$$constant)) >> 32; 1915 if (src_con == 0) { 1916 // xor dst, dst 1917 emit_opcode(cbuf, 0x33); 1918 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1919 } else { 1920 emit_opcode(cbuf, $primary + dst_enc); 1921 emit_d32(cbuf, src_con); 1922 } 1923 %} 1924 1925 1926 // Encode a reg-reg copy. If it is useless, then empty encoding. 1927 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1928 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1929 %} 1930 1931 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1932 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1933 %} 1934 1935 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1936 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1937 %} 1938 1939 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1940 $$$emit8$primary; 1941 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1942 %} 1943 1944 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1945 $$$emit8$secondary; 1946 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1947 %} 1948 1949 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1950 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1951 %} 1952 1953 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1954 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1955 %} 1956 1957 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1958 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 1959 %} 1960 1961 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1962 // Output immediate 1963 $$$emit32$src$$constant; 1964 %} 1965 1966 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1967 // Output Float immediate bits 1968 jfloat jf = $src$$constant; 1969 int jf_as_bits = jint_cast( jf ); 1970 emit_d32(cbuf, jf_as_bits); 1971 %} 1972 1973 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1974 // Output Float immediate bits 1975 jfloat jf = $src$$constant; 1976 int jf_as_bits = jint_cast( jf ); 1977 emit_d32(cbuf, jf_as_bits); 1978 %} 1979 1980 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1981 // Output immediate 1982 $$$emit16$src$$constant; 1983 %} 1984 1985 enc_class Con_d32(immI src) %{ 1986 emit_d32(cbuf,$src$$constant); 1987 %} 1988 1989 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1990 // Output immediate memory reference 1991 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 1992 emit_d32(cbuf, 0x00); 1993 %} 1994 1995 enc_class lock_prefix( ) %{ 1996 emit_opcode(cbuf,0xF0); // [Lock] 1997 %} 1998 1999 // Cmp-xchg long value. 2000 // Note: we need to swap rbx, and rcx before and after the 2001 // cmpxchg8 instruction because the instruction uses 2002 // rcx as the high order word of the new value to store but 2003 // our register encoding uses rbx,. 2004 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2005 2006 // XCHG rbx,ecx 2007 emit_opcode(cbuf,0x87); 2008 emit_opcode(cbuf,0xD9); 2009 // [Lock] 2010 emit_opcode(cbuf,0xF0); 2011 // CMPXCHG8 [Eptr] 2012 emit_opcode(cbuf,0x0F); 2013 emit_opcode(cbuf,0xC7); 2014 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2015 // XCHG rbx,ecx 2016 emit_opcode(cbuf,0x87); 2017 emit_opcode(cbuf,0xD9); 2018 %} 2019 2020 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2021 // [Lock] 2022 emit_opcode(cbuf,0xF0); 2023 2024 // CMPXCHG [Eptr] 2025 emit_opcode(cbuf,0x0F); 2026 emit_opcode(cbuf,0xB1); 2027 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2028 %} 2029 2030 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2031 // [Lock] 2032 emit_opcode(cbuf,0xF0); 2033 2034 // CMPXCHGB [Eptr] 2035 emit_opcode(cbuf,0x0F); 2036 emit_opcode(cbuf,0xB0); 2037 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2038 %} 2039 2040 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2041 // [Lock] 2042 emit_opcode(cbuf,0xF0); 2043 2044 // 16-bit mode 2045 emit_opcode(cbuf, 0x66); 2046 2047 // CMPXCHGW [Eptr] 2048 emit_opcode(cbuf,0x0F); 2049 emit_opcode(cbuf,0xB1); 2050 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2051 %} 2052 2053 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2054 int res_encoding = $res$$reg; 2055 2056 // MOV res,0 2057 emit_opcode( cbuf, 0xB8 + res_encoding); 2058 emit_d32( cbuf, 0 ); 2059 // JNE,s fail 2060 emit_opcode(cbuf,0x75); 2061 emit_d8(cbuf, 5 ); 2062 // MOV res,1 2063 emit_opcode( cbuf, 0xB8 + res_encoding); 2064 emit_d32( cbuf, 1 ); 2065 // fail: 2066 %} 2067 2068 enc_class set_instruction_start( ) %{ 2069 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2070 %} 2071 2072 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2073 int reg_encoding = $ereg$$reg; 2074 int base = $mem$$base; 2075 int index = $mem$$index; 2076 int scale = $mem$$scale; 2077 int displace = $mem$$disp; 2078 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2079 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2080 %} 2081 2082 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2083 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2084 int base = $mem$$base; 2085 int index = $mem$$index; 2086 int scale = $mem$$scale; 2087 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2088 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2089 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2090 %} 2091 2092 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2093 int r1, r2; 2094 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2095 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2096 emit_opcode(cbuf,0x0F); 2097 emit_opcode(cbuf,$tertiary); 2098 emit_rm(cbuf, 0x3, r1, r2); 2099 emit_d8(cbuf,$cnt$$constant); 2100 emit_d8(cbuf,$primary); 2101 emit_rm(cbuf, 0x3, $secondary, r1); 2102 emit_d8(cbuf,$cnt$$constant); 2103 %} 2104 2105 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2106 emit_opcode( cbuf, 0x8B ); // Move 2107 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2108 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2109 emit_d8(cbuf,$primary); 2110 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2111 emit_d8(cbuf,$cnt$$constant-32); 2112 } 2113 emit_d8(cbuf,$primary); 2114 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2115 emit_d8(cbuf,31); 2116 %} 2117 2118 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2119 int r1, r2; 2120 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2121 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2122 2123 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2124 emit_rm(cbuf, 0x3, r1, r2); 2125 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2126 emit_opcode(cbuf,$primary); 2127 emit_rm(cbuf, 0x3, $secondary, r1); 2128 emit_d8(cbuf,$cnt$$constant-32); 2129 } 2130 emit_opcode(cbuf,0x33); // XOR r2,r2 2131 emit_rm(cbuf, 0x3, r2, r2); 2132 %} 2133 2134 // Clone of RegMem but accepts an extra parameter to access each 2135 // half of a double in memory; it never needs relocation info. 2136 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2137 emit_opcode(cbuf,$opcode$$constant); 2138 int reg_encoding = $rm_reg$$reg; 2139 int base = $mem$$base; 2140 int index = $mem$$index; 2141 int scale = $mem$$scale; 2142 int displace = $mem$$disp + $disp_for_half$$constant; 2143 relocInfo::relocType disp_reloc = relocInfo::none; 2144 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2145 %} 2146 2147 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2148 // 2149 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2150 // and it never needs relocation information. 2151 // Frequently used to move data between FPU's Stack Top and memory. 2152 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2153 int rm_byte_opcode = $rm_opcode$$constant; 2154 int base = $mem$$base; 2155 int index = $mem$$index; 2156 int scale = $mem$$scale; 2157 int displace = $mem$$disp; 2158 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2159 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2160 %} 2161 2162 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2163 int rm_byte_opcode = $rm_opcode$$constant; 2164 int base = $mem$$base; 2165 int index = $mem$$index; 2166 int scale = $mem$$scale; 2167 int displace = $mem$$disp; 2168 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2169 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2170 %} 2171 2172 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2173 int reg_encoding = $dst$$reg; 2174 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2175 int index = 0x04; // 0x04 indicates no index 2176 int scale = 0x00; // 0x00 indicates no scale 2177 int displace = $src1$$constant; // 0x00 indicates no displacement 2178 relocInfo::relocType disp_reloc = relocInfo::none; 2179 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2180 %} 2181 2182 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2183 // Compare dst,src 2184 emit_opcode(cbuf,0x3B); 2185 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2186 // jmp dst < src around move 2187 emit_opcode(cbuf,0x7C); 2188 emit_d8(cbuf,2); 2189 // move dst,src 2190 emit_opcode(cbuf,0x8B); 2191 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2192 %} 2193 2194 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2195 // Compare dst,src 2196 emit_opcode(cbuf,0x3B); 2197 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2198 // jmp dst > src around move 2199 emit_opcode(cbuf,0x7F); 2200 emit_d8(cbuf,2); 2201 // move dst,src 2202 emit_opcode(cbuf,0x8B); 2203 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2204 %} 2205 2206 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2207 // If src is FPR1, we can just FST to store it. 2208 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2209 int reg_encoding = 0x2; // Just store 2210 int base = $mem$$base; 2211 int index = $mem$$index; 2212 int scale = $mem$$scale; 2213 int displace = $mem$$disp; 2214 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2215 if( $src$$reg != FPR1L_enc ) { 2216 reg_encoding = 0x3; // Store & pop 2217 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2218 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2219 } 2220 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2221 emit_opcode(cbuf,$primary); 2222 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2223 %} 2224 2225 enc_class neg_reg(rRegI dst) %{ 2226 // NEG $dst 2227 emit_opcode(cbuf,0xF7); 2228 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2229 %} 2230 2231 enc_class setLT_reg(eCXRegI dst) %{ 2232 // SETLT $dst 2233 emit_opcode(cbuf,0x0F); 2234 emit_opcode(cbuf,0x9C); 2235 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2236 %} 2237 2238 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2239 int tmpReg = $tmp$$reg; 2240 2241 // SUB $p,$q 2242 emit_opcode(cbuf,0x2B); 2243 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2244 // SBB $tmp,$tmp 2245 emit_opcode(cbuf,0x1B); 2246 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2247 // AND $tmp,$y 2248 emit_opcode(cbuf,0x23); 2249 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2250 // ADD $p,$tmp 2251 emit_opcode(cbuf,0x03); 2252 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2253 %} 2254 2255 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2256 // TEST shift,32 2257 emit_opcode(cbuf,0xF7); 2258 emit_rm(cbuf, 0x3, 0, ECX_enc); 2259 emit_d32(cbuf,0x20); 2260 // JEQ,s small 2261 emit_opcode(cbuf, 0x74); 2262 emit_d8(cbuf, 0x04); 2263 // MOV $dst.hi,$dst.lo 2264 emit_opcode( cbuf, 0x8B ); 2265 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2266 // CLR $dst.lo 2267 emit_opcode(cbuf, 0x33); 2268 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2269 // small: 2270 // SHLD $dst.hi,$dst.lo,$shift 2271 emit_opcode(cbuf,0x0F); 2272 emit_opcode(cbuf,0xA5); 2273 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2274 // SHL $dst.lo,$shift" 2275 emit_opcode(cbuf,0xD3); 2276 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2277 %} 2278 2279 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2280 // TEST shift,32 2281 emit_opcode(cbuf,0xF7); 2282 emit_rm(cbuf, 0x3, 0, ECX_enc); 2283 emit_d32(cbuf,0x20); 2284 // JEQ,s small 2285 emit_opcode(cbuf, 0x74); 2286 emit_d8(cbuf, 0x04); 2287 // MOV $dst.lo,$dst.hi 2288 emit_opcode( cbuf, 0x8B ); 2289 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2290 // CLR $dst.hi 2291 emit_opcode(cbuf, 0x33); 2292 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2293 // small: 2294 // SHRD $dst.lo,$dst.hi,$shift 2295 emit_opcode(cbuf,0x0F); 2296 emit_opcode(cbuf,0xAD); 2297 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2298 // SHR $dst.hi,$shift" 2299 emit_opcode(cbuf,0xD3); 2300 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2301 %} 2302 2303 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2304 // TEST shift,32 2305 emit_opcode(cbuf,0xF7); 2306 emit_rm(cbuf, 0x3, 0, ECX_enc); 2307 emit_d32(cbuf,0x20); 2308 // JEQ,s small 2309 emit_opcode(cbuf, 0x74); 2310 emit_d8(cbuf, 0x05); 2311 // MOV $dst.lo,$dst.hi 2312 emit_opcode( cbuf, 0x8B ); 2313 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2314 // SAR $dst.hi,31 2315 emit_opcode(cbuf, 0xC1); 2316 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2317 emit_d8(cbuf, 0x1F ); 2318 // small: 2319 // SHRD $dst.lo,$dst.hi,$shift 2320 emit_opcode(cbuf,0x0F); 2321 emit_opcode(cbuf,0xAD); 2322 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2323 // SAR $dst.hi,$shift" 2324 emit_opcode(cbuf,0xD3); 2325 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2326 %} 2327 2328 2329 // ----------------- Encodings for floating point unit ----------------- 2330 // May leave result in FPU-TOS or FPU reg depending on opcodes 2331 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2332 $$$emit8$primary; 2333 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2334 %} 2335 2336 // Pop argument in FPR0 with FSTP ST(0) 2337 enc_class PopFPU() %{ 2338 emit_opcode( cbuf, 0xDD ); 2339 emit_d8( cbuf, 0xD8 ); 2340 %} 2341 2342 // !!!!! equivalent to Pop_Reg_F 2343 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2344 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2345 emit_d8( cbuf, 0xD8+$dst$$reg ); 2346 %} 2347 2348 enc_class Push_Reg_DPR( regDPR dst ) %{ 2349 emit_opcode( cbuf, 0xD9 ); 2350 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2351 %} 2352 2353 enc_class strictfp_bias1( regDPR dst ) %{ 2354 emit_opcode( cbuf, 0xDB ); // FLD m80real 2355 emit_opcode( cbuf, 0x2D ); 2356 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2357 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2358 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2359 %} 2360 2361 enc_class strictfp_bias2( regDPR dst ) %{ 2362 emit_opcode( cbuf, 0xDB ); // FLD m80real 2363 emit_opcode( cbuf, 0x2D ); 2364 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2365 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2366 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2367 %} 2368 2369 // Special case for moving an integer register to a stack slot. 2370 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2371 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2372 %} 2373 2374 // Special case for moving a register to a stack slot. 2375 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2376 // Opcode already emitted 2377 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2378 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2379 emit_d32(cbuf, $dst$$disp); // Displacement 2380 %} 2381 2382 // Push the integer in stackSlot 'src' onto FP-stack 2383 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2384 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2385 %} 2386 2387 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2388 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2389 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2390 %} 2391 2392 // Same as Pop_Mem_F except for opcode 2393 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2394 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2395 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2396 %} 2397 2398 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2399 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2400 emit_d8( cbuf, 0xD8+$dst$$reg ); 2401 %} 2402 2403 enc_class Push_Reg_FPR( regFPR dst ) %{ 2404 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2405 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2406 %} 2407 2408 // Push FPU's float to a stack-slot, and pop FPU-stack 2409 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2410 int pop = 0x02; 2411 if ($src$$reg != FPR1L_enc) { 2412 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2413 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2414 pop = 0x03; 2415 } 2416 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2417 %} 2418 2419 // Push FPU's double to a stack-slot, and pop FPU-stack 2420 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2421 int pop = 0x02; 2422 if ($src$$reg != FPR1L_enc) { 2423 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2424 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2425 pop = 0x03; 2426 } 2427 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2428 %} 2429 2430 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2431 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2432 int pop = 0xD0 - 1; // -1 since we skip FLD 2433 if ($src$$reg != FPR1L_enc) { 2434 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2435 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2436 pop = 0xD8; 2437 } 2438 emit_opcode( cbuf, 0xDD ); 2439 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2440 %} 2441 2442 2443 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2444 // load dst in FPR0 2445 emit_opcode( cbuf, 0xD9 ); 2446 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2447 if ($src$$reg != FPR1L_enc) { 2448 // fincstp 2449 emit_opcode (cbuf, 0xD9); 2450 emit_opcode (cbuf, 0xF7); 2451 // swap src with FPR1: 2452 // FXCH FPR1 with src 2453 emit_opcode(cbuf, 0xD9); 2454 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2455 // fdecstp 2456 emit_opcode (cbuf, 0xD9); 2457 emit_opcode (cbuf, 0xF6); 2458 } 2459 %} 2460 2461 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2462 MacroAssembler _masm(&cbuf); 2463 __ subptr(rsp, 8); 2464 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2465 __ fld_d(Address(rsp, 0)); 2466 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2467 __ fld_d(Address(rsp, 0)); 2468 %} 2469 2470 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2471 MacroAssembler _masm(&cbuf); 2472 __ subptr(rsp, 4); 2473 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2474 __ fld_s(Address(rsp, 0)); 2475 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2476 __ fld_s(Address(rsp, 0)); 2477 %} 2478 2479 enc_class Push_ResultD(regD dst) %{ 2480 MacroAssembler _masm(&cbuf); 2481 __ fstp_d(Address(rsp, 0)); 2482 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2483 __ addptr(rsp, 8); 2484 %} 2485 2486 enc_class Push_ResultF(regF dst, immI d8) %{ 2487 MacroAssembler _masm(&cbuf); 2488 __ fstp_s(Address(rsp, 0)); 2489 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2490 __ addptr(rsp, $d8$$constant); 2491 %} 2492 2493 enc_class Push_SrcD(regD src) %{ 2494 MacroAssembler _masm(&cbuf); 2495 __ subptr(rsp, 8); 2496 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2497 __ fld_d(Address(rsp, 0)); 2498 %} 2499 2500 enc_class push_stack_temp_qword() %{ 2501 MacroAssembler _masm(&cbuf); 2502 __ subptr(rsp, 8); 2503 %} 2504 2505 enc_class pop_stack_temp_qword() %{ 2506 MacroAssembler _masm(&cbuf); 2507 __ addptr(rsp, 8); 2508 %} 2509 2510 enc_class push_xmm_to_fpr1(regD src) %{ 2511 MacroAssembler _masm(&cbuf); 2512 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2513 __ fld_d(Address(rsp, 0)); 2514 %} 2515 2516 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2517 if ($src$$reg != FPR1L_enc) { 2518 // fincstp 2519 emit_opcode (cbuf, 0xD9); 2520 emit_opcode (cbuf, 0xF7); 2521 // FXCH FPR1 with src 2522 emit_opcode(cbuf, 0xD9); 2523 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2524 // fdecstp 2525 emit_opcode (cbuf, 0xD9); 2526 emit_opcode (cbuf, 0xF6); 2527 } 2528 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2529 // // FSTP FPR$dst$$reg 2530 // emit_opcode( cbuf, 0xDD ); 2531 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2532 %} 2533 2534 enc_class fnstsw_sahf_skip_parity() %{ 2535 // fnstsw ax 2536 emit_opcode( cbuf, 0xDF ); 2537 emit_opcode( cbuf, 0xE0 ); 2538 // sahf 2539 emit_opcode( cbuf, 0x9E ); 2540 // jnp ::skip 2541 emit_opcode( cbuf, 0x7B ); 2542 emit_opcode( cbuf, 0x05 ); 2543 %} 2544 2545 enc_class emitModDPR() %{ 2546 // fprem must be iterative 2547 // :: loop 2548 // fprem 2549 emit_opcode( cbuf, 0xD9 ); 2550 emit_opcode( cbuf, 0xF8 ); 2551 // wait 2552 emit_opcode( cbuf, 0x9b ); 2553 // fnstsw ax 2554 emit_opcode( cbuf, 0xDF ); 2555 emit_opcode( cbuf, 0xE0 ); 2556 // sahf 2557 emit_opcode( cbuf, 0x9E ); 2558 // jp ::loop 2559 emit_opcode( cbuf, 0x0F ); 2560 emit_opcode( cbuf, 0x8A ); 2561 emit_opcode( cbuf, 0xF4 ); 2562 emit_opcode( cbuf, 0xFF ); 2563 emit_opcode( cbuf, 0xFF ); 2564 emit_opcode( cbuf, 0xFF ); 2565 %} 2566 2567 enc_class fpu_flags() %{ 2568 // fnstsw_ax 2569 emit_opcode( cbuf, 0xDF); 2570 emit_opcode( cbuf, 0xE0); 2571 // test ax,0x0400 2572 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2573 emit_opcode( cbuf, 0xA9 ); 2574 emit_d16 ( cbuf, 0x0400 ); 2575 // // // This sequence works, but stalls for 12-16 cycles on PPro 2576 // // test rax,0x0400 2577 // emit_opcode( cbuf, 0xA9 ); 2578 // emit_d32 ( cbuf, 0x00000400 ); 2579 // 2580 // jz exit (no unordered comparison) 2581 emit_opcode( cbuf, 0x74 ); 2582 emit_d8 ( cbuf, 0x02 ); 2583 // mov ah,1 - treat as LT case (set carry flag) 2584 emit_opcode( cbuf, 0xB4 ); 2585 emit_d8 ( cbuf, 0x01 ); 2586 // sahf 2587 emit_opcode( cbuf, 0x9E); 2588 %} 2589 2590 enc_class cmpF_P6_fixup() %{ 2591 // Fixup the integer flags in case comparison involved a NaN 2592 // 2593 // JNP exit (no unordered comparison, P-flag is set by NaN) 2594 emit_opcode( cbuf, 0x7B ); 2595 emit_d8 ( cbuf, 0x03 ); 2596 // MOV AH,1 - treat as LT case (set carry flag) 2597 emit_opcode( cbuf, 0xB4 ); 2598 emit_d8 ( cbuf, 0x01 ); 2599 // SAHF 2600 emit_opcode( cbuf, 0x9E); 2601 // NOP // target for branch to avoid branch to branch 2602 emit_opcode( cbuf, 0x90); 2603 %} 2604 2605 // fnstsw_ax(); 2606 // sahf(); 2607 // movl(dst, nan_result); 2608 // jcc(Assembler::parity, exit); 2609 // movl(dst, less_result); 2610 // jcc(Assembler::below, exit); 2611 // movl(dst, equal_result); 2612 // jcc(Assembler::equal, exit); 2613 // movl(dst, greater_result); 2614 2615 // less_result = 1; 2616 // greater_result = -1; 2617 // equal_result = 0; 2618 // nan_result = -1; 2619 2620 enc_class CmpF_Result(rRegI dst) %{ 2621 // fnstsw_ax(); 2622 emit_opcode( cbuf, 0xDF); 2623 emit_opcode( cbuf, 0xE0); 2624 // sahf 2625 emit_opcode( cbuf, 0x9E); 2626 // movl(dst, nan_result); 2627 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2628 emit_d32( cbuf, -1 ); 2629 // jcc(Assembler::parity, exit); 2630 emit_opcode( cbuf, 0x7A ); 2631 emit_d8 ( cbuf, 0x13 ); 2632 // movl(dst, less_result); 2633 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2634 emit_d32( cbuf, -1 ); 2635 // jcc(Assembler::below, exit); 2636 emit_opcode( cbuf, 0x72 ); 2637 emit_d8 ( cbuf, 0x0C ); 2638 // movl(dst, equal_result); 2639 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2640 emit_d32( cbuf, 0 ); 2641 // jcc(Assembler::equal, exit); 2642 emit_opcode( cbuf, 0x74 ); 2643 emit_d8 ( cbuf, 0x05 ); 2644 // movl(dst, greater_result); 2645 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2646 emit_d32( cbuf, 1 ); 2647 %} 2648 2649 2650 // Compare the longs and set flags 2651 // BROKEN! Do Not use as-is 2652 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2653 // CMP $src1.hi,$src2.hi 2654 emit_opcode( cbuf, 0x3B ); 2655 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2656 // JNE,s done 2657 emit_opcode(cbuf,0x75); 2658 emit_d8(cbuf, 2 ); 2659 // CMP $src1.lo,$src2.lo 2660 emit_opcode( cbuf, 0x3B ); 2661 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2662 // done: 2663 %} 2664 2665 enc_class convert_int_long( regL dst, rRegI src ) %{ 2666 // mov $dst.lo,$src 2667 int dst_encoding = $dst$$reg; 2668 int src_encoding = $src$$reg; 2669 encode_Copy( cbuf, dst_encoding , src_encoding ); 2670 // mov $dst.hi,$src 2671 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2672 // sar $dst.hi,31 2673 emit_opcode( cbuf, 0xC1 ); 2674 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2675 emit_d8(cbuf, 0x1F ); 2676 %} 2677 2678 enc_class convert_long_double( eRegL src ) %{ 2679 // push $src.hi 2680 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2681 // push $src.lo 2682 emit_opcode(cbuf, 0x50+$src$$reg ); 2683 // fild 64-bits at [SP] 2684 emit_opcode(cbuf,0xdf); 2685 emit_d8(cbuf, 0x6C); 2686 emit_d8(cbuf, 0x24); 2687 emit_d8(cbuf, 0x00); 2688 // pop stack 2689 emit_opcode(cbuf, 0x83); // add SP, #8 2690 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2691 emit_d8(cbuf, 0x8); 2692 %} 2693 2694 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2695 // IMUL EDX:EAX,$src1 2696 emit_opcode( cbuf, 0xF7 ); 2697 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2698 // SAR EDX,$cnt-32 2699 int shift_count = ((int)$cnt$$constant) - 32; 2700 if (shift_count > 0) { 2701 emit_opcode(cbuf, 0xC1); 2702 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2703 emit_d8(cbuf, shift_count); 2704 } 2705 %} 2706 2707 // this version doesn't have add sp, 8 2708 enc_class convert_long_double2( eRegL src ) %{ 2709 // push $src.hi 2710 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2711 // push $src.lo 2712 emit_opcode(cbuf, 0x50+$src$$reg ); 2713 // fild 64-bits at [SP] 2714 emit_opcode(cbuf,0xdf); 2715 emit_d8(cbuf, 0x6C); 2716 emit_d8(cbuf, 0x24); 2717 emit_d8(cbuf, 0x00); 2718 %} 2719 2720 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2721 // Basic idea: long = (long)int * (long)int 2722 // IMUL EDX:EAX, src 2723 emit_opcode( cbuf, 0xF7 ); 2724 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2725 %} 2726 2727 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2728 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2729 // MUL EDX:EAX, src 2730 emit_opcode( cbuf, 0xF7 ); 2731 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2732 %} 2733 2734 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2735 // Basic idea: lo(result) = lo(x_lo * y_lo) 2736 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2737 // MOV $tmp,$src.lo 2738 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2739 // IMUL $tmp,EDX 2740 emit_opcode( cbuf, 0x0F ); 2741 emit_opcode( cbuf, 0xAF ); 2742 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2743 // MOV EDX,$src.hi 2744 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2745 // IMUL EDX,EAX 2746 emit_opcode( cbuf, 0x0F ); 2747 emit_opcode( cbuf, 0xAF ); 2748 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2749 // ADD $tmp,EDX 2750 emit_opcode( cbuf, 0x03 ); 2751 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2752 // MUL EDX:EAX,$src.lo 2753 emit_opcode( cbuf, 0xF7 ); 2754 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2755 // ADD EDX,ESI 2756 emit_opcode( cbuf, 0x03 ); 2757 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2758 %} 2759 2760 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2761 // Basic idea: lo(result) = lo(src * y_lo) 2762 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2763 // IMUL $tmp,EDX,$src 2764 emit_opcode( cbuf, 0x6B ); 2765 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2766 emit_d8( cbuf, (int)$src$$constant ); 2767 // MOV EDX,$src 2768 emit_opcode(cbuf, 0xB8 + EDX_enc); 2769 emit_d32( cbuf, (int)$src$$constant ); 2770 // MUL EDX:EAX,EDX 2771 emit_opcode( cbuf, 0xF7 ); 2772 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2773 // ADD EDX,ESI 2774 emit_opcode( cbuf, 0x03 ); 2775 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2776 %} 2777 2778 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2779 // PUSH src1.hi 2780 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2781 // PUSH src1.lo 2782 emit_opcode(cbuf, 0x50+$src1$$reg ); 2783 // PUSH src2.hi 2784 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2785 // PUSH src2.lo 2786 emit_opcode(cbuf, 0x50+$src2$$reg ); 2787 // CALL directly to the runtime 2788 cbuf.set_insts_mark(); 2789 emit_opcode(cbuf,0xE8); // Call into runtime 2790 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2791 // Restore stack 2792 emit_opcode(cbuf, 0x83); // add SP, #framesize 2793 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2794 emit_d8(cbuf, 4*4); 2795 %} 2796 2797 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2798 // PUSH src1.hi 2799 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2800 // PUSH src1.lo 2801 emit_opcode(cbuf, 0x50+$src1$$reg ); 2802 // PUSH src2.hi 2803 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2804 // PUSH src2.lo 2805 emit_opcode(cbuf, 0x50+$src2$$reg ); 2806 // CALL directly to the runtime 2807 cbuf.set_insts_mark(); 2808 emit_opcode(cbuf,0xE8); // Call into runtime 2809 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2810 // Restore stack 2811 emit_opcode(cbuf, 0x83); // add SP, #framesize 2812 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2813 emit_d8(cbuf, 4*4); 2814 %} 2815 2816 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2817 // MOV $tmp,$src.lo 2818 emit_opcode(cbuf, 0x8B); 2819 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2820 // OR $tmp,$src.hi 2821 emit_opcode(cbuf, 0x0B); 2822 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2823 %} 2824 2825 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2826 // CMP $src1.lo,$src2.lo 2827 emit_opcode( cbuf, 0x3B ); 2828 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2829 // JNE,s skip 2830 emit_cc(cbuf, 0x70, 0x5); 2831 emit_d8(cbuf,2); 2832 // CMP $src1.hi,$src2.hi 2833 emit_opcode( cbuf, 0x3B ); 2834 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2835 %} 2836 2837 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2838 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2839 emit_opcode( cbuf, 0x3B ); 2840 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2841 // MOV $tmp,$src1.hi 2842 emit_opcode( cbuf, 0x8B ); 2843 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2844 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2845 emit_opcode( cbuf, 0x1B ); 2846 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2847 %} 2848 2849 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2850 // XOR $tmp,$tmp 2851 emit_opcode(cbuf,0x33); // XOR 2852 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2853 // CMP $tmp,$src.lo 2854 emit_opcode( cbuf, 0x3B ); 2855 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2856 // SBB $tmp,$src.hi 2857 emit_opcode( cbuf, 0x1B ); 2858 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2859 %} 2860 2861 // Sniff, sniff... smells like Gnu Superoptimizer 2862 enc_class neg_long( eRegL dst ) %{ 2863 emit_opcode(cbuf,0xF7); // NEG hi 2864 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2865 emit_opcode(cbuf,0xF7); // NEG lo 2866 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2867 emit_opcode(cbuf,0x83); // SBB hi,0 2868 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2869 emit_d8 (cbuf,0 ); 2870 %} 2871 2872 enc_class enc_pop_rdx() %{ 2873 emit_opcode(cbuf,0x5A); 2874 %} 2875 2876 enc_class enc_rethrow() %{ 2877 cbuf.set_insts_mark(); 2878 emit_opcode(cbuf, 0xE9); // jmp entry 2879 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2880 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2881 %} 2882 2883 2884 // Convert a double to an int. Java semantics require we do complex 2885 // manglelations in the corner cases. So we set the rounding mode to 2886 // 'zero', store the darned double down as an int, and reset the 2887 // rounding mode to 'nearest'. The hardware throws an exception which 2888 // patches up the correct value directly to the stack. 2889 enc_class DPR2I_encoding( regDPR src ) %{ 2890 // Flip to round-to-zero mode. We attempted to allow invalid-op 2891 // exceptions here, so that a NAN or other corner-case value will 2892 // thrown an exception (but normal values get converted at full speed). 2893 // However, I2C adapters and other float-stack manglers leave pending 2894 // invalid-op exceptions hanging. We would have to clear them before 2895 // enabling them and that is more expensive than just testing for the 2896 // invalid value Intel stores down in the corner cases. 2897 emit_opcode(cbuf,0xD9); // FLDCW trunc 2898 emit_opcode(cbuf,0x2D); 2899 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2900 // Allocate a word 2901 emit_opcode(cbuf,0x83); // SUB ESP,4 2902 emit_opcode(cbuf,0xEC); 2903 emit_d8(cbuf,0x04); 2904 // Encoding assumes a double has been pushed into FPR0. 2905 // Store down the double as an int, popping the FPU stack 2906 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2907 emit_opcode(cbuf,0x1C); 2908 emit_d8(cbuf,0x24); 2909 // Restore the rounding mode; mask the exception 2910 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2911 emit_opcode(cbuf,0x2D); 2912 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2913 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2914 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2915 2916 // Load the converted int; adjust CPU stack 2917 emit_opcode(cbuf,0x58); // POP EAX 2918 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2919 emit_d32 (cbuf,0x80000000); // 0x80000000 2920 emit_opcode(cbuf,0x75); // JNE around_slow_call 2921 emit_d8 (cbuf,0x07); // Size of slow_call 2922 // Push src onto stack slow-path 2923 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2924 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2925 // CALL directly to the runtime 2926 cbuf.set_insts_mark(); 2927 emit_opcode(cbuf,0xE8); // Call into runtime 2928 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2929 // Carry on here... 2930 %} 2931 2932 enc_class DPR2L_encoding( regDPR src ) %{ 2933 emit_opcode(cbuf,0xD9); // FLDCW trunc 2934 emit_opcode(cbuf,0x2D); 2935 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2936 // Allocate a word 2937 emit_opcode(cbuf,0x83); // SUB ESP,8 2938 emit_opcode(cbuf,0xEC); 2939 emit_d8(cbuf,0x08); 2940 // Encoding assumes a double has been pushed into FPR0. 2941 // Store down the double as a long, popping the FPU stack 2942 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2943 emit_opcode(cbuf,0x3C); 2944 emit_d8(cbuf,0x24); 2945 // Restore the rounding mode; mask the exception 2946 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2947 emit_opcode(cbuf,0x2D); 2948 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2949 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2950 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2951 2952 // Load the converted int; adjust CPU stack 2953 emit_opcode(cbuf,0x58); // POP EAX 2954 emit_opcode(cbuf,0x5A); // POP EDX 2955 emit_opcode(cbuf,0x81); // CMP EDX,imm 2956 emit_d8 (cbuf,0xFA); // rdx 2957 emit_d32 (cbuf,0x80000000); // 0x80000000 2958 emit_opcode(cbuf,0x75); // JNE around_slow_call 2959 emit_d8 (cbuf,0x07+4); // Size of slow_call 2960 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2961 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2962 emit_opcode(cbuf,0x75); // JNE around_slow_call 2963 emit_d8 (cbuf,0x07); // Size of slow_call 2964 // Push src onto stack slow-path 2965 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2966 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2967 // CALL directly to the runtime 2968 cbuf.set_insts_mark(); 2969 emit_opcode(cbuf,0xE8); // Call into runtime 2970 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2971 // Carry on here... 2972 %} 2973 2974 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2975 // Operand was loaded from memory into fp ST (stack top) 2976 // FMUL ST,$src /* D8 C8+i */ 2977 emit_opcode(cbuf, 0xD8); 2978 emit_opcode(cbuf, 0xC8 + $src1$$reg); 2979 %} 2980 2981 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2982 // FADDP ST,src2 /* D8 C0+i */ 2983 emit_opcode(cbuf, 0xD8); 2984 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2985 //could use FADDP src2,fpST /* DE C0+i */ 2986 %} 2987 2988 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2989 // FADDP src2,ST /* DE C0+i */ 2990 emit_opcode(cbuf, 0xDE); 2991 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2992 %} 2993 2994 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2995 // Operand has been loaded into fp ST (stack top) 2996 // FSUB ST,$src1 2997 emit_opcode(cbuf, 0xD8); 2998 emit_opcode(cbuf, 0xE0 + $src1$$reg); 2999 3000 // FDIV 3001 emit_opcode(cbuf, 0xD8); 3002 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3003 %} 3004 3005 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3006 // Operand was loaded from memory into fp ST (stack top) 3007 // FADD ST,$src /* D8 C0+i */ 3008 emit_opcode(cbuf, 0xD8); 3009 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3010 3011 // FMUL ST,src2 /* D8 C*+i */ 3012 emit_opcode(cbuf, 0xD8); 3013 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3014 %} 3015 3016 3017 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3018 // Operand was loaded from memory into fp ST (stack top) 3019 // FADD ST,$src /* D8 C0+i */ 3020 emit_opcode(cbuf, 0xD8); 3021 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3022 3023 // FMULP src2,ST /* DE C8+i */ 3024 emit_opcode(cbuf, 0xDE); 3025 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3026 %} 3027 3028 // Atomically load the volatile long 3029 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3030 emit_opcode(cbuf,0xDF); 3031 int rm_byte_opcode = 0x05; 3032 int base = $mem$$base; 3033 int index = $mem$$index; 3034 int scale = $mem$$scale; 3035 int displace = $mem$$disp; 3036 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3037 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3038 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3039 %} 3040 3041 // Volatile Store Long. Must be atomic, so move it into 3042 // the FP TOS and then do a 64-bit FIST. Has to probe the 3043 // target address before the store (for null-ptr checks) 3044 // so the memory operand is used twice in the encoding. 3045 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3046 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3047 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3048 emit_opcode(cbuf,0xDF); 3049 int rm_byte_opcode = 0x07; 3050 int base = $mem$$base; 3051 int index = $mem$$index; 3052 int scale = $mem$$scale; 3053 int displace = $mem$$disp; 3054 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3055 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3056 %} 3057 3058 %} 3059 3060 3061 //----------FRAME-------------------------------------------------------------- 3062 // Definition of frame structure and management information. 3063 // 3064 // S T A C K L A Y O U T Allocators stack-slot number 3065 // | (to get allocators register number 3066 // G Owned by | | v add OptoReg::stack0()) 3067 // r CALLER | | 3068 // o | +--------+ pad to even-align allocators stack-slot 3069 // w V | pad0 | numbers; owned by CALLER 3070 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3071 // h ^ | in | 5 3072 // | | args | 4 Holes in incoming args owned by SELF 3073 // | | | | 3 3074 // | | +--------+ 3075 // V | | old out| Empty on Intel, window on Sparc 3076 // | old |preserve| Must be even aligned. 3077 // | SP-+--------+----> Matcher::_old_SP, even aligned 3078 // | | in | 3 area for Intel ret address 3079 // Owned by |preserve| Empty on Sparc. 3080 // SELF +--------+ 3081 // | | pad2 | 2 pad to align old SP 3082 // | +--------+ 1 3083 // | | locks | 0 3084 // | +--------+----> OptoReg::stack0(), even aligned 3085 // | | pad1 | 11 pad to align new SP 3086 // | +--------+ 3087 // | | | 10 3088 // | | spills | 9 spills 3089 // V | | 8 (pad0 slot for callee) 3090 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3091 // ^ | out | 7 3092 // | | args | 6 Holes in outgoing args owned by CALLEE 3093 // Owned by +--------+ 3094 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3095 // | new |preserve| Must be even-aligned. 3096 // | SP-+--------+----> Matcher::_new_SP, even aligned 3097 // | | | 3098 // 3099 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3100 // known from SELF's arguments and the Java calling convention. 3101 // Region 6-7 is determined per call site. 3102 // Note 2: If the calling convention leaves holes in the incoming argument 3103 // area, those holes are owned by SELF. Holes in the outgoing area 3104 // are owned by the CALLEE. Holes should not be nessecary in the 3105 // incoming area, as the Java calling convention is completely under 3106 // the control of the AD file. Doubles can be sorted and packed to 3107 // avoid holes. Holes in the outgoing arguments may be nessecary for 3108 // varargs C calling conventions. 3109 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3110 // even aligned with pad0 as needed. 3111 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3112 // region 6-11 is even aligned; it may be padded out more so that 3113 // the region from SP to FP meets the minimum stack alignment. 3114 3115 frame %{ 3116 // These three registers define part of the calling convention 3117 // between compiled code and the interpreter. 3118 inline_cache_reg(EAX); // Inline Cache Register 3119 3120 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3121 cisc_spilling_operand_name(indOffset32); 3122 3123 // Number of stack slots consumed by locking an object 3124 sync_stack_slots(1); 3125 3126 // Compiled code's Frame Pointer 3127 frame_pointer(ESP); 3128 // Interpreter stores its frame pointer in a register which is 3129 // stored to the stack by I2CAdaptors. 3130 // I2CAdaptors convert from interpreted java to compiled java. 3131 interpreter_frame_pointer(EBP); 3132 3133 // Stack alignment requirement 3134 // Alignment size in bytes (128-bit -> 16 bytes) 3135 stack_alignment(StackAlignmentInBytes); 3136 3137 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3138 // for calls to C. Supports the var-args backing area for register parms. 3139 varargs_C_out_slots_killed(0); 3140 3141 // The after-PROLOG location of the return address. Location of 3142 // return address specifies a type (REG or STACK) and a number 3143 // representing the register number (i.e. - use a register name) or 3144 // stack slot. 3145 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3146 // Otherwise, it is above the locks and verification slot and alignment word 3147 return_addr(STACK - 1 + 3148 align_up((Compile::current()->in_preserve_stack_slots() + 3149 Compile::current()->fixed_slots()), 3150 stack_alignment_in_slots())); 3151 3152 // Location of C & interpreter return values 3153 c_return_value %{ 3154 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3155 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3156 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3157 3158 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3159 // that C functions return float and double results in XMM0. 3160 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3161 return OptoRegPair(XMM0b_num,XMM0_num); 3162 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3163 return OptoRegPair(OptoReg::Bad,XMM0_num); 3164 3165 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3166 %} 3167 3168 // Location of return values 3169 return_value %{ 3170 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3171 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3172 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3173 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3174 return OptoRegPair(XMM0b_num,XMM0_num); 3175 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3176 return OptoRegPair(OptoReg::Bad,XMM0_num); 3177 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3178 %} 3179 3180 %} 3181 3182 //----------ATTRIBUTES--------------------------------------------------------- 3183 //----------Operand Attributes------------------------------------------------- 3184 op_attrib op_cost(0); // Required cost attribute 3185 3186 //----------Instruction Attributes--------------------------------------------- 3187 ins_attrib ins_cost(100); // Required cost attribute 3188 ins_attrib ins_size(8); // Required size attribute (in bits) 3189 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3190 // non-matching short branch variant of some 3191 // long branch? 3192 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3193 // specifies the alignment that some part of the instruction (not 3194 // necessarily the start) requires. If > 1, a compute_padding() 3195 // function must be provided for the instruction 3196 3197 //----------OPERANDS----------------------------------------------------------- 3198 // Operand definitions must precede instruction definitions for correct parsing 3199 // in the ADLC because operands constitute user defined types which are used in 3200 // instruction definitions. 3201 3202 //----------Simple Operands---------------------------------------------------- 3203 // Immediate Operands 3204 // Integer Immediate 3205 operand immI() %{ 3206 match(ConI); 3207 3208 op_cost(10); 3209 format %{ %} 3210 interface(CONST_INTER); 3211 %} 3212 3213 // Constant for test vs zero 3214 operand immI_0() %{ 3215 predicate(n->get_int() == 0); 3216 match(ConI); 3217 3218 op_cost(0); 3219 format %{ %} 3220 interface(CONST_INTER); 3221 %} 3222 3223 // Constant for increment 3224 operand immI_1() %{ 3225 predicate(n->get_int() == 1); 3226 match(ConI); 3227 3228 op_cost(0); 3229 format %{ %} 3230 interface(CONST_INTER); 3231 %} 3232 3233 // Constant for decrement 3234 operand immI_M1() %{ 3235 predicate(n->get_int() == -1); 3236 match(ConI); 3237 3238 op_cost(0); 3239 format %{ %} 3240 interface(CONST_INTER); 3241 %} 3242 3243 // Valid scale values for addressing modes 3244 operand immI2() %{ 3245 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3246 match(ConI); 3247 3248 format %{ %} 3249 interface(CONST_INTER); 3250 %} 3251 3252 operand immI8() %{ 3253 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3254 match(ConI); 3255 3256 op_cost(5); 3257 format %{ %} 3258 interface(CONST_INTER); 3259 %} 3260 3261 operand immU8() %{ 3262 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3263 match(ConI); 3264 3265 op_cost(5); 3266 format %{ %} 3267 interface(CONST_INTER); 3268 %} 3269 3270 operand immI16() %{ 3271 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3272 match(ConI); 3273 3274 op_cost(10); 3275 format %{ %} 3276 interface(CONST_INTER); 3277 %} 3278 3279 // Int Immediate non-negative 3280 operand immU31() 3281 %{ 3282 predicate(n->get_int() >= 0); 3283 match(ConI); 3284 3285 op_cost(0); 3286 format %{ %} 3287 interface(CONST_INTER); 3288 %} 3289 3290 // Constant for long shifts 3291 operand immI_32() %{ 3292 predicate( n->get_int() == 32 ); 3293 match(ConI); 3294 3295 op_cost(0); 3296 format %{ %} 3297 interface(CONST_INTER); 3298 %} 3299 3300 operand immI_1_31() %{ 3301 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3302 match(ConI); 3303 3304 op_cost(0); 3305 format %{ %} 3306 interface(CONST_INTER); 3307 %} 3308 3309 operand immI_32_63() %{ 3310 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3311 match(ConI); 3312 op_cost(0); 3313 3314 format %{ %} 3315 interface(CONST_INTER); 3316 %} 3317 3318 operand immI_2() %{ 3319 predicate( n->get_int() == 2 ); 3320 match(ConI); 3321 3322 op_cost(0); 3323 format %{ %} 3324 interface(CONST_INTER); 3325 %} 3326 3327 operand immI_3() %{ 3328 predicate( n->get_int() == 3 ); 3329 match(ConI); 3330 3331 op_cost(0); 3332 format %{ %} 3333 interface(CONST_INTER); 3334 %} 3335 3336 operand immI_4() 3337 %{ 3338 predicate(n->get_int() == 4); 3339 match(ConI); 3340 3341 op_cost(0); 3342 format %{ %} 3343 interface(CONST_INTER); 3344 %} 3345 3346 operand immI_8() 3347 %{ 3348 predicate(n->get_int() == 8); 3349 match(ConI); 3350 3351 op_cost(0); 3352 format %{ %} 3353 interface(CONST_INTER); 3354 %} 3355 3356 // Pointer Immediate 3357 operand immP() %{ 3358 match(ConP); 3359 3360 op_cost(10); 3361 format %{ %} 3362 interface(CONST_INTER); 3363 %} 3364 3365 // NULL Pointer Immediate 3366 operand immP0() %{ 3367 predicate( n->get_ptr() == 0 ); 3368 match(ConP); 3369 op_cost(0); 3370 3371 format %{ %} 3372 interface(CONST_INTER); 3373 %} 3374 3375 // Long Immediate 3376 operand immL() %{ 3377 match(ConL); 3378 3379 op_cost(20); 3380 format %{ %} 3381 interface(CONST_INTER); 3382 %} 3383 3384 // Long Immediate zero 3385 operand immL0() %{ 3386 predicate( n->get_long() == 0L ); 3387 match(ConL); 3388 op_cost(0); 3389 3390 format %{ %} 3391 interface(CONST_INTER); 3392 %} 3393 3394 // Long Immediate zero 3395 operand immL_M1() %{ 3396 predicate( n->get_long() == -1L ); 3397 match(ConL); 3398 op_cost(0); 3399 3400 format %{ %} 3401 interface(CONST_INTER); 3402 %} 3403 3404 // Long immediate from 0 to 127. 3405 // Used for a shorter form of long mul by 10. 3406 operand immL_127() %{ 3407 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3408 match(ConL); 3409 op_cost(0); 3410 3411 format %{ %} 3412 interface(CONST_INTER); 3413 %} 3414 3415 // Long Immediate: low 32-bit mask 3416 operand immL_32bits() %{ 3417 predicate(n->get_long() == 0xFFFFFFFFL); 3418 match(ConL); 3419 op_cost(0); 3420 3421 format %{ %} 3422 interface(CONST_INTER); 3423 %} 3424 3425 // Long Immediate: low 32-bit mask 3426 operand immL32() %{ 3427 predicate(n->get_long() == (int)(n->get_long())); 3428 match(ConL); 3429 op_cost(20); 3430 3431 format %{ %} 3432 interface(CONST_INTER); 3433 %} 3434 3435 //Double Immediate zero 3436 operand immDPR0() %{ 3437 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3438 // bug that generates code such that NaNs compare equal to 0.0 3439 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3440 match(ConD); 3441 3442 op_cost(5); 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 // Double Immediate one 3448 operand immDPR1() %{ 3449 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3450 match(ConD); 3451 3452 op_cost(5); 3453 format %{ %} 3454 interface(CONST_INTER); 3455 %} 3456 3457 // Double Immediate 3458 operand immDPR() %{ 3459 predicate(UseSSE<=1); 3460 match(ConD); 3461 3462 op_cost(5); 3463 format %{ %} 3464 interface(CONST_INTER); 3465 %} 3466 3467 operand immD() %{ 3468 predicate(UseSSE>=2); 3469 match(ConD); 3470 3471 op_cost(5); 3472 format %{ %} 3473 interface(CONST_INTER); 3474 %} 3475 3476 // Double Immediate zero 3477 operand immD0() %{ 3478 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3479 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3480 // compare equal to -0.0. 3481 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3482 match(ConD); 3483 3484 format %{ %} 3485 interface(CONST_INTER); 3486 %} 3487 3488 // Float Immediate zero 3489 operand immFPR0() %{ 3490 predicate(UseSSE == 0 && n->getf() == 0.0F); 3491 match(ConF); 3492 3493 op_cost(5); 3494 format %{ %} 3495 interface(CONST_INTER); 3496 %} 3497 3498 // Float Immediate one 3499 operand immFPR1() %{ 3500 predicate(UseSSE == 0 && n->getf() == 1.0F); 3501 match(ConF); 3502 3503 op_cost(5); 3504 format %{ %} 3505 interface(CONST_INTER); 3506 %} 3507 3508 // Float Immediate 3509 operand immFPR() %{ 3510 predicate( UseSSE == 0 ); 3511 match(ConF); 3512 3513 op_cost(5); 3514 format %{ %} 3515 interface(CONST_INTER); 3516 %} 3517 3518 // Float Immediate 3519 operand immF() %{ 3520 predicate(UseSSE >= 1); 3521 match(ConF); 3522 3523 op_cost(5); 3524 format %{ %} 3525 interface(CONST_INTER); 3526 %} 3527 3528 // Float Immediate zero. Zero and not -0.0 3529 operand immF0() %{ 3530 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3531 match(ConF); 3532 3533 op_cost(5); 3534 format %{ %} 3535 interface(CONST_INTER); 3536 %} 3537 3538 // Immediates for special shifts (sign extend) 3539 3540 // Constants for increment 3541 operand immI_16() %{ 3542 predicate( n->get_int() == 16 ); 3543 match(ConI); 3544 3545 format %{ %} 3546 interface(CONST_INTER); 3547 %} 3548 3549 operand immI_24() %{ 3550 predicate( n->get_int() == 24 ); 3551 match(ConI); 3552 3553 format %{ %} 3554 interface(CONST_INTER); 3555 %} 3556 3557 // Constant for byte-wide masking 3558 operand immI_255() %{ 3559 predicate( n->get_int() == 255 ); 3560 match(ConI); 3561 3562 format %{ %} 3563 interface(CONST_INTER); 3564 %} 3565 3566 // Constant for short-wide masking 3567 operand immI_65535() %{ 3568 predicate(n->get_int() == 65535); 3569 match(ConI); 3570 3571 format %{ %} 3572 interface(CONST_INTER); 3573 %} 3574 3575 operand kReg() 3576 %{ 3577 constraint(ALLOC_IN_RC(vectmask_reg)); 3578 match(RegVectMask); 3579 format %{%} 3580 interface(REG_INTER); 3581 %} 3582 3583 operand kReg_K1() 3584 %{ 3585 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3586 match(RegVectMask); 3587 format %{%} 3588 interface(REG_INTER); 3589 %} 3590 3591 operand kReg_K2() 3592 %{ 3593 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3594 match(RegVectMask); 3595 format %{%} 3596 interface(REG_INTER); 3597 %} 3598 3599 // Special Registers 3600 operand kReg_K3() 3601 %{ 3602 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3603 match(RegVectMask); 3604 format %{%} 3605 interface(REG_INTER); 3606 %} 3607 3608 operand kReg_K4() 3609 %{ 3610 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3611 match(RegVectMask); 3612 format %{%} 3613 interface(REG_INTER); 3614 %} 3615 3616 operand kReg_K5() 3617 %{ 3618 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3619 match(RegVectMask); 3620 format %{%} 3621 interface(REG_INTER); 3622 %} 3623 3624 operand kReg_K6() 3625 %{ 3626 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3627 match(RegVectMask); 3628 format %{%} 3629 interface(REG_INTER); 3630 %} 3631 3632 // Special Registers 3633 operand kReg_K7() 3634 %{ 3635 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3636 match(RegVectMask); 3637 format %{%} 3638 interface(REG_INTER); 3639 %} 3640 3641 // Register Operands 3642 // Integer Register 3643 operand rRegI() %{ 3644 constraint(ALLOC_IN_RC(int_reg)); 3645 match(RegI); 3646 match(xRegI); 3647 match(eAXRegI); 3648 match(eBXRegI); 3649 match(eCXRegI); 3650 match(eDXRegI); 3651 match(eDIRegI); 3652 match(eSIRegI); 3653 3654 format %{ %} 3655 interface(REG_INTER); 3656 %} 3657 3658 // Subset of Integer Register 3659 operand xRegI(rRegI reg) %{ 3660 constraint(ALLOC_IN_RC(int_x_reg)); 3661 match(reg); 3662 match(eAXRegI); 3663 match(eBXRegI); 3664 match(eCXRegI); 3665 match(eDXRegI); 3666 3667 format %{ %} 3668 interface(REG_INTER); 3669 %} 3670 3671 // Special Registers 3672 operand eAXRegI(xRegI reg) %{ 3673 constraint(ALLOC_IN_RC(eax_reg)); 3674 match(reg); 3675 match(rRegI); 3676 3677 format %{ "EAX" %} 3678 interface(REG_INTER); 3679 %} 3680 3681 // Special Registers 3682 operand eBXRegI(xRegI reg) %{ 3683 constraint(ALLOC_IN_RC(ebx_reg)); 3684 match(reg); 3685 match(rRegI); 3686 3687 format %{ "EBX" %} 3688 interface(REG_INTER); 3689 %} 3690 3691 operand eCXRegI(xRegI reg) %{ 3692 constraint(ALLOC_IN_RC(ecx_reg)); 3693 match(reg); 3694 match(rRegI); 3695 3696 format %{ "ECX" %} 3697 interface(REG_INTER); 3698 %} 3699 3700 operand eDXRegI(xRegI reg) %{ 3701 constraint(ALLOC_IN_RC(edx_reg)); 3702 match(reg); 3703 match(rRegI); 3704 3705 format %{ "EDX" %} 3706 interface(REG_INTER); 3707 %} 3708 3709 operand eDIRegI(xRegI reg) %{ 3710 constraint(ALLOC_IN_RC(edi_reg)); 3711 match(reg); 3712 match(rRegI); 3713 3714 format %{ "EDI" %} 3715 interface(REG_INTER); 3716 %} 3717 3718 operand naxRegI() %{ 3719 constraint(ALLOC_IN_RC(nax_reg)); 3720 match(RegI); 3721 match(eCXRegI); 3722 match(eDXRegI); 3723 match(eSIRegI); 3724 match(eDIRegI); 3725 3726 format %{ %} 3727 interface(REG_INTER); 3728 %} 3729 3730 operand nadxRegI() %{ 3731 constraint(ALLOC_IN_RC(nadx_reg)); 3732 match(RegI); 3733 match(eBXRegI); 3734 match(eCXRegI); 3735 match(eSIRegI); 3736 match(eDIRegI); 3737 3738 format %{ %} 3739 interface(REG_INTER); 3740 %} 3741 3742 operand ncxRegI() %{ 3743 constraint(ALLOC_IN_RC(ncx_reg)); 3744 match(RegI); 3745 match(eAXRegI); 3746 match(eDXRegI); 3747 match(eSIRegI); 3748 match(eDIRegI); 3749 3750 format %{ %} 3751 interface(REG_INTER); 3752 %} 3753 3754 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3755 // // 3756 operand eSIRegI(xRegI reg) %{ 3757 constraint(ALLOC_IN_RC(esi_reg)); 3758 match(reg); 3759 match(rRegI); 3760 3761 format %{ "ESI" %} 3762 interface(REG_INTER); 3763 %} 3764 3765 // Pointer Register 3766 operand anyRegP() %{ 3767 constraint(ALLOC_IN_RC(any_reg)); 3768 match(RegP); 3769 match(eAXRegP); 3770 match(eBXRegP); 3771 match(eCXRegP); 3772 match(eDIRegP); 3773 match(eRegP); 3774 3775 format %{ %} 3776 interface(REG_INTER); 3777 %} 3778 3779 operand eRegP() %{ 3780 constraint(ALLOC_IN_RC(int_reg)); 3781 match(RegP); 3782 match(eAXRegP); 3783 match(eBXRegP); 3784 match(eCXRegP); 3785 match(eDIRegP); 3786 3787 format %{ %} 3788 interface(REG_INTER); 3789 %} 3790 3791 operand rRegP() %{ 3792 constraint(ALLOC_IN_RC(int_reg)); 3793 match(RegP); 3794 match(eAXRegP); 3795 match(eBXRegP); 3796 match(eCXRegP); 3797 match(eDIRegP); 3798 3799 format %{ %} 3800 interface(REG_INTER); 3801 %} 3802 3803 // On windows95, EBP is not safe to use for implicit null tests. 3804 operand eRegP_no_EBP() %{ 3805 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3806 match(RegP); 3807 match(eAXRegP); 3808 match(eBXRegP); 3809 match(eCXRegP); 3810 match(eDIRegP); 3811 3812 op_cost(100); 3813 format %{ %} 3814 interface(REG_INTER); 3815 %} 3816 3817 operand naxRegP() %{ 3818 constraint(ALLOC_IN_RC(nax_reg)); 3819 match(RegP); 3820 match(eBXRegP); 3821 match(eDXRegP); 3822 match(eCXRegP); 3823 match(eSIRegP); 3824 match(eDIRegP); 3825 3826 format %{ %} 3827 interface(REG_INTER); 3828 %} 3829 3830 operand nabxRegP() %{ 3831 constraint(ALLOC_IN_RC(nabx_reg)); 3832 match(RegP); 3833 match(eCXRegP); 3834 match(eDXRegP); 3835 match(eSIRegP); 3836 match(eDIRegP); 3837 3838 format %{ %} 3839 interface(REG_INTER); 3840 %} 3841 3842 operand pRegP() %{ 3843 constraint(ALLOC_IN_RC(p_reg)); 3844 match(RegP); 3845 match(eBXRegP); 3846 match(eDXRegP); 3847 match(eSIRegP); 3848 match(eDIRegP); 3849 3850 format %{ %} 3851 interface(REG_INTER); 3852 %} 3853 3854 // Special Registers 3855 // Return a pointer value 3856 operand eAXRegP(eRegP reg) %{ 3857 constraint(ALLOC_IN_RC(eax_reg)); 3858 match(reg); 3859 format %{ "EAX" %} 3860 interface(REG_INTER); 3861 %} 3862 3863 // Used in AtomicAdd 3864 operand eBXRegP(eRegP reg) %{ 3865 constraint(ALLOC_IN_RC(ebx_reg)); 3866 match(reg); 3867 format %{ "EBX" %} 3868 interface(REG_INTER); 3869 %} 3870 3871 // Tail-call (interprocedural jump) to interpreter 3872 operand eCXRegP(eRegP reg) %{ 3873 constraint(ALLOC_IN_RC(ecx_reg)); 3874 match(reg); 3875 format %{ "ECX" %} 3876 interface(REG_INTER); 3877 %} 3878 3879 operand eDXRegP(eRegP reg) %{ 3880 constraint(ALLOC_IN_RC(edx_reg)); 3881 match(reg); 3882 format %{ "EDX" %} 3883 interface(REG_INTER); 3884 %} 3885 3886 operand eSIRegP(eRegP reg) %{ 3887 constraint(ALLOC_IN_RC(esi_reg)); 3888 match(reg); 3889 format %{ "ESI" %} 3890 interface(REG_INTER); 3891 %} 3892 3893 // Used in rep stosw 3894 operand eDIRegP(eRegP reg) %{ 3895 constraint(ALLOC_IN_RC(edi_reg)); 3896 match(reg); 3897 format %{ "EDI" %} 3898 interface(REG_INTER); 3899 %} 3900 3901 operand eRegL() %{ 3902 constraint(ALLOC_IN_RC(long_reg)); 3903 match(RegL); 3904 match(eADXRegL); 3905 3906 format %{ %} 3907 interface(REG_INTER); 3908 %} 3909 3910 operand eADXRegL( eRegL reg ) %{ 3911 constraint(ALLOC_IN_RC(eadx_reg)); 3912 match(reg); 3913 3914 format %{ "EDX:EAX" %} 3915 interface(REG_INTER); 3916 %} 3917 3918 operand eBCXRegL( eRegL reg ) %{ 3919 constraint(ALLOC_IN_RC(ebcx_reg)); 3920 match(reg); 3921 3922 format %{ "EBX:ECX" %} 3923 interface(REG_INTER); 3924 %} 3925 3926 // Special case for integer high multiply 3927 operand eADXRegL_low_only() %{ 3928 constraint(ALLOC_IN_RC(eadx_reg)); 3929 match(RegL); 3930 3931 format %{ "EAX" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 // Flags register, used as output of compare instructions 3936 operand rFlagsReg() %{ 3937 constraint(ALLOC_IN_RC(int_flags)); 3938 match(RegFlags); 3939 3940 format %{ "EFLAGS" %} 3941 interface(REG_INTER); 3942 %} 3943 3944 // Flags register, used as output of compare instructions 3945 operand eFlagsReg() %{ 3946 constraint(ALLOC_IN_RC(int_flags)); 3947 match(RegFlags); 3948 3949 format %{ "EFLAGS" %} 3950 interface(REG_INTER); 3951 %} 3952 3953 // Flags register, used as output of FLOATING POINT compare instructions 3954 operand eFlagsRegU() %{ 3955 constraint(ALLOC_IN_RC(int_flags)); 3956 match(RegFlags); 3957 3958 format %{ "EFLAGS_U" %} 3959 interface(REG_INTER); 3960 %} 3961 3962 operand eFlagsRegUCF() %{ 3963 constraint(ALLOC_IN_RC(int_flags)); 3964 match(RegFlags); 3965 predicate(false); 3966 3967 format %{ "EFLAGS_U_CF" %} 3968 interface(REG_INTER); 3969 %} 3970 3971 // Condition Code Register used by long compare 3972 operand flagsReg_long_LTGE() %{ 3973 constraint(ALLOC_IN_RC(int_flags)); 3974 match(RegFlags); 3975 format %{ "FLAGS_LTGE" %} 3976 interface(REG_INTER); 3977 %} 3978 operand flagsReg_long_EQNE() %{ 3979 constraint(ALLOC_IN_RC(int_flags)); 3980 match(RegFlags); 3981 format %{ "FLAGS_EQNE" %} 3982 interface(REG_INTER); 3983 %} 3984 operand flagsReg_long_LEGT() %{ 3985 constraint(ALLOC_IN_RC(int_flags)); 3986 match(RegFlags); 3987 format %{ "FLAGS_LEGT" %} 3988 interface(REG_INTER); 3989 %} 3990 3991 // Condition Code Register used by unsigned long compare 3992 operand flagsReg_ulong_LTGE() %{ 3993 constraint(ALLOC_IN_RC(int_flags)); 3994 match(RegFlags); 3995 format %{ "FLAGS_U_LTGE" %} 3996 interface(REG_INTER); 3997 %} 3998 operand flagsReg_ulong_EQNE() %{ 3999 constraint(ALLOC_IN_RC(int_flags)); 4000 match(RegFlags); 4001 format %{ "FLAGS_U_EQNE" %} 4002 interface(REG_INTER); 4003 %} 4004 operand flagsReg_ulong_LEGT() %{ 4005 constraint(ALLOC_IN_RC(int_flags)); 4006 match(RegFlags); 4007 format %{ "FLAGS_U_LEGT" %} 4008 interface(REG_INTER); 4009 %} 4010 4011 // Float register operands 4012 operand regDPR() %{ 4013 predicate( UseSSE < 2 ); 4014 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4015 match(RegD); 4016 match(regDPR1); 4017 match(regDPR2); 4018 format %{ %} 4019 interface(REG_INTER); 4020 %} 4021 4022 operand regDPR1(regDPR reg) %{ 4023 predicate( UseSSE < 2 ); 4024 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4025 match(reg); 4026 format %{ "FPR1" %} 4027 interface(REG_INTER); 4028 %} 4029 4030 operand regDPR2(regDPR reg) %{ 4031 predicate( UseSSE < 2 ); 4032 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4033 match(reg); 4034 format %{ "FPR2" %} 4035 interface(REG_INTER); 4036 %} 4037 4038 operand regnotDPR1(regDPR reg) %{ 4039 predicate( UseSSE < 2 ); 4040 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4041 match(reg); 4042 format %{ %} 4043 interface(REG_INTER); 4044 %} 4045 4046 // Float register operands 4047 operand regFPR() %{ 4048 predicate( UseSSE < 2 ); 4049 constraint(ALLOC_IN_RC(fp_flt_reg)); 4050 match(RegF); 4051 match(regFPR1); 4052 format %{ %} 4053 interface(REG_INTER); 4054 %} 4055 4056 // Float register operands 4057 operand regFPR1(regFPR reg) %{ 4058 predicate( UseSSE < 2 ); 4059 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4060 match(reg); 4061 format %{ "FPR1" %} 4062 interface(REG_INTER); 4063 %} 4064 4065 // XMM Float register operands 4066 operand regF() %{ 4067 predicate( UseSSE>=1 ); 4068 constraint(ALLOC_IN_RC(float_reg_legacy)); 4069 match(RegF); 4070 format %{ %} 4071 interface(REG_INTER); 4072 %} 4073 4074 operand legRegF() %{ 4075 predicate( UseSSE>=1 ); 4076 constraint(ALLOC_IN_RC(float_reg_legacy)); 4077 match(RegF); 4078 format %{ %} 4079 interface(REG_INTER); 4080 %} 4081 4082 // Float register operands 4083 operand vlRegF() %{ 4084 constraint(ALLOC_IN_RC(float_reg_vl)); 4085 match(RegF); 4086 4087 format %{ %} 4088 interface(REG_INTER); 4089 %} 4090 4091 // XMM Double register operands 4092 operand regD() %{ 4093 predicate( UseSSE>=2 ); 4094 constraint(ALLOC_IN_RC(double_reg_legacy)); 4095 match(RegD); 4096 format %{ %} 4097 interface(REG_INTER); 4098 %} 4099 4100 // Double register operands 4101 operand legRegD() %{ 4102 predicate( UseSSE>=2 ); 4103 constraint(ALLOC_IN_RC(double_reg_legacy)); 4104 match(RegD); 4105 format %{ %} 4106 interface(REG_INTER); 4107 %} 4108 4109 operand vlRegD() %{ 4110 constraint(ALLOC_IN_RC(double_reg_vl)); 4111 match(RegD); 4112 4113 format %{ %} 4114 interface(REG_INTER); 4115 %} 4116 4117 //----------Memory Operands---------------------------------------------------- 4118 // Direct Memory Operand 4119 operand direct(immP addr) %{ 4120 match(addr); 4121 4122 format %{ "[$addr]" %} 4123 interface(MEMORY_INTER) %{ 4124 base(0xFFFFFFFF); 4125 index(0x4); 4126 scale(0x0); 4127 disp($addr); 4128 %} 4129 %} 4130 4131 // Indirect Memory Operand 4132 operand indirect(eRegP reg) %{ 4133 constraint(ALLOC_IN_RC(int_reg)); 4134 match(reg); 4135 4136 format %{ "[$reg]" %} 4137 interface(MEMORY_INTER) %{ 4138 base($reg); 4139 index(0x4); 4140 scale(0x0); 4141 disp(0x0); 4142 %} 4143 %} 4144 4145 // Indirect Memory Plus Short Offset Operand 4146 operand indOffset8(eRegP reg, immI8 off) %{ 4147 match(AddP reg off); 4148 4149 format %{ "[$reg + $off]" %} 4150 interface(MEMORY_INTER) %{ 4151 base($reg); 4152 index(0x4); 4153 scale(0x0); 4154 disp($off); 4155 %} 4156 %} 4157 4158 // Indirect Memory Plus Long Offset Operand 4159 operand indOffset32(eRegP reg, immI off) %{ 4160 match(AddP reg off); 4161 4162 format %{ "[$reg + $off]" %} 4163 interface(MEMORY_INTER) %{ 4164 base($reg); 4165 index(0x4); 4166 scale(0x0); 4167 disp($off); 4168 %} 4169 %} 4170 4171 // Indirect Memory Plus Long Offset Operand 4172 operand indOffset32X(rRegI reg, immP off) %{ 4173 match(AddP off reg); 4174 4175 format %{ "[$reg + $off]" %} 4176 interface(MEMORY_INTER) %{ 4177 base($reg); 4178 index(0x4); 4179 scale(0x0); 4180 disp($off); 4181 %} 4182 %} 4183 4184 // Indirect Memory Plus Index Register Plus Offset Operand 4185 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4186 match(AddP (AddP reg ireg) off); 4187 4188 op_cost(10); 4189 format %{"[$reg + $off + $ireg]" %} 4190 interface(MEMORY_INTER) %{ 4191 base($reg); 4192 index($ireg); 4193 scale(0x0); 4194 disp($off); 4195 %} 4196 %} 4197 4198 // Indirect Memory Plus Index Register Plus Offset Operand 4199 operand indIndex(eRegP reg, rRegI ireg) %{ 4200 match(AddP reg ireg); 4201 4202 op_cost(10); 4203 format %{"[$reg + $ireg]" %} 4204 interface(MEMORY_INTER) %{ 4205 base($reg); 4206 index($ireg); 4207 scale(0x0); 4208 disp(0x0); 4209 %} 4210 %} 4211 4212 // // ------------------------------------------------------------------------- 4213 // // 486 architecture doesn't support "scale * index + offset" with out a base 4214 // // ------------------------------------------------------------------------- 4215 // // Scaled Memory Operands 4216 // // Indirect Memory Times Scale Plus Offset Operand 4217 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4218 // match(AddP off (LShiftI ireg scale)); 4219 // 4220 // op_cost(10); 4221 // format %{"[$off + $ireg << $scale]" %} 4222 // interface(MEMORY_INTER) %{ 4223 // base(0x4); 4224 // index($ireg); 4225 // scale($scale); 4226 // disp($off); 4227 // %} 4228 // %} 4229 4230 // Indirect Memory Times Scale Plus Index Register 4231 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4232 match(AddP reg (LShiftI ireg scale)); 4233 4234 op_cost(10); 4235 format %{"[$reg + $ireg << $scale]" %} 4236 interface(MEMORY_INTER) %{ 4237 base($reg); 4238 index($ireg); 4239 scale($scale); 4240 disp(0x0); 4241 %} 4242 %} 4243 4244 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4245 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4246 match(AddP (AddP reg (LShiftI ireg scale)) off); 4247 4248 op_cost(10); 4249 format %{"[$reg + $off + $ireg << $scale]" %} 4250 interface(MEMORY_INTER) %{ 4251 base($reg); 4252 index($ireg); 4253 scale($scale); 4254 disp($off); 4255 %} 4256 %} 4257 4258 //----------Load Long Memory Operands------------------------------------------ 4259 // The load-long idiom will use it's address expression again after loading 4260 // the first word of the long. If the load-long destination overlaps with 4261 // registers used in the addressing expression, the 2nd half will be loaded 4262 // from a clobbered address. Fix this by requiring that load-long use 4263 // address registers that do not overlap with the load-long target. 4264 4265 // load-long support 4266 operand load_long_RegP() %{ 4267 constraint(ALLOC_IN_RC(esi_reg)); 4268 match(RegP); 4269 match(eSIRegP); 4270 op_cost(100); 4271 format %{ %} 4272 interface(REG_INTER); 4273 %} 4274 4275 // Indirect Memory Operand Long 4276 operand load_long_indirect(load_long_RegP reg) %{ 4277 constraint(ALLOC_IN_RC(esi_reg)); 4278 match(reg); 4279 4280 format %{ "[$reg]" %} 4281 interface(MEMORY_INTER) %{ 4282 base($reg); 4283 index(0x4); 4284 scale(0x0); 4285 disp(0x0); 4286 %} 4287 %} 4288 4289 // Indirect Memory Plus Long Offset Operand 4290 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4291 match(AddP reg off); 4292 4293 format %{ "[$reg + $off]" %} 4294 interface(MEMORY_INTER) %{ 4295 base($reg); 4296 index(0x4); 4297 scale(0x0); 4298 disp($off); 4299 %} 4300 %} 4301 4302 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4303 4304 4305 //----------Special Memory Operands-------------------------------------------- 4306 // Stack Slot Operand - This operand is used for loading and storing temporary 4307 // values on the stack where a match requires a value to 4308 // flow through memory. 4309 operand stackSlotP(sRegP reg) %{ 4310 constraint(ALLOC_IN_RC(stack_slots)); 4311 // No match rule because this operand is only generated in matching 4312 format %{ "[$reg]" %} 4313 interface(MEMORY_INTER) %{ 4314 base(0x4); // ESP 4315 index(0x4); // No Index 4316 scale(0x0); // No Scale 4317 disp($reg); // Stack Offset 4318 %} 4319 %} 4320 4321 operand stackSlotI(sRegI reg) %{ 4322 constraint(ALLOC_IN_RC(stack_slots)); 4323 // No match rule because this operand is only generated in matching 4324 format %{ "[$reg]" %} 4325 interface(MEMORY_INTER) %{ 4326 base(0x4); // ESP 4327 index(0x4); // No Index 4328 scale(0x0); // No Scale 4329 disp($reg); // Stack Offset 4330 %} 4331 %} 4332 4333 operand stackSlotF(sRegF reg) %{ 4334 constraint(ALLOC_IN_RC(stack_slots)); 4335 // No match rule because this operand is only generated in matching 4336 format %{ "[$reg]" %} 4337 interface(MEMORY_INTER) %{ 4338 base(0x4); // ESP 4339 index(0x4); // No Index 4340 scale(0x0); // No Scale 4341 disp($reg); // Stack Offset 4342 %} 4343 %} 4344 4345 operand stackSlotD(sRegD reg) %{ 4346 constraint(ALLOC_IN_RC(stack_slots)); 4347 // No match rule because this operand is only generated in matching 4348 format %{ "[$reg]" %} 4349 interface(MEMORY_INTER) %{ 4350 base(0x4); // ESP 4351 index(0x4); // No Index 4352 scale(0x0); // No Scale 4353 disp($reg); // Stack Offset 4354 %} 4355 %} 4356 4357 operand stackSlotL(sRegL reg) %{ 4358 constraint(ALLOC_IN_RC(stack_slots)); 4359 // No match rule because this operand is only generated in matching 4360 format %{ "[$reg]" %} 4361 interface(MEMORY_INTER) %{ 4362 base(0x4); // ESP 4363 index(0x4); // No Index 4364 scale(0x0); // No Scale 4365 disp($reg); // Stack Offset 4366 %} 4367 %} 4368 4369 //----------Conditional Branch Operands---------------------------------------- 4370 // Comparison Op - This is the operation of the comparison, and is limited to 4371 // the following set of codes: 4372 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4373 // 4374 // Other attributes of the comparison, such as unsignedness, are specified 4375 // by the comparison instruction that sets a condition code flags register. 4376 // That result is represented by a flags operand whose subtype is appropriate 4377 // to the unsignedness (etc.) of the comparison. 4378 // 4379 // Later, the instruction which matches both the Comparison Op (a Bool) and 4380 // the flags (produced by the Cmp) specifies the coding of the comparison op 4381 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4382 4383 // Comparision Code 4384 operand cmpOp() %{ 4385 match(Bool); 4386 4387 format %{ "" %} 4388 interface(COND_INTER) %{ 4389 equal(0x4, "e"); 4390 not_equal(0x5, "ne"); 4391 less(0xC, "l"); 4392 greater_equal(0xD, "ge"); 4393 less_equal(0xE, "le"); 4394 greater(0xF, "g"); 4395 overflow(0x0, "o"); 4396 no_overflow(0x1, "no"); 4397 %} 4398 %} 4399 4400 // Comparison Code, unsigned compare. Used by FP also, with 4401 // C2 (unordered) turned into GT or LT already. The other bits 4402 // C0 and C3 are turned into Carry & Zero flags. 4403 operand cmpOpU() %{ 4404 match(Bool); 4405 4406 format %{ "" %} 4407 interface(COND_INTER) %{ 4408 equal(0x4, "e"); 4409 not_equal(0x5, "ne"); 4410 less(0x2, "b"); 4411 greater_equal(0x3, "nb"); 4412 less_equal(0x6, "be"); 4413 greater(0x7, "nbe"); 4414 overflow(0x0, "o"); 4415 no_overflow(0x1, "no"); 4416 %} 4417 %} 4418 4419 // Floating comparisons that don't require any fixup for the unordered case 4420 operand cmpOpUCF() %{ 4421 match(Bool); 4422 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4423 n->as_Bool()->_test._test == BoolTest::ge || 4424 n->as_Bool()->_test._test == BoolTest::le || 4425 n->as_Bool()->_test._test == BoolTest::gt); 4426 format %{ "" %} 4427 interface(COND_INTER) %{ 4428 equal(0x4, "e"); 4429 not_equal(0x5, "ne"); 4430 less(0x2, "b"); 4431 greater_equal(0x3, "nb"); 4432 less_equal(0x6, "be"); 4433 greater(0x7, "nbe"); 4434 overflow(0x0, "o"); 4435 no_overflow(0x1, "no"); 4436 %} 4437 %} 4438 4439 4440 // Floating comparisons that can be fixed up with extra conditional jumps 4441 operand cmpOpUCF2() %{ 4442 match(Bool); 4443 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4444 n->as_Bool()->_test._test == BoolTest::eq); 4445 format %{ "" %} 4446 interface(COND_INTER) %{ 4447 equal(0x4, "e"); 4448 not_equal(0x5, "ne"); 4449 less(0x2, "b"); 4450 greater_equal(0x3, "nb"); 4451 less_equal(0x6, "be"); 4452 greater(0x7, "nbe"); 4453 overflow(0x0, "o"); 4454 no_overflow(0x1, "no"); 4455 %} 4456 %} 4457 4458 // Comparison Code for FP conditional move 4459 operand cmpOp_fcmov() %{ 4460 match(Bool); 4461 4462 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4463 n->as_Bool()->_test._test != BoolTest::no_overflow); 4464 format %{ "" %} 4465 interface(COND_INTER) %{ 4466 equal (0x0C8); 4467 not_equal (0x1C8); 4468 less (0x0C0); 4469 greater_equal(0x1C0); 4470 less_equal (0x0D0); 4471 greater (0x1D0); 4472 overflow(0x0, "o"); // not really supported by the instruction 4473 no_overflow(0x1, "no"); // not really supported by the instruction 4474 %} 4475 %} 4476 4477 // Comparison Code used in long compares 4478 operand cmpOp_commute() %{ 4479 match(Bool); 4480 4481 format %{ "" %} 4482 interface(COND_INTER) %{ 4483 equal(0x4, "e"); 4484 not_equal(0x5, "ne"); 4485 less(0xF, "g"); 4486 greater_equal(0xE, "le"); 4487 less_equal(0xD, "ge"); 4488 greater(0xC, "l"); 4489 overflow(0x0, "o"); 4490 no_overflow(0x1, "no"); 4491 %} 4492 %} 4493 4494 // Comparison Code used in unsigned long compares 4495 operand cmpOpU_commute() %{ 4496 match(Bool); 4497 4498 format %{ "" %} 4499 interface(COND_INTER) %{ 4500 equal(0x4, "e"); 4501 not_equal(0x5, "ne"); 4502 less(0x7, "nbe"); 4503 greater_equal(0x6, "be"); 4504 less_equal(0x3, "nb"); 4505 greater(0x2, "b"); 4506 overflow(0x0, "o"); 4507 no_overflow(0x1, "no"); 4508 %} 4509 %} 4510 4511 //----------OPERAND CLASSES---------------------------------------------------- 4512 // Operand Classes are groups of operands that are used as to simplify 4513 // instruction definitions by not requiring the AD writer to specify separate 4514 // instructions for every form of operand when the instruction accepts 4515 // multiple operand types with the same basic encoding and format. The classic 4516 // case of this is memory operands. 4517 4518 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4519 indIndex, indIndexScale, indIndexScaleOffset); 4520 4521 // Long memory operations are encoded in 2 instructions and a +4 offset. 4522 // This means some kind of offset is always required and you cannot use 4523 // an oop as the offset (done when working on static globals). 4524 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4525 indIndex, indIndexScale, indIndexScaleOffset); 4526 4527 4528 //----------PIPELINE----------------------------------------------------------- 4529 // Rules which define the behavior of the target architectures pipeline. 4530 pipeline %{ 4531 4532 //----------ATTRIBUTES--------------------------------------------------------- 4533 attributes %{ 4534 variable_size_instructions; // Fixed size instructions 4535 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4536 instruction_unit_size = 1; // An instruction is 1 bytes long 4537 instruction_fetch_unit_size = 16; // The processor fetches one line 4538 instruction_fetch_units = 1; // of 16 bytes 4539 4540 // List of nop instructions 4541 nops( MachNop ); 4542 %} 4543 4544 //----------RESOURCES---------------------------------------------------------- 4545 // Resources are the functional units available to the machine 4546 4547 // Generic P2/P3 pipeline 4548 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4549 // 3 instructions decoded per cycle. 4550 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4551 // 2 ALU op, only ALU0 handles mul/div instructions. 4552 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4553 MS0, MS1, MEM = MS0 | MS1, 4554 BR, FPU, 4555 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4556 4557 //----------PIPELINE DESCRIPTION----------------------------------------------- 4558 // Pipeline Description specifies the stages in the machine's pipeline 4559 4560 // Generic P2/P3 pipeline 4561 pipe_desc(S0, S1, S2, S3, S4, S5); 4562 4563 //----------PIPELINE CLASSES--------------------------------------------------- 4564 // Pipeline Classes describe the stages in which input and output are 4565 // referenced by the hardware pipeline. 4566 4567 // Naming convention: ialu or fpu 4568 // Then: _reg 4569 // Then: _reg if there is a 2nd register 4570 // Then: _long if it's a pair of instructions implementing a long 4571 // Then: _fat if it requires the big decoder 4572 // Or: _mem if it requires the big decoder and a memory unit. 4573 4574 // Integer ALU reg operation 4575 pipe_class ialu_reg(rRegI dst) %{ 4576 single_instruction; 4577 dst : S4(write); 4578 dst : S3(read); 4579 DECODE : S0; // any decoder 4580 ALU : S3; // any alu 4581 %} 4582 4583 // Long ALU reg operation 4584 pipe_class ialu_reg_long(eRegL dst) %{ 4585 instruction_count(2); 4586 dst : S4(write); 4587 dst : S3(read); 4588 DECODE : S0(2); // any 2 decoders 4589 ALU : S3(2); // both alus 4590 %} 4591 4592 // Integer ALU reg operation using big decoder 4593 pipe_class ialu_reg_fat(rRegI dst) %{ 4594 single_instruction; 4595 dst : S4(write); 4596 dst : S3(read); 4597 D0 : S0; // big decoder only 4598 ALU : S3; // any alu 4599 %} 4600 4601 // Long ALU reg operation using big decoder 4602 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4603 instruction_count(2); 4604 dst : S4(write); 4605 dst : S3(read); 4606 D0 : S0(2); // big decoder only; twice 4607 ALU : S3(2); // any 2 alus 4608 %} 4609 4610 // Integer ALU reg-reg operation 4611 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4612 single_instruction; 4613 dst : S4(write); 4614 src : S3(read); 4615 DECODE : S0; // any decoder 4616 ALU : S3; // any alu 4617 %} 4618 4619 // Long ALU reg-reg operation 4620 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4621 instruction_count(2); 4622 dst : S4(write); 4623 src : S3(read); 4624 DECODE : S0(2); // any 2 decoders 4625 ALU : S3(2); // both alus 4626 %} 4627 4628 // Integer ALU reg-reg operation 4629 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4630 single_instruction; 4631 dst : S4(write); 4632 src : S3(read); 4633 D0 : S0; // big decoder only 4634 ALU : S3; // any alu 4635 %} 4636 4637 // Long ALU reg-reg operation 4638 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4639 instruction_count(2); 4640 dst : S4(write); 4641 src : S3(read); 4642 D0 : S0(2); // big decoder only; twice 4643 ALU : S3(2); // both alus 4644 %} 4645 4646 // Integer ALU reg-mem operation 4647 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4648 single_instruction; 4649 dst : S5(write); 4650 mem : S3(read); 4651 D0 : S0; // big decoder only 4652 ALU : S4; // any alu 4653 MEM : S3; // any mem 4654 %} 4655 4656 // Long ALU reg-mem operation 4657 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4658 instruction_count(2); 4659 dst : S5(write); 4660 mem : S3(read); 4661 D0 : S0(2); // big decoder only; twice 4662 ALU : S4(2); // any 2 alus 4663 MEM : S3(2); // both mems 4664 %} 4665 4666 // Integer mem operation (prefetch) 4667 pipe_class ialu_mem(memory mem) 4668 %{ 4669 single_instruction; 4670 mem : S3(read); 4671 D0 : S0; // big decoder only 4672 MEM : S3; // any mem 4673 %} 4674 4675 // Integer Store to Memory 4676 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4677 single_instruction; 4678 mem : S3(read); 4679 src : S5(read); 4680 D0 : S0; // big decoder only 4681 ALU : S4; // any alu 4682 MEM : S3; 4683 %} 4684 4685 // Long Store to Memory 4686 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4687 instruction_count(2); 4688 mem : S3(read); 4689 src : S5(read); 4690 D0 : S0(2); // big decoder only; twice 4691 ALU : S4(2); // any 2 alus 4692 MEM : S3(2); // Both mems 4693 %} 4694 4695 // Integer Store to Memory 4696 pipe_class ialu_mem_imm(memory mem) %{ 4697 single_instruction; 4698 mem : S3(read); 4699 D0 : S0; // big decoder only 4700 ALU : S4; // any alu 4701 MEM : S3; 4702 %} 4703 4704 // Integer ALU0 reg-reg operation 4705 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4706 single_instruction; 4707 dst : S4(write); 4708 src : S3(read); 4709 D0 : S0; // Big decoder only 4710 ALU0 : S3; // only alu0 4711 %} 4712 4713 // Integer ALU0 reg-mem operation 4714 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4715 single_instruction; 4716 dst : S5(write); 4717 mem : S3(read); 4718 D0 : S0; // big decoder only 4719 ALU0 : S4; // ALU0 only 4720 MEM : S3; // any mem 4721 %} 4722 4723 // Integer ALU reg-reg operation 4724 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4725 single_instruction; 4726 cr : S4(write); 4727 src1 : S3(read); 4728 src2 : S3(read); 4729 DECODE : S0; // any decoder 4730 ALU : S3; // any alu 4731 %} 4732 4733 // Integer ALU reg-imm operation 4734 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4735 single_instruction; 4736 cr : S4(write); 4737 src1 : S3(read); 4738 DECODE : S0; // any decoder 4739 ALU : S3; // any alu 4740 %} 4741 4742 // Integer ALU reg-mem operation 4743 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4744 single_instruction; 4745 cr : S4(write); 4746 src1 : S3(read); 4747 src2 : S3(read); 4748 D0 : S0; // big decoder only 4749 ALU : S4; // any alu 4750 MEM : S3; 4751 %} 4752 4753 // Conditional move reg-reg 4754 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4755 instruction_count(4); 4756 y : S4(read); 4757 q : S3(read); 4758 p : S3(read); 4759 DECODE : S0(4); // any decoder 4760 %} 4761 4762 // Conditional move reg-reg 4763 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4764 single_instruction; 4765 dst : S4(write); 4766 src : S3(read); 4767 cr : S3(read); 4768 DECODE : S0; // any decoder 4769 %} 4770 4771 // Conditional move reg-mem 4772 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4773 single_instruction; 4774 dst : S4(write); 4775 src : S3(read); 4776 cr : S3(read); 4777 DECODE : S0; // any decoder 4778 MEM : S3; 4779 %} 4780 4781 // Conditional move reg-reg long 4782 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4783 single_instruction; 4784 dst : S4(write); 4785 src : S3(read); 4786 cr : S3(read); 4787 DECODE : S0(2); // any 2 decoders 4788 %} 4789 4790 // Conditional move double reg-reg 4791 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4792 single_instruction; 4793 dst : S4(write); 4794 src : S3(read); 4795 cr : S3(read); 4796 DECODE : S0; // any decoder 4797 %} 4798 4799 // Float reg-reg operation 4800 pipe_class fpu_reg(regDPR dst) %{ 4801 instruction_count(2); 4802 dst : S3(read); 4803 DECODE : S0(2); // any 2 decoders 4804 FPU : S3; 4805 %} 4806 4807 // Float reg-reg operation 4808 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4809 instruction_count(2); 4810 dst : S4(write); 4811 src : S3(read); 4812 DECODE : S0(2); // any 2 decoders 4813 FPU : S3; 4814 %} 4815 4816 // Float reg-reg operation 4817 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4818 instruction_count(3); 4819 dst : S4(write); 4820 src1 : S3(read); 4821 src2 : S3(read); 4822 DECODE : S0(3); // any 3 decoders 4823 FPU : S3(2); 4824 %} 4825 4826 // Float reg-reg operation 4827 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4828 instruction_count(4); 4829 dst : S4(write); 4830 src1 : S3(read); 4831 src2 : S3(read); 4832 src3 : S3(read); 4833 DECODE : S0(4); // any 3 decoders 4834 FPU : S3(2); 4835 %} 4836 4837 // Float reg-reg operation 4838 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4839 instruction_count(4); 4840 dst : S4(write); 4841 src1 : S3(read); 4842 src2 : S3(read); 4843 src3 : S3(read); 4844 DECODE : S1(3); // any 3 decoders 4845 D0 : S0; // Big decoder only 4846 FPU : S3(2); 4847 MEM : S3; 4848 %} 4849 4850 // Float reg-mem operation 4851 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4852 instruction_count(2); 4853 dst : S5(write); 4854 mem : S3(read); 4855 D0 : S0; // big decoder only 4856 DECODE : S1; // any decoder for FPU POP 4857 FPU : S4; 4858 MEM : S3; // any mem 4859 %} 4860 4861 // Float reg-mem operation 4862 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4863 instruction_count(3); 4864 dst : S5(write); 4865 src1 : S3(read); 4866 mem : S3(read); 4867 D0 : S0; // big decoder only 4868 DECODE : S1(2); // any decoder for FPU POP 4869 FPU : S4; 4870 MEM : S3; // any mem 4871 %} 4872 4873 // Float mem-reg operation 4874 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4875 instruction_count(2); 4876 src : S5(read); 4877 mem : S3(read); 4878 DECODE : S0; // any decoder for FPU PUSH 4879 D0 : S1; // big decoder only 4880 FPU : S4; 4881 MEM : S3; // any mem 4882 %} 4883 4884 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4885 instruction_count(3); 4886 src1 : S3(read); 4887 src2 : S3(read); 4888 mem : S3(read); 4889 DECODE : S0(2); // any decoder for FPU PUSH 4890 D0 : S1; // big decoder only 4891 FPU : S4; 4892 MEM : S3; // any mem 4893 %} 4894 4895 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4896 instruction_count(3); 4897 src1 : S3(read); 4898 src2 : S3(read); 4899 mem : S4(read); 4900 DECODE : S0; // any decoder for FPU PUSH 4901 D0 : S0(2); // big decoder only 4902 FPU : S4; 4903 MEM : S3(2); // any mem 4904 %} 4905 4906 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4907 instruction_count(2); 4908 src1 : S3(read); 4909 dst : S4(read); 4910 D0 : S0(2); // big decoder only 4911 MEM : S3(2); // any mem 4912 %} 4913 4914 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4915 instruction_count(3); 4916 src1 : S3(read); 4917 src2 : S3(read); 4918 dst : S4(read); 4919 D0 : S0(3); // big decoder only 4920 FPU : S4; 4921 MEM : S3(3); // any mem 4922 %} 4923 4924 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4925 instruction_count(3); 4926 src1 : S4(read); 4927 mem : S4(read); 4928 DECODE : S0; // any decoder for FPU PUSH 4929 D0 : S0(2); // big decoder only 4930 FPU : S4; 4931 MEM : S3(2); // any mem 4932 %} 4933 4934 // Float load constant 4935 pipe_class fpu_reg_con(regDPR dst) %{ 4936 instruction_count(2); 4937 dst : S5(write); 4938 D0 : S0; // big decoder only for the load 4939 DECODE : S1; // any decoder for FPU POP 4940 FPU : S4; 4941 MEM : S3; // any mem 4942 %} 4943 4944 // Float load constant 4945 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4946 instruction_count(3); 4947 dst : S5(write); 4948 src : S3(read); 4949 D0 : S0; // big decoder only for the load 4950 DECODE : S1(2); // any decoder for FPU POP 4951 FPU : S4; 4952 MEM : S3; // any mem 4953 %} 4954 4955 // UnConditional branch 4956 pipe_class pipe_jmp( label labl ) %{ 4957 single_instruction; 4958 BR : S3; 4959 %} 4960 4961 // Conditional branch 4962 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4963 single_instruction; 4964 cr : S1(read); 4965 BR : S3; 4966 %} 4967 4968 // Allocation idiom 4969 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4970 instruction_count(1); force_serialization; 4971 fixed_latency(6); 4972 heap_ptr : S3(read); 4973 DECODE : S0(3); 4974 D0 : S2; 4975 MEM : S3; 4976 ALU : S3(2); 4977 dst : S5(write); 4978 BR : S5; 4979 %} 4980 4981 // Generic big/slow expanded idiom 4982 pipe_class pipe_slow( ) %{ 4983 instruction_count(10); multiple_bundles; force_serialization; 4984 fixed_latency(100); 4985 D0 : S0(2); 4986 MEM : S3(2); 4987 %} 4988 4989 // The real do-nothing guy 4990 pipe_class empty( ) %{ 4991 instruction_count(0); 4992 %} 4993 4994 // Define the class for the Nop node 4995 define %{ 4996 MachNop = empty; 4997 %} 4998 4999 %} 5000 5001 //----------INSTRUCTIONS------------------------------------------------------- 5002 // 5003 // match -- States which machine-independent subtree may be replaced 5004 // by this instruction. 5005 // ins_cost -- The estimated cost of this instruction is used by instruction 5006 // selection to identify a minimum cost tree of machine 5007 // instructions that matches a tree of machine-independent 5008 // instructions. 5009 // format -- A string providing the disassembly for this instruction. 5010 // The value of an instruction's operand may be inserted 5011 // by referring to it with a '$' prefix. 5012 // opcode -- Three instruction opcodes may be provided. These are referred 5013 // to within an encode class as $primary, $secondary, and $tertiary 5014 // respectively. The primary opcode is commonly used to 5015 // indicate the type of machine instruction, while secondary 5016 // and tertiary are often used for prefix options or addressing 5017 // modes. 5018 // ins_encode -- A list of encode classes with parameters. The encode class 5019 // name must have been defined in an 'enc_class' specification 5020 // in the encode section of the architecture description. 5021 5022 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5023 // Load Float 5024 instruct MoveF2LEG(legRegF dst, regF src) %{ 5025 match(Set dst src); 5026 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5027 ins_encode %{ 5028 ShouldNotReachHere(); 5029 %} 5030 ins_pipe( fpu_reg_reg ); 5031 %} 5032 5033 // Load Float 5034 instruct MoveLEG2F(regF dst, legRegF src) %{ 5035 match(Set dst src); 5036 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5037 ins_encode %{ 5038 ShouldNotReachHere(); 5039 %} 5040 ins_pipe( fpu_reg_reg ); 5041 %} 5042 5043 // Load Float 5044 instruct MoveF2VL(vlRegF dst, regF src) %{ 5045 match(Set dst src); 5046 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5047 ins_encode %{ 5048 ShouldNotReachHere(); 5049 %} 5050 ins_pipe( fpu_reg_reg ); 5051 %} 5052 5053 // Load Float 5054 instruct MoveVL2F(regF dst, vlRegF src) %{ 5055 match(Set dst src); 5056 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5057 ins_encode %{ 5058 ShouldNotReachHere(); 5059 %} 5060 ins_pipe( fpu_reg_reg ); 5061 %} 5062 5063 5064 5065 // Load Double 5066 instruct MoveD2LEG(legRegD dst, regD src) %{ 5067 match(Set dst src); 5068 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5069 ins_encode %{ 5070 ShouldNotReachHere(); 5071 %} 5072 ins_pipe( fpu_reg_reg ); 5073 %} 5074 5075 // Load Double 5076 instruct MoveLEG2D(regD dst, legRegD src) %{ 5077 match(Set dst src); 5078 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5079 ins_encode %{ 5080 ShouldNotReachHere(); 5081 %} 5082 ins_pipe( fpu_reg_reg ); 5083 %} 5084 5085 // Load Double 5086 instruct MoveD2VL(vlRegD dst, regD src) %{ 5087 match(Set dst src); 5088 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5089 ins_encode %{ 5090 ShouldNotReachHere(); 5091 %} 5092 ins_pipe( fpu_reg_reg ); 5093 %} 5094 5095 // Load Double 5096 instruct MoveVL2D(regD dst, vlRegD src) %{ 5097 match(Set dst src); 5098 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5099 ins_encode %{ 5100 ShouldNotReachHere(); 5101 %} 5102 ins_pipe( fpu_reg_reg ); 5103 %} 5104 5105 //----------BSWAP-Instruction-------------------------------------------------- 5106 instruct bytes_reverse_int(rRegI dst) %{ 5107 match(Set dst (ReverseBytesI dst)); 5108 5109 format %{ "BSWAP $dst" %} 5110 opcode(0x0F, 0xC8); 5111 ins_encode( OpcP, OpcSReg(dst) ); 5112 ins_pipe( ialu_reg ); 5113 %} 5114 5115 instruct bytes_reverse_long(eRegL dst) %{ 5116 match(Set dst (ReverseBytesL dst)); 5117 5118 format %{ "BSWAP $dst.lo\n\t" 5119 "BSWAP $dst.hi\n\t" 5120 "XCHG $dst.lo $dst.hi" %} 5121 5122 ins_cost(125); 5123 ins_encode( bswap_long_bytes(dst) ); 5124 ins_pipe( ialu_reg_reg); 5125 %} 5126 5127 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5128 match(Set dst (ReverseBytesUS dst)); 5129 effect(KILL cr); 5130 5131 format %{ "BSWAP $dst\n\t" 5132 "SHR $dst,16\n\t" %} 5133 ins_encode %{ 5134 __ bswapl($dst$$Register); 5135 __ shrl($dst$$Register, 16); 5136 %} 5137 ins_pipe( ialu_reg ); 5138 %} 5139 5140 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5141 match(Set dst (ReverseBytesS dst)); 5142 effect(KILL cr); 5143 5144 format %{ "BSWAP $dst\n\t" 5145 "SAR $dst,16\n\t" %} 5146 ins_encode %{ 5147 __ bswapl($dst$$Register); 5148 __ sarl($dst$$Register, 16); 5149 %} 5150 ins_pipe( ialu_reg ); 5151 %} 5152 5153 5154 //---------- Zeros Count Instructions ------------------------------------------ 5155 5156 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5157 predicate(UseCountLeadingZerosInstruction); 5158 match(Set dst (CountLeadingZerosI src)); 5159 effect(KILL cr); 5160 5161 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5162 ins_encode %{ 5163 __ lzcntl($dst$$Register, $src$$Register); 5164 %} 5165 ins_pipe(ialu_reg); 5166 %} 5167 5168 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5169 predicate(!UseCountLeadingZerosInstruction); 5170 match(Set dst (CountLeadingZerosI src)); 5171 effect(KILL cr); 5172 5173 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5174 "JNZ skip\n\t" 5175 "MOV $dst, -1\n" 5176 "skip:\n\t" 5177 "NEG $dst\n\t" 5178 "ADD $dst, 31" %} 5179 ins_encode %{ 5180 Register Rdst = $dst$$Register; 5181 Register Rsrc = $src$$Register; 5182 Label skip; 5183 __ bsrl(Rdst, Rsrc); 5184 __ jccb(Assembler::notZero, skip); 5185 __ movl(Rdst, -1); 5186 __ bind(skip); 5187 __ negl(Rdst); 5188 __ addl(Rdst, BitsPerInt - 1); 5189 %} 5190 ins_pipe(ialu_reg); 5191 %} 5192 5193 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5194 predicate(UseCountLeadingZerosInstruction); 5195 match(Set dst (CountLeadingZerosL src)); 5196 effect(TEMP dst, KILL cr); 5197 5198 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5199 "JNC done\n\t" 5200 "LZCNT $dst, $src.lo\n\t" 5201 "ADD $dst, 32\n" 5202 "done:" %} 5203 ins_encode %{ 5204 Register Rdst = $dst$$Register; 5205 Register Rsrc = $src$$Register; 5206 Label done; 5207 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5208 __ jccb(Assembler::carryClear, done); 5209 __ lzcntl(Rdst, Rsrc); 5210 __ addl(Rdst, BitsPerInt); 5211 __ bind(done); 5212 %} 5213 ins_pipe(ialu_reg); 5214 %} 5215 5216 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5217 predicate(!UseCountLeadingZerosInstruction); 5218 match(Set dst (CountLeadingZerosL src)); 5219 effect(TEMP dst, KILL cr); 5220 5221 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5222 "JZ msw_is_zero\n\t" 5223 "ADD $dst, 32\n\t" 5224 "JMP not_zero\n" 5225 "msw_is_zero:\n\t" 5226 "BSR $dst, $src.lo\n\t" 5227 "JNZ not_zero\n\t" 5228 "MOV $dst, -1\n" 5229 "not_zero:\n\t" 5230 "NEG $dst\n\t" 5231 "ADD $dst, 63\n" %} 5232 ins_encode %{ 5233 Register Rdst = $dst$$Register; 5234 Register Rsrc = $src$$Register; 5235 Label msw_is_zero; 5236 Label not_zero; 5237 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5238 __ jccb(Assembler::zero, msw_is_zero); 5239 __ addl(Rdst, BitsPerInt); 5240 __ jmpb(not_zero); 5241 __ bind(msw_is_zero); 5242 __ bsrl(Rdst, Rsrc); 5243 __ jccb(Assembler::notZero, not_zero); 5244 __ movl(Rdst, -1); 5245 __ bind(not_zero); 5246 __ negl(Rdst); 5247 __ addl(Rdst, BitsPerLong - 1); 5248 %} 5249 ins_pipe(ialu_reg); 5250 %} 5251 5252 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5253 predicate(UseCountTrailingZerosInstruction); 5254 match(Set dst (CountTrailingZerosI src)); 5255 effect(KILL cr); 5256 5257 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5258 ins_encode %{ 5259 __ tzcntl($dst$$Register, $src$$Register); 5260 %} 5261 ins_pipe(ialu_reg); 5262 %} 5263 5264 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5265 predicate(!UseCountTrailingZerosInstruction); 5266 match(Set dst (CountTrailingZerosI src)); 5267 effect(KILL cr); 5268 5269 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5270 "JNZ done\n\t" 5271 "MOV $dst, 32\n" 5272 "done:" %} 5273 ins_encode %{ 5274 Register Rdst = $dst$$Register; 5275 Label done; 5276 __ bsfl(Rdst, $src$$Register); 5277 __ jccb(Assembler::notZero, done); 5278 __ movl(Rdst, BitsPerInt); 5279 __ bind(done); 5280 %} 5281 ins_pipe(ialu_reg); 5282 %} 5283 5284 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5285 predicate(UseCountTrailingZerosInstruction); 5286 match(Set dst (CountTrailingZerosL src)); 5287 effect(TEMP dst, KILL cr); 5288 5289 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5290 "JNC done\n\t" 5291 "TZCNT $dst, $src.hi\n\t" 5292 "ADD $dst, 32\n" 5293 "done:" %} 5294 ins_encode %{ 5295 Register Rdst = $dst$$Register; 5296 Register Rsrc = $src$$Register; 5297 Label done; 5298 __ tzcntl(Rdst, Rsrc); 5299 __ jccb(Assembler::carryClear, done); 5300 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5301 __ addl(Rdst, BitsPerInt); 5302 __ bind(done); 5303 %} 5304 ins_pipe(ialu_reg); 5305 %} 5306 5307 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5308 predicate(!UseCountTrailingZerosInstruction); 5309 match(Set dst (CountTrailingZerosL src)); 5310 effect(TEMP dst, KILL cr); 5311 5312 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5313 "JNZ done\n\t" 5314 "BSF $dst, $src.hi\n\t" 5315 "JNZ msw_not_zero\n\t" 5316 "MOV $dst, 32\n" 5317 "msw_not_zero:\n\t" 5318 "ADD $dst, 32\n" 5319 "done:" %} 5320 ins_encode %{ 5321 Register Rdst = $dst$$Register; 5322 Register Rsrc = $src$$Register; 5323 Label msw_not_zero; 5324 Label done; 5325 __ bsfl(Rdst, Rsrc); 5326 __ jccb(Assembler::notZero, done); 5327 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5328 __ jccb(Assembler::notZero, msw_not_zero); 5329 __ movl(Rdst, BitsPerInt); 5330 __ bind(msw_not_zero); 5331 __ addl(Rdst, BitsPerInt); 5332 __ bind(done); 5333 %} 5334 ins_pipe(ialu_reg); 5335 %} 5336 5337 5338 //---------- Population Count Instructions ------------------------------------- 5339 5340 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5341 predicate(UsePopCountInstruction); 5342 match(Set dst (PopCountI src)); 5343 effect(KILL cr); 5344 5345 format %{ "POPCNT $dst, $src" %} 5346 ins_encode %{ 5347 __ popcntl($dst$$Register, $src$$Register); 5348 %} 5349 ins_pipe(ialu_reg); 5350 %} 5351 5352 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5353 predicate(UsePopCountInstruction); 5354 match(Set dst (PopCountI (LoadI mem))); 5355 effect(KILL cr); 5356 5357 format %{ "POPCNT $dst, $mem" %} 5358 ins_encode %{ 5359 __ popcntl($dst$$Register, $mem$$Address); 5360 %} 5361 ins_pipe(ialu_reg); 5362 %} 5363 5364 // Note: Long.bitCount(long) returns an int. 5365 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5366 predicate(UsePopCountInstruction); 5367 match(Set dst (PopCountL src)); 5368 effect(KILL cr, TEMP tmp, TEMP dst); 5369 5370 format %{ "POPCNT $dst, $src.lo\n\t" 5371 "POPCNT $tmp, $src.hi\n\t" 5372 "ADD $dst, $tmp" %} 5373 ins_encode %{ 5374 __ popcntl($dst$$Register, $src$$Register); 5375 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5376 __ addl($dst$$Register, $tmp$$Register); 5377 %} 5378 ins_pipe(ialu_reg); 5379 %} 5380 5381 // Note: Long.bitCount(long) returns an int. 5382 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5383 predicate(UsePopCountInstruction); 5384 match(Set dst (PopCountL (LoadL mem))); 5385 effect(KILL cr, TEMP tmp, TEMP dst); 5386 5387 format %{ "POPCNT $dst, $mem\n\t" 5388 "POPCNT $tmp, $mem+4\n\t" 5389 "ADD $dst, $tmp" %} 5390 ins_encode %{ 5391 //__ popcntl($dst$$Register, $mem$$Address$$first); 5392 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5393 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5394 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5395 __ addl($dst$$Register, $tmp$$Register); 5396 %} 5397 ins_pipe(ialu_reg); 5398 %} 5399 5400 5401 //----------Load/Store/Move Instructions--------------------------------------- 5402 //----------Load Instructions-------------------------------------------------- 5403 // Load Byte (8bit signed) 5404 instruct loadB(xRegI dst, memory mem) %{ 5405 match(Set dst (LoadB mem)); 5406 5407 ins_cost(125); 5408 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5409 5410 ins_encode %{ 5411 __ movsbl($dst$$Register, $mem$$Address); 5412 %} 5413 5414 ins_pipe(ialu_reg_mem); 5415 %} 5416 5417 // Load Byte (8bit signed) into Long Register 5418 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5419 match(Set dst (ConvI2L (LoadB mem))); 5420 effect(KILL cr); 5421 5422 ins_cost(375); 5423 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5424 "MOV $dst.hi,$dst.lo\n\t" 5425 "SAR $dst.hi,7" %} 5426 5427 ins_encode %{ 5428 __ movsbl($dst$$Register, $mem$$Address); 5429 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5430 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5431 %} 5432 5433 ins_pipe(ialu_reg_mem); 5434 %} 5435 5436 // Load Unsigned Byte (8bit UNsigned) 5437 instruct loadUB(xRegI dst, memory mem) %{ 5438 match(Set dst (LoadUB mem)); 5439 5440 ins_cost(125); 5441 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5442 5443 ins_encode %{ 5444 __ movzbl($dst$$Register, $mem$$Address); 5445 %} 5446 5447 ins_pipe(ialu_reg_mem); 5448 %} 5449 5450 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5451 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5452 match(Set dst (ConvI2L (LoadUB mem))); 5453 effect(KILL cr); 5454 5455 ins_cost(250); 5456 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5457 "XOR $dst.hi,$dst.hi" %} 5458 5459 ins_encode %{ 5460 Register Rdst = $dst$$Register; 5461 __ movzbl(Rdst, $mem$$Address); 5462 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5463 %} 5464 5465 ins_pipe(ialu_reg_mem); 5466 %} 5467 5468 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5469 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5470 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5471 effect(KILL cr); 5472 5473 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5474 "XOR $dst.hi,$dst.hi\n\t" 5475 "AND $dst.lo,right_n_bits($mask, 8)" %} 5476 ins_encode %{ 5477 Register Rdst = $dst$$Register; 5478 __ movzbl(Rdst, $mem$$Address); 5479 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5480 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5481 %} 5482 ins_pipe(ialu_reg_mem); 5483 %} 5484 5485 // Load Short (16bit signed) 5486 instruct loadS(rRegI dst, memory mem) %{ 5487 match(Set dst (LoadS mem)); 5488 5489 ins_cost(125); 5490 format %{ "MOVSX $dst,$mem\t# short" %} 5491 5492 ins_encode %{ 5493 __ movswl($dst$$Register, $mem$$Address); 5494 %} 5495 5496 ins_pipe(ialu_reg_mem); 5497 %} 5498 5499 // Load Short (16 bit signed) to Byte (8 bit signed) 5500 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5501 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5502 5503 ins_cost(125); 5504 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5505 ins_encode %{ 5506 __ movsbl($dst$$Register, $mem$$Address); 5507 %} 5508 ins_pipe(ialu_reg_mem); 5509 %} 5510 5511 // Load Short (16bit signed) into Long Register 5512 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5513 match(Set dst (ConvI2L (LoadS mem))); 5514 effect(KILL cr); 5515 5516 ins_cost(375); 5517 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5518 "MOV $dst.hi,$dst.lo\n\t" 5519 "SAR $dst.hi,15" %} 5520 5521 ins_encode %{ 5522 __ movswl($dst$$Register, $mem$$Address); 5523 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5524 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5525 %} 5526 5527 ins_pipe(ialu_reg_mem); 5528 %} 5529 5530 // Load Unsigned Short/Char (16bit unsigned) 5531 instruct loadUS(rRegI dst, memory mem) %{ 5532 match(Set dst (LoadUS mem)); 5533 5534 ins_cost(125); 5535 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5536 5537 ins_encode %{ 5538 __ movzwl($dst$$Register, $mem$$Address); 5539 %} 5540 5541 ins_pipe(ialu_reg_mem); 5542 %} 5543 5544 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5545 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5546 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5547 5548 ins_cost(125); 5549 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5550 ins_encode %{ 5551 __ movsbl($dst$$Register, $mem$$Address); 5552 %} 5553 ins_pipe(ialu_reg_mem); 5554 %} 5555 5556 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5557 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5558 match(Set dst (ConvI2L (LoadUS mem))); 5559 effect(KILL cr); 5560 5561 ins_cost(250); 5562 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5563 "XOR $dst.hi,$dst.hi" %} 5564 5565 ins_encode %{ 5566 __ movzwl($dst$$Register, $mem$$Address); 5567 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5568 %} 5569 5570 ins_pipe(ialu_reg_mem); 5571 %} 5572 5573 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5574 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5575 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5576 effect(KILL cr); 5577 5578 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5579 "XOR $dst.hi,$dst.hi" %} 5580 ins_encode %{ 5581 Register Rdst = $dst$$Register; 5582 __ movzbl(Rdst, $mem$$Address); 5583 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5584 %} 5585 ins_pipe(ialu_reg_mem); 5586 %} 5587 5588 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5589 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5590 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5591 effect(KILL cr); 5592 5593 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5594 "XOR $dst.hi,$dst.hi\n\t" 5595 "AND $dst.lo,right_n_bits($mask, 16)" %} 5596 ins_encode %{ 5597 Register Rdst = $dst$$Register; 5598 __ movzwl(Rdst, $mem$$Address); 5599 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5600 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5601 %} 5602 ins_pipe(ialu_reg_mem); 5603 %} 5604 5605 // Load Integer 5606 instruct loadI(rRegI dst, memory mem) %{ 5607 match(Set dst (LoadI mem)); 5608 5609 ins_cost(125); 5610 format %{ "MOV $dst,$mem\t# int" %} 5611 5612 ins_encode %{ 5613 __ movl($dst$$Register, $mem$$Address); 5614 %} 5615 5616 ins_pipe(ialu_reg_mem); 5617 %} 5618 5619 // Load Integer (32 bit signed) to Byte (8 bit signed) 5620 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5621 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5622 5623 ins_cost(125); 5624 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5625 ins_encode %{ 5626 __ movsbl($dst$$Register, $mem$$Address); 5627 %} 5628 ins_pipe(ialu_reg_mem); 5629 %} 5630 5631 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5632 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5633 match(Set dst (AndI (LoadI mem) mask)); 5634 5635 ins_cost(125); 5636 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5637 ins_encode %{ 5638 __ movzbl($dst$$Register, $mem$$Address); 5639 %} 5640 ins_pipe(ialu_reg_mem); 5641 %} 5642 5643 // Load Integer (32 bit signed) to Short (16 bit signed) 5644 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5645 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5646 5647 ins_cost(125); 5648 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5649 ins_encode %{ 5650 __ movswl($dst$$Register, $mem$$Address); 5651 %} 5652 ins_pipe(ialu_reg_mem); 5653 %} 5654 5655 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5656 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5657 match(Set dst (AndI (LoadI mem) mask)); 5658 5659 ins_cost(125); 5660 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5661 ins_encode %{ 5662 __ movzwl($dst$$Register, $mem$$Address); 5663 %} 5664 ins_pipe(ialu_reg_mem); 5665 %} 5666 5667 // Load Integer into Long Register 5668 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5669 match(Set dst (ConvI2L (LoadI mem))); 5670 effect(KILL cr); 5671 5672 ins_cost(375); 5673 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5674 "MOV $dst.hi,$dst.lo\n\t" 5675 "SAR $dst.hi,31" %} 5676 5677 ins_encode %{ 5678 __ movl($dst$$Register, $mem$$Address); 5679 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5680 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5681 %} 5682 5683 ins_pipe(ialu_reg_mem); 5684 %} 5685 5686 // Load Integer with mask 0xFF into Long Register 5687 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5688 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5689 effect(KILL cr); 5690 5691 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5692 "XOR $dst.hi,$dst.hi" %} 5693 ins_encode %{ 5694 Register Rdst = $dst$$Register; 5695 __ movzbl(Rdst, $mem$$Address); 5696 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5697 %} 5698 ins_pipe(ialu_reg_mem); 5699 %} 5700 5701 // Load Integer with mask 0xFFFF into Long Register 5702 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5703 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5704 effect(KILL cr); 5705 5706 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5707 "XOR $dst.hi,$dst.hi" %} 5708 ins_encode %{ 5709 Register Rdst = $dst$$Register; 5710 __ movzwl(Rdst, $mem$$Address); 5711 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5712 %} 5713 ins_pipe(ialu_reg_mem); 5714 %} 5715 5716 // Load Integer with 31-bit mask into Long Register 5717 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5718 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5719 effect(KILL cr); 5720 5721 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5722 "XOR $dst.hi,$dst.hi\n\t" 5723 "AND $dst.lo,$mask" %} 5724 ins_encode %{ 5725 Register Rdst = $dst$$Register; 5726 __ movl(Rdst, $mem$$Address); 5727 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5728 __ andl(Rdst, $mask$$constant); 5729 %} 5730 ins_pipe(ialu_reg_mem); 5731 %} 5732 5733 // Load Unsigned Integer into Long Register 5734 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5735 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5736 effect(KILL cr); 5737 5738 ins_cost(250); 5739 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5740 "XOR $dst.hi,$dst.hi" %} 5741 5742 ins_encode %{ 5743 __ movl($dst$$Register, $mem$$Address); 5744 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5745 %} 5746 5747 ins_pipe(ialu_reg_mem); 5748 %} 5749 5750 // Load Long. Cannot clobber address while loading, so restrict address 5751 // register to ESI 5752 instruct loadL(eRegL dst, load_long_memory mem) %{ 5753 predicate(!((LoadLNode*)n)->require_atomic_access()); 5754 match(Set dst (LoadL mem)); 5755 5756 ins_cost(250); 5757 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5758 "MOV $dst.hi,$mem+4" %} 5759 5760 ins_encode %{ 5761 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5762 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5763 __ movl($dst$$Register, Amemlo); 5764 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5765 %} 5766 5767 ins_pipe(ialu_reg_long_mem); 5768 %} 5769 5770 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5771 // then store it down to the stack and reload on the int 5772 // side. 5773 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5774 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5775 match(Set dst (LoadL mem)); 5776 5777 ins_cost(200); 5778 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5779 "FISTp $dst" %} 5780 ins_encode(enc_loadL_volatile(mem,dst)); 5781 ins_pipe( fpu_reg_mem ); 5782 %} 5783 5784 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5785 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5786 match(Set dst (LoadL mem)); 5787 effect(TEMP tmp); 5788 ins_cost(180); 5789 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5790 "MOVSD $dst,$tmp" %} 5791 ins_encode %{ 5792 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5793 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5794 %} 5795 ins_pipe( pipe_slow ); 5796 %} 5797 5798 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5799 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5800 match(Set dst (LoadL mem)); 5801 effect(TEMP tmp); 5802 ins_cost(160); 5803 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5804 "MOVD $dst.lo,$tmp\n\t" 5805 "PSRLQ $tmp,32\n\t" 5806 "MOVD $dst.hi,$tmp" %} 5807 ins_encode %{ 5808 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5809 __ movdl($dst$$Register, $tmp$$XMMRegister); 5810 __ psrlq($tmp$$XMMRegister, 32); 5811 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5812 %} 5813 ins_pipe( pipe_slow ); 5814 %} 5815 5816 // Load Range 5817 instruct loadRange(rRegI dst, memory mem) %{ 5818 match(Set dst (LoadRange mem)); 5819 5820 ins_cost(125); 5821 format %{ "MOV $dst,$mem" %} 5822 opcode(0x8B); 5823 ins_encode( OpcP, RegMem(dst,mem)); 5824 ins_pipe( ialu_reg_mem ); 5825 %} 5826 5827 5828 // Load Pointer 5829 instruct loadP(eRegP dst, memory mem) %{ 5830 match(Set dst (LoadP mem)); 5831 5832 ins_cost(125); 5833 format %{ "MOV $dst,$mem" %} 5834 opcode(0x8B); 5835 ins_encode( OpcP, RegMem(dst,mem)); 5836 ins_pipe( ialu_reg_mem ); 5837 %} 5838 5839 // Load Klass Pointer 5840 instruct loadKlass(eRegP dst, memory mem) %{ 5841 match(Set dst (LoadKlass mem)); 5842 5843 ins_cost(125); 5844 format %{ "MOV $dst,$mem" %} 5845 opcode(0x8B); 5846 ins_encode( OpcP, RegMem(dst,mem)); 5847 ins_pipe( ialu_reg_mem ); 5848 %} 5849 5850 // Load Double 5851 instruct loadDPR(regDPR dst, memory mem) %{ 5852 predicate(UseSSE<=1); 5853 match(Set dst (LoadD mem)); 5854 5855 ins_cost(150); 5856 format %{ "FLD_D ST,$mem\n\t" 5857 "FSTP $dst" %} 5858 opcode(0xDD); /* DD /0 */ 5859 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5860 Pop_Reg_DPR(dst) ); 5861 ins_pipe( fpu_reg_mem ); 5862 %} 5863 5864 // Load Double to XMM 5865 instruct loadD(regD dst, memory mem) %{ 5866 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5867 match(Set dst (LoadD mem)); 5868 ins_cost(145); 5869 format %{ "MOVSD $dst,$mem" %} 5870 ins_encode %{ 5871 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5872 %} 5873 ins_pipe( pipe_slow ); 5874 %} 5875 5876 instruct loadD_partial(regD dst, memory mem) %{ 5877 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5878 match(Set dst (LoadD mem)); 5879 ins_cost(145); 5880 format %{ "MOVLPD $dst,$mem" %} 5881 ins_encode %{ 5882 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5883 %} 5884 ins_pipe( pipe_slow ); 5885 %} 5886 5887 // Load to XMM register (single-precision floating point) 5888 // MOVSS instruction 5889 instruct loadF(regF dst, memory mem) %{ 5890 predicate(UseSSE>=1); 5891 match(Set dst (LoadF mem)); 5892 ins_cost(145); 5893 format %{ "MOVSS $dst,$mem" %} 5894 ins_encode %{ 5895 __ movflt ($dst$$XMMRegister, $mem$$Address); 5896 %} 5897 ins_pipe( pipe_slow ); 5898 %} 5899 5900 // Load Float 5901 instruct loadFPR(regFPR dst, memory mem) %{ 5902 predicate(UseSSE==0); 5903 match(Set dst (LoadF mem)); 5904 5905 ins_cost(150); 5906 format %{ "FLD_S ST,$mem\n\t" 5907 "FSTP $dst" %} 5908 opcode(0xD9); /* D9 /0 */ 5909 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5910 Pop_Reg_FPR(dst) ); 5911 ins_pipe( fpu_reg_mem ); 5912 %} 5913 5914 // Load Effective Address 5915 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5916 match(Set dst mem); 5917 5918 ins_cost(110); 5919 format %{ "LEA $dst,$mem" %} 5920 opcode(0x8D); 5921 ins_encode( OpcP, RegMem(dst,mem)); 5922 ins_pipe( ialu_reg_reg_fat ); 5923 %} 5924 5925 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5926 match(Set dst mem); 5927 5928 ins_cost(110); 5929 format %{ "LEA $dst,$mem" %} 5930 opcode(0x8D); 5931 ins_encode( OpcP, RegMem(dst,mem)); 5932 ins_pipe( ialu_reg_reg_fat ); 5933 %} 5934 5935 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5936 match(Set dst mem); 5937 5938 ins_cost(110); 5939 format %{ "LEA $dst,$mem" %} 5940 opcode(0x8D); 5941 ins_encode( OpcP, RegMem(dst,mem)); 5942 ins_pipe( ialu_reg_reg_fat ); 5943 %} 5944 5945 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5946 match(Set dst mem); 5947 5948 ins_cost(110); 5949 format %{ "LEA $dst,$mem" %} 5950 opcode(0x8D); 5951 ins_encode( OpcP, RegMem(dst,mem)); 5952 ins_pipe( ialu_reg_reg_fat ); 5953 %} 5954 5955 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5956 match(Set dst mem); 5957 5958 ins_cost(110); 5959 format %{ "LEA $dst,$mem" %} 5960 opcode(0x8D); 5961 ins_encode( OpcP, RegMem(dst,mem)); 5962 ins_pipe( ialu_reg_reg_fat ); 5963 %} 5964 5965 // Load Constant 5966 instruct loadConI(rRegI dst, immI src) %{ 5967 match(Set dst src); 5968 5969 format %{ "MOV $dst,$src" %} 5970 ins_encode( LdImmI(dst, src) ); 5971 ins_pipe( ialu_reg_fat ); 5972 %} 5973 5974 // Load Constant zero 5975 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5976 match(Set dst src); 5977 effect(KILL cr); 5978 5979 ins_cost(50); 5980 format %{ "XOR $dst,$dst" %} 5981 opcode(0x33); /* + rd */ 5982 ins_encode( OpcP, RegReg( dst, dst ) ); 5983 ins_pipe( ialu_reg ); 5984 %} 5985 5986 instruct loadConP(eRegP dst, immP src) %{ 5987 match(Set dst src); 5988 5989 format %{ "MOV $dst,$src" %} 5990 opcode(0xB8); /* + rd */ 5991 ins_encode( LdImmP(dst, src) ); 5992 ins_pipe( ialu_reg_fat ); 5993 %} 5994 5995 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5996 match(Set dst src); 5997 effect(KILL cr); 5998 ins_cost(200); 5999 format %{ "MOV $dst.lo,$src.lo\n\t" 6000 "MOV $dst.hi,$src.hi" %} 6001 opcode(0xB8); 6002 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6003 ins_pipe( ialu_reg_long_fat ); 6004 %} 6005 6006 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6007 match(Set dst src); 6008 effect(KILL cr); 6009 ins_cost(150); 6010 format %{ "XOR $dst.lo,$dst.lo\n\t" 6011 "XOR $dst.hi,$dst.hi" %} 6012 opcode(0x33,0x33); 6013 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6014 ins_pipe( ialu_reg_long ); 6015 %} 6016 6017 // The instruction usage is guarded by predicate in operand immFPR(). 6018 instruct loadConFPR(regFPR dst, immFPR con) %{ 6019 match(Set dst con); 6020 ins_cost(125); 6021 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6022 "FSTP $dst" %} 6023 ins_encode %{ 6024 __ fld_s($constantaddress($con)); 6025 __ fstp_d($dst$$reg); 6026 %} 6027 ins_pipe(fpu_reg_con); 6028 %} 6029 6030 // The instruction usage is guarded by predicate in operand immFPR0(). 6031 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6032 match(Set dst con); 6033 ins_cost(125); 6034 format %{ "FLDZ ST\n\t" 6035 "FSTP $dst" %} 6036 ins_encode %{ 6037 __ fldz(); 6038 __ fstp_d($dst$$reg); 6039 %} 6040 ins_pipe(fpu_reg_con); 6041 %} 6042 6043 // The instruction usage is guarded by predicate in operand immFPR1(). 6044 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6045 match(Set dst con); 6046 ins_cost(125); 6047 format %{ "FLD1 ST\n\t" 6048 "FSTP $dst" %} 6049 ins_encode %{ 6050 __ fld1(); 6051 __ fstp_d($dst$$reg); 6052 %} 6053 ins_pipe(fpu_reg_con); 6054 %} 6055 6056 // The instruction usage is guarded by predicate in operand immF(). 6057 instruct loadConF(regF dst, immF con) %{ 6058 match(Set dst con); 6059 ins_cost(125); 6060 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6061 ins_encode %{ 6062 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6063 %} 6064 ins_pipe(pipe_slow); 6065 %} 6066 6067 // The instruction usage is guarded by predicate in operand immF0(). 6068 instruct loadConF0(regF dst, immF0 src) %{ 6069 match(Set dst src); 6070 ins_cost(100); 6071 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6072 ins_encode %{ 6073 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6074 %} 6075 ins_pipe(pipe_slow); 6076 %} 6077 6078 // The instruction usage is guarded by predicate in operand immDPR(). 6079 instruct loadConDPR(regDPR dst, immDPR con) %{ 6080 match(Set dst con); 6081 ins_cost(125); 6082 6083 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6084 "FSTP $dst" %} 6085 ins_encode %{ 6086 __ fld_d($constantaddress($con)); 6087 __ fstp_d($dst$$reg); 6088 %} 6089 ins_pipe(fpu_reg_con); 6090 %} 6091 6092 // The instruction usage is guarded by predicate in operand immDPR0(). 6093 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6094 match(Set dst con); 6095 ins_cost(125); 6096 6097 format %{ "FLDZ ST\n\t" 6098 "FSTP $dst" %} 6099 ins_encode %{ 6100 __ fldz(); 6101 __ fstp_d($dst$$reg); 6102 %} 6103 ins_pipe(fpu_reg_con); 6104 %} 6105 6106 // The instruction usage is guarded by predicate in operand immDPR1(). 6107 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6108 match(Set dst con); 6109 ins_cost(125); 6110 6111 format %{ "FLD1 ST\n\t" 6112 "FSTP $dst" %} 6113 ins_encode %{ 6114 __ fld1(); 6115 __ fstp_d($dst$$reg); 6116 %} 6117 ins_pipe(fpu_reg_con); 6118 %} 6119 6120 // The instruction usage is guarded by predicate in operand immD(). 6121 instruct loadConD(regD dst, immD con) %{ 6122 match(Set dst con); 6123 ins_cost(125); 6124 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6125 ins_encode %{ 6126 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6127 %} 6128 ins_pipe(pipe_slow); 6129 %} 6130 6131 // The instruction usage is guarded by predicate in operand immD0(). 6132 instruct loadConD0(regD dst, immD0 src) %{ 6133 match(Set dst src); 6134 ins_cost(100); 6135 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6136 ins_encode %{ 6137 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6138 %} 6139 ins_pipe( pipe_slow ); 6140 %} 6141 6142 // Load Stack Slot 6143 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6144 match(Set dst src); 6145 ins_cost(125); 6146 6147 format %{ "MOV $dst,$src" %} 6148 opcode(0x8B); 6149 ins_encode( OpcP, RegMem(dst,src)); 6150 ins_pipe( ialu_reg_mem ); 6151 %} 6152 6153 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6154 match(Set dst src); 6155 6156 ins_cost(200); 6157 format %{ "MOV $dst,$src.lo\n\t" 6158 "MOV $dst+4,$src.hi" %} 6159 opcode(0x8B, 0x8B); 6160 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6161 ins_pipe( ialu_mem_long_reg ); 6162 %} 6163 6164 // Load Stack Slot 6165 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6166 match(Set dst src); 6167 ins_cost(125); 6168 6169 format %{ "MOV $dst,$src" %} 6170 opcode(0x8B); 6171 ins_encode( OpcP, RegMem(dst,src)); 6172 ins_pipe( ialu_reg_mem ); 6173 %} 6174 6175 // Load Stack Slot 6176 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6177 match(Set dst src); 6178 ins_cost(125); 6179 6180 format %{ "FLD_S $src\n\t" 6181 "FSTP $dst" %} 6182 opcode(0xD9); /* D9 /0, FLD m32real */ 6183 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6184 Pop_Reg_FPR(dst) ); 6185 ins_pipe( fpu_reg_mem ); 6186 %} 6187 6188 // Load Stack Slot 6189 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6190 match(Set dst src); 6191 ins_cost(125); 6192 6193 format %{ "FLD_D $src\n\t" 6194 "FSTP $dst" %} 6195 opcode(0xDD); /* DD /0, FLD m64real */ 6196 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6197 Pop_Reg_DPR(dst) ); 6198 ins_pipe( fpu_reg_mem ); 6199 %} 6200 6201 // Prefetch instructions for allocation. 6202 // Must be safe to execute with invalid address (cannot fault). 6203 6204 instruct prefetchAlloc0( memory mem ) %{ 6205 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6206 match(PrefetchAllocation mem); 6207 ins_cost(0); 6208 size(0); 6209 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6210 ins_encode(); 6211 ins_pipe(empty); 6212 %} 6213 6214 instruct prefetchAlloc( memory mem ) %{ 6215 predicate(AllocatePrefetchInstr==3); 6216 match( PrefetchAllocation mem ); 6217 ins_cost(100); 6218 6219 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6220 ins_encode %{ 6221 __ prefetchw($mem$$Address); 6222 %} 6223 ins_pipe(ialu_mem); 6224 %} 6225 6226 instruct prefetchAllocNTA( memory mem ) %{ 6227 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6228 match(PrefetchAllocation mem); 6229 ins_cost(100); 6230 6231 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6232 ins_encode %{ 6233 __ prefetchnta($mem$$Address); 6234 %} 6235 ins_pipe(ialu_mem); 6236 %} 6237 6238 instruct prefetchAllocT0( memory mem ) %{ 6239 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6240 match(PrefetchAllocation mem); 6241 ins_cost(100); 6242 6243 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6244 ins_encode %{ 6245 __ prefetcht0($mem$$Address); 6246 %} 6247 ins_pipe(ialu_mem); 6248 %} 6249 6250 instruct prefetchAllocT2( memory mem ) %{ 6251 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6252 match(PrefetchAllocation mem); 6253 ins_cost(100); 6254 6255 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6256 ins_encode %{ 6257 __ prefetcht2($mem$$Address); 6258 %} 6259 ins_pipe(ialu_mem); 6260 %} 6261 6262 //----------Store Instructions------------------------------------------------- 6263 6264 // Store Byte 6265 instruct storeB(memory mem, xRegI src) %{ 6266 match(Set mem (StoreB mem src)); 6267 6268 ins_cost(125); 6269 format %{ "MOV8 $mem,$src" %} 6270 opcode(0x88); 6271 ins_encode( OpcP, RegMem( src, mem ) ); 6272 ins_pipe( ialu_mem_reg ); 6273 %} 6274 6275 // Store Char/Short 6276 instruct storeC(memory mem, rRegI src) %{ 6277 match(Set mem (StoreC mem src)); 6278 6279 ins_cost(125); 6280 format %{ "MOV16 $mem,$src" %} 6281 opcode(0x89, 0x66); 6282 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6283 ins_pipe( ialu_mem_reg ); 6284 %} 6285 6286 // Store Integer 6287 instruct storeI(memory mem, rRegI src) %{ 6288 match(Set mem (StoreI mem src)); 6289 6290 ins_cost(125); 6291 format %{ "MOV $mem,$src" %} 6292 opcode(0x89); 6293 ins_encode( OpcP, RegMem( src, mem ) ); 6294 ins_pipe( ialu_mem_reg ); 6295 %} 6296 6297 // Store Long 6298 instruct storeL(long_memory mem, eRegL src) %{ 6299 predicate(!((StoreLNode*)n)->require_atomic_access()); 6300 match(Set mem (StoreL mem src)); 6301 6302 ins_cost(200); 6303 format %{ "MOV $mem,$src.lo\n\t" 6304 "MOV $mem+4,$src.hi" %} 6305 opcode(0x89, 0x89); 6306 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6307 ins_pipe( ialu_mem_long_reg ); 6308 %} 6309 6310 // Store Long to Integer 6311 instruct storeL2I(memory mem, eRegL src) %{ 6312 match(Set mem (StoreI mem (ConvL2I src))); 6313 6314 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6315 ins_encode %{ 6316 __ movl($mem$$Address, $src$$Register); 6317 %} 6318 ins_pipe(ialu_mem_reg); 6319 %} 6320 6321 // Volatile Store Long. Must be atomic, so move it into 6322 // the FP TOS and then do a 64-bit FIST. Has to probe the 6323 // target address before the store (for null-ptr checks) 6324 // so the memory operand is used twice in the encoding. 6325 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6326 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6327 match(Set mem (StoreL mem src)); 6328 effect( KILL cr ); 6329 ins_cost(400); 6330 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6331 "FILD $src\n\t" 6332 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6333 opcode(0x3B); 6334 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6335 ins_pipe( fpu_reg_mem ); 6336 %} 6337 6338 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6339 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6340 match(Set mem (StoreL mem src)); 6341 effect( TEMP tmp, KILL cr ); 6342 ins_cost(380); 6343 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6344 "MOVSD $tmp,$src\n\t" 6345 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6346 ins_encode %{ 6347 __ cmpl(rax, $mem$$Address); 6348 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6349 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6350 %} 6351 ins_pipe( pipe_slow ); 6352 %} 6353 6354 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6355 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6356 match(Set mem (StoreL mem src)); 6357 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6358 ins_cost(360); 6359 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6360 "MOVD $tmp,$src.lo\n\t" 6361 "MOVD $tmp2,$src.hi\n\t" 6362 "PUNPCKLDQ $tmp,$tmp2\n\t" 6363 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6364 ins_encode %{ 6365 __ cmpl(rax, $mem$$Address); 6366 __ movdl($tmp$$XMMRegister, $src$$Register); 6367 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6368 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6369 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 // Store Pointer; for storing unknown oops and raw pointers 6375 instruct storeP(memory mem, anyRegP src) %{ 6376 match(Set mem (StoreP mem src)); 6377 6378 ins_cost(125); 6379 format %{ "MOV $mem,$src" %} 6380 opcode(0x89); 6381 ins_encode( OpcP, RegMem( src, mem ) ); 6382 ins_pipe( ialu_mem_reg ); 6383 %} 6384 6385 // Store Integer Immediate 6386 instruct storeImmI(memory mem, immI src) %{ 6387 match(Set mem (StoreI mem src)); 6388 6389 ins_cost(150); 6390 format %{ "MOV $mem,$src" %} 6391 opcode(0xC7); /* C7 /0 */ 6392 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6393 ins_pipe( ialu_mem_imm ); 6394 %} 6395 6396 // Store Short/Char Immediate 6397 instruct storeImmI16(memory mem, immI16 src) %{ 6398 predicate(UseStoreImmI16); 6399 match(Set mem (StoreC mem src)); 6400 6401 ins_cost(150); 6402 format %{ "MOV16 $mem,$src" %} 6403 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6404 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6405 ins_pipe( ialu_mem_imm ); 6406 %} 6407 6408 // Store Pointer Immediate; null pointers or constant oops that do not 6409 // need card-mark barriers. 6410 instruct storeImmP(memory mem, immP src) %{ 6411 match(Set mem (StoreP mem src)); 6412 6413 ins_cost(150); 6414 format %{ "MOV $mem,$src" %} 6415 opcode(0xC7); /* C7 /0 */ 6416 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6417 ins_pipe( ialu_mem_imm ); 6418 %} 6419 6420 // Store Byte Immediate 6421 instruct storeImmB(memory mem, immI8 src) %{ 6422 match(Set mem (StoreB mem src)); 6423 6424 ins_cost(150); 6425 format %{ "MOV8 $mem,$src" %} 6426 opcode(0xC6); /* C6 /0 */ 6427 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6428 ins_pipe( ialu_mem_imm ); 6429 %} 6430 6431 // Store CMS card-mark Immediate 6432 instruct storeImmCM(memory mem, immI8 src) %{ 6433 match(Set mem (StoreCM mem src)); 6434 6435 ins_cost(150); 6436 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6437 opcode(0xC6); /* C6 /0 */ 6438 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6439 ins_pipe( ialu_mem_imm ); 6440 %} 6441 6442 // Store Double 6443 instruct storeDPR( memory mem, regDPR1 src) %{ 6444 predicate(UseSSE<=1); 6445 match(Set mem (StoreD mem src)); 6446 6447 ins_cost(100); 6448 format %{ "FST_D $mem,$src" %} 6449 opcode(0xDD); /* DD /2 */ 6450 ins_encode( enc_FPR_store(mem,src) ); 6451 ins_pipe( fpu_mem_reg ); 6452 %} 6453 6454 // Store double does rounding on x86 6455 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6456 predicate(UseSSE<=1); 6457 match(Set mem (StoreD mem (RoundDouble src))); 6458 6459 ins_cost(100); 6460 format %{ "FST_D $mem,$src\t# round" %} 6461 opcode(0xDD); /* DD /2 */ 6462 ins_encode( enc_FPR_store(mem,src) ); 6463 ins_pipe( fpu_mem_reg ); 6464 %} 6465 6466 // Store XMM register to memory (double-precision floating points) 6467 // MOVSD instruction 6468 instruct storeD(memory mem, regD src) %{ 6469 predicate(UseSSE>=2); 6470 match(Set mem (StoreD mem src)); 6471 ins_cost(95); 6472 format %{ "MOVSD $mem,$src" %} 6473 ins_encode %{ 6474 __ movdbl($mem$$Address, $src$$XMMRegister); 6475 %} 6476 ins_pipe( pipe_slow ); 6477 %} 6478 6479 // Store XMM register to memory (single-precision floating point) 6480 // MOVSS instruction 6481 instruct storeF(memory mem, regF src) %{ 6482 predicate(UseSSE>=1); 6483 match(Set mem (StoreF mem src)); 6484 ins_cost(95); 6485 format %{ "MOVSS $mem,$src" %} 6486 ins_encode %{ 6487 __ movflt($mem$$Address, $src$$XMMRegister); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 6493 // Store Float 6494 instruct storeFPR( memory mem, regFPR1 src) %{ 6495 predicate(UseSSE==0); 6496 match(Set mem (StoreF mem src)); 6497 6498 ins_cost(100); 6499 format %{ "FST_S $mem,$src" %} 6500 opcode(0xD9); /* D9 /2 */ 6501 ins_encode( enc_FPR_store(mem,src) ); 6502 ins_pipe( fpu_mem_reg ); 6503 %} 6504 6505 // Store Float does rounding on x86 6506 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6507 predicate(UseSSE==0); 6508 match(Set mem (StoreF mem (RoundFloat src))); 6509 6510 ins_cost(100); 6511 format %{ "FST_S $mem,$src\t# round" %} 6512 opcode(0xD9); /* D9 /2 */ 6513 ins_encode( enc_FPR_store(mem,src) ); 6514 ins_pipe( fpu_mem_reg ); 6515 %} 6516 6517 // Store Float does rounding on x86 6518 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6519 predicate(UseSSE<=1); 6520 match(Set mem (StoreF mem (ConvD2F src))); 6521 6522 ins_cost(100); 6523 format %{ "FST_S $mem,$src\t# D-round" %} 6524 opcode(0xD9); /* D9 /2 */ 6525 ins_encode( enc_FPR_store(mem,src) ); 6526 ins_pipe( fpu_mem_reg ); 6527 %} 6528 6529 // Store immediate Float value (it is faster than store from FPU register) 6530 // The instruction usage is guarded by predicate in operand immFPR(). 6531 instruct storeFPR_imm( memory mem, immFPR src) %{ 6532 match(Set mem (StoreF mem src)); 6533 6534 ins_cost(50); 6535 format %{ "MOV $mem,$src\t# store float" %} 6536 opcode(0xC7); /* C7 /0 */ 6537 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6538 ins_pipe( ialu_mem_imm ); 6539 %} 6540 6541 // Store immediate Float value (it is faster than store from XMM register) 6542 // The instruction usage is guarded by predicate in operand immF(). 6543 instruct storeF_imm( memory mem, immF src) %{ 6544 match(Set mem (StoreF mem src)); 6545 6546 ins_cost(50); 6547 format %{ "MOV $mem,$src\t# store float" %} 6548 opcode(0xC7); /* C7 /0 */ 6549 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6550 ins_pipe( ialu_mem_imm ); 6551 %} 6552 6553 // Store Integer to stack slot 6554 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6555 match(Set dst src); 6556 6557 ins_cost(100); 6558 format %{ "MOV $dst,$src" %} 6559 opcode(0x89); 6560 ins_encode( OpcPRegSS( dst, src ) ); 6561 ins_pipe( ialu_mem_reg ); 6562 %} 6563 6564 // Store Integer to stack slot 6565 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6566 match(Set dst src); 6567 6568 ins_cost(100); 6569 format %{ "MOV $dst,$src" %} 6570 opcode(0x89); 6571 ins_encode( OpcPRegSS( dst, src ) ); 6572 ins_pipe( ialu_mem_reg ); 6573 %} 6574 6575 // Store Long to stack slot 6576 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6577 match(Set dst src); 6578 6579 ins_cost(200); 6580 format %{ "MOV $dst,$src.lo\n\t" 6581 "MOV $dst+4,$src.hi" %} 6582 opcode(0x89, 0x89); 6583 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6584 ins_pipe( ialu_mem_long_reg ); 6585 %} 6586 6587 //----------MemBar Instructions----------------------------------------------- 6588 // Memory barrier flavors 6589 6590 instruct membar_acquire() %{ 6591 match(MemBarAcquire); 6592 match(LoadFence); 6593 ins_cost(400); 6594 6595 size(0); 6596 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6597 ins_encode(); 6598 ins_pipe(empty); 6599 %} 6600 6601 instruct membar_acquire_lock() %{ 6602 match(MemBarAcquireLock); 6603 ins_cost(0); 6604 6605 size(0); 6606 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6607 ins_encode( ); 6608 ins_pipe(empty); 6609 %} 6610 6611 instruct membar_release() %{ 6612 match(MemBarRelease); 6613 match(StoreFence); 6614 ins_cost(400); 6615 6616 size(0); 6617 format %{ "MEMBAR-release ! (empty encoding)" %} 6618 ins_encode( ); 6619 ins_pipe(empty); 6620 %} 6621 6622 instruct membar_release_lock() %{ 6623 match(MemBarReleaseLock); 6624 ins_cost(0); 6625 6626 size(0); 6627 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6628 ins_encode( ); 6629 ins_pipe(empty); 6630 %} 6631 6632 instruct membar_volatile(eFlagsReg cr) %{ 6633 match(MemBarVolatile); 6634 effect(KILL cr); 6635 ins_cost(400); 6636 6637 format %{ 6638 $$template 6639 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6640 %} 6641 ins_encode %{ 6642 __ membar(Assembler::StoreLoad); 6643 %} 6644 ins_pipe(pipe_slow); 6645 %} 6646 6647 instruct unnecessary_membar_volatile() %{ 6648 match(MemBarVolatile); 6649 predicate(Matcher::post_store_load_barrier(n)); 6650 ins_cost(0); 6651 6652 size(0); 6653 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6654 ins_encode( ); 6655 ins_pipe(empty); 6656 %} 6657 6658 instruct membar_storestore() %{ 6659 match(MemBarStoreStore); 6660 match(StoreStoreFence); 6661 ins_cost(0); 6662 6663 size(0); 6664 format %{ "MEMBAR-storestore (empty encoding)" %} 6665 ins_encode( ); 6666 ins_pipe(empty); 6667 %} 6668 6669 //----------Move Instructions-------------------------------------------------- 6670 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6671 match(Set dst (CastX2P src)); 6672 format %{ "# X2P $dst, $src" %} 6673 ins_encode( /*empty encoding*/ ); 6674 ins_cost(0); 6675 ins_pipe(empty); 6676 %} 6677 6678 instruct castP2X(rRegI dst, eRegP src ) %{ 6679 match(Set dst (CastP2X src)); 6680 ins_cost(50); 6681 format %{ "MOV $dst, $src\t# CastP2X" %} 6682 ins_encode( enc_Copy( dst, src) ); 6683 ins_pipe( ialu_reg_reg ); 6684 %} 6685 6686 //----------Conditional Move--------------------------------------------------- 6687 // Conditional move 6688 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6689 predicate(!VM_Version::supports_cmov() ); 6690 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6691 ins_cost(200); 6692 format %{ "J$cop,us skip\t# signed cmove\n\t" 6693 "MOV $dst,$src\n" 6694 "skip:" %} 6695 ins_encode %{ 6696 Label Lskip; 6697 // Invert sense of branch from sense of CMOV 6698 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6699 __ movl($dst$$Register, $src$$Register); 6700 __ bind(Lskip); 6701 %} 6702 ins_pipe( pipe_cmov_reg ); 6703 %} 6704 6705 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6706 predicate(!VM_Version::supports_cmov() ); 6707 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6708 ins_cost(200); 6709 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6710 "MOV $dst,$src\n" 6711 "skip:" %} 6712 ins_encode %{ 6713 Label Lskip; 6714 // Invert sense of branch from sense of CMOV 6715 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6716 __ movl($dst$$Register, $src$$Register); 6717 __ bind(Lskip); 6718 %} 6719 ins_pipe( pipe_cmov_reg ); 6720 %} 6721 6722 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6723 predicate(VM_Version::supports_cmov() ); 6724 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6725 ins_cost(200); 6726 format %{ "CMOV$cop $dst,$src" %} 6727 opcode(0x0F,0x40); 6728 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6729 ins_pipe( pipe_cmov_reg ); 6730 %} 6731 6732 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6733 predicate(VM_Version::supports_cmov() ); 6734 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6735 ins_cost(200); 6736 format %{ "CMOV$cop $dst,$src" %} 6737 opcode(0x0F,0x40); 6738 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6739 ins_pipe( pipe_cmov_reg ); 6740 %} 6741 6742 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6743 predicate(VM_Version::supports_cmov() ); 6744 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6745 ins_cost(200); 6746 expand %{ 6747 cmovI_regU(cop, cr, dst, src); 6748 %} 6749 %} 6750 6751 // Conditional move 6752 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6753 predicate(VM_Version::supports_cmov() ); 6754 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6755 ins_cost(250); 6756 format %{ "CMOV$cop $dst,$src" %} 6757 opcode(0x0F,0x40); 6758 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6759 ins_pipe( pipe_cmov_mem ); 6760 %} 6761 6762 // Conditional move 6763 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6764 predicate(VM_Version::supports_cmov() ); 6765 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6766 ins_cost(250); 6767 format %{ "CMOV$cop $dst,$src" %} 6768 opcode(0x0F,0x40); 6769 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6770 ins_pipe( pipe_cmov_mem ); 6771 %} 6772 6773 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6774 predicate(VM_Version::supports_cmov() ); 6775 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6776 ins_cost(250); 6777 expand %{ 6778 cmovI_memU(cop, cr, dst, src); 6779 %} 6780 %} 6781 6782 // Conditional move 6783 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6784 predicate(VM_Version::supports_cmov() ); 6785 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6786 ins_cost(200); 6787 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6788 opcode(0x0F,0x40); 6789 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6790 ins_pipe( pipe_cmov_reg ); 6791 %} 6792 6793 // Conditional move (non-P6 version) 6794 // Note: a CMoveP is generated for stubs and native wrappers 6795 // regardless of whether we are on a P6, so we 6796 // emulate a cmov here 6797 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6798 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6799 ins_cost(300); 6800 format %{ "Jn$cop skip\n\t" 6801 "MOV $dst,$src\t# pointer\n" 6802 "skip:" %} 6803 opcode(0x8b); 6804 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6805 ins_pipe( pipe_cmov_reg ); 6806 %} 6807 6808 // Conditional move 6809 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6810 predicate(VM_Version::supports_cmov() ); 6811 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6812 ins_cost(200); 6813 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6814 opcode(0x0F,0x40); 6815 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6816 ins_pipe( pipe_cmov_reg ); 6817 %} 6818 6819 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6820 predicate(VM_Version::supports_cmov() ); 6821 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6822 ins_cost(200); 6823 expand %{ 6824 cmovP_regU(cop, cr, dst, src); 6825 %} 6826 %} 6827 6828 // DISABLED: Requires the ADLC to emit a bottom_type call that 6829 // correctly meets the two pointer arguments; one is an incoming 6830 // register but the other is a memory operand. ALSO appears to 6831 // be buggy with implicit null checks. 6832 // 6833 //// Conditional move 6834 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6835 // predicate(VM_Version::supports_cmov() ); 6836 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6837 // ins_cost(250); 6838 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6839 // opcode(0x0F,0x40); 6840 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6841 // ins_pipe( pipe_cmov_mem ); 6842 //%} 6843 // 6844 //// Conditional move 6845 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6846 // predicate(VM_Version::supports_cmov() ); 6847 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6848 // ins_cost(250); 6849 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6850 // opcode(0x0F,0x40); 6851 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6852 // ins_pipe( pipe_cmov_mem ); 6853 //%} 6854 6855 // Conditional move 6856 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6857 predicate(UseSSE<=1); 6858 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6859 ins_cost(200); 6860 format %{ "FCMOV$cop $dst,$src\t# double" %} 6861 opcode(0xDA); 6862 ins_encode( enc_cmov_dpr(cop,src) ); 6863 ins_pipe( pipe_cmovDPR_reg ); 6864 %} 6865 6866 // Conditional move 6867 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6868 predicate(UseSSE==0); 6869 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6870 ins_cost(200); 6871 format %{ "FCMOV$cop $dst,$src\t# float" %} 6872 opcode(0xDA); 6873 ins_encode( enc_cmov_dpr(cop,src) ); 6874 ins_pipe( pipe_cmovDPR_reg ); 6875 %} 6876 6877 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6878 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6879 predicate(UseSSE<=1); 6880 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6881 ins_cost(200); 6882 format %{ "Jn$cop skip\n\t" 6883 "MOV $dst,$src\t# double\n" 6884 "skip:" %} 6885 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6886 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6887 ins_pipe( pipe_cmovDPR_reg ); 6888 %} 6889 6890 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6891 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6892 predicate(UseSSE==0); 6893 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6894 ins_cost(200); 6895 format %{ "Jn$cop skip\n\t" 6896 "MOV $dst,$src\t# float\n" 6897 "skip:" %} 6898 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6899 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6900 ins_pipe( pipe_cmovDPR_reg ); 6901 %} 6902 6903 // No CMOVE with SSE/SSE2 6904 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6905 predicate (UseSSE>=1); 6906 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6907 ins_cost(200); 6908 format %{ "Jn$cop skip\n\t" 6909 "MOVSS $dst,$src\t# float\n" 6910 "skip:" %} 6911 ins_encode %{ 6912 Label skip; 6913 // Invert sense of branch from sense of CMOV 6914 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6915 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6916 __ bind(skip); 6917 %} 6918 ins_pipe( pipe_slow ); 6919 %} 6920 6921 // No CMOVE with SSE/SSE2 6922 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6923 predicate (UseSSE>=2); 6924 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6925 ins_cost(200); 6926 format %{ "Jn$cop skip\n\t" 6927 "MOVSD $dst,$src\t# float\n" 6928 "skip:" %} 6929 ins_encode %{ 6930 Label skip; 6931 // Invert sense of branch from sense of CMOV 6932 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6933 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6934 __ bind(skip); 6935 %} 6936 ins_pipe( pipe_slow ); 6937 %} 6938 6939 // unsigned version 6940 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6941 predicate (UseSSE>=1); 6942 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6943 ins_cost(200); 6944 format %{ "Jn$cop skip\n\t" 6945 "MOVSS $dst,$src\t# float\n" 6946 "skip:" %} 6947 ins_encode %{ 6948 Label skip; 6949 // Invert sense of branch from sense of CMOV 6950 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6951 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6952 __ bind(skip); 6953 %} 6954 ins_pipe( pipe_slow ); 6955 %} 6956 6957 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6958 predicate (UseSSE>=1); 6959 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6960 ins_cost(200); 6961 expand %{ 6962 fcmovF_regU(cop, cr, dst, src); 6963 %} 6964 %} 6965 6966 // unsigned version 6967 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6968 predicate (UseSSE>=2); 6969 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6970 ins_cost(200); 6971 format %{ "Jn$cop skip\n\t" 6972 "MOVSD $dst,$src\t# float\n" 6973 "skip:" %} 6974 ins_encode %{ 6975 Label skip; 6976 // Invert sense of branch from sense of CMOV 6977 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6978 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6979 __ bind(skip); 6980 %} 6981 ins_pipe( pipe_slow ); 6982 %} 6983 6984 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6985 predicate (UseSSE>=2); 6986 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6987 ins_cost(200); 6988 expand %{ 6989 fcmovD_regU(cop, cr, dst, src); 6990 %} 6991 %} 6992 6993 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6994 predicate(VM_Version::supports_cmov() ); 6995 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6996 ins_cost(200); 6997 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6998 "CMOV$cop $dst.hi,$src.hi" %} 6999 opcode(0x0F,0x40); 7000 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7001 ins_pipe( pipe_cmov_reg_long ); 7002 %} 7003 7004 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7005 predicate(VM_Version::supports_cmov() ); 7006 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7007 ins_cost(200); 7008 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7009 "CMOV$cop $dst.hi,$src.hi" %} 7010 opcode(0x0F,0x40); 7011 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7012 ins_pipe( pipe_cmov_reg_long ); 7013 %} 7014 7015 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7016 predicate(VM_Version::supports_cmov() ); 7017 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7018 ins_cost(200); 7019 expand %{ 7020 cmovL_regU(cop, cr, dst, src); 7021 %} 7022 %} 7023 7024 //----------Arithmetic Instructions-------------------------------------------- 7025 //----------Addition Instructions---------------------------------------------- 7026 7027 // Integer Addition Instructions 7028 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7029 match(Set dst (AddI dst src)); 7030 effect(KILL cr); 7031 7032 size(2); 7033 format %{ "ADD $dst,$src" %} 7034 opcode(0x03); 7035 ins_encode( OpcP, RegReg( dst, src) ); 7036 ins_pipe( ialu_reg_reg ); 7037 %} 7038 7039 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7040 match(Set dst (AddI dst src)); 7041 effect(KILL cr); 7042 7043 format %{ "ADD $dst,$src" %} 7044 opcode(0x81, 0x00); /* /0 id */ 7045 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7046 ins_pipe( ialu_reg ); 7047 %} 7048 7049 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7050 predicate(UseIncDec); 7051 match(Set dst (AddI dst src)); 7052 effect(KILL cr); 7053 7054 size(1); 7055 format %{ "INC $dst" %} 7056 opcode(0x40); /* */ 7057 ins_encode( Opc_plus( primary, dst ) ); 7058 ins_pipe( ialu_reg ); 7059 %} 7060 7061 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7062 match(Set dst (AddI src0 src1)); 7063 ins_cost(110); 7064 7065 format %{ "LEA $dst,[$src0 + $src1]" %} 7066 opcode(0x8D); /* 0x8D /r */ 7067 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7068 ins_pipe( ialu_reg_reg ); 7069 %} 7070 7071 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7072 match(Set dst (AddP src0 src1)); 7073 ins_cost(110); 7074 7075 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7076 opcode(0x8D); /* 0x8D /r */ 7077 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7078 ins_pipe( ialu_reg_reg ); 7079 %} 7080 7081 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7082 predicate(UseIncDec); 7083 match(Set dst (AddI dst src)); 7084 effect(KILL cr); 7085 7086 size(1); 7087 format %{ "DEC $dst" %} 7088 opcode(0x48); /* */ 7089 ins_encode( Opc_plus( primary, dst ) ); 7090 ins_pipe( ialu_reg ); 7091 %} 7092 7093 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7094 match(Set dst (AddP dst src)); 7095 effect(KILL cr); 7096 7097 size(2); 7098 format %{ "ADD $dst,$src" %} 7099 opcode(0x03); 7100 ins_encode( OpcP, RegReg( dst, src) ); 7101 ins_pipe( ialu_reg_reg ); 7102 %} 7103 7104 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7105 match(Set dst (AddP dst src)); 7106 effect(KILL cr); 7107 7108 format %{ "ADD $dst,$src" %} 7109 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7110 // ins_encode( RegImm( dst, src) ); 7111 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7112 ins_pipe( ialu_reg ); 7113 %} 7114 7115 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7116 match(Set dst (AddI dst (LoadI src))); 7117 effect(KILL cr); 7118 7119 ins_cost(125); 7120 format %{ "ADD $dst,$src" %} 7121 opcode(0x03); 7122 ins_encode( OpcP, RegMem( dst, src) ); 7123 ins_pipe( ialu_reg_mem ); 7124 %} 7125 7126 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7127 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7128 effect(KILL cr); 7129 7130 ins_cost(150); 7131 format %{ "ADD $dst,$src" %} 7132 opcode(0x01); /* Opcode 01 /r */ 7133 ins_encode( OpcP, RegMem( src, dst ) ); 7134 ins_pipe( ialu_mem_reg ); 7135 %} 7136 7137 // Add Memory with Immediate 7138 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7139 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7140 effect(KILL cr); 7141 7142 ins_cost(125); 7143 format %{ "ADD $dst,$src" %} 7144 opcode(0x81); /* Opcode 81 /0 id */ 7145 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7146 ins_pipe( ialu_mem_imm ); 7147 %} 7148 7149 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7150 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7151 effect(KILL cr); 7152 7153 ins_cost(125); 7154 format %{ "INC $dst" %} 7155 opcode(0xFF); /* Opcode FF /0 */ 7156 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7157 ins_pipe( ialu_mem_imm ); 7158 %} 7159 7160 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7161 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7162 effect(KILL cr); 7163 7164 ins_cost(125); 7165 format %{ "DEC $dst" %} 7166 opcode(0xFF); /* Opcode FF /1 */ 7167 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7168 ins_pipe( ialu_mem_imm ); 7169 %} 7170 7171 7172 instruct checkCastPP( eRegP dst ) %{ 7173 match(Set dst (CheckCastPP dst)); 7174 7175 size(0); 7176 format %{ "#checkcastPP of $dst" %} 7177 ins_encode( /*empty encoding*/ ); 7178 ins_pipe( empty ); 7179 %} 7180 7181 instruct castPP( eRegP dst ) %{ 7182 match(Set dst (CastPP dst)); 7183 format %{ "#castPP of $dst" %} 7184 ins_encode( /*empty encoding*/ ); 7185 ins_pipe( empty ); 7186 %} 7187 7188 instruct castII( rRegI dst ) %{ 7189 match(Set dst (CastII dst)); 7190 format %{ "#castII of $dst" %} 7191 ins_encode( /*empty encoding*/ ); 7192 ins_cost(0); 7193 ins_pipe( empty ); 7194 %} 7195 7196 instruct castLL( eRegL dst ) %{ 7197 match(Set dst (CastLL dst)); 7198 format %{ "#castLL of $dst" %} 7199 ins_encode( /*empty encoding*/ ); 7200 ins_cost(0); 7201 ins_pipe( empty ); 7202 %} 7203 7204 instruct castFF( regF dst ) %{ 7205 predicate(UseSSE >= 1); 7206 match(Set dst (CastFF dst)); 7207 format %{ "#castFF of $dst" %} 7208 ins_encode( /*empty encoding*/ ); 7209 ins_cost(0); 7210 ins_pipe( empty ); 7211 %} 7212 7213 instruct castDD( regD dst ) %{ 7214 predicate(UseSSE >= 2); 7215 match(Set dst (CastDD dst)); 7216 format %{ "#castDD of $dst" %} 7217 ins_encode( /*empty encoding*/ ); 7218 ins_cost(0); 7219 ins_pipe( empty ); 7220 %} 7221 7222 instruct castFF_PR( regFPR dst ) %{ 7223 predicate(UseSSE < 1); 7224 match(Set dst (CastFF dst)); 7225 format %{ "#castFF of $dst" %} 7226 ins_encode( /*empty encoding*/ ); 7227 ins_cost(0); 7228 ins_pipe( empty ); 7229 %} 7230 7231 instruct castDD_PR( regDPR dst ) %{ 7232 predicate(UseSSE < 2); 7233 match(Set dst (CastDD dst)); 7234 format %{ "#castDD of $dst" %} 7235 ins_encode( /*empty encoding*/ ); 7236 ins_cost(0); 7237 ins_pipe( empty ); 7238 %} 7239 7240 // Load-locked - same as a regular pointer load when used with compare-swap 7241 instruct loadPLocked(eRegP dst, memory mem) %{ 7242 match(Set dst (LoadPLocked mem)); 7243 7244 ins_cost(125); 7245 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7246 opcode(0x8B); 7247 ins_encode( OpcP, RegMem(dst,mem)); 7248 ins_pipe( ialu_reg_mem ); 7249 %} 7250 7251 // Conditional-store of the updated heap-top. 7252 // Used during allocation of the shared heap. 7253 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7254 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7255 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7256 // EAX is killed if there is contention, but then it's also unused. 7257 // In the common case of no contention, EAX holds the new oop address. 7258 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7259 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7260 ins_pipe( pipe_cmpxchg ); 7261 %} 7262 7263 // Conditional-store of an int value. 7264 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7265 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7266 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7267 effect(KILL oldval); 7268 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7269 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7270 ins_pipe( pipe_cmpxchg ); 7271 %} 7272 7273 // Conditional-store of a long value. 7274 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7275 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7276 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7277 effect(KILL oldval); 7278 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7279 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7280 "XCHG EBX,ECX" 7281 %} 7282 ins_encode %{ 7283 // Note: we need to swap rbx, and rcx before and after the 7284 // cmpxchg8 instruction because the instruction uses 7285 // rcx as the high order word of the new value to store but 7286 // our register encoding uses rbx. 7287 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7288 __ lock(); 7289 __ cmpxchg8($mem$$Address); 7290 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7291 %} 7292 ins_pipe( pipe_cmpxchg ); 7293 %} 7294 7295 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7296 7297 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7298 predicate(VM_Version::supports_cx8()); 7299 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7300 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7301 effect(KILL cr, KILL oldval); 7302 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7303 "MOV $res,0\n\t" 7304 "JNE,s fail\n\t" 7305 "MOV $res,1\n" 7306 "fail:" %} 7307 ins_encode( enc_cmpxchg8(mem_ptr), 7308 enc_flags_ne_to_boolean(res) ); 7309 ins_pipe( pipe_cmpxchg ); 7310 %} 7311 7312 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7313 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7314 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7315 effect(KILL cr, KILL oldval); 7316 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7317 "MOV $res,0\n\t" 7318 "JNE,s fail\n\t" 7319 "MOV $res,1\n" 7320 "fail:" %} 7321 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7322 ins_pipe( pipe_cmpxchg ); 7323 %} 7324 7325 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7326 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7327 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7328 effect(KILL cr, KILL oldval); 7329 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7330 "MOV $res,0\n\t" 7331 "JNE,s fail\n\t" 7332 "MOV $res,1\n" 7333 "fail:" %} 7334 ins_encode( enc_cmpxchgb(mem_ptr), 7335 enc_flags_ne_to_boolean(res) ); 7336 ins_pipe( pipe_cmpxchg ); 7337 %} 7338 7339 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7340 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7341 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7342 effect(KILL cr, KILL oldval); 7343 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7344 "MOV $res,0\n\t" 7345 "JNE,s fail\n\t" 7346 "MOV $res,1\n" 7347 "fail:" %} 7348 ins_encode( enc_cmpxchgw(mem_ptr), 7349 enc_flags_ne_to_boolean(res) ); 7350 ins_pipe( pipe_cmpxchg ); 7351 %} 7352 7353 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7354 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7355 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7356 effect(KILL cr, KILL oldval); 7357 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7358 "MOV $res,0\n\t" 7359 "JNE,s fail\n\t" 7360 "MOV $res,1\n" 7361 "fail:" %} 7362 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7363 ins_pipe( pipe_cmpxchg ); 7364 %} 7365 7366 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7367 predicate(VM_Version::supports_cx8()); 7368 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7369 effect(KILL cr); 7370 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7371 ins_encode( enc_cmpxchg8(mem_ptr) ); 7372 ins_pipe( pipe_cmpxchg ); 7373 %} 7374 7375 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7376 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7377 effect(KILL cr); 7378 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7379 ins_encode( enc_cmpxchg(mem_ptr) ); 7380 ins_pipe( pipe_cmpxchg ); 7381 %} 7382 7383 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7384 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7385 effect(KILL cr); 7386 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7387 ins_encode( enc_cmpxchgb(mem_ptr) ); 7388 ins_pipe( pipe_cmpxchg ); 7389 %} 7390 7391 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7392 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7393 effect(KILL cr); 7394 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7395 ins_encode( enc_cmpxchgw(mem_ptr) ); 7396 ins_pipe( pipe_cmpxchg ); 7397 %} 7398 7399 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7400 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7401 effect(KILL cr); 7402 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7403 ins_encode( enc_cmpxchg(mem_ptr) ); 7404 ins_pipe( pipe_cmpxchg ); 7405 %} 7406 7407 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7408 predicate(n->as_LoadStore()->result_not_used()); 7409 match(Set dummy (GetAndAddB mem add)); 7410 effect(KILL cr); 7411 format %{ "ADDB [$mem],$add" %} 7412 ins_encode %{ 7413 __ lock(); 7414 __ addb($mem$$Address, $add$$constant); 7415 %} 7416 ins_pipe( pipe_cmpxchg ); 7417 %} 7418 7419 // Important to match to xRegI: only 8-bit regs. 7420 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7421 match(Set newval (GetAndAddB mem newval)); 7422 effect(KILL cr); 7423 format %{ "XADDB [$mem],$newval" %} 7424 ins_encode %{ 7425 __ lock(); 7426 __ xaddb($mem$$Address, $newval$$Register); 7427 %} 7428 ins_pipe( pipe_cmpxchg ); 7429 %} 7430 7431 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7432 predicate(n->as_LoadStore()->result_not_used()); 7433 match(Set dummy (GetAndAddS mem add)); 7434 effect(KILL cr); 7435 format %{ "ADDS [$mem],$add" %} 7436 ins_encode %{ 7437 __ lock(); 7438 __ addw($mem$$Address, $add$$constant); 7439 %} 7440 ins_pipe( pipe_cmpxchg ); 7441 %} 7442 7443 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7444 match(Set newval (GetAndAddS mem newval)); 7445 effect(KILL cr); 7446 format %{ "XADDS [$mem],$newval" %} 7447 ins_encode %{ 7448 __ lock(); 7449 __ xaddw($mem$$Address, $newval$$Register); 7450 %} 7451 ins_pipe( pipe_cmpxchg ); 7452 %} 7453 7454 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7455 predicate(n->as_LoadStore()->result_not_used()); 7456 match(Set dummy (GetAndAddI mem add)); 7457 effect(KILL cr); 7458 format %{ "ADDL [$mem],$add" %} 7459 ins_encode %{ 7460 __ lock(); 7461 __ addl($mem$$Address, $add$$constant); 7462 %} 7463 ins_pipe( pipe_cmpxchg ); 7464 %} 7465 7466 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7467 match(Set newval (GetAndAddI mem newval)); 7468 effect(KILL cr); 7469 format %{ "XADDL [$mem],$newval" %} 7470 ins_encode %{ 7471 __ lock(); 7472 __ xaddl($mem$$Address, $newval$$Register); 7473 %} 7474 ins_pipe( pipe_cmpxchg ); 7475 %} 7476 7477 // Important to match to xRegI: only 8-bit regs. 7478 instruct xchgB( memory mem, xRegI newval) %{ 7479 match(Set newval (GetAndSetB mem newval)); 7480 format %{ "XCHGB $newval,[$mem]" %} 7481 ins_encode %{ 7482 __ xchgb($newval$$Register, $mem$$Address); 7483 %} 7484 ins_pipe( pipe_cmpxchg ); 7485 %} 7486 7487 instruct xchgS( memory mem, rRegI newval) %{ 7488 match(Set newval (GetAndSetS mem newval)); 7489 format %{ "XCHGW $newval,[$mem]" %} 7490 ins_encode %{ 7491 __ xchgw($newval$$Register, $mem$$Address); 7492 %} 7493 ins_pipe( pipe_cmpxchg ); 7494 %} 7495 7496 instruct xchgI( memory mem, rRegI newval) %{ 7497 match(Set newval (GetAndSetI mem newval)); 7498 format %{ "XCHGL $newval,[$mem]" %} 7499 ins_encode %{ 7500 __ xchgl($newval$$Register, $mem$$Address); 7501 %} 7502 ins_pipe( pipe_cmpxchg ); 7503 %} 7504 7505 instruct xchgP( memory mem, pRegP newval) %{ 7506 match(Set newval (GetAndSetP mem newval)); 7507 format %{ "XCHGL $newval,[$mem]" %} 7508 ins_encode %{ 7509 __ xchgl($newval$$Register, $mem$$Address); 7510 %} 7511 ins_pipe( pipe_cmpxchg ); 7512 %} 7513 7514 //----------Subtraction Instructions------------------------------------------- 7515 7516 // Integer Subtraction Instructions 7517 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7518 match(Set dst (SubI dst src)); 7519 effect(KILL cr); 7520 7521 size(2); 7522 format %{ "SUB $dst,$src" %} 7523 opcode(0x2B); 7524 ins_encode( OpcP, RegReg( dst, src) ); 7525 ins_pipe( ialu_reg_reg ); 7526 %} 7527 7528 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7529 match(Set dst (SubI dst src)); 7530 effect(KILL cr); 7531 7532 format %{ "SUB $dst,$src" %} 7533 opcode(0x81,0x05); /* Opcode 81 /5 */ 7534 // ins_encode( RegImm( dst, src) ); 7535 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7536 ins_pipe( ialu_reg ); 7537 %} 7538 7539 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7540 match(Set dst (SubI dst (LoadI src))); 7541 effect(KILL cr); 7542 7543 ins_cost(125); 7544 format %{ "SUB $dst,$src" %} 7545 opcode(0x2B); 7546 ins_encode( OpcP, RegMem( dst, src) ); 7547 ins_pipe( ialu_reg_mem ); 7548 %} 7549 7550 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7551 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7552 effect(KILL cr); 7553 7554 ins_cost(150); 7555 format %{ "SUB $dst,$src" %} 7556 opcode(0x29); /* Opcode 29 /r */ 7557 ins_encode( OpcP, RegMem( src, dst ) ); 7558 ins_pipe( ialu_mem_reg ); 7559 %} 7560 7561 // Subtract from a pointer 7562 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7563 match(Set dst (AddP dst (SubI zero src))); 7564 effect(KILL cr); 7565 7566 size(2); 7567 format %{ "SUB $dst,$src" %} 7568 opcode(0x2B); 7569 ins_encode( OpcP, RegReg( dst, src) ); 7570 ins_pipe( ialu_reg_reg ); 7571 %} 7572 7573 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7574 match(Set dst (SubI zero dst)); 7575 effect(KILL cr); 7576 7577 size(2); 7578 format %{ "NEG $dst" %} 7579 opcode(0xF7,0x03); // Opcode F7 /3 7580 ins_encode( OpcP, RegOpc( dst ) ); 7581 ins_pipe( ialu_reg ); 7582 %} 7583 7584 //----------Multiplication/Division Instructions------------------------------- 7585 // Integer Multiplication Instructions 7586 // Multiply Register 7587 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7588 match(Set dst (MulI dst src)); 7589 effect(KILL cr); 7590 7591 size(3); 7592 ins_cost(300); 7593 format %{ "IMUL $dst,$src" %} 7594 opcode(0xAF, 0x0F); 7595 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7596 ins_pipe( ialu_reg_reg_alu0 ); 7597 %} 7598 7599 // Multiply 32-bit Immediate 7600 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7601 match(Set dst (MulI src imm)); 7602 effect(KILL cr); 7603 7604 ins_cost(300); 7605 format %{ "IMUL $dst,$src,$imm" %} 7606 opcode(0x69); /* 69 /r id */ 7607 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7608 ins_pipe( ialu_reg_reg_alu0 ); 7609 %} 7610 7611 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7612 match(Set dst src); 7613 effect(KILL cr); 7614 7615 // Note that this is artificially increased to make it more expensive than loadConL 7616 ins_cost(250); 7617 format %{ "MOV EAX,$src\t// low word only" %} 7618 opcode(0xB8); 7619 ins_encode( LdImmL_Lo(dst, src) ); 7620 ins_pipe( ialu_reg_fat ); 7621 %} 7622 7623 // Multiply by 32-bit Immediate, taking the shifted high order results 7624 // (special case for shift by 32) 7625 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7626 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7627 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7628 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7629 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7630 effect(USE src1, KILL cr); 7631 7632 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7633 ins_cost(0*100 + 1*400 - 150); 7634 format %{ "IMUL EDX:EAX,$src1" %} 7635 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 // Multiply by 32-bit Immediate, taking the shifted high order results 7640 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7641 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7642 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7643 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7644 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7645 effect(USE src1, KILL cr); 7646 7647 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7648 ins_cost(1*100 + 1*400 - 150); 7649 format %{ "IMUL EDX:EAX,$src1\n\t" 7650 "SAR EDX,$cnt-32" %} 7651 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7652 ins_pipe( pipe_slow ); 7653 %} 7654 7655 // Multiply Memory 32-bit Immediate 7656 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7657 match(Set dst (MulI (LoadI src) imm)); 7658 effect(KILL cr); 7659 7660 ins_cost(300); 7661 format %{ "IMUL $dst,$src,$imm" %} 7662 opcode(0x69); /* 69 /r id */ 7663 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7664 ins_pipe( ialu_reg_mem_alu0 ); 7665 %} 7666 7667 // Multiply Memory 7668 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7669 match(Set dst (MulI dst (LoadI src))); 7670 effect(KILL cr); 7671 7672 ins_cost(350); 7673 format %{ "IMUL $dst,$src" %} 7674 opcode(0xAF, 0x0F); 7675 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7676 ins_pipe( ialu_reg_mem_alu0 ); 7677 %} 7678 7679 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7680 %{ 7681 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7682 effect(KILL cr, KILL src2); 7683 7684 expand %{ mulI_eReg(dst, src1, cr); 7685 mulI_eReg(src2, src3, cr); 7686 addI_eReg(dst, src2, cr); %} 7687 %} 7688 7689 // Multiply Register Int to Long 7690 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7691 // Basic Idea: long = (long)int * (long)int 7692 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7693 effect(DEF dst, USE src, USE src1, KILL flags); 7694 7695 ins_cost(300); 7696 format %{ "IMUL $dst,$src1" %} 7697 7698 ins_encode( long_int_multiply( dst, src1 ) ); 7699 ins_pipe( ialu_reg_reg_alu0 ); 7700 %} 7701 7702 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7703 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7704 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7705 effect(KILL flags); 7706 7707 ins_cost(300); 7708 format %{ "MUL $dst,$src1" %} 7709 7710 ins_encode( long_uint_multiply(dst, src1) ); 7711 ins_pipe( ialu_reg_reg_alu0 ); 7712 %} 7713 7714 // Multiply Register Long 7715 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7716 match(Set dst (MulL dst src)); 7717 effect(KILL cr, TEMP tmp); 7718 ins_cost(4*100+3*400); 7719 // Basic idea: lo(result) = lo(x_lo * y_lo) 7720 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7721 format %{ "MOV $tmp,$src.lo\n\t" 7722 "IMUL $tmp,EDX\n\t" 7723 "MOV EDX,$src.hi\n\t" 7724 "IMUL EDX,EAX\n\t" 7725 "ADD $tmp,EDX\n\t" 7726 "MUL EDX:EAX,$src.lo\n\t" 7727 "ADD EDX,$tmp" %} 7728 ins_encode( long_multiply( dst, src, tmp ) ); 7729 ins_pipe( pipe_slow ); 7730 %} 7731 7732 // Multiply Register Long where the left operand's high 32 bits are zero 7733 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7734 predicate(is_operand_hi32_zero(n->in(1))); 7735 match(Set dst (MulL dst src)); 7736 effect(KILL cr, TEMP tmp); 7737 ins_cost(2*100+2*400); 7738 // Basic idea: lo(result) = lo(x_lo * y_lo) 7739 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7740 format %{ "MOV $tmp,$src.hi\n\t" 7741 "IMUL $tmp,EAX\n\t" 7742 "MUL EDX:EAX,$src.lo\n\t" 7743 "ADD EDX,$tmp" %} 7744 ins_encode %{ 7745 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7746 __ imull($tmp$$Register, rax); 7747 __ mull($src$$Register); 7748 __ addl(rdx, $tmp$$Register); 7749 %} 7750 ins_pipe( pipe_slow ); 7751 %} 7752 7753 // Multiply Register Long where the right operand's high 32 bits are zero 7754 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7755 predicate(is_operand_hi32_zero(n->in(2))); 7756 match(Set dst (MulL dst src)); 7757 effect(KILL cr, TEMP tmp); 7758 ins_cost(2*100+2*400); 7759 // Basic idea: lo(result) = lo(x_lo * y_lo) 7760 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7761 format %{ "MOV $tmp,$src.lo\n\t" 7762 "IMUL $tmp,EDX\n\t" 7763 "MUL EDX:EAX,$src.lo\n\t" 7764 "ADD EDX,$tmp" %} 7765 ins_encode %{ 7766 __ movl($tmp$$Register, $src$$Register); 7767 __ imull($tmp$$Register, rdx); 7768 __ mull($src$$Register); 7769 __ addl(rdx, $tmp$$Register); 7770 %} 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7775 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7776 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7777 match(Set dst (MulL dst src)); 7778 effect(KILL cr); 7779 ins_cost(1*400); 7780 // Basic idea: lo(result) = lo(x_lo * y_lo) 7781 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7782 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7783 ins_encode %{ 7784 __ mull($src$$Register); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 // Multiply Register Long by small constant 7790 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7791 match(Set dst (MulL dst src)); 7792 effect(KILL cr, TEMP tmp); 7793 ins_cost(2*100+2*400); 7794 size(12); 7795 // Basic idea: lo(result) = lo(src * EAX) 7796 // hi(result) = hi(src * EAX) + lo(src * EDX) 7797 format %{ "IMUL $tmp,EDX,$src\n\t" 7798 "MOV EDX,$src\n\t" 7799 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7800 "ADD EDX,$tmp" %} 7801 ins_encode( long_multiply_con( dst, src, tmp ) ); 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 // Integer DIV with Register 7806 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7807 match(Set rax (DivI rax div)); 7808 effect(KILL rdx, KILL cr); 7809 size(26); 7810 ins_cost(30*100+10*100); 7811 format %{ "CMP EAX,0x80000000\n\t" 7812 "JNE,s normal\n\t" 7813 "XOR EDX,EDX\n\t" 7814 "CMP ECX,-1\n\t" 7815 "JE,s done\n" 7816 "normal: CDQ\n\t" 7817 "IDIV $div\n\t" 7818 "done:" %} 7819 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7820 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7821 ins_pipe( ialu_reg_reg_alu0 ); 7822 %} 7823 7824 // Divide Register Long 7825 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7826 match(Set dst (DivL src1 src2)); 7827 effect(CALL); 7828 ins_cost(10000); 7829 format %{ "PUSH $src1.hi\n\t" 7830 "PUSH $src1.lo\n\t" 7831 "PUSH $src2.hi\n\t" 7832 "PUSH $src2.lo\n\t" 7833 "CALL SharedRuntime::ldiv\n\t" 7834 "ADD ESP,16" %} 7835 ins_encode( long_div(src1,src2) ); 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 // Integer DIVMOD with Register, both quotient and mod results 7840 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7841 match(DivModI rax div); 7842 effect(KILL cr); 7843 size(26); 7844 ins_cost(30*100+10*100); 7845 format %{ "CMP EAX,0x80000000\n\t" 7846 "JNE,s normal\n\t" 7847 "XOR EDX,EDX\n\t" 7848 "CMP ECX,-1\n\t" 7849 "JE,s done\n" 7850 "normal: CDQ\n\t" 7851 "IDIV $div\n\t" 7852 "done:" %} 7853 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7854 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7855 ins_pipe( pipe_slow ); 7856 %} 7857 7858 // Integer MOD with Register 7859 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7860 match(Set rdx (ModI rax div)); 7861 effect(KILL rax, KILL cr); 7862 7863 size(26); 7864 ins_cost(300); 7865 format %{ "CDQ\n\t" 7866 "IDIV $div" %} 7867 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7868 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7869 ins_pipe( ialu_reg_reg_alu0 ); 7870 %} 7871 7872 // Remainder Register Long 7873 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7874 match(Set dst (ModL src1 src2)); 7875 effect(CALL); 7876 ins_cost(10000); 7877 format %{ "PUSH $src1.hi\n\t" 7878 "PUSH $src1.lo\n\t" 7879 "PUSH $src2.hi\n\t" 7880 "PUSH $src2.lo\n\t" 7881 "CALL SharedRuntime::lrem\n\t" 7882 "ADD ESP,16" %} 7883 ins_encode( long_mod(src1,src2) ); 7884 ins_pipe( pipe_slow ); 7885 %} 7886 7887 // Divide Register Long (no special case since divisor != -1) 7888 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7889 match(Set dst (DivL dst imm)); 7890 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7891 ins_cost(1000); 7892 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7893 "XOR $tmp2,$tmp2\n\t" 7894 "CMP $tmp,EDX\n\t" 7895 "JA,s fast\n\t" 7896 "MOV $tmp2,EAX\n\t" 7897 "MOV EAX,EDX\n\t" 7898 "MOV EDX,0\n\t" 7899 "JLE,s pos\n\t" 7900 "LNEG EAX : $tmp2\n\t" 7901 "DIV $tmp # unsigned division\n\t" 7902 "XCHG EAX,$tmp2\n\t" 7903 "DIV $tmp\n\t" 7904 "LNEG $tmp2 : EAX\n\t" 7905 "JMP,s done\n" 7906 "pos:\n\t" 7907 "DIV $tmp\n\t" 7908 "XCHG EAX,$tmp2\n" 7909 "fast:\n\t" 7910 "DIV $tmp\n" 7911 "done:\n\t" 7912 "MOV EDX,$tmp2\n\t" 7913 "NEG EDX:EAX # if $imm < 0" %} 7914 ins_encode %{ 7915 int con = (int)$imm$$constant; 7916 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7917 int pcon = (con > 0) ? con : -con; 7918 Label Lfast, Lpos, Ldone; 7919 7920 __ movl($tmp$$Register, pcon); 7921 __ xorl($tmp2$$Register,$tmp2$$Register); 7922 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7923 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7924 7925 __ movl($tmp2$$Register, $dst$$Register); // save 7926 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7927 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7928 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7929 7930 // Negative dividend. 7931 // convert value to positive to use unsigned division 7932 __ lneg($dst$$Register, $tmp2$$Register); 7933 __ divl($tmp$$Register); 7934 __ xchgl($dst$$Register, $tmp2$$Register); 7935 __ divl($tmp$$Register); 7936 // revert result back to negative 7937 __ lneg($tmp2$$Register, $dst$$Register); 7938 __ jmpb(Ldone); 7939 7940 __ bind(Lpos); 7941 __ divl($tmp$$Register); // Use unsigned division 7942 __ xchgl($dst$$Register, $tmp2$$Register); 7943 // Fallthrow for final divide, tmp2 has 32 bit hi result 7944 7945 __ bind(Lfast); 7946 // fast path: src is positive 7947 __ divl($tmp$$Register); // Use unsigned division 7948 7949 __ bind(Ldone); 7950 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7951 if (con < 0) { 7952 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7953 } 7954 %} 7955 ins_pipe( pipe_slow ); 7956 %} 7957 7958 // Remainder Register Long (remainder fit into 32 bits) 7959 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7960 match(Set dst (ModL dst imm)); 7961 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7962 ins_cost(1000); 7963 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7964 "CMP $tmp,EDX\n\t" 7965 "JA,s fast\n\t" 7966 "MOV $tmp2,EAX\n\t" 7967 "MOV EAX,EDX\n\t" 7968 "MOV EDX,0\n\t" 7969 "JLE,s pos\n\t" 7970 "LNEG EAX : $tmp2\n\t" 7971 "DIV $tmp # unsigned division\n\t" 7972 "MOV EAX,$tmp2\n\t" 7973 "DIV $tmp\n\t" 7974 "NEG EDX\n\t" 7975 "JMP,s done\n" 7976 "pos:\n\t" 7977 "DIV $tmp\n\t" 7978 "MOV EAX,$tmp2\n" 7979 "fast:\n\t" 7980 "DIV $tmp\n" 7981 "done:\n\t" 7982 "MOV EAX,EDX\n\t" 7983 "SAR EDX,31\n\t" %} 7984 ins_encode %{ 7985 int con = (int)$imm$$constant; 7986 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7987 int pcon = (con > 0) ? con : -con; 7988 Label Lfast, Lpos, Ldone; 7989 7990 __ movl($tmp$$Register, pcon); 7991 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7992 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7993 7994 __ movl($tmp2$$Register, $dst$$Register); // save 7995 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7996 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7997 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7998 7999 // Negative dividend. 8000 // convert value to positive to use unsigned division 8001 __ lneg($dst$$Register, $tmp2$$Register); 8002 __ divl($tmp$$Register); 8003 __ movl($dst$$Register, $tmp2$$Register); 8004 __ divl($tmp$$Register); 8005 // revert remainder back to negative 8006 __ negl(HIGH_FROM_LOW($dst$$Register)); 8007 __ jmpb(Ldone); 8008 8009 __ bind(Lpos); 8010 __ divl($tmp$$Register); 8011 __ movl($dst$$Register, $tmp2$$Register); 8012 8013 __ bind(Lfast); 8014 // fast path: src is positive 8015 __ divl($tmp$$Register); 8016 8017 __ bind(Ldone); 8018 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8019 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8020 8021 %} 8022 ins_pipe( pipe_slow ); 8023 %} 8024 8025 // Integer Shift Instructions 8026 // Shift Left by one 8027 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8028 match(Set dst (LShiftI dst shift)); 8029 effect(KILL cr); 8030 8031 size(2); 8032 format %{ "SHL $dst,$shift" %} 8033 opcode(0xD1, 0x4); /* D1 /4 */ 8034 ins_encode( OpcP, RegOpc( dst ) ); 8035 ins_pipe( ialu_reg ); 8036 %} 8037 8038 // Shift Left by 8-bit immediate 8039 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8040 match(Set dst (LShiftI dst shift)); 8041 effect(KILL cr); 8042 8043 size(3); 8044 format %{ "SHL $dst,$shift" %} 8045 opcode(0xC1, 0x4); /* C1 /4 ib */ 8046 ins_encode( RegOpcImm( dst, shift) ); 8047 ins_pipe( ialu_reg ); 8048 %} 8049 8050 // Shift Left by variable 8051 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8052 match(Set dst (LShiftI dst shift)); 8053 effect(KILL cr); 8054 8055 size(2); 8056 format %{ "SHL $dst,$shift" %} 8057 opcode(0xD3, 0x4); /* D3 /4 */ 8058 ins_encode( OpcP, RegOpc( dst ) ); 8059 ins_pipe( ialu_reg_reg ); 8060 %} 8061 8062 // Arithmetic shift right by one 8063 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8064 match(Set dst (RShiftI dst shift)); 8065 effect(KILL cr); 8066 8067 size(2); 8068 format %{ "SAR $dst,$shift" %} 8069 opcode(0xD1, 0x7); /* D1 /7 */ 8070 ins_encode( OpcP, RegOpc( dst ) ); 8071 ins_pipe( ialu_reg ); 8072 %} 8073 8074 // Arithmetic shift right by one 8075 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8076 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8077 effect(KILL cr); 8078 format %{ "SAR $dst,$shift" %} 8079 opcode(0xD1, 0x7); /* D1 /7 */ 8080 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8081 ins_pipe( ialu_mem_imm ); 8082 %} 8083 8084 // Arithmetic Shift Right by 8-bit immediate 8085 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8086 match(Set dst (RShiftI dst shift)); 8087 effect(KILL cr); 8088 8089 size(3); 8090 format %{ "SAR $dst,$shift" %} 8091 opcode(0xC1, 0x7); /* C1 /7 ib */ 8092 ins_encode( RegOpcImm( dst, shift ) ); 8093 ins_pipe( ialu_mem_imm ); 8094 %} 8095 8096 // Arithmetic Shift Right by 8-bit immediate 8097 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8098 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8099 effect(KILL cr); 8100 8101 format %{ "SAR $dst,$shift" %} 8102 opcode(0xC1, 0x7); /* C1 /7 ib */ 8103 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8104 ins_pipe( ialu_mem_imm ); 8105 %} 8106 8107 // Arithmetic Shift Right by variable 8108 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8109 match(Set dst (RShiftI dst shift)); 8110 effect(KILL cr); 8111 8112 size(2); 8113 format %{ "SAR $dst,$shift" %} 8114 opcode(0xD3, 0x7); /* D3 /7 */ 8115 ins_encode( OpcP, RegOpc( dst ) ); 8116 ins_pipe( ialu_reg_reg ); 8117 %} 8118 8119 // Logical shift right by one 8120 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8121 match(Set dst (URShiftI dst shift)); 8122 effect(KILL cr); 8123 8124 size(2); 8125 format %{ "SHR $dst,$shift" %} 8126 opcode(0xD1, 0x5); /* D1 /5 */ 8127 ins_encode( OpcP, RegOpc( dst ) ); 8128 ins_pipe( ialu_reg ); 8129 %} 8130 8131 // Logical Shift Right by 8-bit immediate 8132 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8133 match(Set dst (URShiftI dst shift)); 8134 effect(KILL cr); 8135 8136 size(3); 8137 format %{ "SHR $dst,$shift" %} 8138 opcode(0xC1, 0x5); /* C1 /5 ib */ 8139 ins_encode( RegOpcImm( dst, shift) ); 8140 ins_pipe( ialu_reg ); 8141 %} 8142 8143 8144 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8145 // This idiom is used by the compiler for the i2b bytecode. 8146 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8147 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8148 8149 size(3); 8150 format %{ "MOVSX $dst,$src :8" %} 8151 ins_encode %{ 8152 __ movsbl($dst$$Register, $src$$Register); 8153 %} 8154 ins_pipe(ialu_reg_reg); 8155 %} 8156 8157 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8158 // This idiom is used by the compiler the i2s bytecode. 8159 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8160 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8161 8162 size(3); 8163 format %{ "MOVSX $dst,$src :16" %} 8164 ins_encode %{ 8165 __ movswl($dst$$Register, $src$$Register); 8166 %} 8167 ins_pipe(ialu_reg_reg); 8168 %} 8169 8170 8171 // Logical Shift Right by variable 8172 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8173 match(Set dst (URShiftI dst shift)); 8174 effect(KILL cr); 8175 8176 size(2); 8177 format %{ "SHR $dst,$shift" %} 8178 opcode(0xD3, 0x5); /* D3 /5 */ 8179 ins_encode( OpcP, RegOpc( dst ) ); 8180 ins_pipe( ialu_reg_reg ); 8181 %} 8182 8183 8184 //----------Logical Instructions----------------------------------------------- 8185 //----------Integer Logical Instructions--------------------------------------- 8186 // And Instructions 8187 // And Register with Register 8188 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8189 match(Set dst (AndI dst src)); 8190 effect(KILL cr); 8191 8192 size(2); 8193 format %{ "AND $dst,$src" %} 8194 opcode(0x23); 8195 ins_encode( OpcP, RegReg( dst, src) ); 8196 ins_pipe( ialu_reg_reg ); 8197 %} 8198 8199 // And Register with Immediate 8200 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8201 match(Set dst (AndI dst src)); 8202 effect(KILL cr); 8203 8204 format %{ "AND $dst,$src" %} 8205 opcode(0x81,0x04); /* Opcode 81 /4 */ 8206 // ins_encode( RegImm( dst, src) ); 8207 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8208 ins_pipe( ialu_reg ); 8209 %} 8210 8211 // And Register with Memory 8212 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8213 match(Set dst (AndI dst (LoadI src))); 8214 effect(KILL cr); 8215 8216 ins_cost(125); 8217 format %{ "AND $dst,$src" %} 8218 opcode(0x23); 8219 ins_encode( OpcP, RegMem( dst, src) ); 8220 ins_pipe( ialu_reg_mem ); 8221 %} 8222 8223 // And Memory with Register 8224 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8225 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8226 effect(KILL cr); 8227 8228 ins_cost(150); 8229 format %{ "AND $dst,$src" %} 8230 opcode(0x21); /* Opcode 21 /r */ 8231 ins_encode( OpcP, RegMem( src, dst ) ); 8232 ins_pipe( ialu_mem_reg ); 8233 %} 8234 8235 // And Memory with Immediate 8236 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8237 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8238 effect(KILL cr); 8239 8240 ins_cost(125); 8241 format %{ "AND $dst,$src" %} 8242 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8243 // ins_encode( MemImm( dst, src) ); 8244 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8245 ins_pipe( ialu_mem_imm ); 8246 %} 8247 8248 // BMI1 instructions 8249 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8250 match(Set dst (AndI (XorI src1 minus_1) src2)); 8251 predicate(UseBMI1Instructions); 8252 effect(KILL cr); 8253 8254 format %{ "ANDNL $dst, $src1, $src2" %} 8255 8256 ins_encode %{ 8257 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8258 %} 8259 ins_pipe(ialu_reg); 8260 %} 8261 8262 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8263 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8264 predicate(UseBMI1Instructions); 8265 effect(KILL cr); 8266 8267 ins_cost(125); 8268 format %{ "ANDNL $dst, $src1, $src2" %} 8269 8270 ins_encode %{ 8271 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8272 %} 8273 ins_pipe(ialu_reg_mem); 8274 %} 8275 8276 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8277 match(Set dst (AndI (SubI imm_zero src) src)); 8278 predicate(UseBMI1Instructions); 8279 effect(KILL cr); 8280 8281 format %{ "BLSIL $dst, $src" %} 8282 8283 ins_encode %{ 8284 __ blsil($dst$$Register, $src$$Register); 8285 %} 8286 ins_pipe(ialu_reg); 8287 %} 8288 8289 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8290 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8291 predicate(UseBMI1Instructions); 8292 effect(KILL cr); 8293 8294 ins_cost(125); 8295 format %{ "BLSIL $dst, $src" %} 8296 8297 ins_encode %{ 8298 __ blsil($dst$$Register, $src$$Address); 8299 %} 8300 ins_pipe(ialu_reg_mem); 8301 %} 8302 8303 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8304 %{ 8305 match(Set dst (XorI (AddI src minus_1) src)); 8306 predicate(UseBMI1Instructions); 8307 effect(KILL cr); 8308 8309 format %{ "BLSMSKL $dst, $src" %} 8310 8311 ins_encode %{ 8312 __ blsmskl($dst$$Register, $src$$Register); 8313 %} 8314 8315 ins_pipe(ialu_reg); 8316 %} 8317 8318 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8319 %{ 8320 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8321 predicate(UseBMI1Instructions); 8322 effect(KILL cr); 8323 8324 ins_cost(125); 8325 format %{ "BLSMSKL $dst, $src" %} 8326 8327 ins_encode %{ 8328 __ blsmskl($dst$$Register, $src$$Address); 8329 %} 8330 8331 ins_pipe(ialu_reg_mem); 8332 %} 8333 8334 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8335 %{ 8336 match(Set dst (AndI (AddI src minus_1) src) ); 8337 predicate(UseBMI1Instructions); 8338 effect(KILL cr); 8339 8340 format %{ "BLSRL $dst, $src" %} 8341 8342 ins_encode %{ 8343 __ blsrl($dst$$Register, $src$$Register); 8344 %} 8345 8346 ins_pipe(ialu_reg); 8347 %} 8348 8349 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8350 %{ 8351 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8352 predicate(UseBMI1Instructions); 8353 effect(KILL cr); 8354 8355 ins_cost(125); 8356 format %{ "BLSRL $dst, $src" %} 8357 8358 ins_encode %{ 8359 __ blsrl($dst$$Register, $src$$Address); 8360 %} 8361 8362 ins_pipe(ialu_reg_mem); 8363 %} 8364 8365 // Or Instructions 8366 // Or Register with Register 8367 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8368 match(Set dst (OrI dst src)); 8369 effect(KILL cr); 8370 8371 size(2); 8372 format %{ "OR $dst,$src" %} 8373 opcode(0x0B); 8374 ins_encode( OpcP, RegReg( dst, src) ); 8375 ins_pipe( ialu_reg_reg ); 8376 %} 8377 8378 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8379 match(Set dst (OrI dst (CastP2X src))); 8380 effect(KILL cr); 8381 8382 size(2); 8383 format %{ "OR $dst,$src" %} 8384 opcode(0x0B); 8385 ins_encode( OpcP, RegReg( dst, src) ); 8386 ins_pipe( ialu_reg_reg ); 8387 %} 8388 8389 8390 // Or Register with Immediate 8391 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8392 match(Set dst (OrI dst src)); 8393 effect(KILL cr); 8394 8395 format %{ "OR $dst,$src" %} 8396 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8397 // ins_encode( RegImm( dst, src) ); 8398 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8399 ins_pipe( ialu_reg ); 8400 %} 8401 8402 // Or Register with Memory 8403 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8404 match(Set dst (OrI dst (LoadI src))); 8405 effect(KILL cr); 8406 8407 ins_cost(125); 8408 format %{ "OR $dst,$src" %} 8409 opcode(0x0B); 8410 ins_encode( OpcP, RegMem( dst, src) ); 8411 ins_pipe( ialu_reg_mem ); 8412 %} 8413 8414 // Or Memory with Register 8415 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8416 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8417 effect(KILL cr); 8418 8419 ins_cost(150); 8420 format %{ "OR $dst,$src" %} 8421 opcode(0x09); /* Opcode 09 /r */ 8422 ins_encode( OpcP, RegMem( src, dst ) ); 8423 ins_pipe( ialu_mem_reg ); 8424 %} 8425 8426 // Or Memory with Immediate 8427 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8428 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8429 effect(KILL cr); 8430 8431 ins_cost(125); 8432 format %{ "OR $dst,$src" %} 8433 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8434 // ins_encode( MemImm( dst, src) ); 8435 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8436 ins_pipe( ialu_mem_imm ); 8437 %} 8438 8439 // ROL/ROR 8440 // ROL expand 8441 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8442 effect(USE_DEF dst, USE shift, KILL cr); 8443 8444 format %{ "ROL $dst, $shift" %} 8445 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8446 ins_encode( OpcP, RegOpc( dst )); 8447 ins_pipe( ialu_reg ); 8448 %} 8449 8450 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8451 effect(USE_DEF dst, USE shift, KILL cr); 8452 8453 format %{ "ROL $dst, $shift" %} 8454 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8455 ins_encode( RegOpcImm(dst, shift) ); 8456 ins_pipe(ialu_reg); 8457 %} 8458 8459 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8460 effect(USE_DEF dst, USE shift, KILL cr); 8461 8462 format %{ "ROL $dst, $shift" %} 8463 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8464 ins_encode(OpcP, RegOpc(dst)); 8465 ins_pipe( ialu_reg_reg ); 8466 %} 8467 // end of ROL expand 8468 8469 // ROL 32bit by one once 8470 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8471 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8472 8473 expand %{ 8474 rolI_eReg_imm1(dst, lshift, cr); 8475 %} 8476 %} 8477 8478 // ROL 32bit var by imm8 once 8479 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8480 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8481 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8482 8483 expand %{ 8484 rolI_eReg_imm8(dst, lshift, cr); 8485 %} 8486 %} 8487 8488 // ROL 32bit var by var once 8489 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8490 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8491 8492 expand %{ 8493 rolI_eReg_CL(dst, shift, cr); 8494 %} 8495 %} 8496 8497 // ROL 32bit var by var once 8498 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8499 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8500 8501 expand %{ 8502 rolI_eReg_CL(dst, shift, cr); 8503 %} 8504 %} 8505 8506 // ROR expand 8507 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8508 effect(USE_DEF dst, USE shift, KILL cr); 8509 8510 format %{ "ROR $dst, $shift" %} 8511 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8512 ins_encode( OpcP, RegOpc( dst ) ); 8513 ins_pipe( ialu_reg ); 8514 %} 8515 8516 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8517 effect (USE_DEF dst, USE shift, KILL cr); 8518 8519 format %{ "ROR $dst, $shift" %} 8520 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8521 ins_encode( RegOpcImm(dst, shift) ); 8522 ins_pipe( ialu_reg ); 8523 %} 8524 8525 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8526 effect(USE_DEF dst, USE shift, KILL cr); 8527 8528 format %{ "ROR $dst, $shift" %} 8529 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8530 ins_encode(OpcP, RegOpc(dst)); 8531 ins_pipe( ialu_reg_reg ); 8532 %} 8533 // end of ROR expand 8534 8535 // ROR right once 8536 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8537 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8538 8539 expand %{ 8540 rorI_eReg_imm1(dst, rshift, cr); 8541 %} 8542 %} 8543 8544 // ROR 32bit by immI8 once 8545 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8546 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8547 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8548 8549 expand %{ 8550 rorI_eReg_imm8(dst, rshift, cr); 8551 %} 8552 %} 8553 8554 // ROR 32bit var by var once 8555 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8556 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8557 8558 expand %{ 8559 rorI_eReg_CL(dst, shift, cr); 8560 %} 8561 %} 8562 8563 // ROR 32bit var by var once 8564 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8565 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8566 8567 expand %{ 8568 rorI_eReg_CL(dst, shift, cr); 8569 %} 8570 %} 8571 8572 // Xor Instructions 8573 // Xor Register with Register 8574 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8575 match(Set dst (XorI dst src)); 8576 effect(KILL cr); 8577 8578 size(2); 8579 format %{ "XOR $dst,$src" %} 8580 opcode(0x33); 8581 ins_encode( OpcP, RegReg( dst, src) ); 8582 ins_pipe( ialu_reg_reg ); 8583 %} 8584 8585 // Xor Register with Immediate -1 8586 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8587 match(Set dst (XorI dst imm)); 8588 8589 size(2); 8590 format %{ "NOT $dst" %} 8591 ins_encode %{ 8592 __ notl($dst$$Register); 8593 %} 8594 ins_pipe( ialu_reg ); 8595 %} 8596 8597 // Xor Register with Immediate 8598 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8599 match(Set dst (XorI dst src)); 8600 effect(KILL cr); 8601 8602 format %{ "XOR $dst,$src" %} 8603 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8604 // ins_encode( RegImm( dst, src) ); 8605 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8606 ins_pipe( ialu_reg ); 8607 %} 8608 8609 // Xor Register with Memory 8610 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8611 match(Set dst (XorI dst (LoadI src))); 8612 effect(KILL cr); 8613 8614 ins_cost(125); 8615 format %{ "XOR $dst,$src" %} 8616 opcode(0x33); 8617 ins_encode( OpcP, RegMem(dst, src) ); 8618 ins_pipe( ialu_reg_mem ); 8619 %} 8620 8621 // Xor Memory with Register 8622 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8623 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8624 effect(KILL cr); 8625 8626 ins_cost(150); 8627 format %{ "XOR $dst,$src" %} 8628 opcode(0x31); /* Opcode 31 /r */ 8629 ins_encode( OpcP, RegMem( src, dst ) ); 8630 ins_pipe( ialu_mem_reg ); 8631 %} 8632 8633 // Xor Memory with Immediate 8634 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8635 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8636 effect(KILL cr); 8637 8638 ins_cost(125); 8639 format %{ "XOR $dst,$src" %} 8640 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8641 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8642 ins_pipe( ialu_mem_imm ); 8643 %} 8644 8645 //----------Convert Int to Boolean--------------------------------------------- 8646 8647 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8648 effect( DEF dst, USE src ); 8649 format %{ "MOV $dst,$src" %} 8650 ins_encode( enc_Copy( dst, src) ); 8651 ins_pipe( ialu_reg_reg ); 8652 %} 8653 8654 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8655 effect( USE_DEF dst, USE src, KILL cr ); 8656 8657 size(4); 8658 format %{ "NEG $dst\n\t" 8659 "ADC $dst,$src" %} 8660 ins_encode( neg_reg(dst), 8661 OpcRegReg(0x13,dst,src) ); 8662 ins_pipe( ialu_reg_reg_long ); 8663 %} 8664 8665 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8666 match(Set dst (Conv2B src)); 8667 8668 expand %{ 8669 movI_nocopy(dst,src); 8670 ci2b(dst,src,cr); 8671 %} 8672 %} 8673 8674 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8675 effect( DEF dst, USE src ); 8676 format %{ "MOV $dst,$src" %} 8677 ins_encode( enc_Copy( dst, src) ); 8678 ins_pipe( ialu_reg_reg ); 8679 %} 8680 8681 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8682 effect( USE_DEF dst, USE src, KILL cr ); 8683 format %{ "NEG $dst\n\t" 8684 "ADC $dst,$src" %} 8685 ins_encode( neg_reg(dst), 8686 OpcRegReg(0x13,dst,src) ); 8687 ins_pipe( ialu_reg_reg_long ); 8688 %} 8689 8690 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8691 match(Set dst (Conv2B src)); 8692 8693 expand %{ 8694 movP_nocopy(dst,src); 8695 cp2b(dst,src,cr); 8696 %} 8697 %} 8698 8699 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8700 match(Set dst (CmpLTMask p q)); 8701 effect(KILL cr); 8702 ins_cost(400); 8703 8704 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8705 format %{ "XOR $dst,$dst\n\t" 8706 "CMP $p,$q\n\t" 8707 "SETlt $dst\n\t" 8708 "NEG $dst" %} 8709 ins_encode %{ 8710 Register Rp = $p$$Register; 8711 Register Rq = $q$$Register; 8712 Register Rd = $dst$$Register; 8713 Label done; 8714 __ xorl(Rd, Rd); 8715 __ cmpl(Rp, Rq); 8716 __ setb(Assembler::less, Rd); 8717 __ negl(Rd); 8718 %} 8719 8720 ins_pipe(pipe_slow); 8721 %} 8722 8723 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8724 match(Set dst (CmpLTMask dst zero)); 8725 effect(DEF dst, KILL cr); 8726 ins_cost(100); 8727 8728 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8729 ins_encode %{ 8730 __ sarl($dst$$Register, 31); 8731 %} 8732 ins_pipe(ialu_reg); 8733 %} 8734 8735 /* better to save a register than avoid a branch */ 8736 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8737 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8738 effect(KILL cr); 8739 ins_cost(400); 8740 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8741 "JGE done\n\t" 8742 "ADD $p,$y\n" 8743 "done: " %} 8744 ins_encode %{ 8745 Register Rp = $p$$Register; 8746 Register Rq = $q$$Register; 8747 Register Ry = $y$$Register; 8748 Label done; 8749 __ subl(Rp, Rq); 8750 __ jccb(Assembler::greaterEqual, done); 8751 __ addl(Rp, Ry); 8752 __ bind(done); 8753 %} 8754 8755 ins_pipe(pipe_cmplt); 8756 %} 8757 8758 /* better to save a register than avoid a branch */ 8759 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8760 match(Set y (AndI (CmpLTMask p q) y)); 8761 effect(KILL cr); 8762 8763 ins_cost(300); 8764 8765 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8766 "JLT done\n\t" 8767 "XORL $y, $y\n" 8768 "done: " %} 8769 ins_encode %{ 8770 Register Rp = $p$$Register; 8771 Register Rq = $q$$Register; 8772 Register Ry = $y$$Register; 8773 Label done; 8774 __ cmpl(Rp, Rq); 8775 __ jccb(Assembler::less, done); 8776 __ xorl(Ry, Ry); 8777 __ bind(done); 8778 %} 8779 8780 ins_pipe(pipe_cmplt); 8781 %} 8782 8783 /* If I enable this, I encourage spilling in the inner loop of compress. 8784 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8785 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8786 */ 8787 //----------Overflow Math Instructions----------------------------------------- 8788 8789 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8790 %{ 8791 match(Set cr (OverflowAddI op1 op2)); 8792 effect(DEF cr, USE_KILL op1, USE op2); 8793 8794 format %{ "ADD $op1, $op2\t# overflow check int" %} 8795 8796 ins_encode %{ 8797 __ addl($op1$$Register, $op2$$Register); 8798 %} 8799 ins_pipe(ialu_reg_reg); 8800 %} 8801 8802 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8803 %{ 8804 match(Set cr (OverflowAddI op1 op2)); 8805 effect(DEF cr, USE_KILL op1, USE op2); 8806 8807 format %{ "ADD $op1, $op2\t# overflow check int" %} 8808 8809 ins_encode %{ 8810 __ addl($op1$$Register, $op2$$constant); 8811 %} 8812 ins_pipe(ialu_reg_reg); 8813 %} 8814 8815 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8816 %{ 8817 match(Set cr (OverflowSubI op1 op2)); 8818 8819 format %{ "CMP $op1, $op2\t# overflow check int" %} 8820 ins_encode %{ 8821 __ cmpl($op1$$Register, $op2$$Register); 8822 %} 8823 ins_pipe(ialu_reg_reg); 8824 %} 8825 8826 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8827 %{ 8828 match(Set cr (OverflowSubI op1 op2)); 8829 8830 format %{ "CMP $op1, $op2\t# overflow check int" %} 8831 ins_encode %{ 8832 __ cmpl($op1$$Register, $op2$$constant); 8833 %} 8834 ins_pipe(ialu_reg_reg); 8835 %} 8836 8837 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8838 %{ 8839 match(Set cr (OverflowSubI zero op2)); 8840 effect(DEF cr, USE_KILL op2); 8841 8842 format %{ "NEG $op2\t# overflow check int" %} 8843 ins_encode %{ 8844 __ negl($op2$$Register); 8845 %} 8846 ins_pipe(ialu_reg_reg); 8847 %} 8848 8849 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8850 %{ 8851 match(Set cr (OverflowMulI op1 op2)); 8852 effect(DEF cr, USE_KILL op1, USE op2); 8853 8854 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8855 ins_encode %{ 8856 __ imull($op1$$Register, $op2$$Register); 8857 %} 8858 ins_pipe(ialu_reg_reg_alu0); 8859 %} 8860 8861 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8862 %{ 8863 match(Set cr (OverflowMulI op1 op2)); 8864 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8865 8866 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8867 ins_encode %{ 8868 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8869 %} 8870 ins_pipe(ialu_reg_reg_alu0); 8871 %} 8872 8873 // Integer Absolute Instructions 8874 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8875 %{ 8876 match(Set dst (AbsI src)); 8877 effect(TEMP dst, TEMP tmp, KILL cr); 8878 format %{ "movl $tmp, $src\n\t" 8879 "sarl $tmp, 31\n\t" 8880 "movl $dst, $src\n\t" 8881 "xorl $dst, $tmp\n\t" 8882 "subl $dst, $tmp\n" 8883 %} 8884 ins_encode %{ 8885 __ movl($tmp$$Register, $src$$Register); 8886 __ sarl($tmp$$Register, 31); 8887 __ movl($dst$$Register, $src$$Register); 8888 __ xorl($dst$$Register, $tmp$$Register); 8889 __ subl($dst$$Register, $tmp$$Register); 8890 %} 8891 8892 ins_pipe(ialu_reg_reg); 8893 %} 8894 8895 //----------Long Instructions------------------------------------------------ 8896 // Add Long Register with Register 8897 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8898 match(Set dst (AddL dst src)); 8899 effect(KILL cr); 8900 ins_cost(200); 8901 format %{ "ADD $dst.lo,$src.lo\n\t" 8902 "ADC $dst.hi,$src.hi" %} 8903 opcode(0x03, 0x13); 8904 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8905 ins_pipe( ialu_reg_reg_long ); 8906 %} 8907 8908 // Add Long Register with Immediate 8909 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8910 match(Set dst (AddL dst src)); 8911 effect(KILL cr); 8912 format %{ "ADD $dst.lo,$src.lo\n\t" 8913 "ADC $dst.hi,$src.hi" %} 8914 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8915 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8916 ins_pipe( ialu_reg_long ); 8917 %} 8918 8919 // Add Long Register with Memory 8920 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8921 match(Set dst (AddL dst (LoadL mem))); 8922 effect(KILL cr); 8923 ins_cost(125); 8924 format %{ "ADD $dst.lo,$mem\n\t" 8925 "ADC $dst.hi,$mem+4" %} 8926 opcode(0x03, 0x13); 8927 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8928 ins_pipe( ialu_reg_long_mem ); 8929 %} 8930 8931 // Subtract Long Register with Register. 8932 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8933 match(Set dst (SubL dst src)); 8934 effect(KILL cr); 8935 ins_cost(200); 8936 format %{ "SUB $dst.lo,$src.lo\n\t" 8937 "SBB $dst.hi,$src.hi" %} 8938 opcode(0x2B, 0x1B); 8939 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8940 ins_pipe( ialu_reg_reg_long ); 8941 %} 8942 8943 // Subtract Long Register with Immediate 8944 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8945 match(Set dst (SubL dst src)); 8946 effect(KILL cr); 8947 format %{ "SUB $dst.lo,$src.lo\n\t" 8948 "SBB $dst.hi,$src.hi" %} 8949 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8950 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8951 ins_pipe( ialu_reg_long ); 8952 %} 8953 8954 // Subtract Long Register with Memory 8955 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8956 match(Set dst (SubL dst (LoadL mem))); 8957 effect(KILL cr); 8958 ins_cost(125); 8959 format %{ "SUB $dst.lo,$mem\n\t" 8960 "SBB $dst.hi,$mem+4" %} 8961 opcode(0x2B, 0x1B); 8962 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8963 ins_pipe( ialu_reg_long_mem ); 8964 %} 8965 8966 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8967 match(Set dst (SubL zero dst)); 8968 effect(KILL cr); 8969 ins_cost(300); 8970 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8971 ins_encode( neg_long(dst) ); 8972 ins_pipe( ialu_reg_reg_long ); 8973 %} 8974 8975 // And Long Register with Register 8976 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8977 match(Set dst (AndL dst src)); 8978 effect(KILL cr); 8979 format %{ "AND $dst.lo,$src.lo\n\t" 8980 "AND $dst.hi,$src.hi" %} 8981 opcode(0x23,0x23); 8982 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8983 ins_pipe( ialu_reg_reg_long ); 8984 %} 8985 8986 // And Long Register with Immediate 8987 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8988 match(Set dst (AndL dst src)); 8989 effect(KILL cr); 8990 format %{ "AND $dst.lo,$src.lo\n\t" 8991 "AND $dst.hi,$src.hi" %} 8992 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8993 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8994 ins_pipe( ialu_reg_long ); 8995 %} 8996 8997 // And Long Register with Memory 8998 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8999 match(Set dst (AndL dst (LoadL mem))); 9000 effect(KILL cr); 9001 ins_cost(125); 9002 format %{ "AND $dst.lo,$mem\n\t" 9003 "AND $dst.hi,$mem+4" %} 9004 opcode(0x23, 0x23); 9005 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9006 ins_pipe( ialu_reg_long_mem ); 9007 %} 9008 9009 // BMI1 instructions 9010 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9011 match(Set dst (AndL (XorL src1 minus_1) src2)); 9012 predicate(UseBMI1Instructions); 9013 effect(KILL cr, TEMP dst); 9014 9015 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9016 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9017 %} 9018 9019 ins_encode %{ 9020 Register Rdst = $dst$$Register; 9021 Register Rsrc1 = $src1$$Register; 9022 Register Rsrc2 = $src2$$Register; 9023 __ andnl(Rdst, Rsrc1, Rsrc2); 9024 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9025 %} 9026 ins_pipe(ialu_reg_reg_long); 9027 %} 9028 9029 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9030 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9031 predicate(UseBMI1Instructions); 9032 effect(KILL cr, TEMP dst); 9033 9034 ins_cost(125); 9035 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9036 "ANDNL $dst.hi, $src1.hi, $src2+4" 9037 %} 9038 9039 ins_encode %{ 9040 Register Rdst = $dst$$Register; 9041 Register Rsrc1 = $src1$$Register; 9042 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9043 9044 __ andnl(Rdst, Rsrc1, $src2$$Address); 9045 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9046 %} 9047 ins_pipe(ialu_reg_mem); 9048 %} 9049 9050 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9051 match(Set dst (AndL (SubL imm_zero src) src)); 9052 predicate(UseBMI1Instructions); 9053 effect(KILL cr, TEMP dst); 9054 9055 format %{ "MOVL $dst.hi, 0\n\t" 9056 "BLSIL $dst.lo, $src.lo\n\t" 9057 "JNZ done\n\t" 9058 "BLSIL $dst.hi, $src.hi\n" 9059 "done:" 9060 %} 9061 9062 ins_encode %{ 9063 Label done; 9064 Register Rdst = $dst$$Register; 9065 Register Rsrc = $src$$Register; 9066 __ movl(HIGH_FROM_LOW(Rdst), 0); 9067 __ blsil(Rdst, Rsrc); 9068 __ jccb(Assembler::notZero, done); 9069 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9070 __ bind(done); 9071 %} 9072 ins_pipe(ialu_reg); 9073 %} 9074 9075 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9076 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9077 predicate(UseBMI1Instructions); 9078 effect(KILL cr, TEMP dst); 9079 9080 ins_cost(125); 9081 format %{ "MOVL $dst.hi, 0\n\t" 9082 "BLSIL $dst.lo, $src\n\t" 9083 "JNZ done\n\t" 9084 "BLSIL $dst.hi, $src+4\n" 9085 "done:" 9086 %} 9087 9088 ins_encode %{ 9089 Label done; 9090 Register Rdst = $dst$$Register; 9091 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9092 9093 __ movl(HIGH_FROM_LOW(Rdst), 0); 9094 __ blsil(Rdst, $src$$Address); 9095 __ jccb(Assembler::notZero, done); 9096 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9097 __ bind(done); 9098 %} 9099 ins_pipe(ialu_reg_mem); 9100 %} 9101 9102 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9103 %{ 9104 match(Set dst (XorL (AddL src minus_1) src)); 9105 predicate(UseBMI1Instructions); 9106 effect(KILL cr, TEMP dst); 9107 9108 format %{ "MOVL $dst.hi, 0\n\t" 9109 "BLSMSKL $dst.lo, $src.lo\n\t" 9110 "JNC done\n\t" 9111 "BLSMSKL $dst.hi, $src.hi\n" 9112 "done:" 9113 %} 9114 9115 ins_encode %{ 9116 Label done; 9117 Register Rdst = $dst$$Register; 9118 Register Rsrc = $src$$Register; 9119 __ movl(HIGH_FROM_LOW(Rdst), 0); 9120 __ blsmskl(Rdst, Rsrc); 9121 __ jccb(Assembler::carryClear, done); 9122 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9123 __ bind(done); 9124 %} 9125 9126 ins_pipe(ialu_reg); 9127 %} 9128 9129 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9130 %{ 9131 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9132 predicate(UseBMI1Instructions); 9133 effect(KILL cr, TEMP dst); 9134 9135 ins_cost(125); 9136 format %{ "MOVL $dst.hi, 0\n\t" 9137 "BLSMSKL $dst.lo, $src\n\t" 9138 "JNC done\n\t" 9139 "BLSMSKL $dst.hi, $src+4\n" 9140 "done:" 9141 %} 9142 9143 ins_encode %{ 9144 Label done; 9145 Register Rdst = $dst$$Register; 9146 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9147 9148 __ movl(HIGH_FROM_LOW(Rdst), 0); 9149 __ blsmskl(Rdst, $src$$Address); 9150 __ jccb(Assembler::carryClear, done); 9151 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9152 __ bind(done); 9153 %} 9154 9155 ins_pipe(ialu_reg_mem); 9156 %} 9157 9158 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9159 %{ 9160 match(Set dst (AndL (AddL src minus_1) src) ); 9161 predicate(UseBMI1Instructions); 9162 effect(KILL cr, TEMP dst); 9163 9164 format %{ "MOVL $dst.hi, $src.hi\n\t" 9165 "BLSRL $dst.lo, $src.lo\n\t" 9166 "JNC done\n\t" 9167 "BLSRL $dst.hi, $src.hi\n" 9168 "done:" 9169 %} 9170 9171 ins_encode %{ 9172 Label done; 9173 Register Rdst = $dst$$Register; 9174 Register Rsrc = $src$$Register; 9175 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9176 __ blsrl(Rdst, Rsrc); 9177 __ jccb(Assembler::carryClear, done); 9178 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9179 __ bind(done); 9180 %} 9181 9182 ins_pipe(ialu_reg); 9183 %} 9184 9185 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9186 %{ 9187 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9188 predicate(UseBMI1Instructions); 9189 effect(KILL cr, TEMP dst); 9190 9191 ins_cost(125); 9192 format %{ "MOVL $dst.hi, $src+4\n\t" 9193 "BLSRL $dst.lo, $src\n\t" 9194 "JNC done\n\t" 9195 "BLSRL $dst.hi, $src+4\n" 9196 "done:" 9197 %} 9198 9199 ins_encode %{ 9200 Label done; 9201 Register Rdst = $dst$$Register; 9202 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9203 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9204 __ blsrl(Rdst, $src$$Address); 9205 __ jccb(Assembler::carryClear, done); 9206 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9207 __ bind(done); 9208 %} 9209 9210 ins_pipe(ialu_reg_mem); 9211 %} 9212 9213 // Or Long Register with Register 9214 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9215 match(Set dst (OrL dst src)); 9216 effect(KILL cr); 9217 format %{ "OR $dst.lo,$src.lo\n\t" 9218 "OR $dst.hi,$src.hi" %} 9219 opcode(0x0B,0x0B); 9220 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9221 ins_pipe( ialu_reg_reg_long ); 9222 %} 9223 9224 // Or Long Register with Immediate 9225 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9226 match(Set dst (OrL dst src)); 9227 effect(KILL cr); 9228 format %{ "OR $dst.lo,$src.lo\n\t" 9229 "OR $dst.hi,$src.hi" %} 9230 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9231 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9232 ins_pipe( ialu_reg_long ); 9233 %} 9234 9235 // Or Long Register with Memory 9236 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9237 match(Set dst (OrL dst (LoadL mem))); 9238 effect(KILL cr); 9239 ins_cost(125); 9240 format %{ "OR $dst.lo,$mem\n\t" 9241 "OR $dst.hi,$mem+4" %} 9242 opcode(0x0B,0x0B); 9243 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9244 ins_pipe( ialu_reg_long_mem ); 9245 %} 9246 9247 // Xor Long Register with Register 9248 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9249 match(Set dst (XorL dst src)); 9250 effect(KILL cr); 9251 format %{ "XOR $dst.lo,$src.lo\n\t" 9252 "XOR $dst.hi,$src.hi" %} 9253 opcode(0x33,0x33); 9254 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9255 ins_pipe( ialu_reg_reg_long ); 9256 %} 9257 9258 // Xor Long Register with Immediate -1 9259 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9260 match(Set dst (XorL dst imm)); 9261 format %{ "NOT $dst.lo\n\t" 9262 "NOT $dst.hi" %} 9263 ins_encode %{ 9264 __ notl($dst$$Register); 9265 __ notl(HIGH_FROM_LOW($dst$$Register)); 9266 %} 9267 ins_pipe( ialu_reg_long ); 9268 %} 9269 9270 // Xor Long Register with Immediate 9271 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9272 match(Set dst (XorL dst src)); 9273 effect(KILL cr); 9274 format %{ "XOR $dst.lo,$src.lo\n\t" 9275 "XOR $dst.hi,$src.hi" %} 9276 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9277 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9278 ins_pipe( ialu_reg_long ); 9279 %} 9280 9281 // Xor Long Register with Memory 9282 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9283 match(Set dst (XorL dst (LoadL mem))); 9284 effect(KILL cr); 9285 ins_cost(125); 9286 format %{ "XOR $dst.lo,$mem\n\t" 9287 "XOR $dst.hi,$mem+4" %} 9288 opcode(0x33,0x33); 9289 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9290 ins_pipe( ialu_reg_long_mem ); 9291 %} 9292 9293 // Shift Left Long by 1 9294 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9295 predicate(UseNewLongLShift); 9296 match(Set dst (LShiftL dst cnt)); 9297 effect(KILL cr); 9298 ins_cost(100); 9299 format %{ "ADD $dst.lo,$dst.lo\n\t" 9300 "ADC $dst.hi,$dst.hi" %} 9301 ins_encode %{ 9302 __ addl($dst$$Register,$dst$$Register); 9303 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9304 %} 9305 ins_pipe( ialu_reg_long ); 9306 %} 9307 9308 // Shift Left Long by 2 9309 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9310 predicate(UseNewLongLShift); 9311 match(Set dst (LShiftL dst cnt)); 9312 effect(KILL cr); 9313 ins_cost(100); 9314 format %{ "ADD $dst.lo,$dst.lo\n\t" 9315 "ADC $dst.hi,$dst.hi\n\t" 9316 "ADD $dst.lo,$dst.lo\n\t" 9317 "ADC $dst.hi,$dst.hi" %} 9318 ins_encode %{ 9319 __ addl($dst$$Register,$dst$$Register); 9320 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9321 __ addl($dst$$Register,$dst$$Register); 9322 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9323 %} 9324 ins_pipe( ialu_reg_long ); 9325 %} 9326 9327 // Shift Left Long by 3 9328 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9329 predicate(UseNewLongLShift); 9330 match(Set dst (LShiftL dst cnt)); 9331 effect(KILL cr); 9332 ins_cost(100); 9333 format %{ "ADD $dst.lo,$dst.lo\n\t" 9334 "ADC $dst.hi,$dst.hi\n\t" 9335 "ADD $dst.lo,$dst.lo\n\t" 9336 "ADC $dst.hi,$dst.hi\n\t" 9337 "ADD $dst.lo,$dst.lo\n\t" 9338 "ADC $dst.hi,$dst.hi" %} 9339 ins_encode %{ 9340 __ addl($dst$$Register,$dst$$Register); 9341 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9342 __ addl($dst$$Register,$dst$$Register); 9343 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9344 __ addl($dst$$Register,$dst$$Register); 9345 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9346 %} 9347 ins_pipe( ialu_reg_long ); 9348 %} 9349 9350 // Shift Left Long by 1-31 9351 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9352 match(Set dst (LShiftL dst cnt)); 9353 effect(KILL cr); 9354 ins_cost(200); 9355 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9356 "SHL $dst.lo,$cnt" %} 9357 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9358 ins_encode( move_long_small_shift(dst,cnt) ); 9359 ins_pipe( ialu_reg_long ); 9360 %} 9361 9362 // Shift Left Long by 32-63 9363 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9364 match(Set dst (LShiftL dst cnt)); 9365 effect(KILL cr); 9366 ins_cost(300); 9367 format %{ "MOV $dst.hi,$dst.lo\n" 9368 "\tSHL $dst.hi,$cnt-32\n" 9369 "\tXOR $dst.lo,$dst.lo" %} 9370 opcode(0xC1, 0x4); /* C1 /4 ib */ 9371 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9372 ins_pipe( ialu_reg_long ); 9373 %} 9374 9375 // Shift Left Long by variable 9376 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9377 match(Set dst (LShiftL dst shift)); 9378 effect(KILL cr); 9379 ins_cost(500+200); 9380 size(17); 9381 format %{ "TEST $shift,32\n\t" 9382 "JEQ,s small\n\t" 9383 "MOV $dst.hi,$dst.lo\n\t" 9384 "XOR $dst.lo,$dst.lo\n" 9385 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9386 "SHL $dst.lo,$shift" %} 9387 ins_encode( shift_left_long( dst, shift ) ); 9388 ins_pipe( pipe_slow ); 9389 %} 9390 9391 // Shift Right Long by 1-31 9392 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9393 match(Set dst (URShiftL dst cnt)); 9394 effect(KILL cr); 9395 ins_cost(200); 9396 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9397 "SHR $dst.hi,$cnt" %} 9398 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9399 ins_encode( move_long_small_shift(dst,cnt) ); 9400 ins_pipe( ialu_reg_long ); 9401 %} 9402 9403 // Shift Right Long by 32-63 9404 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9405 match(Set dst (URShiftL dst cnt)); 9406 effect(KILL cr); 9407 ins_cost(300); 9408 format %{ "MOV $dst.lo,$dst.hi\n" 9409 "\tSHR $dst.lo,$cnt-32\n" 9410 "\tXOR $dst.hi,$dst.hi" %} 9411 opcode(0xC1, 0x5); /* C1 /5 ib */ 9412 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9413 ins_pipe( ialu_reg_long ); 9414 %} 9415 9416 // Shift Right Long by variable 9417 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9418 match(Set dst (URShiftL dst shift)); 9419 effect(KILL cr); 9420 ins_cost(600); 9421 size(17); 9422 format %{ "TEST $shift,32\n\t" 9423 "JEQ,s small\n\t" 9424 "MOV $dst.lo,$dst.hi\n\t" 9425 "XOR $dst.hi,$dst.hi\n" 9426 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9427 "SHR $dst.hi,$shift" %} 9428 ins_encode( shift_right_long( dst, shift ) ); 9429 ins_pipe( pipe_slow ); 9430 %} 9431 9432 // Shift Right Long by 1-31 9433 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9434 match(Set dst (RShiftL dst cnt)); 9435 effect(KILL cr); 9436 ins_cost(200); 9437 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9438 "SAR $dst.hi,$cnt" %} 9439 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9440 ins_encode( move_long_small_shift(dst,cnt) ); 9441 ins_pipe( ialu_reg_long ); 9442 %} 9443 9444 // Shift Right Long by 32-63 9445 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9446 match(Set dst (RShiftL dst cnt)); 9447 effect(KILL cr); 9448 ins_cost(300); 9449 format %{ "MOV $dst.lo,$dst.hi\n" 9450 "\tSAR $dst.lo,$cnt-32\n" 9451 "\tSAR $dst.hi,31" %} 9452 opcode(0xC1, 0x7); /* C1 /7 ib */ 9453 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9454 ins_pipe( ialu_reg_long ); 9455 %} 9456 9457 // Shift Right arithmetic Long by variable 9458 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9459 match(Set dst (RShiftL dst shift)); 9460 effect(KILL cr); 9461 ins_cost(600); 9462 size(18); 9463 format %{ "TEST $shift,32\n\t" 9464 "JEQ,s small\n\t" 9465 "MOV $dst.lo,$dst.hi\n\t" 9466 "SAR $dst.hi,31\n" 9467 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9468 "SAR $dst.hi,$shift" %} 9469 ins_encode( shift_right_arith_long( dst, shift ) ); 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 9474 //----------Double Instructions------------------------------------------------ 9475 // Double Math 9476 9477 // Compare & branch 9478 9479 // P6 version of float compare, sets condition codes in EFLAGS 9480 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9481 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9482 match(Set cr (CmpD src1 src2)); 9483 effect(KILL rax); 9484 ins_cost(150); 9485 format %{ "FLD $src1\n\t" 9486 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9487 "JNP exit\n\t" 9488 "MOV ah,1 // saw a NaN, set CF\n\t" 9489 "SAHF\n" 9490 "exit:\tNOP // avoid branch to branch" %} 9491 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9492 ins_encode( Push_Reg_DPR(src1), 9493 OpcP, RegOpc(src2), 9494 cmpF_P6_fixup ); 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9499 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9500 match(Set cr (CmpD src1 src2)); 9501 ins_cost(150); 9502 format %{ "FLD $src1\n\t" 9503 "FUCOMIP ST,$src2 // P6 instruction" %} 9504 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9505 ins_encode( Push_Reg_DPR(src1), 9506 OpcP, RegOpc(src2)); 9507 ins_pipe( pipe_slow ); 9508 %} 9509 9510 // Compare & branch 9511 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9512 predicate(UseSSE<=1); 9513 match(Set cr (CmpD src1 src2)); 9514 effect(KILL rax); 9515 ins_cost(200); 9516 format %{ "FLD $src1\n\t" 9517 "FCOMp $src2\n\t" 9518 "FNSTSW AX\n\t" 9519 "TEST AX,0x400\n\t" 9520 "JZ,s flags\n\t" 9521 "MOV AH,1\t# unordered treat as LT\n" 9522 "flags:\tSAHF" %} 9523 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9524 ins_encode( Push_Reg_DPR(src1), 9525 OpcP, RegOpc(src2), 9526 fpu_flags); 9527 ins_pipe( pipe_slow ); 9528 %} 9529 9530 // Compare vs zero into -1,0,1 9531 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9532 predicate(UseSSE<=1); 9533 match(Set dst (CmpD3 src1 zero)); 9534 effect(KILL cr, KILL rax); 9535 ins_cost(280); 9536 format %{ "FTSTD $dst,$src1" %} 9537 opcode(0xE4, 0xD9); 9538 ins_encode( Push_Reg_DPR(src1), 9539 OpcS, OpcP, PopFPU, 9540 CmpF_Result(dst)); 9541 ins_pipe( pipe_slow ); 9542 %} 9543 9544 // Compare into -1,0,1 9545 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9546 predicate(UseSSE<=1); 9547 match(Set dst (CmpD3 src1 src2)); 9548 effect(KILL cr, KILL rax); 9549 ins_cost(300); 9550 format %{ "FCMPD $dst,$src1,$src2" %} 9551 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9552 ins_encode( Push_Reg_DPR(src1), 9553 OpcP, RegOpc(src2), 9554 CmpF_Result(dst)); 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 // float compare and set condition codes in EFLAGS by XMM regs 9559 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9560 predicate(UseSSE>=2); 9561 match(Set cr (CmpD src1 src2)); 9562 ins_cost(145); 9563 format %{ "UCOMISD $src1,$src2\n\t" 9564 "JNP,s exit\n\t" 9565 "PUSHF\t# saw NaN, set CF\n\t" 9566 "AND [rsp], #0xffffff2b\n\t" 9567 "POPF\n" 9568 "exit:" %} 9569 ins_encode %{ 9570 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9571 emit_cmpfp_fixup(_masm); 9572 %} 9573 ins_pipe( pipe_slow ); 9574 %} 9575 9576 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9577 predicate(UseSSE>=2); 9578 match(Set cr (CmpD src1 src2)); 9579 ins_cost(100); 9580 format %{ "UCOMISD $src1,$src2" %} 9581 ins_encode %{ 9582 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9583 %} 9584 ins_pipe( pipe_slow ); 9585 %} 9586 9587 // float compare and set condition codes in EFLAGS by XMM regs 9588 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9589 predicate(UseSSE>=2); 9590 match(Set cr (CmpD src1 (LoadD src2))); 9591 ins_cost(145); 9592 format %{ "UCOMISD $src1,$src2\n\t" 9593 "JNP,s exit\n\t" 9594 "PUSHF\t# saw NaN, set CF\n\t" 9595 "AND [rsp], #0xffffff2b\n\t" 9596 "POPF\n" 9597 "exit:" %} 9598 ins_encode %{ 9599 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9600 emit_cmpfp_fixup(_masm); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9606 predicate(UseSSE>=2); 9607 match(Set cr (CmpD src1 (LoadD src2))); 9608 ins_cost(100); 9609 format %{ "UCOMISD $src1,$src2" %} 9610 ins_encode %{ 9611 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9612 %} 9613 ins_pipe( pipe_slow ); 9614 %} 9615 9616 // Compare into -1,0,1 in XMM 9617 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9618 predicate(UseSSE>=2); 9619 match(Set dst (CmpD3 src1 src2)); 9620 effect(KILL cr); 9621 ins_cost(255); 9622 format %{ "UCOMISD $src1, $src2\n\t" 9623 "MOV $dst, #-1\n\t" 9624 "JP,s done\n\t" 9625 "JB,s done\n\t" 9626 "SETNE $dst\n\t" 9627 "MOVZB $dst, $dst\n" 9628 "done:" %} 9629 ins_encode %{ 9630 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9631 emit_cmpfp3(_masm, $dst$$Register); 9632 %} 9633 ins_pipe( pipe_slow ); 9634 %} 9635 9636 // Compare into -1,0,1 in XMM and memory 9637 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9638 predicate(UseSSE>=2); 9639 match(Set dst (CmpD3 src1 (LoadD src2))); 9640 effect(KILL cr); 9641 ins_cost(275); 9642 format %{ "UCOMISD $src1, $src2\n\t" 9643 "MOV $dst, #-1\n\t" 9644 "JP,s done\n\t" 9645 "JB,s done\n\t" 9646 "SETNE $dst\n\t" 9647 "MOVZB $dst, $dst\n" 9648 "done:" %} 9649 ins_encode %{ 9650 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9651 emit_cmpfp3(_masm, $dst$$Register); 9652 %} 9653 ins_pipe( pipe_slow ); 9654 %} 9655 9656 9657 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9658 predicate (UseSSE <=1); 9659 match(Set dst (SubD dst src)); 9660 9661 format %{ "FLD $src\n\t" 9662 "DSUBp $dst,ST" %} 9663 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9664 ins_cost(150); 9665 ins_encode( Push_Reg_DPR(src), 9666 OpcP, RegOpc(dst) ); 9667 ins_pipe( fpu_reg_reg ); 9668 %} 9669 9670 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9671 predicate (UseSSE <=1); 9672 match(Set dst (RoundDouble (SubD src1 src2))); 9673 ins_cost(250); 9674 9675 format %{ "FLD $src2\n\t" 9676 "DSUB ST,$src1\n\t" 9677 "FSTP_D $dst\t# D-round" %} 9678 opcode(0xD8, 0x5); 9679 ins_encode( Push_Reg_DPR(src2), 9680 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9681 ins_pipe( fpu_mem_reg_reg ); 9682 %} 9683 9684 9685 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9686 predicate (UseSSE <=1); 9687 match(Set dst (SubD dst (LoadD src))); 9688 ins_cost(150); 9689 9690 format %{ "FLD $src\n\t" 9691 "DSUBp $dst,ST" %} 9692 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9693 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9694 OpcP, RegOpc(dst) ); 9695 ins_pipe( fpu_reg_mem ); 9696 %} 9697 9698 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9699 predicate (UseSSE<=1); 9700 match(Set dst (AbsD src)); 9701 ins_cost(100); 9702 format %{ "FABS" %} 9703 opcode(0xE1, 0xD9); 9704 ins_encode( OpcS, OpcP ); 9705 ins_pipe( fpu_reg_reg ); 9706 %} 9707 9708 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9709 predicate(UseSSE<=1); 9710 match(Set dst (NegD src)); 9711 ins_cost(100); 9712 format %{ "FCHS" %} 9713 opcode(0xE0, 0xD9); 9714 ins_encode( OpcS, OpcP ); 9715 ins_pipe( fpu_reg_reg ); 9716 %} 9717 9718 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9719 predicate(UseSSE<=1); 9720 match(Set dst (AddD dst src)); 9721 format %{ "FLD $src\n\t" 9722 "DADD $dst,ST" %} 9723 size(4); 9724 ins_cost(150); 9725 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9726 ins_encode( Push_Reg_DPR(src), 9727 OpcP, RegOpc(dst) ); 9728 ins_pipe( fpu_reg_reg ); 9729 %} 9730 9731 9732 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9733 predicate(UseSSE<=1); 9734 match(Set dst (RoundDouble (AddD src1 src2))); 9735 ins_cost(250); 9736 9737 format %{ "FLD $src2\n\t" 9738 "DADD ST,$src1\n\t" 9739 "FSTP_D $dst\t# D-round" %} 9740 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9741 ins_encode( Push_Reg_DPR(src2), 9742 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9743 ins_pipe( fpu_mem_reg_reg ); 9744 %} 9745 9746 9747 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9748 predicate(UseSSE<=1); 9749 match(Set dst (AddD dst (LoadD src))); 9750 ins_cost(150); 9751 9752 format %{ "FLD $src\n\t" 9753 "DADDp $dst,ST" %} 9754 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9755 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9756 OpcP, RegOpc(dst) ); 9757 ins_pipe( fpu_reg_mem ); 9758 %} 9759 9760 // add-to-memory 9761 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9762 predicate(UseSSE<=1); 9763 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9764 ins_cost(150); 9765 9766 format %{ "FLD_D $dst\n\t" 9767 "DADD ST,$src\n\t" 9768 "FST_D $dst" %} 9769 opcode(0xDD, 0x0); 9770 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9771 Opcode(0xD8), RegOpc(src), 9772 set_instruction_start, 9773 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9774 ins_pipe( fpu_reg_mem ); 9775 %} 9776 9777 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9778 predicate(UseSSE<=1); 9779 match(Set dst (AddD dst con)); 9780 ins_cost(125); 9781 format %{ "FLD1\n\t" 9782 "DADDp $dst,ST" %} 9783 ins_encode %{ 9784 __ fld1(); 9785 __ faddp($dst$$reg); 9786 %} 9787 ins_pipe(fpu_reg); 9788 %} 9789 9790 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9791 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9792 match(Set dst (AddD dst con)); 9793 ins_cost(200); 9794 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9795 "DADDp $dst,ST" %} 9796 ins_encode %{ 9797 __ fld_d($constantaddress($con)); 9798 __ faddp($dst$$reg); 9799 %} 9800 ins_pipe(fpu_reg_mem); 9801 %} 9802 9803 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9804 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9805 match(Set dst (RoundDouble (AddD src con))); 9806 ins_cost(200); 9807 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9808 "DADD ST,$src\n\t" 9809 "FSTP_D $dst\t# D-round" %} 9810 ins_encode %{ 9811 __ fld_d($constantaddress($con)); 9812 __ fadd($src$$reg); 9813 __ fstp_d(Address(rsp, $dst$$disp)); 9814 %} 9815 ins_pipe(fpu_mem_reg_con); 9816 %} 9817 9818 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9819 predicate(UseSSE<=1); 9820 match(Set dst (MulD dst src)); 9821 format %{ "FLD $src\n\t" 9822 "DMULp $dst,ST" %} 9823 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9824 ins_cost(150); 9825 ins_encode( Push_Reg_DPR(src), 9826 OpcP, RegOpc(dst) ); 9827 ins_pipe( fpu_reg_reg ); 9828 %} 9829 9830 // Strict FP instruction biases argument before multiply then 9831 // biases result to avoid double rounding of subnormals. 9832 // 9833 // scale arg1 by multiplying arg1 by 2^(-15360) 9834 // load arg2 9835 // multiply scaled arg1 by arg2 9836 // rescale product by 2^(15360) 9837 // 9838 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9839 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9840 match(Set dst (MulD dst src)); 9841 ins_cost(1); // Select this instruction for all FP double multiplies 9842 9843 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9844 "DMULp $dst,ST\n\t" 9845 "FLD $src\n\t" 9846 "DMULp $dst,ST\n\t" 9847 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9848 "DMULp $dst,ST\n\t" %} 9849 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9850 ins_encode( strictfp_bias1(dst), 9851 Push_Reg_DPR(src), 9852 OpcP, RegOpc(dst), 9853 strictfp_bias2(dst) ); 9854 ins_pipe( fpu_reg_reg ); 9855 %} 9856 9857 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9858 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9859 match(Set dst (MulD dst con)); 9860 ins_cost(200); 9861 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9862 "DMULp $dst,ST" %} 9863 ins_encode %{ 9864 __ fld_d($constantaddress($con)); 9865 __ fmulp($dst$$reg); 9866 %} 9867 ins_pipe(fpu_reg_mem); 9868 %} 9869 9870 9871 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9872 predicate( UseSSE<=1 ); 9873 match(Set dst (MulD dst (LoadD src))); 9874 ins_cost(200); 9875 format %{ "FLD_D $src\n\t" 9876 "DMULp $dst,ST" %} 9877 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9878 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9879 OpcP, RegOpc(dst) ); 9880 ins_pipe( fpu_reg_mem ); 9881 %} 9882 9883 // 9884 // Cisc-alternate to reg-reg multiply 9885 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9886 predicate( UseSSE<=1 ); 9887 match(Set dst (MulD src (LoadD mem))); 9888 ins_cost(250); 9889 format %{ "FLD_D $mem\n\t" 9890 "DMUL ST,$src\n\t" 9891 "FSTP_D $dst" %} 9892 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9893 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9894 OpcReg_FPR(src), 9895 Pop_Reg_DPR(dst) ); 9896 ins_pipe( fpu_reg_reg_mem ); 9897 %} 9898 9899 9900 // MACRO3 -- addDPR a mulDPR 9901 // This instruction is a '2-address' instruction in that the result goes 9902 // back to src2. This eliminates a move from the macro; possibly the 9903 // register allocator will have to add it back (and maybe not). 9904 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9905 predicate( UseSSE<=1 ); 9906 match(Set src2 (AddD (MulD src0 src1) src2)); 9907 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9908 "DMUL ST,$src1\n\t" 9909 "DADDp $src2,ST" %} 9910 ins_cost(250); 9911 opcode(0xDD); /* LoadD DD /0 */ 9912 ins_encode( Push_Reg_FPR(src0), 9913 FMul_ST_reg(src1), 9914 FAddP_reg_ST(src2) ); 9915 ins_pipe( fpu_reg_reg_reg ); 9916 %} 9917 9918 9919 // MACRO3 -- subDPR a mulDPR 9920 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9921 predicate( UseSSE<=1 ); 9922 match(Set src2 (SubD (MulD src0 src1) src2)); 9923 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9924 "DMUL ST,$src1\n\t" 9925 "DSUBRp $src2,ST" %} 9926 ins_cost(250); 9927 ins_encode( Push_Reg_FPR(src0), 9928 FMul_ST_reg(src1), 9929 Opcode(0xDE), Opc_plus(0xE0,src2)); 9930 ins_pipe( fpu_reg_reg_reg ); 9931 %} 9932 9933 9934 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9935 predicate( UseSSE<=1 ); 9936 match(Set dst (DivD dst src)); 9937 9938 format %{ "FLD $src\n\t" 9939 "FDIVp $dst,ST" %} 9940 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9941 ins_cost(150); 9942 ins_encode( Push_Reg_DPR(src), 9943 OpcP, RegOpc(dst) ); 9944 ins_pipe( fpu_reg_reg ); 9945 %} 9946 9947 // Strict FP instruction biases argument before division then 9948 // biases result, to avoid double rounding of subnormals. 9949 // 9950 // scale dividend by multiplying dividend by 2^(-15360) 9951 // load divisor 9952 // divide scaled dividend by divisor 9953 // rescale quotient by 2^(15360) 9954 // 9955 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9956 predicate (UseSSE<=1); 9957 match(Set dst (DivD dst src)); 9958 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9959 ins_cost(01); 9960 9961 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9962 "DMULp $dst,ST\n\t" 9963 "FLD $src\n\t" 9964 "FDIVp $dst,ST\n\t" 9965 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9966 "DMULp $dst,ST\n\t" %} 9967 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9968 ins_encode( strictfp_bias1(dst), 9969 Push_Reg_DPR(src), 9970 OpcP, RegOpc(dst), 9971 strictfp_bias2(dst) ); 9972 ins_pipe( fpu_reg_reg ); 9973 %} 9974 9975 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9976 predicate(UseSSE<=1); 9977 match(Set dst (ModD dst src)); 9978 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9979 9980 format %{ "DMOD $dst,$src" %} 9981 ins_cost(250); 9982 ins_encode(Push_Reg_Mod_DPR(dst, src), 9983 emitModDPR(), 9984 Push_Result_Mod_DPR(src), 9985 Pop_Reg_DPR(dst)); 9986 ins_pipe( pipe_slow ); 9987 %} 9988 9989 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9990 predicate(UseSSE>=2); 9991 match(Set dst (ModD src0 src1)); 9992 effect(KILL rax, KILL cr); 9993 9994 format %{ "SUB ESP,8\t # DMOD\n" 9995 "\tMOVSD [ESP+0],$src1\n" 9996 "\tFLD_D [ESP+0]\n" 9997 "\tMOVSD [ESP+0],$src0\n" 9998 "\tFLD_D [ESP+0]\n" 9999 "loop:\tFPREM\n" 10000 "\tFWAIT\n" 10001 "\tFNSTSW AX\n" 10002 "\tSAHF\n" 10003 "\tJP loop\n" 10004 "\tFSTP_D [ESP+0]\n" 10005 "\tMOVSD $dst,[ESP+0]\n" 10006 "\tADD ESP,8\n" 10007 "\tFSTP ST0\t # Restore FPU Stack" 10008 %} 10009 ins_cost(250); 10010 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10015 predicate (UseSSE<=1); 10016 match(Set dst(AtanD dst src)); 10017 format %{ "DATA $dst,$src" %} 10018 opcode(0xD9, 0xF3); 10019 ins_encode( Push_Reg_DPR(src), 10020 OpcP, OpcS, RegOpc(dst) ); 10021 ins_pipe( pipe_slow ); 10022 %} 10023 10024 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10025 predicate (UseSSE>=2); 10026 match(Set dst(AtanD dst src)); 10027 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10028 format %{ "DATA $dst,$src" %} 10029 opcode(0xD9, 0xF3); 10030 ins_encode( Push_SrcD(src), 10031 OpcP, OpcS, Push_ResultD(dst) ); 10032 ins_pipe( pipe_slow ); 10033 %} 10034 10035 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10036 predicate (UseSSE<=1); 10037 match(Set dst (SqrtD src)); 10038 format %{ "DSQRT $dst,$src" %} 10039 opcode(0xFA, 0xD9); 10040 ins_encode( Push_Reg_DPR(src), 10041 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10042 ins_pipe( pipe_slow ); 10043 %} 10044 10045 //-------------Float Instructions------------------------------- 10046 // Float Math 10047 10048 // Code for float compare: 10049 // fcompp(); 10050 // fwait(); fnstsw_ax(); 10051 // sahf(); 10052 // movl(dst, unordered_result); 10053 // jcc(Assembler::parity, exit); 10054 // movl(dst, less_result); 10055 // jcc(Assembler::below, exit); 10056 // movl(dst, equal_result); 10057 // jcc(Assembler::equal, exit); 10058 // movl(dst, greater_result); 10059 // exit: 10060 10061 // P6 version of float compare, sets condition codes in EFLAGS 10062 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10063 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10064 match(Set cr (CmpF src1 src2)); 10065 effect(KILL rax); 10066 ins_cost(150); 10067 format %{ "FLD $src1\n\t" 10068 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10069 "JNP exit\n\t" 10070 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10071 "SAHF\n" 10072 "exit:\tNOP // avoid branch to branch" %} 10073 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10074 ins_encode( Push_Reg_DPR(src1), 10075 OpcP, RegOpc(src2), 10076 cmpF_P6_fixup ); 10077 ins_pipe( pipe_slow ); 10078 %} 10079 10080 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10081 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10082 match(Set cr (CmpF src1 src2)); 10083 ins_cost(100); 10084 format %{ "FLD $src1\n\t" 10085 "FUCOMIP ST,$src2 // P6 instruction" %} 10086 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10087 ins_encode( Push_Reg_DPR(src1), 10088 OpcP, RegOpc(src2)); 10089 ins_pipe( pipe_slow ); 10090 %} 10091 10092 10093 // Compare & branch 10094 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10095 predicate(UseSSE == 0); 10096 match(Set cr (CmpF src1 src2)); 10097 effect(KILL rax); 10098 ins_cost(200); 10099 format %{ "FLD $src1\n\t" 10100 "FCOMp $src2\n\t" 10101 "FNSTSW AX\n\t" 10102 "TEST AX,0x400\n\t" 10103 "JZ,s flags\n\t" 10104 "MOV AH,1\t# unordered treat as LT\n" 10105 "flags:\tSAHF" %} 10106 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10107 ins_encode( Push_Reg_DPR(src1), 10108 OpcP, RegOpc(src2), 10109 fpu_flags); 10110 ins_pipe( pipe_slow ); 10111 %} 10112 10113 // Compare vs zero into -1,0,1 10114 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10115 predicate(UseSSE == 0); 10116 match(Set dst (CmpF3 src1 zero)); 10117 effect(KILL cr, KILL rax); 10118 ins_cost(280); 10119 format %{ "FTSTF $dst,$src1" %} 10120 opcode(0xE4, 0xD9); 10121 ins_encode( Push_Reg_DPR(src1), 10122 OpcS, OpcP, PopFPU, 10123 CmpF_Result(dst)); 10124 ins_pipe( pipe_slow ); 10125 %} 10126 10127 // Compare into -1,0,1 10128 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10129 predicate(UseSSE == 0); 10130 match(Set dst (CmpF3 src1 src2)); 10131 effect(KILL cr, KILL rax); 10132 ins_cost(300); 10133 format %{ "FCMPF $dst,$src1,$src2" %} 10134 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10135 ins_encode( Push_Reg_DPR(src1), 10136 OpcP, RegOpc(src2), 10137 CmpF_Result(dst)); 10138 ins_pipe( pipe_slow ); 10139 %} 10140 10141 // float compare and set condition codes in EFLAGS by XMM regs 10142 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10143 predicate(UseSSE>=1); 10144 match(Set cr (CmpF src1 src2)); 10145 ins_cost(145); 10146 format %{ "UCOMISS $src1,$src2\n\t" 10147 "JNP,s exit\n\t" 10148 "PUSHF\t# saw NaN, set CF\n\t" 10149 "AND [rsp], #0xffffff2b\n\t" 10150 "POPF\n" 10151 "exit:" %} 10152 ins_encode %{ 10153 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10154 emit_cmpfp_fixup(_masm); 10155 %} 10156 ins_pipe( pipe_slow ); 10157 %} 10158 10159 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10160 predicate(UseSSE>=1); 10161 match(Set cr (CmpF src1 src2)); 10162 ins_cost(100); 10163 format %{ "UCOMISS $src1,$src2" %} 10164 ins_encode %{ 10165 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10166 %} 10167 ins_pipe( pipe_slow ); 10168 %} 10169 10170 // float compare and set condition codes in EFLAGS by XMM regs 10171 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10172 predicate(UseSSE>=1); 10173 match(Set cr (CmpF src1 (LoadF src2))); 10174 ins_cost(165); 10175 format %{ "UCOMISS $src1,$src2\n\t" 10176 "JNP,s exit\n\t" 10177 "PUSHF\t# saw NaN, set CF\n\t" 10178 "AND [rsp], #0xffffff2b\n\t" 10179 "POPF\n" 10180 "exit:" %} 10181 ins_encode %{ 10182 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10183 emit_cmpfp_fixup(_masm); 10184 %} 10185 ins_pipe( pipe_slow ); 10186 %} 10187 10188 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10189 predicate(UseSSE>=1); 10190 match(Set cr (CmpF src1 (LoadF src2))); 10191 ins_cost(100); 10192 format %{ "UCOMISS $src1,$src2" %} 10193 ins_encode %{ 10194 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10195 %} 10196 ins_pipe( pipe_slow ); 10197 %} 10198 10199 // Compare into -1,0,1 in XMM 10200 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10201 predicate(UseSSE>=1); 10202 match(Set dst (CmpF3 src1 src2)); 10203 effect(KILL cr); 10204 ins_cost(255); 10205 format %{ "UCOMISS $src1, $src2\n\t" 10206 "MOV $dst, #-1\n\t" 10207 "JP,s done\n\t" 10208 "JB,s done\n\t" 10209 "SETNE $dst\n\t" 10210 "MOVZB $dst, $dst\n" 10211 "done:" %} 10212 ins_encode %{ 10213 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10214 emit_cmpfp3(_masm, $dst$$Register); 10215 %} 10216 ins_pipe( pipe_slow ); 10217 %} 10218 10219 // Compare into -1,0,1 in XMM and memory 10220 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10221 predicate(UseSSE>=1); 10222 match(Set dst (CmpF3 src1 (LoadF src2))); 10223 effect(KILL cr); 10224 ins_cost(275); 10225 format %{ "UCOMISS $src1, $src2\n\t" 10226 "MOV $dst, #-1\n\t" 10227 "JP,s done\n\t" 10228 "JB,s done\n\t" 10229 "SETNE $dst\n\t" 10230 "MOVZB $dst, $dst\n" 10231 "done:" %} 10232 ins_encode %{ 10233 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10234 emit_cmpfp3(_masm, $dst$$Register); 10235 %} 10236 ins_pipe( pipe_slow ); 10237 %} 10238 10239 // Spill to obtain 24-bit precision 10240 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10241 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10242 match(Set dst (SubF src1 src2)); 10243 10244 format %{ "FSUB $dst,$src1 - $src2" %} 10245 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10246 ins_encode( Push_Reg_FPR(src1), 10247 OpcReg_FPR(src2), 10248 Pop_Mem_FPR(dst) ); 10249 ins_pipe( fpu_mem_reg_reg ); 10250 %} 10251 // 10252 // This instruction does not round to 24-bits 10253 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10254 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10255 match(Set dst (SubF dst src)); 10256 10257 format %{ "FSUB $dst,$src" %} 10258 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10259 ins_encode( Push_Reg_FPR(src), 10260 OpcP, RegOpc(dst) ); 10261 ins_pipe( fpu_reg_reg ); 10262 %} 10263 10264 // Spill to obtain 24-bit precision 10265 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10266 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10267 match(Set dst (AddF src1 src2)); 10268 10269 format %{ "FADD $dst,$src1,$src2" %} 10270 opcode(0xD8, 0x0); /* D8 C0+i */ 10271 ins_encode( Push_Reg_FPR(src2), 10272 OpcReg_FPR(src1), 10273 Pop_Mem_FPR(dst) ); 10274 ins_pipe( fpu_mem_reg_reg ); 10275 %} 10276 // 10277 // This instruction does not round to 24-bits 10278 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10279 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10280 match(Set dst (AddF dst src)); 10281 10282 format %{ "FLD $src\n\t" 10283 "FADDp $dst,ST" %} 10284 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10285 ins_encode( Push_Reg_FPR(src), 10286 OpcP, RegOpc(dst) ); 10287 ins_pipe( fpu_reg_reg ); 10288 %} 10289 10290 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10291 predicate(UseSSE==0); 10292 match(Set dst (AbsF src)); 10293 ins_cost(100); 10294 format %{ "FABS" %} 10295 opcode(0xE1, 0xD9); 10296 ins_encode( OpcS, OpcP ); 10297 ins_pipe( fpu_reg_reg ); 10298 %} 10299 10300 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10301 predicate(UseSSE==0); 10302 match(Set dst (NegF src)); 10303 ins_cost(100); 10304 format %{ "FCHS" %} 10305 opcode(0xE0, 0xD9); 10306 ins_encode( OpcS, OpcP ); 10307 ins_pipe( fpu_reg_reg ); 10308 %} 10309 10310 // Cisc-alternate to addFPR_reg 10311 // Spill to obtain 24-bit precision 10312 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10313 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10314 match(Set dst (AddF src1 (LoadF src2))); 10315 10316 format %{ "FLD $src2\n\t" 10317 "FADD ST,$src1\n\t" 10318 "FSTP_S $dst" %} 10319 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10320 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10321 OpcReg_FPR(src1), 10322 Pop_Mem_FPR(dst) ); 10323 ins_pipe( fpu_mem_reg_mem ); 10324 %} 10325 // 10326 // Cisc-alternate to addFPR_reg 10327 // This instruction does not round to 24-bits 10328 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10329 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10330 match(Set dst (AddF dst (LoadF src))); 10331 10332 format %{ "FADD $dst,$src" %} 10333 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10334 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10335 OpcP, RegOpc(dst) ); 10336 ins_pipe( fpu_reg_mem ); 10337 %} 10338 10339 // // Following two instructions for _222_mpegaudio 10340 // Spill to obtain 24-bit precision 10341 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10342 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10343 match(Set dst (AddF src1 src2)); 10344 10345 format %{ "FADD $dst,$src1,$src2" %} 10346 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10347 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10348 OpcReg_FPR(src2), 10349 Pop_Mem_FPR(dst) ); 10350 ins_pipe( fpu_mem_reg_mem ); 10351 %} 10352 10353 // Cisc-spill variant 10354 // Spill to obtain 24-bit precision 10355 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10356 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10357 match(Set dst (AddF src1 (LoadF src2))); 10358 10359 format %{ "FADD $dst,$src1,$src2 cisc" %} 10360 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10361 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10362 set_instruction_start, 10363 OpcP, RMopc_Mem(secondary,src1), 10364 Pop_Mem_FPR(dst) ); 10365 ins_pipe( fpu_mem_mem_mem ); 10366 %} 10367 10368 // Spill to obtain 24-bit precision 10369 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10370 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10371 match(Set dst (AddF src1 src2)); 10372 10373 format %{ "FADD $dst,$src1,$src2" %} 10374 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10375 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10376 set_instruction_start, 10377 OpcP, RMopc_Mem(secondary,src1), 10378 Pop_Mem_FPR(dst) ); 10379 ins_pipe( fpu_mem_mem_mem ); 10380 %} 10381 10382 10383 // Spill to obtain 24-bit precision 10384 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10385 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10386 match(Set dst (AddF src con)); 10387 format %{ "FLD $src\n\t" 10388 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10389 "FSTP_S $dst" %} 10390 ins_encode %{ 10391 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10392 __ fadd_s($constantaddress($con)); 10393 __ fstp_s(Address(rsp, $dst$$disp)); 10394 %} 10395 ins_pipe(fpu_mem_reg_con); 10396 %} 10397 // 10398 // This instruction does not round to 24-bits 10399 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10400 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10401 match(Set dst (AddF src con)); 10402 format %{ "FLD $src\n\t" 10403 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10404 "FSTP $dst" %} 10405 ins_encode %{ 10406 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10407 __ fadd_s($constantaddress($con)); 10408 __ fstp_d($dst$$reg); 10409 %} 10410 ins_pipe(fpu_reg_reg_con); 10411 %} 10412 10413 // Spill to obtain 24-bit precision 10414 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10415 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10416 match(Set dst (MulF src1 src2)); 10417 10418 format %{ "FLD $src1\n\t" 10419 "FMUL $src2\n\t" 10420 "FSTP_S $dst" %} 10421 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10422 ins_encode( Push_Reg_FPR(src1), 10423 OpcReg_FPR(src2), 10424 Pop_Mem_FPR(dst) ); 10425 ins_pipe( fpu_mem_reg_reg ); 10426 %} 10427 // 10428 // This instruction does not round to 24-bits 10429 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10430 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10431 match(Set dst (MulF src1 src2)); 10432 10433 format %{ "FLD $src1\n\t" 10434 "FMUL $src2\n\t" 10435 "FSTP_S $dst" %} 10436 opcode(0xD8, 0x1); /* D8 C8+i */ 10437 ins_encode( Push_Reg_FPR(src2), 10438 OpcReg_FPR(src1), 10439 Pop_Reg_FPR(dst) ); 10440 ins_pipe( fpu_reg_reg_reg ); 10441 %} 10442 10443 10444 // Spill to obtain 24-bit precision 10445 // Cisc-alternate to reg-reg multiply 10446 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10447 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10448 match(Set dst (MulF src1 (LoadF src2))); 10449 10450 format %{ "FLD_S $src2\n\t" 10451 "FMUL $src1\n\t" 10452 "FSTP_S $dst" %} 10453 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10454 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10455 OpcReg_FPR(src1), 10456 Pop_Mem_FPR(dst) ); 10457 ins_pipe( fpu_mem_reg_mem ); 10458 %} 10459 // 10460 // This instruction does not round to 24-bits 10461 // Cisc-alternate to reg-reg multiply 10462 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10463 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10464 match(Set dst (MulF src1 (LoadF src2))); 10465 10466 format %{ "FMUL $dst,$src1,$src2" %} 10467 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10468 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10469 OpcReg_FPR(src1), 10470 Pop_Reg_FPR(dst) ); 10471 ins_pipe( fpu_reg_reg_mem ); 10472 %} 10473 10474 // Spill to obtain 24-bit precision 10475 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10476 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10477 match(Set dst (MulF src1 src2)); 10478 10479 format %{ "FMUL $dst,$src1,$src2" %} 10480 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10481 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10482 set_instruction_start, 10483 OpcP, RMopc_Mem(secondary,src1), 10484 Pop_Mem_FPR(dst) ); 10485 ins_pipe( fpu_mem_mem_mem ); 10486 %} 10487 10488 // Spill to obtain 24-bit precision 10489 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10490 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10491 match(Set dst (MulF src con)); 10492 10493 format %{ "FLD $src\n\t" 10494 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10495 "FSTP_S $dst" %} 10496 ins_encode %{ 10497 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10498 __ fmul_s($constantaddress($con)); 10499 __ fstp_s(Address(rsp, $dst$$disp)); 10500 %} 10501 ins_pipe(fpu_mem_reg_con); 10502 %} 10503 // 10504 // This instruction does not round to 24-bits 10505 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10506 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10507 match(Set dst (MulF src con)); 10508 10509 format %{ "FLD $src\n\t" 10510 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10511 "FSTP $dst" %} 10512 ins_encode %{ 10513 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10514 __ fmul_s($constantaddress($con)); 10515 __ fstp_d($dst$$reg); 10516 %} 10517 ins_pipe(fpu_reg_reg_con); 10518 %} 10519 10520 10521 // 10522 // MACRO1 -- subsume unshared load into mulFPR 10523 // This instruction does not round to 24-bits 10524 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10525 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10526 match(Set dst (MulF (LoadF mem1) src)); 10527 10528 format %{ "FLD $mem1 ===MACRO1===\n\t" 10529 "FMUL ST,$src\n\t" 10530 "FSTP $dst" %} 10531 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10532 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10533 OpcReg_FPR(src), 10534 Pop_Reg_FPR(dst) ); 10535 ins_pipe( fpu_reg_reg_mem ); 10536 %} 10537 // 10538 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10539 // This instruction does not round to 24-bits 10540 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10541 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10542 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10543 ins_cost(95); 10544 10545 format %{ "FLD $mem1 ===MACRO2===\n\t" 10546 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10547 "FADD ST,$src2\n\t" 10548 "FSTP $dst" %} 10549 opcode(0xD9); /* LoadF D9 /0 */ 10550 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10551 FMul_ST_reg(src1), 10552 FAdd_ST_reg(src2), 10553 Pop_Reg_FPR(dst) ); 10554 ins_pipe( fpu_reg_mem_reg_reg ); 10555 %} 10556 10557 // MACRO3 -- addFPR a mulFPR 10558 // This instruction does not round to 24-bits. It is a '2-address' 10559 // instruction in that the result goes back to src2. This eliminates 10560 // a move from the macro; possibly the register allocator will have 10561 // to add it back (and maybe not). 10562 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10563 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10564 match(Set src2 (AddF (MulF src0 src1) src2)); 10565 10566 format %{ "FLD $src0 ===MACRO3===\n\t" 10567 "FMUL ST,$src1\n\t" 10568 "FADDP $src2,ST" %} 10569 opcode(0xD9); /* LoadF D9 /0 */ 10570 ins_encode( Push_Reg_FPR(src0), 10571 FMul_ST_reg(src1), 10572 FAddP_reg_ST(src2) ); 10573 ins_pipe( fpu_reg_reg_reg ); 10574 %} 10575 10576 // MACRO4 -- divFPR subFPR 10577 // This instruction does not round to 24-bits 10578 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10579 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10580 match(Set dst (DivF (SubF src2 src1) src3)); 10581 10582 format %{ "FLD $src2 ===MACRO4===\n\t" 10583 "FSUB ST,$src1\n\t" 10584 "FDIV ST,$src3\n\t" 10585 "FSTP $dst" %} 10586 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10587 ins_encode( Push_Reg_FPR(src2), 10588 subFPR_divFPR_encode(src1,src3), 10589 Pop_Reg_FPR(dst) ); 10590 ins_pipe( fpu_reg_reg_reg_reg ); 10591 %} 10592 10593 // Spill to obtain 24-bit precision 10594 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10595 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10596 match(Set dst (DivF src1 src2)); 10597 10598 format %{ "FDIV $dst,$src1,$src2" %} 10599 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10600 ins_encode( Push_Reg_FPR(src1), 10601 OpcReg_FPR(src2), 10602 Pop_Mem_FPR(dst) ); 10603 ins_pipe( fpu_mem_reg_reg ); 10604 %} 10605 // 10606 // This instruction does not round to 24-bits 10607 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10608 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10609 match(Set dst (DivF dst src)); 10610 10611 format %{ "FDIV $dst,$src" %} 10612 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10613 ins_encode( Push_Reg_FPR(src), 10614 OpcP, RegOpc(dst) ); 10615 ins_pipe( fpu_reg_reg ); 10616 %} 10617 10618 10619 // Spill to obtain 24-bit precision 10620 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10621 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10622 match(Set dst (ModF src1 src2)); 10623 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10624 10625 format %{ "FMOD $dst,$src1,$src2" %} 10626 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10627 emitModDPR(), 10628 Push_Result_Mod_DPR(src2), 10629 Pop_Mem_FPR(dst)); 10630 ins_pipe( pipe_slow ); 10631 %} 10632 // 10633 // This instruction does not round to 24-bits 10634 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10635 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10636 match(Set dst (ModF dst src)); 10637 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10638 10639 format %{ "FMOD $dst,$src" %} 10640 ins_encode(Push_Reg_Mod_DPR(dst, src), 10641 emitModDPR(), 10642 Push_Result_Mod_DPR(src), 10643 Pop_Reg_FPR(dst)); 10644 ins_pipe( pipe_slow ); 10645 %} 10646 10647 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10648 predicate(UseSSE>=1); 10649 match(Set dst (ModF src0 src1)); 10650 effect(KILL rax, KILL cr); 10651 format %{ "SUB ESP,4\t # FMOD\n" 10652 "\tMOVSS [ESP+0],$src1\n" 10653 "\tFLD_S [ESP+0]\n" 10654 "\tMOVSS [ESP+0],$src0\n" 10655 "\tFLD_S [ESP+0]\n" 10656 "loop:\tFPREM\n" 10657 "\tFWAIT\n" 10658 "\tFNSTSW AX\n" 10659 "\tSAHF\n" 10660 "\tJP loop\n" 10661 "\tFSTP_S [ESP+0]\n" 10662 "\tMOVSS $dst,[ESP+0]\n" 10663 "\tADD ESP,4\n" 10664 "\tFSTP ST0\t # Restore FPU Stack" 10665 %} 10666 ins_cost(250); 10667 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10668 ins_pipe( pipe_slow ); 10669 %} 10670 10671 10672 //----------Arithmetic Conversion Instructions--------------------------------- 10673 // The conversions operations are all Alpha sorted. Please keep it that way! 10674 10675 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10676 predicate(UseSSE==0); 10677 match(Set dst (RoundFloat src)); 10678 ins_cost(125); 10679 format %{ "FST_S $dst,$src\t# F-round" %} 10680 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10681 ins_pipe( fpu_mem_reg ); 10682 %} 10683 10684 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10685 predicate(UseSSE<=1); 10686 match(Set dst (RoundDouble src)); 10687 ins_cost(125); 10688 format %{ "FST_D $dst,$src\t# D-round" %} 10689 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10690 ins_pipe( fpu_mem_reg ); 10691 %} 10692 10693 // Force rounding to 24-bit precision and 6-bit exponent 10694 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10695 predicate(UseSSE==0); 10696 match(Set dst (ConvD2F src)); 10697 format %{ "FST_S $dst,$src\t# F-round" %} 10698 expand %{ 10699 roundFloat_mem_reg(dst,src); 10700 %} 10701 %} 10702 10703 // Force rounding to 24-bit precision and 6-bit exponent 10704 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10705 predicate(UseSSE==1); 10706 match(Set dst (ConvD2F src)); 10707 effect( KILL cr ); 10708 format %{ "SUB ESP,4\n\t" 10709 "FST_S [ESP],$src\t# F-round\n\t" 10710 "MOVSS $dst,[ESP]\n\t" 10711 "ADD ESP,4" %} 10712 ins_encode %{ 10713 __ subptr(rsp, 4); 10714 if ($src$$reg != FPR1L_enc) { 10715 __ fld_s($src$$reg-1); 10716 __ fstp_s(Address(rsp, 0)); 10717 } else { 10718 __ fst_s(Address(rsp, 0)); 10719 } 10720 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10721 __ addptr(rsp, 4); 10722 %} 10723 ins_pipe( pipe_slow ); 10724 %} 10725 10726 // Force rounding double precision to single precision 10727 instruct convD2F_reg(regF dst, regD src) %{ 10728 predicate(UseSSE>=2); 10729 match(Set dst (ConvD2F src)); 10730 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10731 ins_encode %{ 10732 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10733 %} 10734 ins_pipe( pipe_slow ); 10735 %} 10736 10737 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10738 predicate(UseSSE==0); 10739 match(Set dst (ConvF2D src)); 10740 format %{ "FST_S $dst,$src\t# D-round" %} 10741 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10742 ins_pipe( fpu_reg_reg ); 10743 %} 10744 10745 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10746 predicate(UseSSE==1); 10747 match(Set dst (ConvF2D src)); 10748 format %{ "FST_D $dst,$src\t# D-round" %} 10749 expand %{ 10750 roundDouble_mem_reg(dst,src); 10751 %} 10752 %} 10753 10754 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10755 predicate(UseSSE==1); 10756 match(Set dst (ConvF2D src)); 10757 effect( KILL cr ); 10758 format %{ "SUB ESP,4\n\t" 10759 "MOVSS [ESP] $src\n\t" 10760 "FLD_S [ESP]\n\t" 10761 "ADD ESP,4\n\t" 10762 "FSTP $dst\t# D-round" %} 10763 ins_encode %{ 10764 __ subptr(rsp, 4); 10765 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10766 __ fld_s(Address(rsp, 0)); 10767 __ addptr(rsp, 4); 10768 __ fstp_d($dst$$reg); 10769 %} 10770 ins_pipe( pipe_slow ); 10771 %} 10772 10773 instruct convF2D_reg(regD dst, regF src) %{ 10774 predicate(UseSSE>=2); 10775 match(Set dst (ConvF2D src)); 10776 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10777 ins_encode %{ 10778 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10779 %} 10780 ins_pipe( pipe_slow ); 10781 %} 10782 10783 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10784 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10785 predicate(UseSSE<=1); 10786 match(Set dst (ConvD2I src)); 10787 effect( KILL tmp, KILL cr ); 10788 format %{ "FLD $src\t# Convert double to int \n\t" 10789 "FLDCW trunc mode\n\t" 10790 "SUB ESP,4\n\t" 10791 "FISTp [ESP + #0]\n\t" 10792 "FLDCW std/24-bit mode\n\t" 10793 "POP EAX\n\t" 10794 "CMP EAX,0x80000000\n\t" 10795 "JNE,s fast\n\t" 10796 "FLD_D $src\n\t" 10797 "CALL d2i_wrapper\n" 10798 "fast:" %} 10799 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10800 ins_pipe( pipe_slow ); 10801 %} 10802 10803 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10804 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10805 predicate(UseSSE>=2); 10806 match(Set dst (ConvD2I src)); 10807 effect( KILL tmp, KILL cr ); 10808 format %{ "CVTTSD2SI $dst, $src\n\t" 10809 "CMP $dst,0x80000000\n\t" 10810 "JNE,s fast\n\t" 10811 "SUB ESP, 8\n\t" 10812 "MOVSD [ESP], $src\n\t" 10813 "FLD_D [ESP]\n\t" 10814 "ADD ESP, 8\n\t" 10815 "CALL d2i_wrapper\n" 10816 "fast:" %} 10817 ins_encode %{ 10818 Label fast; 10819 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10820 __ cmpl($dst$$Register, 0x80000000); 10821 __ jccb(Assembler::notEqual, fast); 10822 __ subptr(rsp, 8); 10823 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10824 __ fld_d(Address(rsp, 0)); 10825 __ addptr(rsp, 8); 10826 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10827 __ bind(fast); 10828 %} 10829 ins_pipe( pipe_slow ); 10830 %} 10831 10832 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10833 predicate(UseSSE<=1); 10834 match(Set dst (ConvD2L src)); 10835 effect( KILL cr ); 10836 format %{ "FLD $src\t# Convert double to long\n\t" 10837 "FLDCW trunc mode\n\t" 10838 "SUB ESP,8\n\t" 10839 "FISTp [ESP + #0]\n\t" 10840 "FLDCW std/24-bit mode\n\t" 10841 "POP EAX\n\t" 10842 "POP EDX\n\t" 10843 "CMP EDX,0x80000000\n\t" 10844 "JNE,s fast\n\t" 10845 "TEST EAX,EAX\n\t" 10846 "JNE,s fast\n\t" 10847 "FLD $src\n\t" 10848 "CALL d2l_wrapper\n" 10849 "fast:" %} 10850 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10851 ins_pipe( pipe_slow ); 10852 %} 10853 10854 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10855 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10856 predicate (UseSSE>=2); 10857 match(Set dst (ConvD2L src)); 10858 effect( KILL cr ); 10859 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10860 "MOVSD [ESP],$src\n\t" 10861 "FLD_D [ESP]\n\t" 10862 "FLDCW trunc mode\n\t" 10863 "FISTp [ESP + #0]\n\t" 10864 "FLDCW std/24-bit mode\n\t" 10865 "POP EAX\n\t" 10866 "POP EDX\n\t" 10867 "CMP EDX,0x80000000\n\t" 10868 "JNE,s fast\n\t" 10869 "TEST EAX,EAX\n\t" 10870 "JNE,s fast\n\t" 10871 "SUB ESP,8\n\t" 10872 "MOVSD [ESP],$src\n\t" 10873 "FLD_D [ESP]\n\t" 10874 "ADD ESP,8\n\t" 10875 "CALL d2l_wrapper\n" 10876 "fast:" %} 10877 ins_encode %{ 10878 Label fast; 10879 __ subptr(rsp, 8); 10880 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10881 __ fld_d(Address(rsp, 0)); 10882 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10883 __ fistp_d(Address(rsp, 0)); 10884 // Restore the rounding mode, mask the exception 10885 if (Compile::current()->in_24_bit_fp_mode()) { 10886 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10887 } else { 10888 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10889 } 10890 // Load the converted long, adjust CPU stack 10891 __ pop(rax); 10892 __ pop(rdx); 10893 __ cmpl(rdx, 0x80000000); 10894 __ jccb(Assembler::notEqual, fast); 10895 __ testl(rax, rax); 10896 __ jccb(Assembler::notEqual, fast); 10897 __ subptr(rsp, 8); 10898 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10899 __ fld_d(Address(rsp, 0)); 10900 __ addptr(rsp, 8); 10901 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10902 __ bind(fast); 10903 %} 10904 ins_pipe( pipe_slow ); 10905 %} 10906 10907 // Convert a double to an int. Java semantics require we do complex 10908 // manglations in the corner cases. So we set the rounding mode to 10909 // 'zero', store the darned double down as an int, and reset the 10910 // rounding mode to 'nearest'. The hardware stores a flag value down 10911 // if we would overflow or converted a NAN; we check for this and 10912 // and go the slow path if needed. 10913 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10914 predicate(UseSSE==0); 10915 match(Set dst (ConvF2I src)); 10916 effect( KILL tmp, KILL cr ); 10917 format %{ "FLD $src\t# Convert float to int \n\t" 10918 "FLDCW trunc mode\n\t" 10919 "SUB ESP,4\n\t" 10920 "FISTp [ESP + #0]\n\t" 10921 "FLDCW std/24-bit mode\n\t" 10922 "POP EAX\n\t" 10923 "CMP EAX,0x80000000\n\t" 10924 "JNE,s fast\n\t" 10925 "FLD $src\n\t" 10926 "CALL d2i_wrapper\n" 10927 "fast:" %} 10928 // DPR2I_encoding works for FPR2I 10929 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10930 ins_pipe( pipe_slow ); 10931 %} 10932 10933 // Convert a float in xmm to an int reg. 10934 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10935 predicate(UseSSE>=1); 10936 match(Set dst (ConvF2I src)); 10937 effect( KILL tmp, KILL cr ); 10938 format %{ "CVTTSS2SI $dst, $src\n\t" 10939 "CMP $dst,0x80000000\n\t" 10940 "JNE,s fast\n\t" 10941 "SUB ESP, 4\n\t" 10942 "MOVSS [ESP], $src\n\t" 10943 "FLD [ESP]\n\t" 10944 "ADD ESP, 4\n\t" 10945 "CALL d2i_wrapper\n" 10946 "fast:" %} 10947 ins_encode %{ 10948 Label fast; 10949 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10950 __ cmpl($dst$$Register, 0x80000000); 10951 __ jccb(Assembler::notEqual, fast); 10952 __ subptr(rsp, 4); 10953 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10954 __ fld_s(Address(rsp, 0)); 10955 __ addptr(rsp, 4); 10956 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10957 __ bind(fast); 10958 %} 10959 ins_pipe( pipe_slow ); 10960 %} 10961 10962 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10963 predicate(UseSSE==0); 10964 match(Set dst (ConvF2L src)); 10965 effect( KILL cr ); 10966 format %{ "FLD $src\t# Convert float to long\n\t" 10967 "FLDCW trunc mode\n\t" 10968 "SUB ESP,8\n\t" 10969 "FISTp [ESP + #0]\n\t" 10970 "FLDCW std/24-bit mode\n\t" 10971 "POP EAX\n\t" 10972 "POP EDX\n\t" 10973 "CMP EDX,0x80000000\n\t" 10974 "JNE,s fast\n\t" 10975 "TEST EAX,EAX\n\t" 10976 "JNE,s fast\n\t" 10977 "FLD $src\n\t" 10978 "CALL d2l_wrapper\n" 10979 "fast:" %} 10980 // DPR2L_encoding works for FPR2L 10981 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10982 ins_pipe( pipe_slow ); 10983 %} 10984 10985 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10986 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10987 predicate (UseSSE>=1); 10988 match(Set dst (ConvF2L src)); 10989 effect( KILL cr ); 10990 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10991 "MOVSS [ESP],$src\n\t" 10992 "FLD_S [ESP]\n\t" 10993 "FLDCW trunc mode\n\t" 10994 "FISTp [ESP + #0]\n\t" 10995 "FLDCW std/24-bit mode\n\t" 10996 "POP EAX\n\t" 10997 "POP EDX\n\t" 10998 "CMP EDX,0x80000000\n\t" 10999 "JNE,s fast\n\t" 11000 "TEST EAX,EAX\n\t" 11001 "JNE,s fast\n\t" 11002 "SUB ESP,4\t# Convert float to long\n\t" 11003 "MOVSS [ESP],$src\n\t" 11004 "FLD_S [ESP]\n\t" 11005 "ADD ESP,4\n\t" 11006 "CALL d2l_wrapper\n" 11007 "fast:" %} 11008 ins_encode %{ 11009 Label fast; 11010 __ subptr(rsp, 8); 11011 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11012 __ fld_s(Address(rsp, 0)); 11013 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 11014 __ fistp_d(Address(rsp, 0)); 11015 // Restore the rounding mode, mask the exception 11016 if (Compile::current()->in_24_bit_fp_mode()) { 11017 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 11018 } else { 11019 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 11020 } 11021 // Load the converted long, adjust CPU stack 11022 __ pop(rax); 11023 __ pop(rdx); 11024 __ cmpl(rdx, 0x80000000); 11025 __ jccb(Assembler::notEqual, fast); 11026 __ testl(rax, rax); 11027 __ jccb(Assembler::notEqual, fast); 11028 __ subptr(rsp, 4); 11029 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11030 __ fld_s(Address(rsp, 0)); 11031 __ addptr(rsp, 4); 11032 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11033 __ bind(fast); 11034 %} 11035 ins_pipe( pipe_slow ); 11036 %} 11037 11038 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11039 predicate( UseSSE<=1 ); 11040 match(Set dst (ConvI2D src)); 11041 format %{ "FILD $src\n\t" 11042 "FSTP $dst" %} 11043 opcode(0xDB, 0x0); /* DB /0 */ 11044 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11045 ins_pipe( fpu_reg_mem ); 11046 %} 11047 11048 instruct convI2D_reg(regD dst, rRegI src) %{ 11049 predicate( UseSSE>=2 && !UseXmmI2D ); 11050 match(Set dst (ConvI2D src)); 11051 format %{ "CVTSI2SD $dst,$src" %} 11052 ins_encode %{ 11053 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11054 %} 11055 ins_pipe( pipe_slow ); 11056 %} 11057 11058 instruct convI2D_mem(regD dst, memory mem) %{ 11059 predicate( UseSSE>=2 ); 11060 match(Set dst (ConvI2D (LoadI mem))); 11061 format %{ "CVTSI2SD $dst,$mem" %} 11062 ins_encode %{ 11063 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11064 %} 11065 ins_pipe( pipe_slow ); 11066 %} 11067 11068 instruct convXI2D_reg(regD dst, rRegI src) 11069 %{ 11070 predicate( UseSSE>=2 && UseXmmI2D ); 11071 match(Set dst (ConvI2D src)); 11072 11073 format %{ "MOVD $dst,$src\n\t" 11074 "CVTDQ2PD $dst,$dst\t# i2d" %} 11075 ins_encode %{ 11076 __ movdl($dst$$XMMRegister, $src$$Register); 11077 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11078 %} 11079 ins_pipe(pipe_slow); // XXX 11080 %} 11081 11082 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11083 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11084 match(Set dst (ConvI2D (LoadI mem))); 11085 format %{ "FILD $mem\n\t" 11086 "FSTP $dst" %} 11087 opcode(0xDB); /* DB /0 */ 11088 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11089 Pop_Reg_DPR(dst)); 11090 ins_pipe( fpu_reg_mem ); 11091 %} 11092 11093 // Convert a byte to a float; no rounding step needed. 11094 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11095 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11096 match(Set dst (ConvI2F src)); 11097 format %{ "FILD $src\n\t" 11098 "FSTP $dst" %} 11099 11100 opcode(0xDB, 0x0); /* DB /0 */ 11101 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11102 ins_pipe( fpu_reg_mem ); 11103 %} 11104 11105 // In 24-bit mode, force exponent rounding by storing back out 11106 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11107 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11108 match(Set dst (ConvI2F src)); 11109 ins_cost(200); 11110 format %{ "FILD $src\n\t" 11111 "FSTP_S $dst" %} 11112 opcode(0xDB, 0x0); /* DB /0 */ 11113 ins_encode( Push_Mem_I(src), 11114 Pop_Mem_FPR(dst)); 11115 ins_pipe( fpu_mem_mem ); 11116 %} 11117 11118 // In 24-bit mode, force exponent rounding by storing back out 11119 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11120 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11121 match(Set dst (ConvI2F (LoadI mem))); 11122 ins_cost(200); 11123 format %{ "FILD $mem\n\t" 11124 "FSTP_S $dst" %} 11125 opcode(0xDB); /* DB /0 */ 11126 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11127 Pop_Mem_FPR(dst)); 11128 ins_pipe( fpu_mem_mem ); 11129 %} 11130 11131 // This instruction does not round to 24-bits 11132 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11133 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11134 match(Set dst (ConvI2F src)); 11135 format %{ "FILD $src\n\t" 11136 "FSTP $dst" %} 11137 opcode(0xDB, 0x0); /* DB /0 */ 11138 ins_encode( Push_Mem_I(src), 11139 Pop_Reg_FPR(dst)); 11140 ins_pipe( fpu_reg_mem ); 11141 %} 11142 11143 // This instruction does not round to 24-bits 11144 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11145 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11146 match(Set dst (ConvI2F (LoadI mem))); 11147 format %{ "FILD $mem\n\t" 11148 "FSTP $dst" %} 11149 opcode(0xDB); /* DB /0 */ 11150 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11151 Pop_Reg_FPR(dst)); 11152 ins_pipe( fpu_reg_mem ); 11153 %} 11154 11155 // Convert an int to a float in xmm; no rounding step needed. 11156 instruct convI2F_reg(regF dst, rRegI src) %{ 11157 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11158 match(Set dst (ConvI2F src)); 11159 format %{ "CVTSI2SS $dst, $src" %} 11160 ins_encode %{ 11161 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11162 %} 11163 ins_pipe( pipe_slow ); 11164 %} 11165 11166 instruct convXI2F_reg(regF dst, rRegI src) 11167 %{ 11168 predicate( UseSSE>=2 && UseXmmI2F ); 11169 match(Set dst (ConvI2F src)); 11170 11171 format %{ "MOVD $dst,$src\n\t" 11172 "CVTDQ2PS $dst,$dst\t# i2f" %} 11173 ins_encode %{ 11174 __ movdl($dst$$XMMRegister, $src$$Register); 11175 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11176 %} 11177 ins_pipe(pipe_slow); // XXX 11178 %} 11179 11180 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11181 match(Set dst (ConvI2L src)); 11182 effect(KILL cr); 11183 ins_cost(375); 11184 format %{ "MOV $dst.lo,$src\n\t" 11185 "MOV $dst.hi,$src\n\t" 11186 "SAR $dst.hi,31" %} 11187 ins_encode(convert_int_long(dst,src)); 11188 ins_pipe( ialu_reg_reg_long ); 11189 %} 11190 11191 // Zero-extend convert int to long 11192 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11193 match(Set dst (AndL (ConvI2L src) mask) ); 11194 effect( KILL flags ); 11195 ins_cost(250); 11196 format %{ "MOV $dst.lo,$src\n\t" 11197 "XOR $dst.hi,$dst.hi" %} 11198 opcode(0x33); // XOR 11199 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11200 ins_pipe( ialu_reg_reg_long ); 11201 %} 11202 11203 // Zero-extend long 11204 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11205 match(Set dst (AndL src mask) ); 11206 effect( KILL flags ); 11207 ins_cost(250); 11208 format %{ "MOV $dst.lo,$src.lo\n\t" 11209 "XOR $dst.hi,$dst.hi\n\t" %} 11210 opcode(0x33); // XOR 11211 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11212 ins_pipe( ialu_reg_reg_long ); 11213 %} 11214 11215 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11216 predicate (UseSSE<=1); 11217 match(Set dst (ConvL2D src)); 11218 effect( KILL cr ); 11219 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11220 "PUSH $src.lo\n\t" 11221 "FILD ST,[ESP + #0]\n\t" 11222 "ADD ESP,8\n\t" 11223 "FSTP_D $dst\t# D-round" %} 11224 opcode(0xDF, 0x5); /* DF /5 */ 11225 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11226 ins_pipe( pipe_slow ); 11227 %} 11228 11229 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11230 predicate (UseSSE>=2); 11231 match(Set dst (ConvL2D src)); 11232 effect( KILL cr ); 11233 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11234 "PUSH $src.lo\n\t" 11235 "FILD_D [ESP]\n\t" 11236 "FSTP_D [ESP]\n\t" 11237 "MOVSD $dst,[ESP]\n\t" 11238 "ADD ESP,8" %} 11239 opcode(0xDF, 0x5); /* DF /5 */ 11240 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11241 ins_pipe( pipe_slow ); 11242 %} 11243 11244 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11245 predicate (UseSSE>=1); 11246 match(Set dst (ConvL2F src)); 11247 effect( KILL cr ); 11248 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11249 "PUSH $src.lo\n\t" 11250 "FILD_D [ESP]\n\t" 11251 "FSTP_S [ESP]\n\t" 11252 "MOVSS $dst,[ESP]\n\t" 11253 "ADD ESP,8" %} 11254 opcode(0xDF, 0x5); /* DF /5 */ 11255 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11256 ins_pipe( pipe_slow ); 11257 %} 11258 11259 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11260 match(Set dst (ConvL2F src)); 11261 effect( KILL cr ); 11262 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11263 "PUSH $src.lo\n\t" 11264 "FILD ST,[ESP + #0]\n\t" 11265 "ADD ESP,8\n\t" 11266 "FSTP_S $dst\t# F-round" %} 11267 opcode(0xDF, 0x5); /* DF /5 */ 11268 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11269 ins_pipe( pipe_slow ); 11270 %} 11271 11272 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11273 match(Set dst (ConvL2I src)); 11274 effect( DEF dst, USE src ); 11275 format %{ "MOV $dst,$src.lo" %} 11276 ins_encode(enc_CopyL_Lo(dst,src)); 11277 ins_pipe( ialu_reg_reg ); 11278 %} 11279 11280 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11281 match(Set dst (MoveF2I src)); 11282 effect( DEF dst, USE src ); 11283 ins_cost(100); 11284 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11285 ins_encode %{ 11286 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11287 %} 11288 ins_pipe( ialu_reg_mem ); 11289 %} 11290 11291 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11292 predicate(UseSSE==0); 11293 match(Set dst (MoveF2I src)); 11294 effect( DEF dst, USE src ); 11295 11296 ins_cost(125); 11297 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11298 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11299 ins_pipe( fpu_mem_reg ); 11300 %} 11301 11302 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11303 predicate(UseSSE>=1); 11304 match(Set dst (MoveF2I src)); 11305 effect( DEF dst, USE src ); 11306 11307 ins_cost(95); 11308 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11309 ins_encode %{ 11310 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11311 %} 11312 ins_pipe( pipe_slow ); 11313 %} 11314 11315 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11316 predicate(UseSSE>=2); 11317 match(Set dst (MoveF2I src)); 11318 effect( DEF dst, USE src ); 11319 ins_cost(85); 11320 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11321 ins_encode %{ 11322 __ movdl($dst$$Register, $src$$XMMRegister); 11323 %} 11324 ins_pipe( pipe_slow ); 11325 %} 11326 11327 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11328 match(Set dst (MoveI2F src)); 11329 effect( DEF dst, USE src ); 11330 11331 ins_cost(100); 11332 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11333 ins_encode %{ 11334 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11335 %} 11336 ins_pipe( ialu_mem_reg ); 11337 %} 11338 11339 11340 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11341 predicate(UseSSE==0); 11342 match(Set dst (MoveI2F src)); 11343 effect(DEF dst, USE src); 11344 11345 ins_cost(125); 11346 format %{ "FLD_S $src\n\t" 11347 "FSTP $dst\t# MoveI2F_stack_reg" %} 11348 opcode(0xD9); /* D9 /0, FLD m32real */ 11349 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11350 Pop_Reg_FPR(dst) ); 11351 ins_pipe( fpu_reg_mem ); 11352 %} 11353 11354 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11355 predicate(UseSSE>=1); 11356 match(Set dst (MoveI2F src)); 11357 effect( DEF dst, USE src ); 11358 11359 ins_cost(95); 11360 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11361 ins_encode %{ 11362 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11363 %} 11364 ins_pipe( pipe_slow ); 11365 %} 11366 11367 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11368 predicate(UseSSE>=2); 11369 match(Set dst (MoveI2F src)); 11370 effect( DEF dst, USE src ); 11371 11372 ins_cost(85); 11373 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11374 ins_encode %{ 11375 __ movdl($dst$$XMMRegister, $src$$Register); 11376 %} 11377 ins_pipe( pipe_slow ); 11378 %} 11379 11380 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11381 match(Set dst (MoveD2L src)); 11382 effect(DEF dst, USE src); 11383 11384 ins_cost(250); 11385 format %{ "MOV $dst.lo,$src\n\t" 11386 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11387 opcode(0x8B, 0x8B); 11388 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11389 ins_pipe( ialu_mem_long_reg ); 11390 %} 11391 11392 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11393 predicate(UseSSE<=1); 11394 match(Set dst (MoveD2L src)); 11395 effect(DEF dst, USE src); 11396 11397 ins_cost(125); 11398 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11399 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11400 ins_pipe( fpu_mem_reg ); 11401 %} 11402 11403 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11404 predicate(UseSSE>=2); 11405 match(Set dst (MoveD2L src)); 11406 effect(DEF dst, USE src); 11407 ins_cost(95); 11408 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11409 ins_encode %{ 11410 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11411 %} 11412 ins_pipe( pipe_slow ); 11413 %} 11414 11415 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11416 predicate(UseSSE>=2); 11417 match(Set dst (MoveD2L src)); 11418 effect(DEF dst, USE src, TEMP tmp); 11419 ins_cost(85); 11420 format %{ "MOVD $dst.lo,$src\n\t" 11421 "PSHUFLW $tmp,$src,0x4E\n\t" 11422 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11423 ins_encode %{ 11424 __ movdl($dst$$Register, $src$$XMMRegister); 11425 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11426 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11427 %} 11428 ins_pipe( pipe_slow ); 11429 %} 11430 11431 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11432 match(Set dst (MoveL2D src)); 11433 effect(DEF dst, USE src); 11434 11435 ins_cost(200); 11436 format %{ "MOV $dst,$src.lo\n\t" 11437 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11438 opcode(0x89, 0x89); 11439 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11440 ins_pipe( ialu_mem_long_reg ); 11441 %} 11442 11443 11444 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11445 predicate(UseSSE<=1); 11446 match(Set dst (MoveL2D src)); 11447 effect(DEF dst, USE src); 11448 ins_cost(125); 11449 11450 format %{ "FLD_D $src\n\t" 11451 "FSTP $dst\t# MoveL2D_stack_reg" %} 11452 opcode(0xDD); /* DD /0, FLD m64real */ 11453 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11454 Pop_Reg_DPR(dst) ); 11455 ins_pipe( fpu_reg_mem ); 11456 %} 11457 11458 11459 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11460 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11461 match(Set dst (MoveL2D src)); 11462 effect(DEF dst, USE src); 11463 11464 ins_cost(95); 11465 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11466 ins_encode %{ 11467 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11468 %} 11469 ins_pipe( pipe_slow ); 11470 %} 11471 11472 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11473 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11474 match(Set dst (MoveL2D src)); 11475 effect(DEF dst, USE src); 11476 11477 ins_cost(95); 11478 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11479 ins_encode %{ 11480 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11481 %} 11482 ins_pipe( pipe_slow ); 11483 %} 11484 11485 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11486 predicate(UseSSE>=2); 11487 match(Set dst (MoveL2D src)); 11488 effect(TEMP dst, USE src, TEMP tmp); 11489 ins_cost(85); 11490 format %{ "MOVD $dst,$src.lo\n\t" 11491 "MOVD $tmp,$src.hi\n\t" 11492 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11493 ins_encode %{ 11494 __ movdl($dst$$XMMRegister, $src$$Register); 11495 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11496 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11497 %} 11498 ins_pipe( pipe_slow ); 11499 %} 11500 11501 11502 // ======================================================================= 11503 // fast clearing of an array 11504 // Small ClearArray non-AVX512. 11505 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11506 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11507 match(Set dummy (ClearArray cnt base)); 11508 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11509 11510 format %{ $$template 11511 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11512 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11513 $$emit$$"JG LARGE\n\t" 11514 $$emit$$"SHL ECX, 1\n\t" 11515 $$emit$$"DEC ECX\n\t" 11516 $$emit$$"JS DONE\t# Zero length\n\t" 11517 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11518 $$emit$$"DEC ECX\n\t" 11519 $$emit$$"JGE LOOP\n\t" 11520 $$emit$$"JMP DONE\n\t" 11521 $$emit$$"# LARGE:\n\t" 11522 if (UseFastStosb) { 11523 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11524 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11525 } else if (UseXMMForObjInit) { 11526 $$emit$$"MOV RDI,RAX\n\t" 11527 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11528 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11529 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11530 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11531 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11532 $$emit$$"ADD 0x40,RAX\n\t" 11533 $$emit$$"# L_zero_64_bytes:\n\t" 11534 $$emit$$"SUB 0x8,RCX\n\t" 11535 $$emit$$"JGE L_loop\n\t" 11536 $$emit$$"ADD 0x4,RCX\n\t" 11537 $$emit$$"JL L_tail\n\t" 11538 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11539 $$emit$$"ADD 0x20,RAX\n\t" 11540 $$emit$$"SUB 0x4,RCX\n\t" 11541 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11542 $$emit$$"ADD 0x4,RCX\n\t" 11543 $$emit$$"JLE L_end\n\t" 11544 $$emit$$"DEC RCX\n\t" 11545 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11546 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11547 $$emit$$"ADD 0x8,RAX\n\t" 11548 $$emit$$"DEC RCX\n\t" 11549 $$emit$$"JGE L_sloop\n\t" 11550 $$emit$$"# L_end:\n\t" 11551 } else { 11552 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11553 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11554 } 11555 $$emit$$"# DONE" 11556 %} 11557 ins_encode %{ 11558 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11559 $tmp$$XMMRegister, false, knoreg); 11560 %} 11561 ins_pipe( pipe_slow ); 11562 %} 11563 11564 // Small ClearArray AVX512 non-constant length. 11565 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11566 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11567 match(Set dummy (ClearArray cnt base)); 11568 ins_cost(125); 11569 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11570 11571 format %{ $$template 11572 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11573 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11574 $$emit$$"JG LARGE\n\t" 11575 $$emit$$"SHL ECX, 1\n\t" 11576 $$emit$$"DEC ECX\n\t" 11577 $$emit$$"JS DONE\t# Zero length\n\t" 11578 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11579 $$emit$$"DEC ECX\n\t" 11580 $$emit$$"JGE LOOP\n\t" 11581 $$emit$$"JMP DONE\n\t" 11582 $$emit$$"# LARGE:\n\t" 11583 if (UseFastStosb) { 11584 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11585 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11586 } else if (UseXMMForObjInit) { 11587 $$emit$$"MOV RDI,RAX\n\t" 11588 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11589 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11590 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11591 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11592 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11593 $$emit$$"ADD 0x40,RAX\n\t" 11594 $$emit$$"# L_zero_64_bytes:\n\t" 11595 $$emit$$"SUB 0x8,RCX\n\t" 11596 $$emit$$"JGE L_loop\n\t" 11597 $$emit$$"ADD 0x4,RCX\n\t" 11598 $$emit$$"JL L_tail\n\t" 11599 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11600 $$emit$$"ADD 0x20,RAX\n\t" 11601 $$emit$$"SUB 0x4,RCX\n\t" 11602 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11603 $$emit$$"ADD 0x4,RCX\n\t" 11604 $$emit$$"JLE L_end\n\t" 11605 $$emit$$"DEC RCX\n\t" 11606 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11607 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11608 $$emit$$"ADD 0x8,RAX\n\t" 11609 $$emit$$"DEC RCX\n\t" 11610 $$emit$$"JGE L_sloop\n\t" 11611 $$emit$$"# L_end:\n\t" 11612 } else { 11613 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11614 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11615 } 11616 $$emit$$"# DONE" 11617 %} 11618 ins_encode %{ 11619 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11620 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11621 %} 11622 ins_pipe( pipe_slow ); 11623 %} 11624 11625 // Large ClearArray non-AVX512. 11626 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11627 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11628 match(Set dummy (ClearArray cnt base)); 11629 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11630 format %{ $$template 11631 if (UseFastStosb) { 11632 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11633 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11634 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11635 } else if (UseXMMForObjInit) { 11636 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11637 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11638 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11639 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11640 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11641 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11642 $$emit$$"ADD 0x40,RAX\n\t" 11643 $$emit$$"# L_zero_64_bytes:\n\t" 11644 $$emit$$"SUB 0x8,RCX\n\t" 11645 $$emit$$"JGE L_loop\n\t" 11646 $$emit$$"ADD 0x4,RCX\n\t" 11647 $$emit$$"JL L_tail\n\t" 11648 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11649 $$emit$$"ADD 0x20,RAX\n\t" 11650 $$emit$$"SUB 0x4,RCX\n\t" 11651 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11652 $$emit$$"ADD 0x4,RCX\n\t" 11653 $$emit$$"JLE L_end\n\t" 11654 $$emit$$"DEC RCX\n\t" 11655 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11656 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11657 $$emit$$"ADD 0x8,RAX\n\t" 11658 $$emit$$"DEC RCX\n\t" 11659 $$emit$$"JGE L_sloop\n\t" 11660 $$emit$$"# L_end:\n\t" 11661 } else { 11662 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11663 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11664 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11665 } 11666 $$emit$$"# DONE" 11667 %} 11668 ins_encode %{ 11669 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11670 $tmp$$XMMRegister, true, knoreg); 11671 %} 11672 ins_pipe( pipe_slow ); 11673 %} 11674 11675 // Large ClearArray AVX512. 11676 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11677 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11678 match(Set dummy (ClearArray cnt base)); 11679 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11680 format %{ $$template 11681 if (UseFastStosb) { 11682 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11683 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11684 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11685 } else if (UseXMMForObjInit) { 11686 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11687 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11688 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11689 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11690 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11691 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11692 $$emit$$"ADD 0x40,RAX\n\t" 11693 $$emit$$"# L_zero_64_bytes:\n\t" 11694 $$emit$$"SUB 0x8,RCX\n\t" 11695 $$emit$$"JGE L_loop\n\t" 11696 $$emit$$"ADD 0x4,RCX\n\t" 11697 $$emit$$"JL L_tail\n\t" 11698 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11699 $$emit$$"ADD 0x20,RAX\n\t" 11700 $$emit$$"SUB 0x4,RCX\n\t" 11701 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11702 $$emit$$"ADD 0x4,RCX\n\t" 11703 $$emit$$"JLE L_end\n\t" 11704 $$emit$$"DEC RCX\n\t" 11705 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11706 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11707 $$emit$$"ADD 0x8,RAX\n\t" 11708 $$emit$$"DEC RCX\n\t" 11709 $$emit$$"JGE L_sloop\n\t" 11710 $$emit$$"# L_end:\n\t" 11711 } else { 11712 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11713 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11714 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11715 } 11716 $$emit$$"# DONE" 11717 %} 11718 ins_encode %{ 11719 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11720 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11721 %} 11722 ins_pipe( pipe_slow ); 11723 %} 11724 11725 // Small ClearArray AVX512 constant length. 11726 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11727 %{ 11728 predicate(!((ClearArrayNode*)n)->is_large() && 11729 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11730 match(Set dummy (ClearArray cnt base)); 11731 ins_cost(100); 11732 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11733 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11734 ins_encode %{ 11735 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11736 %} 11737 ins_pipe(pipe_slow); 11738 %} 11739 11740 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11741 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11742 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11743 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11744 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11745 11746 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11747 ins_encode %{ 11748 __ string_compare($str1$$Register, $str2$$Register, 11749 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11750 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11751 %} 11752 ins_pipe( pipe_slow ); 11753 %} 11754 11755 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11756 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11757 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11758 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11759 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11760 11761 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11762 ins_encode %{ 11763 __ string_compare($str1$$Register, $str2$$Register, 11764 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11765 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11766 %} 11767 ins_pipe( pipe_slow ); 11768 %} 11769 11770 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11771 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11772 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11773 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11774 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11775 11776 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11777 ins_encode %{ 11778 __ string_compare($str1$$Register, $str2$$Register, 11779 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11780 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11781 %} 11782 ins_pipe( pipe_slow ); 11783 %} 11784 11785 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11786 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11787 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11788 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11789 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11790 11791 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11792 ins_encode %{ 11793 __ string_compare($str1$$Register, $str2$$Register, 11794 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11795 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11796 %} 11797 ins_pipe( pipe_slow ); 11798 %} 11799 11800 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11801 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11802 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11803 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11804 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11805 11806 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11807 ins_encode %{ 11808 __ string_compare($str1$$Register, $str2$$Register, 11809 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11810 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11811 %} 11812 ins_pipe( pipe_slow ); 11813 %} 11814 11815 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11816 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11817 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11818 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11819 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11820 11821 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11822 ins_encode %{ 11823 __ string_compare($str1$$Register, $str2$$Register, 11824 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11825 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11826 %} 11827 ins_pipe( pipe_slow ); 11828 %} 11829 11830 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11831 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11832 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11833 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11834 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11835 11836 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11837 ins_encode %{ 11838 __ string_compare($str2$$Register, $str1$$Register, 11839 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11840 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11841 %} 11842 ins_pipe( pipe_slow ); 11843 %} 11844 11845 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11846 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11847 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11848 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11849 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11850 11851 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11852 ins_encode %{ 11853 __ string_compare($str2$$Register, $str1$$Register, 11854 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11855 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11856 %} 11857 ins_pipe( pipe_slow ); 11858 %} 11859 11860 // fast string equals 11861 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11862 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11863 predicate(!VM_Version::supports_avx512vlbw()); 11864 match(Set result (StrEquals (Binary str1 str2) cnt)); 11865 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11866 11867 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11868 ins_encode %{ 11869 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11870 $cnt$$Register, $result$$Register, $tmp3$$Register, 11871 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11872 %} 11873 11874 ins_pipe( pipe_slow ); 11875 %} 11876 11877 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11878 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11879 predicate(VM_Version::supports_avx512vlbw()); 11880 match(Set result (StrEquals (Binary str1 str2) cnt)); 11881 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11882 11883 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11884 ins_encode %{ 11885 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11886 $cnt$$Register, $result$$Register, $tmp3$$Register, 11887 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11888 %} 11889 11890 ins_pipe( pipe_slow ); 11891 %} 11892 11893 11894 // fast search of substring with known size. 11895 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11896 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11897 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11898 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11899 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11900 11901 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11902 ins_encode %{ 11903 int icnt2 = (int)$int_cnt2$$constant; 11904 if (icnt2 >= 16) { 11905 // IndexOf for constant substrings with size >= 16 elements 11906 // which don't need to be loaded through stack. 11907 __ string_indexofC8($str1$$Register, $str2$$Register, 11908 $cnt1$$Register, $cnt2$$Register, 11909 icnt2, $result$$Register, 11910 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11911 } else { 11912 // Small strings are loaded through stack if they cross page boundary. 11913 __ string_indexof($str1$$Register, $str2$$Register, 11914 $cnt1$$Register, $cnt2$$Register, 11915 icnt2, $result$$Register, 11916 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11917 } 11918 %} 11919 ins_pipe( pipe_slow ); 11920 %} 11921 11922 // fast search of substring with known size. 11923 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11924 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11925 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11926 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11927 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11928 11929 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11930 ins_encode %{ 11931 int icnt2 = (int)$int_cnt2$$constant; 11932 if (icnt2 >= 8) { 11933 // IndexOf for constant substrings with size >= 8 elements 11934 // which don't need to be loaded through stack. 11935 __ string_indexofC8($str1$$Register, $str2$$Register, 11936 $cnt1$$Register, $cnt2$$Register, 11937 icnt2, $result$$Register, 11938 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11939 } else { 11940 // Small strings are loaded through stack if they cross page boundary. 11941 __ string_indexof($str1$$Register, $str2$$Register, 11942 $cnt1$$Register, $cnt2$$Register, 11943 icnt2, $result$$Register, 11944 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11945 } 11946 %} 11947 ins_pipe( pipe_slow ); 11948 %} 11949 11950 // fast search of substring with known size. 11951 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11952 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11953 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11954 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11955 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11956 11957 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11958 ins_encode %{ 11959 int icnt2 = (int)$int_cnt2$$constant; 11960 if (icnt2 >= 8) { 11961 // IndexOf for constant substrings with size >= 8 elements 11962 // which don't need to be loaded through stack. 11963 __ string_indexofC8($str1$$Register, $str2$$Register, 11964 $cnt1$$Register, $cnt2$$Register, 11965 icnt2, $result$$Register, 11966 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11967 } else { 11968 // Small strings are loaded through stack if they cross page boundary. 11969 __ string_indexof($str1$$Register, $str2$$Register, 11970 $cnt1$$Register, $cnt2$$Register, 11971 icnt2, $result$$Register, 11972 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11973 } 11974 %} 11975 ins_pipe( pipe_slow ); 11976 %} 11977 11978 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11979 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11980 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11981 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11982 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11983 11984 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11985 ins_encode %{ 11986 __ string_indexof($str1$$Register, $str2$$Register, 11987 $cnt1$$Register, $cnt2$$Register, 11988 (-1), $result$$Register, 11989 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11990 %} 11991 ins_pipe( pipe_slow ); 11992 %} 11993 11994 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11995 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11996 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11997 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11998 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11999 12000 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12001 ins_encode %{ 12002 __ string_indexof($str1$$Register, $str2$$Register, 12003 $cnt1$$Register, $cnt2$$Register, 12004 (-1), $result$$Register, 12005 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12006 %} 12007 ins_pipe( pipe_slow ); 12008 %} 12009 12010 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12011 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12012 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12013 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12014 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12015 12016 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12017 ins_encode %{ 12018 __ string_indexof($str1$$Register, $str2$$Register, 12019 $cnt1$$Register, $cnt2$$Register, 12020 (-1), $result$$Register, 12021 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12022 %} 12023 ins_pipe( pipe_slow ); 12024 %} 12025 12026 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12027 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12028 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12029 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12030 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12031 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12032 ins_encode %{ 12033 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12034 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12035 %} 12036 ins_pipe( pipe_slow ); 12037 %} 12038 12039 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12040 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12041 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12042 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12043 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12044 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12045 ins_encode %{ 12046 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12047 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12048 %} 12049 ins_pipe( pipe_slow ); 12050 %} 12051 12052 12053 // fast array equals 12054 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12055 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12056 %{ 12057 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12058 match(Set result (AryEq ary1 ary2)); 12059 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12060 //ins_cost(300); 12061 12062 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12063 ins_encode %{ 12064 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12065 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12066 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12067 %} 12068 ins_pipe( pipe_slow ); 12069 %} 12070 12071 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12072 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12073 %{ 12074 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12075 match(Set result (AryEq ary1 ary2)); 12076 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12077 //ins_cost(300); 12078 12079 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12080 ins_encode %{ 12081 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12082 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12083 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12084 %} 12085 ins_pipe( pipe_slow ); 12086 %} 12087 12088 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12089 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12090 %{ 12091 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12092 match(Set result (AryEq ary1 ary2)); 12093 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12094 //ins_cost(300); 12095 12096 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12097 ins_encode %{ 12098 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12099 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12100 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12101 %} 12102 ins_pipe( pipe_slow ); 12103 %} 12104 12105 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12106 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12107 %{ 12108 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12109 match(Set result (AryEq ary1 ary2)); 12110 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12111 //ins_cost(300); 12112 12113 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12114 ins_encode %{ 12115 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12116 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12117 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12118 %} 12119 ins_pipe( pipe_slow ); 12120 %} 12121 12122 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12123 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12124 %{ 12125 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12126 match(Set result (CountPositives ary1 len)); 12127 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12128 12129 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12130 ins_encode %{ 12131 __ count_positives($ary1$$Register, $len$$Register, 12132 $result$$Register, $tmp3$$Register, 12133 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12134 %} 12135 ins_pipe( pipe_slow ); 12136 %} 12137 12138 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12139 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12140 %{ 12141 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12142 match(Set result (CountPositives ary1 len)); 12143 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12144 12145 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12146 ins_encode %{ 12147 __ count_positives($ary1$$Register, $len$$Register, 12148 $result$$Register, $tmp3$$Register, 12149 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12150 %} 12151 ins_pipe( pipe_slow ); 12152 %} 12153 12154 12155 // fast char[] to byte[] compression 12156 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12157 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12158 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12159 match(Set result (StrCompressedCopy src (Binary dst len))); 12160 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12161 12162 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12163 ins_encode %{ 12164 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12165 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12166 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12167 knoreg, knoreg); 12168 %} 12169 ins_pipe( pipe_slow ); 12170 %} 12171 12172 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12173 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12174 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12175 match(Set result (StrCompressedCopy src (Binary dst len))); 12176 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12177 12178 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12179 ins_encode %{ 12180 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12181 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12182 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12183 $ktmp1$$KRegister, $ktmp2$$KRegister); 12184 %} 12185 ins_pipe( pipe_slow ); 12186 %} 12187 12188 // fast byte[] to char[] inflation 12189 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12190 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12191 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12192 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12193 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12194 12195 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12196 ins_encode %{ 12197 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12198 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12199 %} 12200 ins_pipe( pipe_slow ); 12201 %} 12202 12203 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12204 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12205 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12206 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12207 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12208 12209 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12210 ins_encode %{ 12211 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12212 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12213 %} 12214 ins_pipe( pipe_slow ); 12215 %} 12216 12217 // encode char[] to byte[] in ISO_8859_1 12218 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12219 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12220 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12221 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12222 match(Set result (EncodeISOArray src (Binary dst len))); 12223 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12224 12225 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12226 ins_encode %{ 12227 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12228 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12229 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12230 %} 12231 ins_pipe( pipe_slow ); 12232 %} 12233 12234 // encode char[] to byte[] in ASCII 12235 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12236 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12237 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12238 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12239 match(Set result (EncodeISOArray src (Binary dst len))); 12240 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12241 12242 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12243 ins_encode %{ 12244 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12245 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12246 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12247 %} 12248 ins_pipe( pipe_slow ); 12249 %} 12250 12251 //----------Control Flow Instructions------------------------------------------ 12252 // Signed compare Instructions 12253 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12254 match(Set cr (CmpI op1 op2)); 12255 effect( DEF cr, USE op1, USE op2 ); 12256 format %{ "CMP $op1,$op2" %} 12257 opcode(0x3B); /* Opcode 3B /r */ 12258 ins_encode( OpcP, RegReg( op1, op2) ); 12259 ins_pipe( ialu_cr_reg_reg ); 12260 %} 12261 12262 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12263 match(Set cr (CmpI op1 op2)); 12264 effect( DEF cr, USE op1 ); 12265 format %{ "CMP $op1,$op2" %} 12266 opcode(0x81,0x07); /* Opcode 81 /7 */ 12267 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12268 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12269 ins_pipe( ialu_cr_reg_imm ); 12270 %} 12271 12272 // Cisc-spilled version of cmpI_eReg 12273 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12274 match(Set cr (CmpI op1 (LoadI op2))); 12275 12276 format %{ "CMP $op1,$op2" %} 12277 ins_cost(500); 12278 opcode(0x3B); /* Opcode 3B /r */ 12279 ins_encode( OpcP, RegMem( op1, op2) ); 12280 ins_pipe( ialu_cr_reg_mem ); 12281 %} 12282 12283 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12284 match(Set cr (CmpI src zero)); 12285 effect( DEF cr, USE src ); 12286 12287 format %{ "TEST $src,$src" %} 12288 opcode(0x85); 12289 ins_encode( OpcP, RegReg( src, src ) ); 12290 ins_pipe( ialu_cr_reg_imm ); 12291 %} 12292 12293 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12294 match(Set cr (CmpI (AndI src con) zero)); 12295 12296 format %{ "TEST $src,$con" %} 12297 opcode(0xF7,0x00); 12298 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12299 ins_pipe( ialu_cr_reg_imm ); 12300 %} 12301 12302 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12303 match(Set cr (CmpI (AndI src mem) zero)); 12304 12305 format %{ "TEST $src,$mem" %} 12306 opcode(0x85); 12307 ins_encode( OpcP, RegMem( src, mem ) ); 12308 ins_pipe( ialu_cr_reg_mem ); 12309 %} 12310 12311 // Unsigned compare Instructions; really, same as signed except they 12312 // produce an eFlagsRegU instead of eFlagsReg. 12313 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12314 match(Set cr (CmpU op1 op2)); 12315 12316 format %{ "CMPu $op1,$op2" %} 12317 opcode(0x3B); /* Opcode 3B /r */ 12318 ins_encode( OpcP, RegReg( op1, op2) ); 12319 ins_pipe( ialu_cr_reg_reg ); 12320 %} 12321 12322 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12323 match(Set cr (CmpU op1 op2)); 12324 12325 format %{ "CMPu $op1,$op2" %} 12326 opcode(0x81,0x07); /* Opcode 81 /7 */ 12327 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12328 ins_pipe( ialu_cr_reg_imm ); 12329 %} 12330 12331 // // Cisc-spilled version of cmpU_eReg 12332 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12333 match(Set cr (CmpU op1 (LoadI op2))); 12334 12335 format %{ "CMPu $op1,$op2" %} 12336 ins_cost(500); 12337 opcode(0x3B); /* Opcode 3B /r */ 12338 ins_encode( OpcP, RegMem( op1, op2) ); 12339 ins_pipe( ialu_cr_reg_mem ); 12340 %} 12341 12342 // // Cisc-spilled version of cmpU_eReg 12343 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12344 // match(Set cr (CmpU (LoadI op1) op2)); 12345 // 12346 // format %{ "CMPu $op1,$op2" %} 12347 // ins_cost(500); 12348 // opcode(0x39); /* Opcode 39 /r */ 12349 // ins_encode( OpcP, RegMem( op1, op2) ); 12350 //%} 12351 12352 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12353 match(Set cr (CmpU src zero)); 12354 12355 format %{ "TESTu $src,$src" %} 12356 opcode(0x85); 12357 ins_encode( OpcP, RegReg( src, src ) ); 12358 ins_pipe( ialu_cr_reg_imm ); 12359 %} 12360 12361 // Unsigned pointer compare Instructions 12362 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12363 match(Set cr (CmpP op1 op2)); 12364 12365 format %{ "CMPu $op1,$op2" %} 12366 opcode(0x3B); /* Opcode 3B /r */ 12367 ins_encode( OpcP, RegReg( op1, op2) ); 12368 ins_pipe( ialu_cr_reg_reg ); 12369 %} 12370 12371 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12372 match(Set cr (CmpP op1 op2)); 12373 12374 format %{ "CMPu $op1,$op2" %} 12375 opcode(0x81,0x07); /* Opcode 81 /7 */ 12376 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12377 ins_pipe( ialu_cr_reg_imm ); 12378 %} 12379 12380 // // Cisc-spilled version of cmpP_eReg 12381 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12382 match(Set cr (CmpP op1 (LoadP op2))); 12383 12384 format %{ "CMPu $op1,$op2" %} 12385 ins_cost(500); 12386 opcode(0x3B); /* Opcode 3B /r */ 12387 ins_encode( OpcP, RegMem( op1, op2) ); 12388 ins_pipe( ialu_cr_reg_mem ); 12389 %} 12390 12391 // // Cisc-spilled version of cmpP_eReg 12392 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12393 // match(Set cr (CmpP (LoadP op1) op2)); 12394 // 12395 // format %{ "CMPu $op1,$op2" %} 12396 // ins_cost(500); 12397 // opcode(0x39); /* Opcode 39 /r */ 12398 // ins_encode( OpcP, RegMem( op1, op2) ); 12399 //%} 12400 12401 // Compare raw pointer (used in out-of-heap check). 12402 // Only works because non-oop pointers must be raw pointers 12403 // and raw pointers have no anti-dependencies. 12404 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12405 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12406 match(Set cr (CmpP op1 (LoadP op2))); 12407 12408 format %{ "CMPu $op1,$op2" %} 12409 opcode(0x3B); /* Opcode 3B /r */ 12410 ins_encode( OpcP, RegMem( op1, op2) ); 12411 ins_pipe( ialu_cr_reg_mem ); 12412 %} 12413 12414 // 12415 // This will generate a signed flags result. This should be ok 12416 // since any compare to a zero should be eq/neq. 12417 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12418 match(Set cr (CmpP src zero)); 12419 12420 format %{ "TEST $src,$src" %} 12421 opcode(0x85); 12422 ins_encode( OpcP, RegReg( src, src ) ); 12423 ins_pipe( ialu_cr_reg_imm ); 12424 %} 12425 12426 // Cisc-spilled version of testP_reg 12427 // This will generate a signed flags result. This should be ok 12428 // since any compare to a zero should be eq/neq. 12429 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12430 match(Set cr (CmpP (LoadP op) zero)); 12431 12432 format %{ "TEST $op,0xFFFFFFFF" %} 12433 ins_cost(500); 12434 opcode(0xF7); /* Opcode F7 /0 */ 12435 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12436 ins_pipe( ialu_cr_reg_imm ); 12437 %} 12438 12439 // Yanked all unsigned pointer compare operations. 12440 // Pointer compares are done with CmpP which is already unsigned. 12441 12442 //----------Max and Min-------------------------------------------------------- 12443 // Min Instructions 12444 //// 12445 // *** Min and Max using the conditional move are slower than the 12446 // *** branch version on a Pentium III. 12447 // // Conditional move for min 12448 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12449 // effect( USE_DEF op2, USE op1, USE cr ); 12450 // format %{ "CMOVlt $op2,$op1\t! min" %} 12451 // opcode(0x4C,0x0F); 12452 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12453 // ins_pipe( pipe_cmov_reg ); 12454 //%} 12455 // 12456 //// Min Register with Register (P6 version) 12457 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12458 // predicate(VM_Version::supports_cmov() ); 12459 // match(Set op2 (MinI op1 op2)); 12460 // ins_cost(200); 12461 // expand %{ 12462 // eFlagsReg cr; 12463 // compI_eReg(cr,op1,op2); 12464 // cmovI_reg_lt(op2,op1,cr); 12465 // %} 12466 //%} 12467 12468 // Min Register with Register (generic version) 12469 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12470 match(Set dst (MinI dst src)); 12471 effect(KILL flags); 12472 ins_cost(300); 12473 12474 format %{ "MIN $dst,$src" %} 12475 opcode(0xCC); 12476 ins_encode( min_enc(dst,src) ); 12477 ins_pipe( pipe_slow ); 12478 %} 12479 12480 // Max Register with Register 12481 // *** Min and Max using the conditional move are slower than the 12482 // *** branch version on a Pentium III. 12483 // // Conditional move for max 12484 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12485 // effect( USE_DEF op2, USE op1, USE cr ); 12486 // format %{ "CMOVgt $op2,$op1\t! max" %} 12487 // opcode(0x4F,0x0F); 12488 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12489 // ins_pipe( pipe_cmov_reg ); 12490 //%} 12491 // 12492 // // Max Register with Register (P6 version) 12493 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12494 // predicate(VM_Version::supports_cmov() ); 12495 // match(Set op2 (MaxI op1 op2)); 12496 // ins_cost(200); 12497 // expand %{ 12498 // eFlagsReg cr; 12499 // compI_eReg(cr,op1,op2); 12500 // cmovI_reg_gt(op2,op1,cr); 12501 // %} 12502 //%} 12503 12504 // Max Register with Register (generic version) 12505 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12506 match(Set dst (MaxI dst src)); 12507 effect(KILL flags); 12508 ins_cost(300); 12509 12510 format %{ "MAX $dst,$src" %} 12511 opcode(0xCC); 12512 ins_encode( max_enc(dst,src) ); 12513 ins_pipe( pipe_slow ); 12514 %} 12515 12516 // ============================================================================ 12517 // Counted Loop limit node which represents exact final iterator value. 12518 // Note: the resulting value should fit into integer range since 12519 // counted loops have limit check on overflow. 12520 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12521 match(Set limit (LoopLimit (Binary init limit) stride)); 12522 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12523 ins_cost(300); 12524 12525 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12526 ins_encode %{ 12527 int strd = (int)$stride$$constant; 12528 assert(strd != 1 && strd != -1, "sanity"); 12529 int m1 = (strd > 0) ? 1 : -1; 12530 // Convert limit to long (EAX:EDX) 12531 __ cdql(); 12532 // Convert init to long (init:tmp) 12533 __ movl($tmp$$Register, $init$$Register); 12534 __ sarl($tmp$$Register, 31); 12535 // $limit - $init 12536 __ subl($limit$$Register, $init$$Register); 12537 __ sbbl($limit_hi$$Register, $tmp$$Register); 12538 // + ($stride - 1) 12539 if (strd > 0) { 12540 __ addl($limit$$Register, (strd - 1)); 12541 __ adcl($limit_hi$$Register, 0); 12542 __ movl($tmp$$Register, strd); 12543 } else { 12544 __ addl($limit$$Register, (strd + 1)); 12545 __ adcl($limit_hi$$Register, -1); 12546 __ lneg($limit_hi$$Register, $limit$$Register); 12547 __ movl($tmp$$Register, -strd); 12548 } 12549 // signed devision: (EAX:EDX) / pos_stride 12550 __ idivl($tmp$$Register); 12551 if (strd < 0) { 12552 // restore sign 12553 __ negl($tmp$$Register); 12554 } 12555 // (EAX) * stride 12556 __ mull($tmp$$Register); 12557 // + init (ignore upper bits) 12558 __ addl($limit$$Register, $init$$Register); 12559 %} 12560 ins_pipe( pipe_slow ); 12561 %} 12562 12563 // ============================================================================ 12564 // Branch Instructions 12565 // Jump Table 12566 instruct jumpXtnd(rRegI switch_val) %{ 12567 match(Jump switch_val); 12568 ins_cost(350); 12569 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12570 ins_encode %{ 12571 // Jump to Address(table_base + switch_reg) 12572 Address index(noreg, $switch_val$$Register, Address::times_1); 12573 __ jump(ArrayAddress($constantaddress, index)); 12574 %} 12575 ins_pipe(pipe_jmp); 12576 %} 12577 12578 // Jump Direct - Label defines a relative address from JMP+1 12579 instruct jmpDir(label labl) %{ 12580 match(Goto); 12581 effect(USE labl); 12582 12583 ins_cost(300); 12584 format %{ "JMP $labl" %} 12585 size(5); 12586 ins_encode %{ 12587 Label* L = $labl$$label; 12588 __ jmp(*L, false); // Always long jump 12589 %} 12590 ins_pipe( pipe_jmp ); 12591 %} 12592 12593 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12594 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12595 match(If cop cr); 12596 effect(USE labl); 12597 12598 ins_cost(300); 12599 format %{ "J$cop $labl" %} 12600 size(6); 12601 ins_encode %{ 12602 Label* L = $labl$$label; 12603 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12604 %} 12605 ins_pipe( pipe_jcc ); 12606 %} 12607 12608 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12609 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12610 predicate(!n->has_vector_mask_set()); 12611 match(CountedLoopEnd cop cr); 12612 effect(USE labl); 12613 12614 ins_cost(300); 12615 format %{ "J$cop $labl\t# Loop end" %} 12616 size(6); 12617 ins_encode %{ 12618 Label* L = $labl$$label; 12619 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12620 %} 12621 ins_pipe( pipe_jcc ); 12622 %} 12623 12624 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12625 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12626 predicate(!n->has_vector_mask_set()); 12627 match(CountedLoopEnd cop cmp); 12628 effect(USE labl); 12629 12630 ins_cost(300); 12631 format %{ "J$cop,u $labl\t# Loop end" %} 12632 size(6); 12633 ins_encode %{ 12634 Label* L = $labl$$label; 12635 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12636 %} 12637 ins_pipe( pipe_jcc ); 12638 %} 12639 12640 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12641 predicate(!n->has_vector_mask_set()); 12642 match(CountedLoopEnd cop cmp); 12643 effect(USE labl); 12644 12645 ins_cost(200); 12646 format %{ "J$cop,u $labl\t# Loop end" %} 12647 size(6); 12648 ins_encode %{ 12649 Label* L = $labl$$label; 12650 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12651 %} 12652 ins_pipe( pipe_jcc ); 12653 %} 12654 12655 // mask version 12656 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12657 // Bounded mask operand used in following patten is needed for 12658 // post-loop multiversioning. 12659 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{ 12660 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12661 match(CountedLoopEnd cop cr); 12662 effect(USE labl, TEMP ktmp); 12663 12664 ins_cost(400); 12665 format %{ "J$cop $labl\t# Loop end\n\t" 12666 "restorevectmask \t# vector mask restore for loops" %} 12667 size(10); 12668 ins_encode %{ 12669 Label* L = $labl$$label; 12670 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12671 __ restorevectmask($ktmp$$KRegister); 12672 %} 12673 ins_pipe( pipe_jcc ); 12674 %} 12675 12676 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12677 // Bounded mask operand used in following patten is needed for 12678 // post-loop multiversioning. 12679 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{ 12680 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12681 match(CountedLoopEnd cop cmp); 12682 effect(USE labl, TEMP ktmp); 12683 12684 ins_cost(400); 12685 format %{ "J$cop,u $labl\t# Loop end\n\t" 12686 "restorevectmask \t# vector mask restore for loops" %} 12687 size(10); 12688 ins_encode %{ 12689 Label* L = $labl$$label; 12690 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12691 __ restorevectmask($ktmp$$KRegister); 12692 %} 12693 ins_pipe( pipe_jcc ); 12694 %} 12695 12696 // Bounded mask operand used in following patten is needed for 12697 // post-loop multiversioning. 12698 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{ 12699 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12700 match(CountedLoopEnd cop cmp); 12701 effect(USE labl, TEMP ktmp); 12702 12703 ins_cost(300); 12704 format %{ "J$cop,u $labl\t# Loop end\n\t" 12705 "restorevectmask \t# vector mask restore for loops" %} 12706 size(10); 12707 ins_encode %{ 12708 Label* L = $labl$$label; 12709 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12710 __ restorevectmask($ktmp$$KRegister); 12711 %} 12712 ins_pipe( pipe_jcc ); 12713 %} 12714 12715 // Jump Direct Conditional - using unsigned comparison 12716 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12717 match(If cop cmp); 12718 effect(USE labl); 12719 12720 ins_cost(300); 12721 format %{ "J$cop,u $labl" %} 12722 size(6); 12723 ins_encode %{ 12724 Label* L = $labl$$label; 12725 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12726 %} 12727 ins_pipe(pipe_jcc); 12728 %} 12729 12730 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12731 match(If cop cmp); 12732 effect(USE labl); 12733 12734 ins_cost(200); 12735 format %{ "J$cop,u $labl" %} 12736 size(6); 12737 ins_encode %{ 12738 Label* L = $labl$$label; 12739 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12740 %} 12741 ins_pipe(pipe_jcc); 12742 %} 12743 12744 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12745 match(If cop cmp); 12746 effect(USE labl); 12747 12748 ins_cost(200); 12749 format %{ $$template 12750 if ($cop$$cmpcode == Assembler::notEqual) { 12751 $$emit$$"JP,u $labl\n\t" 12752 $$emit$$"J$cop,u $labl" 12753 } else { 12754 $$emit$$"JP,u done\n\t" 12755 $$emit$$"J$cop,u $labl\n\t" 12756 $$emit$$"done:" 12757 } 12758 %} 12759 ins_encode %{ 12760 Label* l = $labl$$label; 12761 if ($cop$$cmpcode == Assembler::notEqual) { 12762 __ jcc(Assembler::parity, *l, false); 12763 __ jcc(Assembler::notEqual, *l, false); 12764 } else if ($cop$$cmpcode == Assembler::equal) { 12765 Label done; 12766 __ jccb(Assembler::parity, done); 12767 __ jcc(Assembler::equal, *l, false); 12768 __ bind(done); 12769 } else { 12770 ShouldNotReachHere(); 12771 } 12772 %} 12773 ins_pipe(pipe_jcc); 12774 %} 12775 12776 // ============================================================================ 12777 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12778 // array for an instance of the superklass. Set a hidden internal cache on a 12779 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12780 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12781 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12782 match(Set result (PartialSubtypeCheck sub super)); 12783 effect( KILL rcx, KILL cr ); 12784 12785 ins_cost(1100); // slightly larger than the next version 12786 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12787 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12788 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12789 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12790 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12791 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12792 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12793 "miss:\t" %} 12794 12795 opcode(0x1); // Force a XOR of EDI 12796 ins_encode( enc_PartialSubtypeCheck() ); 12797 ins_pipe( pipe_slow ); 12798 %} 12799 12800 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12801 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12802 effect( KILL rcx, KILL result ); 12803 12804 ins_cost(1000); 12805 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12806 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12807 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12808 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12809 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12810 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12811 "miss:\t" %} 12812 12813 opcode(0x0); // No need to XOR EDI 12814 ins_encode( enc_PartialSubtypeCheck() ); 12815 ins_pipe( pipe_slow ); 12816 %} 12817 12818 // ============================================================================ 12819 // Branch Instructions -- short offset versions 12820 // 12821 // These instructions are used to replace jumps of a long offset (the default 12822 // match) with jumps of a shorter offset. These instructions are all tagged 12823 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12824 // match rules in general matching. Instead, the ADLC generates a conversion 12825 // method in the MachNode which can be used to do in-place replacement of the 12826 // long variant with the shorter variant. The compiler will determine if a 12827 // branch can be taken by the is_short_branch_offset() predicate in the machine 12828 // specific code section of the file. 12829 12830 // Jump Direct - Label defines a relative address from JMP+1 12831 instruct jmpDir_short(label labl) %{ 12832 match(Goto); 12833 effect(USE labl); 12834 12835 ins_cost(300); 12836 format %{ "JMP,s $labl" %} 12837 size(2); 12838 ins_encode %{ 12839 Label* L = $labl$$label; 12840 __ jmpb(*L); 12841 %} 12842 ins_pipe( pipe_jmp ); 12843 ins_short_branch(1); 12844 %} 12845 12846 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12847 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12848 match(If cop cr); 12849 effect(USE labl); 12850 12851 ins_cost(300); 12852 format %{ "J$cop,s $labl" %} 12853 size(2); 12854 ins_encode %{ 12855 Label* L = $labl$$label; 12856 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12857 %} 12858 ins_pipe( pipe_jcc ); 12859 ins_short_branch(1); 12860 %} 12861 12862 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12863 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12864 match(CountedLoopEnd cop cr); 12865 effect(USE labl); 12866 12867 ins_cost(300); 12868 format %{ "J$cop,s $labl\t# Loop end" %} 12869 size(2); 12870 ins_encode %{ 12871 Label* L = $labl$$label; 12872 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12873 %} 12874 ins_pipe( pipe_jcc ); 12875 ins_short_branch(1); 12876 %} 12877 12878 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12879 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12880 match(CountedLoopEnd cop cmp); 12881 effect(USE labl); 12882 12883 ins_cost(300); 12884 format %{ "J$cop,us $labl\t# Loop end" %} 12885 size(2); 12886 ins_encode %{ 12887 Label* L = $labl$$label; 12888 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12889 %} 12890 ins_pipe( pipe_jcc ); 12891 ins_short_branch(1); 12892 %} 12893 12894 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12895 match(CountedLoopEnd cop cmp); 12896 effect(USE labl); 12897 12898 ins_cost(300); 12899 format %{ "J$cop,us $labl\t# Loop end" %} 12900 size(2); 12901 ins_encode %{ 12902 Label* L = $labl$$label; 12903 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12904 %} 12905 ins_pipe( pipe_jcc ); 12906 ins_short_branch(1); 12907 %} 12908 12909 // Jump Direct Conditional - using unsigned comparison 12910 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12911 match(If cop cmp); 12912 effect(USE labl); 12913 12914 ins_cost(300); 12915 format %{ "J$cop,us $labl" %} 12916 size(2); 12917 ins_encode %{ 12918 Label* L = $labl$$label; 12919 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12920 %} 12921 ins_pipe( pipe_jcc ); 12922 ins_short_branch(1); 12923 %} 12924 12925 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12926 match(If cop cmp); 12927 effect(USE labl); 12928 12929 ins_cost(300); 12930 format %{ "J$cop,us $labl" %} 12931 size(2); 12932 ins_encode %{ 12933 Label* L = $labl$$label; 12934 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12935 %} 12936 ins_pipe( pipe_jcc ); 12937 ins_short_branch(1); 12938 %} 12939 12940 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12941 match(If cop cmp); 12942 effect(USE labl); 12943 12944 ins_cost(300); 12945 format %{ $$template 12946 if ($cop$$cmpcode == Assembler::notEqual) { 12947 $$emit$$"JP,u,s $labl\n\t" 12948 $$emit$$"J$cop,u,s $labl" 12949 } else { 12950 $$emit$$"JP,u,s done\n\t" 12951 $$emit$$"J$cop,u,s $labl\n\t" 12952 $$emit$$"done:" 12953 } 12954 %} 12955 size(4); 12956 ins_encode %{ 12957 Label* l = $labl$$label; 12958 if ($cop$$cmpcode == Assembler::notEqual) { 12959 __ jccb(Assembler::parity, *l); 12960 __ jccb(Assembler::notEqual, *l); 12961 } else if ($cop$$cmpcode == Assembler::equal) { 12962 Label done; 12963 __ jccb(Assembler::parity, done); 12964 __ jccb(Assembler::equal, *l); 12965 __ bind(done); 12966 } else { 12967 ShouldNotReachHere(); 12968 } 12969 %} 12970 ins_pipe(pipe_jcc); 12971 ins_short_branch(1); 12972 %} 12973 12974 // ============================================================================ 12975 // Long Compare 12976 // 12977 // Currently we hold longs in 2 registers. Comparing such values efficiently 12978 // is tricky. The flavor of compare used depends on whether we are testing 12979 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12980 // The GE test is the negated LT test. The LE test can be had by commuting 12981 // the operands (yielding a GE test) and then negating; negate again for the 12982 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12983 // NE test is negated from that. 12984 12985 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12986 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12987 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12988 // are collapsed internally in the ADLC's dfa-gen code. The match for 12989 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12990 // foo match ends up with the wrong leaf. One fix is to not match both 12991 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12992 // both forms beat the trinary form of long-compare and both are very useful 12993 // on Intel which has so few registers. 12994 12995 // Manifest a CmpL result in an integer register. Very painful. 12996 // This is the test to avoid. 12997 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12998 match(Set dst (CmpL3 src1 src2)); 12999 effect( KILL flags ); 13000 ins_cost(1000); 13001 format %{ "XOR $dst,$dst\n\t" 13002 "CMP $src1.hi,$src2.hi\n\t" 13003 "JLT,s m_one\n\t" 13004 "JGT,s p_one\n\t" 13005 "CMP $src1.lo,$src2.lo\n\t" 13006 "JB,s m_one\n\t" 13007 "JEQ,s done\n" 13008 "p_one:\tINC $dst\n\t" 13009 "JMP,s done\n" 13010 "m_one:\tDEC $dst\n" 13011 "done:" %} 13012 ins_encode %{ 13013 Label p_one, m_one, done; 13014 __ xorptr($dst$$Register, $dst$$Register); 13015 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13016 __ jccb(Assembler::less, m_one); 13017 __ jccb(Assembler::greater, p_one); 13018 __ cmpl($src1$$Register, $src2$$Register); 13019 __ jccb(Assembler::below, m_one); 13020 __ jccb(Assembler::equal, done); 13021 __ bind(p_one); 13022 __ incrementl($dst$$Register); 13023 __ jmpb(done); 13024 __ bind(m_one); 13025 __ decrementl($dst$$Register); 13026 __ bind(done); 13027 %} 13028 ins_pipe( pipe_slow ); 13029 %} 13030 13031 //====== 13032 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13033 // compares. Can be used for LE or GT compares by reversing arguments. 13034 // NOT GOOD FOR EQ/NE tests. 13035 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13036 match( Set flags (CmpL src zero )); 13037 ins_cost(100); 13038 format %{ "TEST $src.hi,$src.hi" %} 13039 opcode(0x85); 13040 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13041 ins_pipe( ialu_cr_reg_reg ); 13042 %} 13043 13044 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13045 // compares. Can be used for LE or GT compares by reversing arguments. 13046 // NOT GOOD FOR EQ/NE tests. 13047 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13048 match( Set flags (CmpL src1 src2 )); 13049 effect( TEMP tmp ); 13050 ins_cost(300); 13051 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13052 "MOV $tmp,$src1.hi\n\t" 13053 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13054 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13055 ins_pipe( ialu_cr_reg_reg ); 13056 %} 13057 13058 // Long compares reg < zero/req OR reg >= zero/req. 13059 // Just a wrapper for a normal branch, plus the predicate test. 13060 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13061 match(If cmp flags); 13062 effect(USE labl); 13063 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13064 expand %{ 13065 jmpCon(cmp,flags,labl); // JLT or JGE... 13066 %} 13067 %} 13068 13069 //====== 13070 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13071 // compares. Can be used for LE or GT compares by reversing arguments. 13072 // NOT GOOD FOR EQ/NE tests. 13073 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13074 match(Set flags (CmpUL src zero)); 13075 ins_cost(100); 13076 format %{ "TEST $src.hi,$src.hi" %} 13077 opcode(0x85); 13078 ins_encode(OpcP, RegReg_Hi2(src, src)); 13079 ins_pipe(ialu_cr_reg_reg); 13080 %} 13081 13082 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13083 // compares. Can be used for LE or GT compares by reversing arguments. 13084 // NOT GOOD FOR EQ/NE tests. 13085 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13086 match(Set flags (CmpUL src1 src2)); 13087 effect(TEMP tmp); 13088 ins_cost(300); 13089 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13090 "MOV $tmp,$src1.hi\n\t" 13091 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13092 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13093 ins_pipe(ialu_cr_reg_reg); 13094 %} 13095 13096 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13097 // Just a wrapper for a normal branch, plus the predicate test. 13098 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13099 match(If cmp flags); 13100 effect(USE labl); 13101 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13102 expand %{ 13103 jmpCon(cmp, flags, labl); // JLT or JGE... 13104 %} 13105 %} 13106 13107 // Compare 2 longs and CMOVE longs. 13108 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13109 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13110 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13111 ins_cost(400); 13112 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13113 "CMOV$cmp $dst.hi,$src.hi" %} 13114 opcode(0x0F,0x40); 13115 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13116 ins_pipe( pipe_cmov_reg_long ); 13117 %} 13118 13119 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13120 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13121 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13122 ins_cost(500); 13123 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13124 "CMOV$cmp $dst.hi,$src.hi" %} 13125 opcode(0x0F,0x40); 13126 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13127 ins_pipe( pipe_cmov_reg_long ); 13128 %} 13129 13130 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13131 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13132 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13133 ins_cost(400); 13134 expand %{ 13135 cmovLL_reg_LTGE(cmp, flags, dst, src); 13136 %} 13137 %} 13138 13139 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13140 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13141 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13142 ins_cost(500); 13143 expand %{ 13144 cmovLL_mem_LTGE(cmp, flags, dst, src); 13145 %} 13146 %} 13147 13148 // Compare 2 longs and CMOVE ints. 13149 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13150 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13151 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13152 ins_cost(200); 13153 format %{ "CMOV$cmp $dst,$src" %} 13154 opcode(0x0F,0x40); 13155 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13156 ins_pipe( pipe_cmov_reg ); 13157 %} 13158 13159 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13160 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13161 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13162 ins_cost(250); 13163 format %{ "CMOV$cmp $dst,$src" %} 13164 opcode(0x0F,0x40); 13165 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13166 ins_pipe( pipe_cmov_mem ); 13167 %} 13168 13169 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13170 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13171 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13172 ins_cost(200); 13173 expand %{ 13174 cmovII_reg_LTGE(cmp, flags, dst, src); 13175 %} 13176 %} 13177 13178 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13179 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13180 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13181 ins_cost(250); 13182 expand %{ 13183 cmovII_mem_LTGE(cmp, flags, dst, src); 13184 %} 13185 %} 13186 13187 // Compare 2 longs and CMOVE ptrs. 13188 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13189 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13190 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13191 ins_cost(200); 13192 format %{ "CMOV$cmp $dst,$src" %} 13193 opcode(0x0F,0x40); 13194 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13195 ins_pipe( pipe_cmov_reg ); 13196 %} 13197 13198 // Compare 2 unsigned longs and CMOVE ptrs. 13199 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13200 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13201 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13202 ins_cost(200); 13203 expand %{ 13204 cmovPP_reg_LTGE(cmp,flags,dst,src); 13205 %} 13206 %} 13207 13208 // Compare 2 longs and CMOVE doubles 13209 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13210 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13211 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13212 ins_cost(200); 13213 expand %{ 13214 fcmovDPR_regS(cmp,flags,dst,src); 13215 %} 13216 %} 13217 13218 // Compare 2 longs and CMOVE doubles 13219 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13220 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13221 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13222 ins_cost(200); 13223 expand %{ 13224 fcmovD_regS(cmp,flags,dst,src); 13225 %} 13226 %} 13227 13228 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13229 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13230 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13231 ins_cost(200); 13232 expand %{ 13233 fcmovFPR_regS(cmp,flags,dst,src); 13234 %} 13235 %} 13236 13237 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13238 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13239 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13240 ins_cost(200); 13241 expand %{ 13242 fcmovF_regS(cmp,flags,dst,src); 13243 %} 13244 %} 13245 13246 //====== 13247 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13248 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13249 match( Set flags (CmpL src zero )); 13250 effect(TEMP tmp); 13251 ins_cost(200); 13252 format %{ "MOV $tmp,$src.lo\n\t" 13253 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13254 ins_encode( long_cmp_flags0( src, tmp ) ); 13255 ins_pipe( ialu_reg_reg_long ); 13256 %} 13257 13258 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13259 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13260 match( Set flags (CmpL src1 src2 )); 13261 ins_cost(200+300); 13262 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13263 "JNE,s skip\n\t" 13264 "CMP $src1.hi,$src2.hi\n\t" 13265 "skip:\t" %} 13266 ins_encode( long_cmp_flags1( src1, src2 ) ); 13267 ins_pipe( ialu_cr_reg_reg ); 13268 %} 13269 13270 // Long compare reg == zero/reg OR reg != zero/reg 13271 // Just a wrapper for a normal branch, plus the predicate test. 13272 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13273 match(If cmp flags); 13274 effect(USE labl); 13275 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13276 expand %{ 13277 jmpCon(cmp,flags,labl); // JEQ or JNE... 13278 %} 13279 %} 13280 13281 //====== 13282 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13283 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13284 match(Set flags (CmpUL src zero)); 13285 effect(TEMP tmp); 13286 ins_cost(200); 13287 format %{ "MOV $tmp,$src.lo\n\t" 13288 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13289 ins_encode(long_cmp_flags0(src, tmp)); 13290 ins_pipe(ialu_reg_reg_long); 13291 %} 13292 13293 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13294 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13295 match(Set flags (CmpUL src1 src2)); 13296 ins_cost(200+300); 13297 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13298 "JNE,s skip\n\t" 13299 "CMP $src1.hi,$src2.hi\n\t" 13300 "skip:\t" %} 13301 ins_encode(long_cmp_flags1(src1, src2)); 13302 ins_pipe(ialu_cr_reg_reg); 13303 %} 13304 13305 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13306 // Just a wrapper for a normal branch, plus the predicate test. 13307 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13308 match(If cmp flags); 13309 effect(USE labl); 13310 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13311 expand %{ 13312 jmpCon(cmp, flags, labl); // JEQ or JNE... 13313 %} 13314 %} 13315 13316 // Compare 2 longs and CMOVE longs. 13317 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13318 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13319 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13320 ins_cost(400); 13321 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13322 "CMOV$cmp $dst.hi,$src.hi" %} 13323 opcode(0x0F,0x40); 13324 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13325 ins_pipe( pipe_cmov_reg_long ); 13326 %} 13327 13328 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13329 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13330 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13331 ins_cost(500); 13332 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13333 "CMOV$cmp $dst.hi,$src.hi" %} 13334 opcode(0x0F,0x40); 13335 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13336 ins_pipe( pipe_cmov_reg_long ); 13337 %} 13338 13339 // Compare 2 longs and CMOVE ints. 13340 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13341 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13342 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13343 ins_cost(200); 13344 format %{ "CMOV$cmp $dst,$src" %} 13345 opcode(0x0F,0x40); 13346 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13347 ins_pipe( pipe_cmov_reg ); 13348 %} 13349 13350 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13351 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13352 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13353 ins_cost(250); 13354 format %{ "CMOV$cmp $dst,$src" %} 13355 opcode(0x0F,0x40); 13356 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13357 ins_pipe( pipe_cmov_mem ); 13358 %} 13359 13360 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13361 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13362 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13363 ins_cost(200); 13364 expand %{ 13365 cmovII_reg_EQNE(cmp, flags, dst, src); 13366 %} 13367 %} 13368 13369 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13370 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13371 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13372 ins_cost(250); 13373 expand %{ 13374 cmovII_mem_EQNE(cmp, flags, dst, src); 13375 %} 13376 %} 13377 13378 // Compare 2 longs and CMOVE ptrs. 13379 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13380 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13381 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13382 ins_cost(200); 13383 format %{ "CMOV$cmp $dst,$src" %} 13384 opcode(0x0F,0x40); 13385 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13386 ins_pipe( pipe_cmov_reg ); 13387 %} 13388 13389 // Compare 2 unsigned longs and CMOVE ptrs. 13390 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13391 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13392 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13393 ins_cost(200); 13394 expand %{ 13395 cmovPP_reg_EQNE(cmp,flags,dst,src); 13396 %} 13397 %} 13398 13399 // Compare 2 longs and CMOVE doubles 13400 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13401 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13402 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13403 ins_cost(200); 13404 expand %{ 13405 fcmovDPR_regS(cmp,flags,dst,src); 13406 %} 13407 %} 13408 13409 // Compare 2 longs and CMOVE doubles 13410 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13411 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13412 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13413 ins_cost(200); 13414 expand %{ 13415 fcmovD_regS(cmp,flags,dst,src); 13416 %} 13417 %} 13418 13419 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13420 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13421 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13422 ins_cost(200); 13423 expand %{ 13424 fcmovFPR_regS(cmp,flags,dst,src); 13425 %} 13426 %} 13427 13428 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13429 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13430 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13431 ins_cost(200); 13432 expand %{ 13433 fcmovF_regS(cmp,flags,dst,src); 13434 %} 13435 %} 13436 13437 //====== 13438 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13439 // Same as cmpL_reg_flags_LEGT except must negate src 13440 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13441 match( Set flags (CmpL src zero )); 13442 effect( TEMP tmp ); 13443 ins_cost(300); 13444 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13445 "CMP $tmp,$src.lo\n\t" 13446 "SBB $tmp,$src.hi\n\t" %} 13447 ins_encode( long_cmp_flags3(src, tmp) ); 13448 ins_pipe( ialu_reg_reg_long ); 13449 %} 13450 13451 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13452 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13453 // requires a commuted test to get the same result. 13454 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13455 match( Set flags (CmpL src1 src2 )); 13456 effect( TEMP tmp ); 13457 ins_cost(300); 13458 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13459 "MOV $tmp,$src2.hi\n\t" 13460 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13461 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13462 ins_pipe( ialu_cr_reg_reg ); 13463 %} 13464 13465 // Long compares reg < zero/req OR reg >= zero/req. 13466 // Just a wrapper for a normal branch, plus the predicate test 13467 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13468 match(If cmp flags); 13469 effect(USE labl); 13470 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13471 ins_cost(300); 13472 expand %{ 13473 jmpCon(cmp,flags,labl); // JGT or JLE... 13474 %} 13475 %} 13476 13477 //====== 13478 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13479 // Same as cmpUL_reg_flags_LEGT except must negate src 13480 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13481 match(Set flags (CmpUL src zero)); 13482 effect(TEMP tmp); 13483 ins_cost(300); 13484 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13485 "CMP $tmp,$src.lo\n\t" 13486 "SBB $tmp,$src.hi\n\t" %} 13487 ins_encode(long_cmp_flags3(src, tmp)); 13488 ins_pipe(ialu_reg_reg_long); 13489 %} 13490 13491 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13492 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13493 // requires a commuted test to get the same result. 13494 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13495 match(Set flags (CmpUL src1 src2)); 13496 effect(TEMP tmp); 13497 ins_cost(300); 13498 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13499 "MOV $tmp,$src2.hi\n\t" 13500 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13501 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13502 ins_pipe(ialu_cr_reg_reg); 13503 %} 13504 13505 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13506 // Just a wrapper for a normal branch, plus the predicate test 13507 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13508 match(If cmp flags); 13509 effect(USE labl); 13510 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13511 ins_cost(300); 13512 expand %{ 13513 jmpCon(cmp, flags, labl); // JGT or JLE... 13514 %} 13515 %} 13516 13517 // Compare 2 longs and CMOVE longs. 13518 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13519 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13520 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13521 ins_cost(400); 13522 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13523 "CMOV$cmp $dst.hi,$src.hi" %} 13524 opcode(0x0F,0x40); 13525 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13526 ins_pipe( pipe_cmov_reg_long ); 13527 %} 13528 13529 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13530 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13531 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13532 ins_cost(500); 13533 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13534 "CMOV$cmp $dst.hi,$src.hi+4" %} 13535 opcode(0x0F,0x40); 13536 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13537 ins_pipe( pipe_cmov_reg_long ); 13538 %} 13539 13540 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13541 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13542 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13543 ins_cost(400); 13544 expand %{ 13545 cmovLL_reg_LEGT(cmp, flags, dst, src); 13546 %} 13547 %} 13548 13549 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13550 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13551 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13552 ins_cost(500); 13553 expand %{ 13554 cmovLL_mem_LEGT(cmp, flags, dst, src); 13555 %} 13556 %} 13557 13558 // Compare 2 longs and CMOVE ints. 13559 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13560 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13561 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13562 ins_cost(200); 13563 format %{ "CMOV$cmp $dst,$src" %} 13564 opcode(0x0F,0x40); 13565 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13566 ins_pipe( pipe_cmov_reg ); 13567 %} 13568 13569 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13570 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13571 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13572 ins_cost(250); 13573 format %{ "CMOV$cmp $dst,$src" %} 13574 opcode(0x0F,0x40); 13575 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13576 ins_pipe( pipe_cmov_mem ); 13577 %} 13578 13579 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13580 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13581 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13582 ins_cost(200); 13583 expand %{ 13584 cmovII_reg_LEGT(cmp, flags, dst, src); 13585 %} 13586 %} 13587 13588 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13589 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13590 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13591 ins_cost(250); 13592 expand %{ 13593 cmovII_mem_LEGT(cmp, flags, dst, src); 13594 %} 13595 %} 13596 13597 // Compare 2 longs and CMOVE ptrs. 13598 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13599 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13600 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13601 ins_cost(200); 13602 format %{ "CMOV$cmp $dst,$src" %} 13603 opcode(0x0F,0x40); 13604 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13605 ins_pipe( pipe_cmov_reg ); 13606 %} 13607 13608 // Compare 2 unsigned longs and CMOVE ptrs. 13609 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13610 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13611 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13612 ins_cost(200); 13613 expand %{ 13614 cmovPP_reg_LEGT(cmp,flags,dst,src); 13615 %} 13616 %} 13617 13618 // Compare 2 longs and CMOVE doubles 13619 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13620 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13621 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13622 ins_cost(200); 13623 expand %{ 13624 fcmovDPR_regS(cmp,flags,dst,src); 13625 %} 13626 %} 13627 13628 // Compare 2 longs and CMOVE doubles 13629 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13630 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13631 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13632 ins_cost(200); 13633 expand %{ 13634 fcmovD_regS(cmp,flags,dst,src); 13635 %} 13636 %} 13637 13638 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13639 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13640 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13641 ins_cost(200); 13642 expand %{ 13643 fcmovFPR_regS(cmp,flags,dst,src); 13644 %} 13645 %} 13646 13647 13648 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13649 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13650 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13651 ins_cost(200); 13652 expand %{ 13653 fcmovF_regS(cmp,flags,dst,src); 13654 %} 13655 %} 13656 13657 13658 // ============================================================================ 13659 // Procedure Call/Return Instructions 13660 // Call Java Static Instruction 13661 // Note: If this code changes, the corresponding ret_addr_offset() and 13662 // compute_padding() functions will have to be adjusted. 13663 instruct CallStaticJavaDirect(method meth) %{ 13664 match(CallStaticJava); 13665 effect(USE meth); 13666 13667 ins_cost(300); 13668 format %{ "CALL,static " %} 13669 opcode(0xE8); /* E8 cd */ 13670 ins_encode( pre_call_resets, 13671 Java_Static_Call( meth ), 13672 call_epilog, 13673 post_call_FPU ); 13674 ins_pipe( pipe_slow ); 13675 ins_alignment(4); 13676 %} 13677 13678 // Call Java Dynamic Instruction 13679 // Note: If this code changes, the corresponding ret_addr_offset() and 13680 // compute_padding() functions will have to be adjusted. 13681 instruct CallDynamicJavaDirect(method meth) %{ 13682 match(CallDynamicJava); 13683 effect(USE meth); 13684 13685 ins_cost(300); 13686 format %{ "MOV EAX,(oop)-1\n\t" 13687 "CALL,dynamic" %} 13688 opcode(0xE8); /* E8 cd */ 13689 ins_encode( pre_call_resets, 13690 Java_Dynamic_Call( meth ), 13691 call_epilog, 13692 post_call_FPU ); 13693 ins_pipe( pipe_slow ); 13694 ins_alignment(4); 13695 %} 13696 13697 // Call Runtime Instruction 13698 instruct CallRuntimeDirect(method meth) %{ 13699 match(CallRuntime ); 13700 effect(USE meth); 13701 13702 ins_cost(300); 13703 format %{ "CALL,runtime " %} 13704 opcode(0xE8); /* E8 cd */ 13705 // Use FFREEs to clear entries in float stack 13706 ins_encode( pre_call_resets, 13707 FFree_Float_Stack_All, 13708 Java_To_Runtime( meth ), 13709 post_call_FPU ); 13710 ins_pipe( pipe_slow ); 13711 %} 13712 13713 // Call runtime without safepoint 13714 instruct CallLeafDirect(method meth) %{ 13715 match(CallLeaf); 13716 effect(USE meth); 13717 13718 ins_cost(300); 13719 format %{ "CALL_LEAF,runtime " %} 13720 opcode(0xE8); /* E8 cd */ 13721 ins_encode( pre_call_resets, 13722 FFree_Float_Stack_All, 13723 Java_To_Runtime( meth ), 13724 Verify_FPU_For_Leaf, post_call_FPU ); 13725 ins_pipe( pipe_slow ); 13726 %} 13727 13728 instruct CallLeafNoFPDirect(method meth) %{ 13729 match(CallLeafNoFP); 13730 effect(USE meth); 13731 13732 ins_cost(300); 13733 format %{ "CALL_LEAF_NOFP,runtime " %} 13734 opcode(0xE8); /* E8 cd */ 13735 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13736 ins_pipe( pipe_slow ); 13737 %} 13738 13739 13740 // Return Instruction 13741 // Remove the return address & jump to it. 13742 instruct Ret() %{ 13743 match(Return); 13744 format %{ "RET" %} 13745 opcode(0xC3); 13746 ins_encode(OpcP); 13747 ins_pipe( pipe_jmp ); 13748 %} 13749 13750 // Tail Call; Jump from runtime stub to Java code. 13751 // Also known as an 'interprocedural jump'. 13752 // Target of jump will eventually return to caller. 13753 // TailJump below removes the return address. 13754 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13755 match(TailCall jump_target method_ptr); 13756 ins_cost(300); 13757 format %{ "JMP $jump_target \t# EBX holds method" %} 13758 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13759 ins_encode( OpcP, RegOpc(jump_target) ); 13760 ins_pipe( pipe_jmp ); 13761 %} 13762 13763 13764 // Tail Jump; remove the return address; jump to target. 13765 // TailCall above leaves the return address around. 13766 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13767 match( TailJump jump_target ex_oop ); 13768 ins_cost(300); 13769 format %{ "POP EDX\t# pop return address into dummy\n\t" 13770 "JMP $jump_target " %} 13771 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13772 ins_encode( enc_pop_rdx, 13773 OpcP, RegOpc(jump_target) ); 13774 ins_pipe( pipe_jmp ); 13775 %} 13776 13777 // Create exception oop: created by stack-crawling runtime code. 13778 // Created exception is now available to this handler, and is setup 13779 // just prior to jumping to this handler. No code emitted. 13780 instruct CreateException( eAXRegP ex_oop ) 13781 %{ 13782 match(Set ex_oop (CreateEx)); 13783 13784 size(0); 13785 // use the following format syntax 13786 format %{ "# exception oop is in EAX; no code emitted" %} 13787 ins_encode(); 13788 ins_pipe( empty ); 13789 %} 13790 13791 13792 // Rethrow exception: 13793 // The exception oop will come in the first argument position. 13794 // Then JUMP (not call) to the rethrow stub code. 13795 instruct RethrowException() 13796 %{ 13797 match(Rethrow); 13798 13799 // use the following format syntax 13800 format %{ "JMP rethrow_stub" %} 13801 ins_encode(enc_rethrow); 13802 ins_pipe( pipe_jmp ); 13803 %} 13804 13805 // inlined locking and unlocking 13806 13807 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13808 predicate(Compile::current()->use_rtm()); 13809 match(Set cr (FastLock object box)); 13810 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13811 ins_cost(300); 13812 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13813 ins_encode %{ 13814 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13815 $scr$$Register, $cx1$$Register, $cx2$$Register, 13816 _rtm_counters, _stack_rtm_counters, 13817 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13818 true, ra_->C->profile_rtm()); 13819 %} 13820 ins_pipe(pipe_slow); 13821 %} 13822 13823 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13824 predicate(!Compile::current()->use_rtm()); 13825 match(Set cr (FastLock object box)); 13826 effect(TEMP tmp, TEMP scr, USE_KILL box); 13827 ins_cost(300); 13828 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13829 ins_encode %{ 13830 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13831 $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false); 13832 %} 13833 ins_pipe(pipe_slow); 13834 %} 13835 13836 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13837 match(Set cr (FastUnlock object box)); 13838 effect(TEMP tmp, USE_KILL box); 13839 ins_cost(300); 13840 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13841 ins_encode %{ 13842 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13843 %} 13844 ins_pipe(pipe_slow); 13845 %} 13846 13847 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13848 predicate(Matcher::vector_length(n) <= 32); 13849 match(Set dst (MaskAll src)); 13850 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13851 ins_encode %{ 13852 int mask_len = Matcher::vector_length(this); 13853 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13854 %} 13855 ins_pipe( pipe_slow ); 13856 %} 13857 13858 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13859 predicate(Matcher::vector_length(n) > 32); 13860 match(Set dst (MaskAll src)); 13861 effect(TEMP ktmp); 13862 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13863 ins_encode %{ 13864 int mask_len = Matcher::vector_length(this); 13865 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13866 %} 13867 ins_pipe( pipe_slow ); 13868 %} 13869 13870 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13871 predicate(Matcher::vector_length(n) > 32); 13872 match(Set dst (MaskAll src)); 13873 effect(TEMP ktmp); 13874 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13875 ins_encode %{ 13876 int mask_len = Matcher::vector_length(this); 13877 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13878 %} 13879 ins_pipe( pipe_slow ); 13880 %} 13881 13882 // ============================================================================ 13883 // Safepoint Instruction 13884 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13885 match(SafePoint poll); 13886 effect(KILL cr, USE poll); 13887 13888 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13889 ins_cost(125); 13890 // EBP would need size(3) 13891 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13892 ins_encode %{ 13893 __ relocate(relocInfo::poll_type); 13894 address pre_pc = __ pc(); 13895 __ testl(rax, Address($poll$$Register, 0)); 13896 address post_pc = __ pc(); 13897 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13898 %} 13899 ins_pipe(ialu_reg_mem); 13900 %} 13901 13902 13903 // ============================================================================ 13904 // This name is KNOWN by the ADLC and cannot be changed. 13905 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13906 // for this guy. 13907 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13908 match(Set dst (ThreadLocal)); 13909 effect(DEF dst, KILL cr); 13910 13911 format %{ "MOV $dst, Thread::current()" %} 13912 ins_encode %{ 13913 Register dstReg = as_Register($dst$$reg); 13914 __ get_thread(dstReg); 13915 %} 13916 ins_pipe( ialu_reg_fat ); 13917 %} 13918 13919 13920 13921 //----------PEEPHOLE RULES----------------------------------------------------- 13922 // These must follow all instruction definitions as they use the names 13923 // defined in the instructions definitions. 13924 // 13925 // peepmatch ( root_instr_name [preceding_instruction]* ); 13926 // 13927 // peepconstraint %{ 13928 // (instruction_number.operand_name relational_op instruction_number.operand_name 13929 // [, ...] ); 13930 // // instruction numbers are zero-based using left to right order in peepmatch 13931 // 13932 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13933 // // provide an instruction_number.operand_name for each operand that appears 13934 // // in the replacement instruction's match rule 13935 // 13936 // ---------VM FLAGS--------------------------------------------------------- 13937 // 13938 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13939 // 13940 // Each peephole rule is given an identifying number starting with zero and 13941 // increasing by one in the order seen by the parser. An individual peephole 13942 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13943 // on the command-line. 13944 // 13945 // ---------CURRENT LIMITATIONS---------------------------------------------- 13946 // 13947 // Only match adjacent instructions in same basic block 13948 // Only equality constraints 13949 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13950 // Only one replacement instruction 13951 // 13952 // ---------EXAMPLE---------------------------------------------------------- 13953 // 13954 // // pertinent parts of existing instructions in architecture description 13955 // instruct movI(rRegI dst, rRegI src) %{ 13956 // match(Set dst (CopyI src)); 13957 // %} 13958 // 13959 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13960 // match(Set dst (AddI dst src)); 13961 // effect(KILL cr); 13962 // %} 13963 // 13964 // // Change (inc mov) to lea 13965 // peephole %{ 13966 // // increment preceeded by register-register move 13967 // peepmatch ( incI_eReg movI ); 13968 // // require that the destination register of the increment 13969 // // match the destination register of the move 13970 // peepconstraint ( 0.dst == 1.dst ); 13971 // // construct a replacement instruction that sets 13972 // // the destination to ( move's source register + one ) 13973 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13974 // %} 13975 // 13976 // Implementation no longer uses movX instructions since 13977 // machine-independent system no longer uses CopyX nodes. 13978 // 13979 // peephole %{ 13980 // peepmatch ( incI_eReg movI ); 13981 // peepconstraint ( 0.dst == 1.dst ); 13982 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13983 // %} 13984 // 13985 // peephole %{ 13986 // peepmatch ( decI_eReg movI ); 13987 // peepconstraint ( 0.dst == 1.dst ); 13988 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13989 // %} 13990 // 13991 // peephole %{ 13992 // peepmatch ( addI_eReg_imm movI ); 13993 // peepconstraint ( 0.dst == 1.dst ); 13994 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13995 // %} 13996 // 13997 // peephole %{ 13998 // peepmatch ( addP_eReg_imm movP ); 13999 // peepconstraint ( 0.dst == 1.dst ); 14000 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 14001 // %} 14002 14003 // // Change load of spilled value to only a spill 14004 // instruct storeI(memory mem, rRegI src) %{ 14005 // match(Set mem (StoreI mem src)); 14006 // %} 14007 // 14008 // instruct loadI(rRegI dst, memory mem) %{ 14009 // match(Set dst (LoadI mem)); 14010 // %} 14011 // 14012 peephole %{ 14013 peepmatch ( loadI storeI ); 14014 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 14015 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 14016 %} 14017 14018 //----------SMARTSPILL RULES--------------------------------------------------- 14019 // These must follow all instruction definitions as they use the names 14020 // defined in the instructions definitions.