1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 217 // Not AX or DX, used in divides 218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 219 // Not AX or DX (and neither EBP), used in divides 220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 223 224 // Floating point registers. Notice FPR0 is not a choice. 225 // FPR0 is not ever allocated; we use clever encodings to fake 226 // a 2-address instructions out of Intels FP stack. 227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 228 229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 230 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 231 FPR7L,FPR7H ); 232 233 reg_class fp_flt_reg0( FPR1L ); 234 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 235 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 237 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 238 239 %} 240 241 242 //----------SOURCE BLOCK------------------------------------------------------- 243 // This is a block of C++ code which provides values, functions, and 244 // definitions necessary in the rest of the architecture description 245 source_hpp %{ 246 // Must be visible to the DFA in dfa_x86_32.cpp 247 extern bool is_operand_hi32_zero(Node* n); 248 %} 249 250 source %{ 251 #define RELOC_IMM32 Assembler::imm_operand 252 #define RELOC_DISP32 Assembler::disp32_operand 253 254 #define __ _masm. 255 256 // How to find the high register of a Long pair, given the low register 257 #define HIGH_FROM_LOW(x) ((x)+2) 258 259 // These masks are used to provide 128-bit aligned bitmasks to the XMM 260 // instructions, to allow sign-masking or sign-bit flipping. They allow 261 // fast versions of NegF/NegD and AbsF/AbsD. 262 263 void reg_mask_init() { 264 if (Matcher::has_predicated_vectors()) { 265 // Post-loop multi-versioning expects mask to be present in K1 register, till the time 266 // its fixed, RA should not be allocting K1 register, this shall prevent any accidental 267 // curruption of value held in K1 register. 268 if (PostLoopMultiversioning) { 269 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg())); 270 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next())); 271 } 272 } 273 } 274 275 // Note: 'double' and 'long long' have 32-bits alignment on x86. 276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 277 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 278 // of 128-bits operands for SSE instructions. 279 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 280 // Store the value to a 128-bits operand. 281 operand[0] = lo; 282 operand[1] = hi; 283 return operand; 284 } 285 286 // Buffer for 128-bits masks used by SSE instructions. 287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 288 289 // Static initialization during VM startup. 290 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 292 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 294 295 // Offset hacking within calls. 296 static int pre_call_resets_size() { 297 int size = 0; 298 Compile* C = Compile::current(); 299 if (C->in_24_bit_fp_mode()) { 300 size += 6; // fldcw 301 } 302 if (VM_Version::supports_vzeroupper()) { 303 size += 3; // vzeroupper 304 } 305 return size; 306 } 307 308 // !!!!! Special hack to get all type of calls to specify the byte offset 309 // from the start of the call to the point where the return address 310 // will point. 311 int MachCallStaticJavaNode::ret_addr_offset() { 312 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 313 } 314 315 int MachCallDynamicJavaNode::ret_addr_offset() { 316 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 317 } 318 319 static int sizeof_FFree_Float_Stack_All = -1; 320 321 int MachCallRuntimeNode::ret_addr_offset() { 322 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 323 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 324 } 325 326 int MachCallNativeNode::ret_addr_offset() { 327 ShouldNotCallThis(); 328 return -1; 329 } 330 331 // 332 // Compute padding required for nodes which need alignment 333 // 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 1; // skip call opcode byte 340 return align_up(current_offset, alignment_required()) - current_offset; 341 } 342 343 // The address of the call instruction needs to be 4-byte aligned to 344 // ensure that it does not span a cache line so that it can be patched. 345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 346 current_offset += pre_call_resets_size(); // skip fldcw, if any 347 current_offset += 5; // skip MOV instruction 348 current_offset += 1; // skip call opcode byte 349 return align_up(current_offset, alignment_required()) - current_offset; 350 } 351 352 // EMIT_RM() 353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 354 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_CC() 359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 360 unsigned char c = (unsigned char)( f1 | f2 ); 361 cbuf.insts()->emit_int8(c); 362 } 363 364 // EMIT_OPCODE() 365 void emit_opcode(CodeBuffer &cbuf, int code) { 366 cbuf.insts()->emit_int8((unsigned char) code); 367 } 368 369 // EMIT_OPCODE() w/ relocation information 370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 371 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 372 emit_opcode(cbuf, code); 373 } 374 375 // EMIT_D8() 376 void emit_d8(CodeBuffer &cbuf, int d8) { 377 cbuf.insts()->emit_int8((unsigned char) d8); 378 } 379 380 // EMIT_D16() 381 void emit_d16(CodeBuffer &cbuf, int d16) { 382 cbuf.insts()->emit_int16(d16); 383 } 384 385 // EMIT_D32() 386 void emit_d32(CodeBuffer &cbuf, int d32) { 387 cbuf.insts()->emit_int32(d32); 388 } 389 390 // emit 32 bit value and construct relocation entry from relocInfo::relocType 391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 392 int format) { 393 cbuf.relocate(cbuf.insts_mark(), reloc, format); 394 cbuf.insts()->emit_int32(d32); 395 } 396 397 // emit 32 bit value and construct relocation entry from RelocationHolder 398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 399 int format) { 400 #ifdef ASSERT 401 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 402 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 403 } 404 #endif 405 cbuf.relocate(cbuf.insts_mark(), rspec, format); 406 cbuf.insts()->emit_int32(d32); 407 } 408 409 // Access stack slot for load or store 410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 411 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 412 if( -128 <= disp && disp <= 127 ) { 413 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 414 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 415 emit_d8 (cbuf, disp); // Displacement // R/M byte 416 } else { 417 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 418 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 419 emit_d32(cbuf, disp); // Displacement // R/M byte 420 } 421 } 422 423 // rRegI ereg, memory mem) %{ // emit_reg_mem 424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 425 // There is no index & no scale, use form without SIB byte 426 if ((index == 0x4) && 427 (scale == 0) && (base != ESP_enc)) { 428 // If no displacement, mode is 0x0; unless base is [EBP] 429 if ( (displace == 0) && (base != EBP_enc) ) { 430 emit_rm(cbuf, 0x0, reg_encoding, base); 431 } 432 else { // If 8-bit displacement, mode 0x1 433 if ((displace >= -128) && (displace <= 127) 434 && (disp_reloc == relocInfo::none) ) { 435 emit_rm(cbuf, 0x1, reg_encoding, base); 436 emit_d8(cbuf, displace); 437 } 438 else { // If 32-bit displacement 439 if (base == -1) { // Special flag for absolute address 440 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 441 // (manual lies; no SIB needed here) 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 else { // Normal base + offset 449 emit_rm(cbuf, 0x2, reg_encoding, base); 450 if ( disp_reloc != relocInfo::none ) { 451 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 452 } else { 453 emit_d32 (cbuf, displace); 454 } 455 } 456 } 457 } 458 } 459 else { // Else, encode with the SIB byte 460 // If no displacement, mode is 0x0; unless base is [EBP] 461 if (displace == 0 && (base != EBP_enc)) { // If no displacement 462 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 } 465 else { // If 8-bit displacement, mode 0x1 466 if ((displace >= -128) && (displace <= 127) 467 && (disp_reloc == relocInfo::none) ) { 468 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, base); 470 emit_d8(cbuf, displace); 471 } 472 else { // If 32-bit displacement 473 if (base == 0x04 ) { 474 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 475 emit_rm(cbuf, scale, index, 0x04); 476 } else { 477 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 478 emit_rm(cbuf, scale, index, base); 479 } 480 if ( disp_reloc != relocInfo::none ) { 481 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 482 } else { 483 emit_d32 (cbuf, displace); 484 } 485 } 486 } 487 } 488 } 489 490 491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 492 if( dst_encoding == src_encoding ) { 493 // reg-reg copy, use an empty encoding 494 } else { 495 emit_opcode( cbuf, 0x8B ); 496 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 497 } 498 } 499 500 void emit_cmpfp_fixup(MacroAssembler& _masm) { 501 Label exit; 502 __ jccb(Assembler::noParity, exit); 503 __ pushf(); 504 // 505 // comiss/ucomiss instructions set ZF,PF,CF flags and 506 // zero OF,AF,SF for NaN values. 507 // Fixup flags by zeroing ZF,PF so that compare of NaN 508 // values returns 'less than' result (CF is set). 509 // Leave the rest of flags unchanged. 510 // 511 // 7 6 5 4 3 2 1 0 512 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 513 // 0 0 1 0 1 0 1 1 (0x2B) 514 // 515 __ andl(Address(rsp, 0), 0xffffff2b); 516 __ popf(); 517 __ bind(exit); 518 } 519 520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 521 Label done; 522 __ movl(dst, -1); 523 __ jcc(Assembler::parity, done); 524 __ jcc(Assembler::below, done); 525 __ setb(Assembler::notEqual, dst); 526 __ movzbl(dst, dst); 527 __ bind(done); 528 } 529 530 531 //============================================================================= 532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 533 534 int ConstantTable::calculate_table_base_offset() const { 535 return 0; // absolute addressing, no offset 536 } 537 538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 540 ShouldNotReachHere(); 541 } 542 543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 544 // Empty encoding 545 } 546 547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 548 return 0; 549 } 550 551 #ifndef PRODUCT 552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 st->print("# MachConstantBaseNode (empty encoding)"); 554 } 555 #endif 556 557 558 //============================================================================= 559 #ifndef PRODUCT 560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 561 Compile* C = ra_->C; 562 563 int framesize = C->output()->frame_size_in_bytes(); 564 int bangsize = C->output()->bang_size_in_bytes(); 565 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 566 // Remove wordSize for return addr which is already pushed. 567 framesize -= wordSize; 568 569 if (C->output()->need_stack_bang(bangsize)) { 570 framesize -= wordSize; 571 st->print("# stack bang (%d bytes)", bangsize); 572 st->print("\n\t"); 573 st->print("PUSH EBP\t# Save EBP"); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 } 578 if (framesize) { 579 st->print("\n\t"); 580 st->print("SUB ESP, #%d\t# Create frame",framesize); 581 } 582 } else { 583 st->print("SUB ESP, #%d\t# Create frame",framesize); 584 st->print("\n\t"); 585 framesize -= wordSize; 586 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 587 if (PreserveFramePointer) { 588 st->print("\n\t"); 589 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 590 if (framesize > 0) { 591 st->print("\n\t"); 592 st->print("ADD EBP, #%d", framesize); 593 } 594 } 595 } 596 597 if (VerifyStackAtCalls) { 598 st->print("\n\t"); 599 framesize -= wordSize; 600 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 601 } 602 603 if( C->in_24_bit_fp_mode() ) { 604 st->print("\n\t"); 605 st->print("FLDCW \t# load 24 bit fpu control word"); 606 } 607 if (UseSSE >= 2 && VerifyFPU) { 608 st->print("\n\t"); 609 st->print("# verify FPU stack (must be clean on entry)"); 610 } 611 612 #ifdef ASSERT 613 if (VerifyStackAtCalls) { 614 st->print("\n\t"); 615 st->print("# stack alignment check"); 616 } 617 #endif 618 st->cr(); 619 } 620 #endif 621 622 623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 624 Compile* C = ra_->C; 625 MacroAssembler _masm(&cbuf); 626 627 int framesize = C->output()->frame_size_in_bytes(); 628 int bangsize = C->output()->bang_size_in_bytes(); 629 630 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 631 632 C->output()->set_frame_complete(cbuf.insts_size()); 633 634 if (C->has_mach_constant_base_node()) { 635 // NOTE: We set the table base offset here because users might be 636 // emitted before MachConstantBaseNode. 637 ConstantTable& constant_table = C->output()->constant_table(); 638 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 639 } 640 } 641 642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 643 return MachNode::size(ra_); // too many variables; just compute it the hard way 644 } 645 646 int MachPrologNode::reloc() const { 647 return 0; // a large enough number 648 } 649 650 //============================================================================= 651 #ifndef PRODUCT 652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 653 Compile *C = ra_->C; 654 int framesize = C->output()->frame_size_in_bytes(); 655 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 656 // Remove two words for return addr and rbp, 657 framesize -= 2*wordSize; 658 659 if (C->max_vector_size() > 16) { 660 st->print("VZEROUPPER"); 661 st->cr(); st->print("\t"); 662 } 663 if (C->in_24_bit_fp_mode()) { 664 st->print("FLDCW standard control word"); 665 st->cr(); st->print("\t"); 666 } 667 if (framesize) { 668 st->print("ADD ESP,%d\t# Destroy frame",framesize); 669 st->cr(); st->print("\t"); 670 } 671 st->print_cr("POPL EBP"); st->print("\t"); 672 if (do_polling() && C->is_method_compilation()) { 673 st->print("CMPL rsp, poll_offset[thread] \n\t" 674 "JA #safepoint_stub\t" 675 "# Safepoint: poll for GC"); 676 } 677 } 678 #endif 679 680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 681 Compile *C = ra_->C; 682 MacroAssembler _masm(&cbuf); 683 684 if (C->max_vector_size() > 16) { 685 // Clear upper bits of YMM registers when current compiled code uses 686 // wide vectors to avoid AVX <-> SSE transition penalty during call. 687 _masm.vzeroupper(); 688 } 689 // If method set FPU control word, restore to standard control word 690 if (C->in_24_bit_fp_mode()) { 691 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 692 } 693 694 int framesize = C->output()->frame_size_in_bytes(); 695 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 696 // Remove two words for return addr and rbp, 697 framesize -= 2*wordSize; 698 699 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 700 701 if (framesize >= 128) { 702 emit_opcode(cbuf, 0x81); // add SP, #framesize 703 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 704 emit_d32(cbuf, framesize); 705 } else if (framesize) { 706 emit_opcode(cbuf, 0x83); // add SP, #framesize 707 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 708 emit_d8(cbuf, framesize); 709 } 710 711 emit_opcode(cbuf, 0x58 | EBP_enc); 712 713 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 714 __ reserved_stack_check(); 715 } 716 717 if (do_polling() && C->is_method_compilation()) { 718 Register thread = as_Register(EBX_enc); 719 MacroAssembler masm(&cbuf); 720 __ get_thread(thread); 721 Label dummy_label; 722 Label* code_stub = &dummy_label; 723 if (!C->output()->in_scratch_emit_size()) { 724 code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); 725 } 726 __ relocate(relocInfo::poll_return_type); 727 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 728 } 729 } 730 731 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 732 return MachNode::size(ra_); // too many variables; just compute it 733 // the hard way 734 } 735 736 int MachEpilogNode::reloc() const { 737 return 0; // a large enough number 738 } 739 740 const Pipeline * MachEpilogNode::pipeline() const { 741 return MachNode::pipeline_class(); 742 } 743 744 //============================================================================= 745 746 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 747 static enum RC rc_class( OptoReg::Name reg ) { 748 749 if( !OptoReg::is_valid(reg) ) return rc_bad; 750 if (OptoReg::is_stack(reg)) return rc_stack; 751 752 VMReg r = OptoReg::as_VMReg(reg); 753 if (r->is_Register()) return rc_int; 754 if (r->is_FloatRegister()) { 755 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 756 return rc_float; 757 } 758 if (r->is_KRegister()) return rc_kreg; 759 assert(r->is_XMMRegister(), "must be"); 760 return rc_xmm; 761 } 762 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 764 int opcode, const char *op_str, int size, outputStream* st ) { 765 if( cbuf ) { 766 emit_opcode (*cbuf, opcode ); 767 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 768 #ifndef PRODUCT 769 } else if( !do_size ) { 770 if( size != 0 ) st->print("\n\t"); 771 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 772 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 773 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 774 } else { // FLD, FST, PUSH, POP 775 st->print("%s [ESP + #%d]",op_str,offset); 776 } 777 #endif 778 } 779 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 780 return size+3+offset_size; 781 } 782 783 // Helper for XMM registers. Extra opcode bits, limited syntax. 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 785 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 786 int in_size_in_bits = Assembler::EVEX_32bit; 787 int evex_encoding = 0; 788 if (reg_lo+1 == reg_hi) { 789 in_size_in_bits = Assembler::EVEX_64bit; 790 evex_encoding = Assembler::VEX_W; 791 } 792 if (cbuf) { 793 MacroAssembler _masm(cbuf); 794 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 795 // it maps more cases to single byte displacement 796 _masm.set_managed(); 797 if (reg_lo+1 == reg_hi) { // double move? 798 if (is_load) { 799 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 800 } else { 801 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 802 } 803 } else { 804 if (is_load) { 805 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 806 } else { 807 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 808 } 809 } 810 #ifndef PRODUCT 811 } else if (!do_size) { 812 if (size != 0) st->print("\n\t"); 813 if (reg_lo+1 == reg_hi) { // double move? 814 if (is_load) st->print("%s %s,[ESP + #%d]", 815 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSD [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } else { 820 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 821 Matcher::regName[reg_lo], offset); 822 else st->print("MOVSS [ESP + #%d],%s", 823 offset, Matcher::regName[reg_lo]); 824 } 825 #endif 826 } 827 bool is_single_byte = false; 828 if ((UseAVX > 2) && (offset != 0)) { 829 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 830 } 831 int offset_size = 0; 832 if (UseAVX > 2 ) { 833 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 834 } else { 835 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 836 } 837 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 838 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 839 return size+5+offset_size; 840 } 841 842 843 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 844 int src_hi, int dst_hi, int size, outputStream* st ) { 845 if (cbuf) { 846 MacroAssembler _masm(cbuf); 847 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 848 _masm.set_managed(); 849 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 850 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 851 as_XMMRegister(Matcher::_regEncode[src_lo])); 852 } else { 853 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 854 as_XMMRegister(Matcher::_regEncode[src_lo])); 855 } 856 #ifndef PRODUCT 857 } else if (!do_size) { 858 if (size != 0) st->print("\n\t"); 859 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 860 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 861 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 862 } else { 863 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 864 } 865 } else { 866 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 867 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 868 } else { 869 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 870 } 871 } 872 #endif 873 } 874 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 875 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 876 int sz = (UseAVX > 2) ? 6 : 4; 877 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 878 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 879 return size + sz; 880 } 881 882 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 883 int src_hi, int dst_hi, int size, outputStream* st ) { 884 // 32-bit 885 if (cbuf) { 886 MacroAssembler _masm(cbuf); 887 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 888 _masm.set_managed(); 889 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 890 as_Register(Matcher::_regEncode[src_lo])); 891 #ifndef PRODUCT 892 } else if (!do_size) { 893 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 894 #endif 895 } 896 return (UseAVX> 2) ? 6 : 4; 897 } 898 899 900 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 901 int src_hi, int dst_hi, int size, outputStream* st ) { 902 // 32-bit 903 if (cbuf) { 904 MacroAssembler _masm(cbuf); 905 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 906 _masm.set_managed(); 907 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 908 as_XMMRegister(Matcher::_regEncode[src_lo])); 909 #ifndef PRODUCT 910 } else if (!do_size) { 911 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 912 #endif 913 } 914 return (UseAVX> 2) ? 6 : 4; 915 } 916 917 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 918 if( cbuf ) { 919 emit_opcode(*cbuf, 0x8B ); 920 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 921 #ifndef PRODUCT 922 } else if( !do_size ) { 923 if( size != 0 ) st->print("\n\t"); 924 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 925 #endif 926 } 927 return size+2; 928 } 929 930 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 931 int offset, int size, outputStream* st ) { 932 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 933 if( cbuf ) { 934 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 935 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 936 #ifndef PRODUCT 937 } else if( !do_size ) { 938 if( size != 0 ) st->print("\n\t"); 939 st->print("FLD %s",Matcher::regName[src_lo]); 940 #endif 941 } 942 size += 2; 943 } 944 945 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 946 const char *op_str; 947 int op; 948 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 949 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 950 op = 0xDD; 951 } else { // 32-bit store 952 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 953 op = 0xD9; 954 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 955 } 956 957 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 958 } 959 960 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 961 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 962 int src_hi, int dst_hi, uint ireg, outputStream* st); 963 964 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 965 int stack_offset, int reg, uint ireg, outputStream* st); 966 967 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 968 int dst_offset, uint ireg, outputStream* st) { 969 if (cbuf) { 970 MacroAssembler _masm(cbuf); 971 switch (ireg) { 972 case Op_VecS: 973 __ pushl(Address(rsp, src_offset)); 974 __ popl (Address(rsp, dst_offset)); 975 break; 976 case Op_VecD: 977 __ pushl(Address(rsp, src_offset)); 978 __ popl (Address(rsp, dst_offset)); 979 __ pushl(Address(rsp, src_offset+4)); 980 __ popl (Address(rsp, dst_offset+4)); 981 break; 982 case Op_VecX: 983 __ movdqu(Address(rsp, -16), xmm0); 984 __ movdqu(xmm0, Address(rsp, src_offset)); 985 __ movdqu(Address(rsp, dst_offset), xmm0); 986 __ movdqu(xmm0, Address(rsp, -16)); 987 break; 988 case Op_VecY: 989 __ vmovdqu(Address(rsp, -32), xmm0); 990 __ vmovdqu(xmm0, Address(rsp, src_offset)); 991 __ vmovdqu(Address(rsp, dst_offset), xmm0); 992 __ vmovdqu(xmm0, Address(rsp, -32)); 993 break; 994 case Op_VecZ: 995 __ evmovdquq(Address(rsp, -64), xmm0, 2); 996 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 997 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 998 __ evmovdquq(xmm0, Address(rsp, -64), 2); 999 break; 1000 default: 1001 ShouldNotReachHere(); 1002 } 1003 #ifndef PRODUCT 1004 } else { 1005 switch (ireg) { 1006 case Op_VecS: 1007 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1008 "popl [rsp + #%d]", 1009 src_offset, dst_offset); 1010 break; 1011 case Op_VecD: 1012 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1013 "popq [rsp + #%d]\n\t" 1014 "pushl [rsp + #%d]\n\t" 1015 "popq [rsp + #%d]", 1016 src_offset, dst_offset, src_offset+4, dst_offset+4); 1017 break; 1018 case Op_VecX: 1019 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1020 "movdqu xmm0, [rsp + #%d]\n\t" 1021 "movdqu [rsp + #%d], xmm0\n\t" 1022 "movdqu xmm0, [rsp - #16]", 1023 src_offset, dst_offset); 1024 break; 1025 case Op_VecY: 1026 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1027 "vmovdqu xmm0, [rsp + #%d]\n\t" 1028 "vmovdqu [rsp + #%d], xmm0\n\t" 1029 "vmovdqu xmm0, [rsp - #32]", 1030 src_offset, dst_offset); 1031 break; 1032 case Op_VecZ: 1033 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1034 "vmovdqu xmm0, [rsp + #%d]\n\t" 1035 "vmovdqu [rsp + #%d], xmm0\n\t" 1036 "vmovdqu xmm0, [rsp - #64]", 1037 src_offset, dst_offset); 1038 break; 1039 default: 1040 ShouldNotReachHere(); 1041 } 1042 #endif 1043 } 1044 } 1045 1046 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1047 // Get registers to move 1048 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1049 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1050 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1051 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1052 1053 enum RC src_second_rc = rc_class(src_second); 1054 enum RC src_first_rc = rc_class(src_first); 1055 enum RC dst_second_rc = rc_class(dst_second); 1056 enum RC dst_first_rc = rc_class(dst_first); 1057 1058 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1059 1060 // Generate spill code! 1061 int size = 0; 1062 1063 if( src_first == dst_first && src_second == dst_second ) 1064 return size; // Self copy, no move 1065 1066 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1067 uint ireg = ideal_reg(); 1068 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1069 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1070 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1071 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1072 // mem -> mem 1073 int src_offset = ra_->reg2offset(src_first); 1074 int dst_offset = ra_->reg2offset(dst_first); 1075 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1076 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1077 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1078 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1079 int stack_offset = ra_->reg2offset(dst_first); 1080 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1081 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1082 int stack_offset = ra_->reg2offset(src_first); 1083 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1084 } else { 1085 ShouldNotReachHere(); 1086 } 1087 return 0; 1088 } 1089 1090 // -------------------------------------- 1091 // Check for mem-mem move. push/pop to move. 1092 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1093 if( src_second == dst_first ) { // overlapping stack copy ranges 1094 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1095 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1096 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1097 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1098 } 1099 // move low bits 1100 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1101 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1102 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1103 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1104 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1105 } 1106 return size; 1107 } 1108 1109 // -------------------------------------- 1110 // Check for integer reg-reg copy 1111 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1112 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1113 1114 // Check for integer store 1115 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1116 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1117 1118 // Check for integer load 1119 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1120 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1121 1122 // Check for integer reg-xmm reg copy 1123 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1124 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1125 "no 64 bit integer-float reg moves" ); 1126 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1127 } 1128 // -------------------------------------- 1129 // Check for float reg-reg copy 1130 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1131 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1132 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1133 if( cbuf ) { 1134 1135 // Note the mucking with the register encode to compensate for the 0/1 1136 // indexing issue mentioned in a comment in the reg_def sections 1137 // for FPR registers many lines above here. 1138 1139 if( src_first != FPR1L_num ) { 1140 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1141 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1142 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1143 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1144 } else { 1145 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1146 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1147 } 1148 #ifndef PRODUCT 1149 } else if( !do_size ) { 1150 if( size != 0 ) st->print("\n\t"); 1151 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1152 else st->print( "FST %s", Matcher::regName[dst_first]); 1153 #endif 1154 } 1155 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1156 } 1157 1158 // Check for float store 1159 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1160 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1161 } 1162 1163 // Check for float load 1164 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1165 int offset = ra_->reg2offset(src_first); 1166 const char *op_str; 1167 int op; 1168 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1169 op_str = "FLD_D"; 1170 op = 0xDD; 1171 } else { // 32-bit load 1172 op_str = "FLD_S"; 1173 op = 0xD9; 1174 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1175 } 1176 if( cbuf ) { 1177 emit_opcode (*cbuf, op ); 1178 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1179 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1180 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1181 #ifndef PRODUCT 1182 } else if( !do_size ) { 1183 if( size != 0 ) st->print("\n\t"); 1184 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1185 #endif 1186 } 1187 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1188 return size + 3+offset_size+2; 1189 } 1190 1191 // Check for xmm reg-reg copy 1192 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1193 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1194 (src_first+1 == src_second && dst_first+1 == dst_second), 1195 "no non-adjacent float-moves" ); 1196 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1197 } 1198 1199 // Check for xmm reg-integer reg copy 1200 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1201 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1202 "no 64 bit float-integer reg moves" ); 1203 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1204 } 1205 1206 // Check for xmm store 1207 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1208 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1209 } 1210 1211 // Check for float xmm load 1212 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1213 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1214 } 1215 1216 // Copy from float reg to xmm reg 1217 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1218 // copy to the top of stack from floating point reg 1219 // and use LEA to preserve flags 1220 if( cbuf ) { 1221 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1222 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1223 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1224 emit_d8(*cbuf,0xF8); 1225 #ifndef PRODUCT 1226 } else if( !do_size ) { 1227 if( size != 0 ) st->print("\n\t"); 1228 st->print("LEA ESP,[ESP-8]"); 1229 #endif 1230 } 1231 size += 4; 1232 1233 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1234 1235 // Copy from the temp memory to the xmm reg. 1236 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1237 1238 if( cbuf ) { 1239 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1240 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1241 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1242 emit_d8(*cbuf,0x08); 1243 #ifndef PRODUCT 1244 } else if( !do_size ) { 1245 if( size != 0 ) st->print("\n\t"); 1246 st->print("LEA ESP,[ESP+8]"); 1247 #endif 1248 } 1249 size += 4; 1250 return size; 1251 } 1252 1253 // AVX-512 opmask specific spilling. 1254 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1255 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1256 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1257 MacroAssembler _masm(cbuf); 1258 int offset = ra_->reg2offset(src_first); 1259 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1260 return 0; 1261 } 1262 1263 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1264 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1265 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1266 MacroAssembler _masm(cbuf); 1267 int offset = ra_->reg2offset(dst_first); 1268 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1269 return 0; 1270 } 1271 1272 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1273 Unimplemented(); 1274 return 0; 1275 } 1276 1277 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1278 Unimplemented(); 1279 return 0; 1280 } 1281 1282 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1283 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1284 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1285 MacroAssembler _masm(cbuf); 1286 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1287 return 0; 1288 } 1289 1290 assert( size > 0, "missed a case" ); 1291 1292 // -------------------------------------------------------------------- 1293 // Check for second bits still needing moving. 1294 if( src_second == dst_second ) 1295 return size; // Self copy; no move 1296 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1297 1298 // Check for second word int-int move 1299 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1300 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1301 1302 // Check for second word integer store 1303 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1304 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1305 1306 // Check for second word integer load 1307 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1308 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1309 1310 Unimplemented(); 1311 return 0; // Mute compiler 1312 } 1313 1314 #ifndef PRODUCT 1315 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1316 implementation( NULL, ra_, false, st ); 1317 } 1318 #endif 1319 1320 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1321 implementation( &cbuf, ra_, false, NULL ); 1322 } 1323 1324 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1325 return MachNode::size(ra_); 1326 } 1327 1328 1329 //============================================================================= 1330 #ifndef PRODUCT 1331 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1332 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1333 int reg = ra_->get_reg_first(this); 1334 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1335 } 1336 #endif 1337 1338 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1339 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1340 int reg = ra_->get_encode(this); 1341 if( offset >= 128 ) { 1342 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1343 emit_rm(cbuf, 0x2, reg, 0x04); 1344 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1345 emit_d32(cbuf, offset); 1346 } 1347 else { 1348 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1349 emit_rm(cbuf, 0x1, reg, 0x04); 1350 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1351 emit_d8(cbuf, offset); 1352 } 1353 } 1354 1355 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1356 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1357 if( offset >= 128 ) { 1358 return 7; 1359 } 1360 else { 1361 return 4; 1362 } 1363 } 1364 1365 //============================================================================= 1366 #ifndef PRODUCT 1367 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1368 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1369 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1370 st->print_cr("\tNOP"); 1371 st->print_cr("\tNOP"); 1372 if( !OptoBreakpoint ) 1373 st->print_cr("\tNOP"); 1374 } 1375 #endif 1376 1377 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1378 MacroAssembler masm(&cbuf); 1379 #ifdef ASSERT 1380 uint insts_size = cbuf.insts_size(); 1381 #endif 1382 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1383 masm.jump_cc(Assembler::notEqual, 1384 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1385 /* WARNING these NOPs are critical so that verified entry point is properly 1386 aligned for patching by NativeJump::patch_verified_entry() */ 1387 int nops_cnt = 2; 1388 if( !OptoBreakpoint ) // Leave space for int3 1389 nops_cnt += 1; 1390 masm.nop(nops_cnt); 1391 1392 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1393 } 1394 1395 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1396 return OptoBreakpoint ? 11 : 12; 1397 } 1398 1399 1400 //============================================================================= 1401 1402 // Vector calling convention not supported. 1403 const bool Matcher::supports_vector_calling_convention() { 1404 return false; 1405 } 1406 1407 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1408 Unimplemented(); 1409 return OptoRegPair(0, 0); 1410 } 1411 1412 // Is this branch offset short enough that a short branch can be used? 1413 // 1414 // NOTE: If the platform does not provide any short branch variants, then 1415 // this method should return false for offset 0. 1416 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1417 // The passed offset is relative to address of the branch. 1418 // On 86 a branch displacement is calculated relative to address 1419 // of a next instruction. 1420 offset -= br_size; 1421 1422 // the short version of jmpConUCF2 contains multiple branches, 1423 // making the reach slightly less 1424 if (rule == jmpConUCF2_rule) 1425 return (-126 <= offset && offset <= 125); 1426 return (-128 <= offset && offset <= 127); 1427 } 1428 1429 // Return whether or not this register is ever used as an argument. This 1430 // function is used on startup to build the trampoline stubs in generateOptoStub. 1431 // Registers not mentioned will be killed by the VM call in the trampoline, and 1432 // arguments in those registers not be available to the callee. 1433 bool Matcher::can_be_java_arg( int reg ) { 1434 if( reg == ECX_num || reg == EDX_num ) return true; 1435 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1436 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1437 return false; 1438 } 1439 1440 bool Matcher::is_spillable_arg( int reg ) { 1441 return can_be_java_arg(reg); 1442 } 1443 1444 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1445 // Use hardware integer DIV instruction when 1446 // it is faster than a code which use multiply. 1447 // Only when constant divisor fits into 32 bit 1448 // (min_jint is excluded to get only correct 1449 // positive 32 bit values from negative). 1450 return VM_Version::has_fast_idiv() && 1451 (divisor == (int)divisor && divisor != min_jint); 1452 } 1453 1454 // Register for DIVI projection of divmodI 1455 RegMask Matcher::divI_proj_mask() { 1456 return EAX_REG_mask(); 1457 } 1458 1459 // Register for MODI projection of divmodI 1460 RegMask Matcher::modI_proj_mask() { 1461 return EDX_REG_mask(); 1462 } 1463 1464 // Register for DIVL projection of divmodL 1465 RegMask Matcher::divL_proj_mask() { 1466 ShouldNotReachHere(); 1467 return RegMask(); 1468 } 1469 1470 // Register for MODL projection of divmodL 1471 RegMask Matcher::modL_proj_mask() { 1472 ShouldNotReachHere(); 1473 return RegMask(); 1474 } 1475 1476 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1477 return NO_REG_mask(); 1478 } 1479 1480 // Returns true if the high 32 bits of the value is known to be zero. 1481 bool is_operand_hi32_zero(Node* n) { 1482 int opc = n->Opcode(); 1483 if (opc == Op_AndL) { 1484 Node* o2 = n->in(2); 1485 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1486 return true; 1487 } 1488 } 1489 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1490 return true; 1491 } 1492 return false; 1493 } 1494 1495 %} 1496 1497 //----------ENCODING BLOCK----------------------------------------------------- 1498 // This block specifies the encoding classes used by the compiler to output 1499 // byte streams. Encoding classes generate functions which are called by 1500 // Machine Instruction Nodes in order to generate the bit encoding of the 1501 // instruction. Operands specify their base encoding interface with the 1502 // interface keyword. There are currently supported four interfaces, 1503 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1504 // operand to generate a function which returns its register number when 1505 // queried. CONST_INTER causes an operand to generate a function which 1506 // returns the value of the constant when queried. MEMORY_INTER causes an 1507 // operand to generate four functions which return the Base Register, the 1508 // Index Register, the Scale Value, and the Offset Value of the operand when 1509 // queried. COND_INTER causes an operand to generate six functions which 1510 // return the encoding code (ie - encoding bits for the instruction) 1511 // associated with each basic boolean condition for a conditional instruction. 1512 // Instructions specify two basic values for encoding. They use the 1513 // ins_encode keyword to specify their encoding class (which must be one of 1514 // the class names specified in the encoding block), and they use the 1515 // opcode keyword to specify, in order, their primary, secondary, and 1516 // tertiary opcode. Only the opcode sections which a particular instruction 1517 // needs for encoding need to be specified. 1518 encode %{ 1519 // Build emit functions for each basic byte or larger field in the intel 1520 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1521 // code in the enc_class source block. Emit functions will live in the 1522 // main source block for now. In future, we can generalize this by 1523 // adding a syntax that specifies the sizes of fields in an order, 1524 // so that the adlc can build the emit functions automagically 1525 1526 // Emit primary opcode 1527 enc_class OpcP %{ 1528 emit_opcode(cbuf, $primary); 1529 %} 1530 1531 // Emit secondary opcode 1532 enc_class OpcS %{ 1533 emit_opcode(cbuf, $secondary); 1534 %} 1535 1536 // Emit opcode directly 1537 enc_class Opcode(immI d8) %{ 1538 emit_opcode(cbuf, $d8$$constant); 1539 %} 1540 1541 enc_class SizePrefix %{ 1542 emit_opcode(cbuf,0x66); 1543 %} 1544 1545 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1546 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1547 %} 1548 1549 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1550 emit_opcode(cbuf,$opcode$$constant); 1551 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1552 %} 1553 1554 enc_class mov_r32_imm0( rRegI dst ) %{ 1555 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1556 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1557 %} 1558 1559 enc_class cdq_enc %{ 1560 // Full implementation of Java idiv and irem; checks for 1561 // special case as described in JVM spec., p.243 & p.271. 1562 // 1563 // normal case special case 1564 // 1565 // input : rax,: dividend min_int 1566 // reg: divisor -1 1567 // 1568 // output: rax,: quotient (= rax, idiv reg) min_int 1569 // rdx: remainder (= rax, irem reg) 0 1570 // 1571 // Code sequnce: 1572 // 1573 // 81 F8 00 00 00 80 cmp rax,80000000h 1574 // 0F 85 0B 00 00 00 jne normal_case 1575 // 33 D2 xor rdx,edx 1576 // 83 F9 FF cmp rcx,0FFh 1577 // 0F 84 03 00 00 00 je done 1578 // normal_case: 1579 // 99 cdq 1580 // F7 F9 idiv rax,ecx 1581 // done: 1582 // 1583 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1584 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1585 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1586 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1587 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1588 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1589 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1590 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1591 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1592 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1593 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1594 // normal_case: 1595 emit_opcode(cbuf,0x99); // cdq 1596 // idiv (note: must be emitted by the user of this rule) 1597 // normal: 1598 %} 1599 1600 // Dense encoding for older common ops 1601 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1602 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1603 %} 1604 1605 1606 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1607 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1608 // Check for 8-bit immediate, and set sign extend bit in opcode 1609 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1610 emit_opcode(cbuf, $primary | 0x02); 1611 } 1612 else { // If 32-bit immediate 1613 emit_opcode(cbuf, $primary); 1614 } 1615 %} 1616 1617 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1618 // Emit primary opcode and set sign-extend bit 1619 // Check for 8-bit immediate, and set sign extend bit in opcode 1620 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1621 emit_opcode(cbuf, $primary | 0x02); } 1622 else { // If 32-bit immediate 1623 emit_opcode(cbuf, $primary); 1624 } 1625 // Emit r/m byte with secondary opcode, after primary opcode. 1626 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1627 %} 1628 1629 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1630 // Check for 8-bit immediate, and set sign extend bit in opcode 1631 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1632 $$$emit8$imm$$constant; 1633 } 1634 else { // If 32-bit immediate 1635 // Output immediate 1636 $$$emit32$imm$$constant; 1637 } 1638 %} 1639 1640 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1641 // Emit primary opcode and set sign-extend bit 1642 // Check for 8-bit immediate, and set sign extend bit in opcode 1643 int con = (int)$imm$$constant; // Throw away top bits 1644 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1645 // Emit r/m byte with secondary opcode, after primary opcode. 1646 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1647 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1648 else emit_d32(cbuf,con); 1649 %} 1650 1651 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1652 // Emit primary opcode and set sign-extend bit 1653 // Check for 8-bit immediate, and set sign extend bit in opcode 1654 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1655 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1656 // Emit r/m byte with tertiary opcode, after primary opcode. 1657 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1658 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1659 else emit_d32(cbuf,con); 1660 %} 1661 1662 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1663 emit_cc(cbuf, $secondary, $dst$$reg ); 1664 %} 1665 1666 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1667 int destlo = $dst$$reg; 1668 int desthi = HIGH_FROM_LOW(destlo); 1669 // bswap lo 1670 emit_opcode(cbuf, 0x0F); 1671 emit_cc(cbuf, 0xC8, destlo); 1672 // bswap hi 1673 emit_opcode(cbuf, 0x0F); 1674 emit_cc(cbuf, 0xC8, desthi); 1675 // xchg lo and hi 1676 emit_opcode(cbuf, 0x87); 1677 emit_rm(cbuf, 0x3, destlo, desthi); 1678 %} 1679 1680 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1681 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1682 %} 1683 1684 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1685 $$$emit8$primary; 1686 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1687 %} 1688 1689 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1690 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1691 emit_d8(cbuf, op >> 8 ); 1692 emit_d8(cbuf, op & 255); 1693 %} 1694 1695 // emulate a CMOV with a conditional branch around a MOV 1696 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1697 // Invert sense of branch from sense of CMOV 1698 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1699 emit_d8( cbuf, $brOffs$$constant ); 1700 %} 1701 1702 enc_class enc_PartialSubtypeCheck( ) %{ 1703 Register Redi = as_Register(EDI_enc); // result register 1704 Register Reax = as_Register(EAX_enc); // super class 1705 Register Recx = as_Register(ECX_enc); // killed 1706 Register Resi = as_Register(ESI_enc); // sub class 1707 Label miss; 1708 1709 MacroAssembler _masm(&cbuf); 1710 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1711 NULL, &miss, 1712 /*set_cond_codes:*/ true); 1713 if ($primary) { 1714 __ xorptr(Redi, Redi); 1715 } 1716 __ bind(miss); 1717 %} 1718 1719 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1720 MacroAssembler masm(&cbuf); 1721 int start = masm.offset(); 1722 if (UseSSE >= 2) { 1723 if (VerifyFPU) { 1724 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1725 } 1726 } else { 1727 // External c_calling_convention expects the FPU stack to be 'clean'. 1728 // Compiled code leaves it dirty. Do cleanup now. 1729 masm.empty_FPU_stack(); 1730 } 1731 if (sizeof_FFree_Float_Stack_All == -1) { 1732 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1733 } else { 1734 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1735 } 1736 %} 1737 1738 enc_class Verify_FPU_For_Leaf %{ 1739 if( VerifyFPU ) { 1740 MacroAssembler masm(&cbuf); 1741 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1742 } 1743 %} 1744 1745 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1746 // This is the instruction starting address for relocation info. 1747 cbuf.set_insts_mark(); 1748 $$$emit8$primary; 1749 // CALL directly to the runtime 1750 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1751 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1752 1753 if (UseSSE >= 2) { 1754 MacroAssembler _masm(&cbuf); 1755 BasicType rt = tf()->return_type(); 1756 1757 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1758 // A C runtime call where the return value is unused. In SSE2+ 1759 // mode the result needs to be removed from the FPU stack. It's 1760 // likely that this function call could be removed by the 1761 // optimizer if the C function is a pure function. 1762 __ ffree(0); 1763 } else if (rt == T_FLOAT) { 1764 __ lea(rsp, Address(rsp, -4)); 1765 __ fstp_s(Address(rsp, 0)); 1766 __ movflt(xmm0, Address(rsp, 0)); 1767 __ lea(rsp, Address(rsp, 4)); 1768 } else if (rt == T_DOUBLE) { 1769 __ lea(rsp, Address(rsp, -8)); 1770 __ fstp_d(Address(rsp, 0)); 1771 __ movdbl(xmm0, Address(rsp, 0)); 1772 __ lea(rsp, Address(rsp, 8)); 1773 } 1774 } 1775 %} 1776 1777 enc_class pre_call_resets %{ 1778 // If method sets FPU control word restore it here 1779 debug_only(int off0 = cbuf.insts_size()); 1780 if (ra_->C->in_24_bit_fp_mode()) { 1781 MacroAssembler _masm(&cbuf); 1782 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1783 } 1784 // Clear upper bits of YMM registers when current compiled code uses 1785 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1786 MacroAssembler _masm(&cbuf); 1787 __ vzeroupper(); 1788 debug_only(int off1 = cbuf.insts_size()); 1789 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1790 %} 1791 1792 enc_class post_call_FPU %{ 1793 // If method sets FPU control word do it here also 1794 if (Compile::current()->in_24_bit_fp_mode()) { 1795 MacroAssembler masm(&cbuf); 1796 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1797 } 1798 %} 1799 1800 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1801 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1802 // who we intended to call. 1803 cbuf.set_insts_mark(); 1804 $$$emit8$primary; 1805 1806 if (!_method) { 1807 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1808 runtime_call_Relocation::spec(), 1809 RELOC_IMM32); 1810 } else { 1811 int method_index = resolved_method_index(cbuf); 1812 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1813 : static_call_Relocation::spec(method_index); 1814 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1815 rspec, RELOC_DISP32); 1816 // Emit stubs for static call. 1817 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1818 if (stub == NULL) { 1819 ciEnv::current()->record_failure("CodeCache is full"); 1820 return; 1821 } 1822 } 1823 %} 1824 1825 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1826 MacroAssembler _masm(&cbuf); 1827 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1828 %} 1829 1830 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1831 int disp = in_bytes(Method::from_compiled_offset()); 1832 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1833 1834 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1835 cbuf.set_insts_mark(); 1836 $$$emit8$primary; 1837 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1838 emit_d8(cbuf, disp); // Displacement 1839 1840 %} 1841 1842 // Following encoding is no longer used, but may be restored if calling 1843 // convention changes significantly. 1844 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1845 // 1846 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1847 // // int ic_reg = Matcher::inline_cache_reg(); 1848 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1849 // // int imo_reg = Matcher::interpreter_method_reg(); 1850 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1851 // 1852 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1853 // // // so we load it immediately before the call 1854 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1855 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1856 // 1857 // // xor rbp,ebp 1858 // emit_opcode(cbuf, 0x33); 1859 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1860 // 1861 // // CALL to interpreter. 1862 // cbuf.set_insts_mark(); 1863 // $$$emit8$primary; 1864 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1865 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1866 // %} 1867 1868 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1869 $$$emit8$primary; 1870 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1871 $$$emit8$shift$$constant; 1872 %} 1873 1874 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1875 // Load immediate does not have a zero or sign extended version 1876 // for 8-bit immediates 1877 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1878 $$$emit32$src$$constant; 1879 %} 1880 1881 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1882 // Load immediate does not have a zero or sign extended version 1883 // for 8-bit immediates 1884 emit_opcode(cbuf, $primary + $dst$$reg); 1885 $$$emit32$src$$constant; 1886 %} 1887 1888 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1889 // Load immediate does not have a zero or sign extended version 1890 // for 8-bit immediates 1891 int dst_enc = $dst$$reg; 1892 int src_con = $src$$constant & 0x0FFFFFFFFL; 1893 if (src_con == 0) { 1894 // xor dst, dst 1895 emit_opcode(cbuf, 0x33); 1896 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1897 } else { 1898 emit_opcode(cbuf, $primary + dst_enc); 1899 emit_d32(cbuf, src_con); 1900 } 1901 %} 1902 1903 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1904 // Load immediate does not have a zero or sign extended version 1905 // for 8-bit immediates 1906 int dst_enc = $dst$$reg + 2; 1907 int src_con = ((julong)($src$$constant)) >> 32; 1908 if (src_con == 0) { 1909 // xor dst, dst 1910 emit_opcode(cbuf, 0x33); 1911 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1912 } else { 1913 emit_opcode(cbuf, $primary + dst_enc); 1914 emit_d32(cbuf, src_con); 1915 } 1916 %} 1917 1918 1919 // Encode a reg-reg copy. If it is useless, then empty encoding. 1920 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1921 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1922 %} 1923 1924 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1925 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1926 %} 1927 1928 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1929 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1930 %} 1931 1932 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1933 $$$emit8$primary; 1934 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1935 %} 1936 1937 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1938 $$$emit8$secondary; 1939 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1940 %} 1941 1942 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1943 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1944 %} 1945 1946 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1947 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1948 %} 1949 1950 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1951 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 1952 %} 1953 1954 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1955 // Output immediate 1956 $$$emit32$src$$constant; 1957 %} 1958 1959 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1960 // Output Float immediate bits 1961 jfloat jf = $src$$constant; 1962 int jf_as_bits = jint_cast( jf ); 1963 emit_d32(cbuf, jf_as_bits); 1964 %} 1965 1966 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1967 // Output Float immediate bits 1968 jfloat jf = $src$$constant; 1969 int jf_as_bits = jint_cast( jf ); 1970 emit_d32(cbuf, jf_as_bits); 1971 %} 1972 1973 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1974 // Output immediate 1975 $$$emit16$src$$constant; 1976 %} 1977 1978 enc_class Con_d32(immI src) %{ 1979 emit_d32(cbuf,$src$$constant); 1980 %} 1981 1982 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1983 // Output immediate memory reference 1984 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 1985 emit_d32(cbuf, 0x00); 1986 %} 1987 1988 enc_class lock_prefix( ) %{ 1989 emit_opcode(cbuf,0xF0); // [Lock] 1990 %} 1991 1992 // Cmp-xchg long value. 1993 // Note: we need to swap rbx, and rcx before and after the 1994 // cmpxchg8 instruction because the instruction uses 1995 // rcx as the high order word of the new value to store but 1996 // our register encoding uses rbx,. 1997 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 1998 1999 // XCHG rbx,ecx 2000 emit_opcode(cbuf,0x87); 2001 emit_opcode(cbuf,0xD9); 2002 // [Lock] 2003 emit_opcode(cbuf,0xF0); 2004 // CMPXCHG8 [Eptr] 2005 emit_opcode(cbuf,0x0F); 2006 emit_opcode(cbuf,0xC7); 2007 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2008 // XCHG rbx,ecx 2009 emit_opcode(cbuf,0x87); 2010 emit_opcode(cbuf,0xD9); 2011 %} 2012 2013 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2014 // [Lock] 2015 emit_opcode(cbuf,0xF0); 2016 2017 // CMPXCHG [Eptr] 2018 emit_opcode(cbuf,0x0F); 2019 emit_opcode(cbuf,0xB1); 2020 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2021 %} 2022 2023 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2024 // [Lock] 2025 emit_opcode(cbuf,0xF0); 2026 2027 // CMPXCHGB [Eptr] 2028 emit_opcode(cbuf,0x0F); 2029 emit_opcode(cbuf,0xB0); 2030 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2031 %} 2032 2033 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2034 // [Lock] 2035 emit_opcode(cbuf,0xF0); 2036 2037 // 16-bit mode 2038 emit_opcode(cbuf, 0x66); 2039 2040 // CMPXCHGW [Eptr] 2041 emit_opcode(cbuf,0x0F); 2042 emit_opcode(cbuf,0xB1); 2043 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2044 %} 2045 2046 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2047 int res_encoding = $res$$reg; 2048 2049 // MOV res,0 2050 emit_opcode( cbuf, 0xB8 + res_encoding); 2051 emit_d32( cbuf, 0 ); 2052 // JNE,s fail 2053 emit_opcode(cbuf,0x75); 2054 emit_d8(cbuf, 5 ); 2055 // MOV res,1 2056 emit_opcode( cbuf, 0xB8 + res_encoding); 2057 emit_d32( cbuf, 1 ); 2058 // fail: 2059 %} 2060 2061 enc_class set_instruction_start( ) %{ 2062 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2063 %} 2064 2065 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2066 int reg_encoding = $ereg$$reg; 2067 int base = $mem$$base; 2068 int index = $mem$$index; 2069 int scale = $mem$$scale; 2070 int displace = $mem$$disp; 2071 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2072 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2073 %} 2074 2075 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2076 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2077 int base = $mem$$base; 2078 int index = $mem$$index; 2079 int scale = $mem$$scale; 2080 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2081 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2082 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2083 %} 2084 2085 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2086 int r1, r2; 2087 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2088 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2089 emit_opcode(cbuf,0x0F); 2090 emit_opcode(cbuf,$tertiary); 2091 emit_rm(cbuf, 0x3, r1, r2); 2092 emit_d8(cbuf,$cnt$$constant); 2093 emit_d8(cbuf,$primary); 2094 emit_rm(cbuf, 0x3, $secondary, r1); 2095 emit_d8(cbuf,$cnt$$constant); 2096 %} 2097 2098 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2099 emit_opcode( cbuf, 0x8B ); // Move 2100 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2101 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2102 emit_d8(cbuf,$primary); 2103 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2104 emit_d8(cbuf,$cnt$$constant-32); 2105 } 2106 emit_d8(cbuf,$primary); 2107 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2108 emit_d8(cbuf,31); 2109 %} 2110 2111 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2112 int r1, r2; 2113 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2114 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2115 2116 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2117 emit_rm(cbuf, 0x3, r1, r2); 2118 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2119 emit_opcode(cbuf,$primary); 2120 emit_rm(cbuf, 0x3, $secondary, r1); 2121 emit_d8(cbuf,$cnt$$constant-32); 2122 } 2123 emit_opcode(cbuf,0x33); // XOR r2,r2 2124 emit_rm(cbuf, 0x3, r2, r2); 2125 %} 2126 2127 // Clone of RegMem but accepts an extra parameter to access each 2128 // half of a double in memory; it never needs relocation info. 2129 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2130 emit_opcode(cbuf,$opcode$$constant); 2131 int reg_encoding = $rm_reg$$reg; 2132 int base = $mem$$base; 2133 int index = $mem$$index; 2134 int scale = $mem$$scale; 2135 int displace = $mem$$disp + $disp_for_half$$constant; 2136 relocInfo::relocType disp_reloc = relocInfo::none; 2137 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2138 %} 2139 2140 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2141 // 2142 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2143 // and it never needs relocation information. 2144 // Frequently used to move data between FPU's Stack Top and memory. 2145 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2146 int rm_byte_opcode = $rm_opcode$$constant; 2147 int base = $mem$$base; 2148 int index = $mem$$index; 2149 int scale = $mem$$scale; 2150 int displace = $mem$$disp; 2151 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2152 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2153 %} 2154 2155 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2156 int rm_byte_opcode = $rm_opcode$$constant; 2157 int base = $mem$$base; 2158 int index = $mem$$index; 2159 int scale = $mem$$scale; 2160 int displace = $mem$$disp; 2161 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2162 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2163 %} 2164 2165 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2166 int reg_encoding = $dst$$reg; 2167 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2168 int index = 0x04; // 0x04 indicates no index 2169 int scale = 0x00; // 0x00 indicates no scale 2170 int displace = $src1$$constant; // 0x00 indicates no displacement 2171 relocInfo::relocType disp_reloc = relocInfo::none; 2172 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2173 %} 2174 2175 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2176 // Compare dst,src 2177 emit_opcode(cbuf,0x3B); 2178 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2179 // jmp dst < src around move 2180 emit_opcode(cbuf,0x7C); 2181 emit_d8(cbuf,2); 2182 // move dst,src 2183 emit_opcode(cbuf,0x8B); 2184 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2185 %} 2186 2187 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2188 // Compare dst,src 2189 emit_opcode(cbuf,0x3B); 2190 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2191 // jmp dst > src around move 2192 emit_opcode(cbuf,0x7F); 2193 emit_d8(cbuf,2); 2194 // move dst,src 2195 emit_opcode(cbuf,0x8B); 2196 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2197 %} 2198 2199 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2200 // If src is FPR1, we can just FST to store it. 2201 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2202 int reg_encoding = 0x2; // Just store 2203 int base = $mem$$base; 2204 int index = $mem$$index; 2205 int scale = $mem$$scale; 2206 int displace = $mem$$disp; 2207 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2208 if( $src$$reg != FPR1L_enc ) { 2209 reg_encoding = 0x3; // Store & pop 2210 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2211 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2212 } 2213 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2214 emit_opcode(cbuf,$primary); 2215 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2216 %} 2217 2218 enc_class neg_reg(rRegI dst) %{ 2219 // NEG $dst 2220 emit_opcode(cbuf,0xF7); 2221 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2222 %} 2223 2224 enc_class setLT_reg(eCXRegI dst) %{ 2225 // SETLT $dst 2226 emit_opcode(cbuf,0x0F); 2227 emit_opcode(cbuf,0x9C); 2228 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2229 %} 2230 2231 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2232 int tmpReg = $tmp$$reg; 2233 2234 // SUB $p,$q 2235 emit_opcode(cbuf,0x2B); 2236 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2237 // SBB $tmp,$tmp 2238 emit_opcode(cbuf,0x1B); 2239 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2240 // AND $tmp,$y 2241 emit_opcode(cbuf,0x23); 2242 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2243 // ADD $p,$tmp 2244 emit_opcode(cbuf,0x03); 2245 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2246 %} 2247 2248 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2249 // TEST shift,32 2250 emit_opcode(cbuf,0xF7); 2251 emit_rm(cbuf, 0x3, 0, ECX_enc); 2252 emit_d32(cbuf,0x20); 2253 // JEQ,s small 2254 emit_opcode(cbuf, 0x74); 2255 emit_d8(cbuf, 0x04); 2256 // MOV $dst.hi,$dst.lo 2257 emit_opcode( cbuf, 0x8B ); 2258 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2259 // CLR $dst.lo 2260 emit_opcode(cbuf, 0x33); 2261 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2262 // small: 2263 // SHLD $dst.hi,$dst.lo,$shift 2264 emit_opcode(cbuf,0x0F); 2265 emit_opcode(cbuf,0xA5); 2266 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2267 // SHL $dst.lo,$shift" 2268 emit_opcode(cbuf,0xD3); 2269 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2270 %} 2271 2272 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2273 // TEST shift,32 2274 emit_opcode(cbuf,0xF7); 2275 emit_rm(cbuf, 0x3, 0, ECX_enc); 2276 emit_d32(cbuf,0x20); 2277 // JEQ,s small 2278 emit_opcode(cbuf, 0x74); 2279 emit_d8(cbuf, 0x04); 2280 // MOV $dst.lo,$dst.hi 2281 emit_opcode( cbuf, 0x8B ); 2282 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2283 // CLR $dst.hi 2284 emit_opcode(cbuf, 0x33); 2285 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2286 // small: 2287 // SHRD $dst.lo,$dst.hi,$shift 2288 emit_opcode(cbuf,0x0F); 2289 emit_opcode(cbuf,0xAD); 2290 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2291 // SHR $dst.hi,$shift" 2292 emit_opcode(cbuf,0xD3); 2293 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2294 %} 2295 2296 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2297 // TEST shift,32 2298 emit_opcode(cbuf,0xF7); 2299 emit_rm(cbuf, 0x3, 0, ECX_enc); 2300 emit_d32(cbuf,0x20); 2301 // JEQ,s small 2302 emit_opcode(cbuf, 0x74); 2303 emit_d8(cbuf, 0x05); 2304 // MOV $dst.lo,$dst.hi 2305 emit_opcode( cbuf, 0x8B ); 2306 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2307 // SAR $dst.hi,31 2308 emit_opcode(cbuf, 0xC1); 2309 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2310 emit_d8(cbuf, 0x1F ); 2311 // small: 2312 // SHRD $dst.lo,$dst.hi,$shift 2313 emit_opcode(cbuf,0x0F); 2314 emit_opcode(cbuf,0xAD); 2315 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2316 // SAR $dst.hi,$shift" 2317 emit_opcode(cbuf,0xD3); 2318 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2319 %} 2320 2321 2322 // ----------------- Encodings for floating point unit ----------------- 2323 // May leave result in FPU-TOS or FPU reg depending on opcodes 2324 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2325 $$$emit8$primary; 2326 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2327 %} 2328 2329 // Pop argument in FPR0 with FSTP ST(0) 2330 enc_class PopFPU() %{ 2331 emit_opcode( cbuf, 0xDD ); 2332 emit_d8( cbuf, 0xD8 ); 2333 %} 2334 2335 // !!!!! equivalent to Pop_Reg_F 2336 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2337 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2338 emit_d8( cbuf, 0xD8+$dst$$reg ); 2339 %} 2340 2341 enc_class Push_Reg_DPR( regDPR dst ) %{ 2342 emit_opcode( cbuf, 0xD9 ); 2343 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2344 %} 2345 2346 enc_class strictfp_bias1( regDPR dst ) %{ 2347 emit_opcode( cbuf, 0xDB ); // FLD m80real 2348 emit_opcode( cbuf, 0x2D ); 2349 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2350 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2351 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2352 %} 2353 2354 enc_class strictfp_bias2( regDPR dst ) %{ 2355 emit_opcode( cbuf, 0xDB ); // FLD m80real 2356 emit_opcode( cbuf, 0x2D ); 2357 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2358 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2359 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2360 %} 2361 2362 // Special case for moving an integer register to a stack slot. 2363 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2364 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2365 %} 2366 2367 // Special case for moving a register to a stack slot. 2368 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2369 // Opcode already emitted 2370 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2371 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2372 emit_d32(cbuf, $dst$$disp); // Displacement 2373 %} 2374 2375 // Push the integer in stackSlot 'src' onto FP-stack 2376 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2377 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2378 %} 2379 2380 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2381 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2382 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2383 %} 2384 2385 // Same as Pop_Mem_F except for opcode 2386 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2387 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2388 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2389 %} 2390 2391 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2392 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2393 emit_d8( cbuf, 0xD8+$dst$$reg ); 2394 %} 2395 2396 enc_class Push_Reg_FPR( regFPR dst ) %{ 2397 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2398 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2399 %} 2400 2401 // Push FPU's float to a stack-slot, and pop FPU-stack 2402 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2403 int pop = 0x02; 2404 if ($src$$reg != FPR1L_enc) { 2405 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2406 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2407 pop = 0x03; 2408 } 2409 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2410 %} 2411 2412 // Push FPU's double to a stack-slot, and pop FPU-stack 2413 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2414 int pop = 0x02; 2415 if ($src$$reg != FPR1L_enc) { 2416 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2417 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2418 pop = 0x03; 2419 } 2420 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2421 %} 2422 2423 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2424 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2425 int pop = 0xD0 - 1; // -1 since we skip FLD 2426 if ($src$$reg != FPR1L_enc) { 2427 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2428 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2429 pop = 0xD8; 2430 } 2431 emit_opcode( cbuf, 0xDD ); 2432 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2433 %} 2434 2435 2436 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2437 // load dst in FPR0 2438 emit_opcode( cbuf, 0xD9 ); 2439 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2440 if ($src$$reg != FPR1L_enc) { 2441 // fincstp 2442 emit_opcode (cbuf, 0xD9); 2443 emit_opcode (cbuf, 0xF7); 2444 // swap src with FPR1: 2445 // FXCH FPR1 with src 2446 emit_opcode(cbuf, 0xD9); 2447 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2448 // fdecstp 2449 emit_opcode (cbuf, 0xD9); 2450 emit_opcode (cbuf, 0xF6); 2451 } 2452 %} 2453 2454 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2455 MacroAssembler _masm(&cbuf); 2456 __ subptr(rsp, 8); 2457 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2458 __ fld_d(Address(rsp, 0)); 2459 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2460 __ fld_d(Address(rsp, 0)); 2461 %} 2462 2463 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2464 MacroAssembler _masm(&cbuf); 2465 __ subptr(rsp, 4); 2466 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2467 __ fld_s(Address(rsp, 0)); 2468 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2469 __ fld_s(Address(rsp, 0)); 2470 %} 2471 2472 enc_class Push_ResultD(regD dst) %{ 2473 MacroAssembler _masm(&cbuf); 2474 __ fstp_d(Address(rsp, 0)); 2475 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2476 __ addptr(rsp, 8); 2477 %} 2478 2479 enc_class Push_ResultF(regF dst, immI d8) %{ 2480 MacroAssembler _masm(&cbuf); 2481 __ fstp_s(Address(rsp, 0)); 2482 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2483 __ addptr(rsp, $d8$$constant); 2484 %} 2485 2486 enc_class Push_SrcD(regD src) %{ 2487 MacroAssembler _masm(&cbuf); 2488 __ subptr(rsp, 8); 2489 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2490 __ fld_d(Address(rsp, 0)); 2491 %} 2492 2493 enc_class push_stack_temp_qword() %{ 2494 MacroAssembler _masm(&cbuf); 2495 __ subptr(rsp, 8); 2496 %} 2497 2498 enc_class pop_stack_temp_qword() %{ 2499 MacroAssembler _masm(&cbuf); 2500 __ addptr(rsp, 8); 2501 %} 2502 2503 enc_class push_xmm_to_fpr1(regD src) %{ 2504 MacroAssembler _masm(&cbuf); 2505 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2506 __ fld_d(Address(rsp, 0)); 2507 %} 2508 2509 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2510 if ($src$$reg != FPR1L_enc) { 2511 // fincstp 2512 emit_opcode (cbuf, 0xD9); 2513 emit_opcode (cbuf, 0xF7); 2514 // FXCH FPR1 with src 2515 emit_opcode(cbuf, 0xD9); 2516 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2517 // fdecstp 2518 emit_opcode (cbuf, 0xD9); 2519 emit_opcode (cbuf, 0xF6); 2520 } 2521 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2522 // // FSTP FPR$dst$$reg 2523 // emit_opcode( cbuf, 0xDD ); 2524 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2525 %} 2526 2527 enc_class fnstsw_sahf_skip_parity() %{ 2528 // fnstsw ax 2529 emit_opcode( cbuf, 0xDF ); 2530 emit_opcode( cbuf, 0xE0 ); 2531 // sahf 2532 emit_opcode( cbuf, 0x9E ); 2533 // jnp ::skip 2534 emit_opcode( cbuf, 0x7B ); 2535 emit_opcode( cbuf, 0x05 ); 2536 %} 2537 2538 enc_class emitModDPR() %{ 2539 // fprem must be iterative 2540 // :: loop 2541 // fprem 2542 emit_opcode( cbuf, 0xD9 ); 2543 emit_opcode( cbuf, 0xF8 ); 2544 // wait 2545 emit_opcode( cbuf, 0x9b ); 2546 // fnstsw ax 2547 emit_opcode( cbuf, 0xDF ); 2548 emit_opcode( cbuf, 0xE0 ); 2549 // sahf 2550 emit_opcode( cbuf, 0x9E ); 2551 // jp ::loop 2552 emit_opcode( cbuf, 0x0F ); 2553 emit_opcode( cbuf, 0x8A ); 2554 emit_opcode( cbuf, 0xF4 ); 2555 emit_opcode( cbuf, 0xFF ); 2556 emit_opcode( cbuf, 0xFF ); 2557 emit_opcode( cbuf, 0xFF ); 2558 %} 2559 2560 enc_class fpu_flags() %{ 2561 // fnstsw_ax 2562 emit_opcode( cbuf, 0xDF); 2563 emit_opcode( cbuf, 0xE0); 2564 // test ax,0x0400 2565 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2566 emit_opcode( cbuf, 0xA9 ); 2567 emit_d16 ( cbuf, 0x0400 ); 2568 // // // This sequence works, but stalls for 12-16 cycles on PPro 2569 // // test rax,0x0400 2570 // emit_opcode( cbuf, 0xA9 ); 2571 // emit_d32 ( cbuf, 0x00000400 ); 2572 // 2573 // jz exit (no unordered comparison) 2574 emit_opcode( cbuf, 0x74 ); 2575 emit_d8 ( cbuf, 0x02 ); 2576 // mov ah,1 - treat as LT case (set carry flag) 2577 emit_opcode( cbuf, 0xB4 ); 2578 emit_d8 ( cbuf, 0x01 ); 2579 // sahf 2580 emit_opcode( cbuf, 0x9E); 2581 %} 2582 2583 enc_class cmpF_P6_fixup() %{ 2584 // Fixup the integer flags in case comparison involved a NaN 2585 // 2586 // JNP exit (no unordered comparison, P-flag is set by NaN) 2587 emit_opcode( cbuf, 0x7B ); 2588 emit_d8 ( cbuf, 0x03 ); 2589 // MOV AH,1 - treat as LT case (set carry flag) 2590 emit_opcode( cbuf, 0xB4 ); 2591 emit_d8 ( cbuf, 0x01 ); 2592 // SAHF 2593 emit_opcode( cbuf, 0x9E); 2594 // NOP // target for branch to avoid branch to branch 2595 emit_opcode( cbuf, 0x90); 2596 %} 2597 2598 // fnstsw_ax(); 2599 // sahf(); 2600 // movl(dst, nan_result); 2601 // jcc(Assembler::parity, exit); 2602 // movl(dst, less_result); 2603 // jcc(Assembler::below, exit); 2604 // movl(dst, equal_result); 2605 // jcc(Assembler::equal, exit); 2606 // movl(dst, greater_result); 2607 2608 // less_result = 1; 2609 // greater_result = -1; 2610 // equal_result = 0; 2611 // nan_result = -1; 2612 2613 enc_class CmpF_Result(rRegI dst) %{ 2614 // fnstsw_ax(); 2615 emit_opcode( cbuf, 0xDF); 2616 emit_opcode( cbuf, 0xE0); 2617 // sahf 2618 emit_opcode( cbuf, 0x9E); 2619 // movl(dst, nan_result); 2620 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2621 emit_d32( cbuf, -1 ); 2622 // jcc(Assembler::parity, exit); 2623 emit_opcode( cbuf, 0x7A ); 2624 emit_d8 ( cbuf, 0x13 ); 2625 // movl(dst, less_result); 2626 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2627 emit_d32( cbuf, -1 ); 2628 // jcc(Assembler::below, exit); 2629 emit_opcode( cbuf, 0x72 ); 2630 emit_d8 ( cbuf, 0x0C ); 2631 // movl(dst, equal_result); 2632 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2633 emit_d32( cbuf, 0 ); 2634 // jcc(Assembler::equal, exit); 2635 emit_opcode( cbuf, 0x74 ); 2636 emit_d8 ( cbuf, 0x05 ); 2637 // movl(dst, greater_result); 2638 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2639 emit_d32( cbuf, 1 ); 2640 %} 2641 2642 2643 // Compare the longs and set flags 2644 // BROKEN! Do Not use as-is 2645 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2646 // CMP $src1.hi,$src2.hi 2647 emit_opcode( cbuf, 0x3B ); 2648 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2649 // JNE,s done 2650 emit_opcode(cbuf,0x75); 2651 emit_d8(cbuf, 2 ); 2652 // CMP $src1.lo,$src2.lo 2653 emit_opcode( cbuf, 0x3B ); 2654 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2655 // done: 2656 %} 2657 2658 enc_class convert_int_long( regL dst, rRegI src ) %{ 2659 // mov $dst.lo,$src 2660 int dst_encoding = $dst$$reg; 2661 int src_encoding = $src$$reg; 2662 encode_Copy( cbuf, dst_encoding , src_encoding ); 2663 // mov $dst.hi,$src 2664 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2665 // sar $dst.hi,31 2666 emit_opcode( cbuf, 0xC1 ); 2667 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2668 emit_d8(cbuf, 0x1F ); 2669 %} 2670 2671 enc_class convert_long_double( eRegL src ) %{ 2672 // push $src.hi 2673 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2674 // push $src.lo 2675 emit_opcode(cbuf, 0x50+$src$$reg ); 2676 // fild 64-bits at [SP] 2677 emit_opcode(cbuf,0xdf); 2678 emit_d8(cbuf, 0x6C); 2679 emit_d8(cbuf, 0x24); 2680 emit_d8(cbuf, 0x00); 2681 // pop stack 2682 emit_opcode(cbuf, 0x83); // add SP, #8 2683 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2684 emit_d8(cbuf, 0x8); 2685 %} 2686 2687 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2688 // IMUL EDX:EAX,$src1 2689 emit_opcode( cbuf, 0xF7 ); 2690 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2691 // SAR EDX,$cnt-32 2692 int shift_count = ((int)$cnt$$constant) - 32; 2693 if (shift_count > 0) { 2694 emit_opcode(cbuf, 0xC1); 2695 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2696 emit_d8(cbuf, shift_count); 2697 } 2698 %} 2699 2700 // this version doesn't have add sp, 8 2701 enc_class convert_long_double2( eRegL src ) %{ 2702 // push $src.hi 2703 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2704 // push $src.lo 2705 emit_opcode(cbuf, 0x50+$src$$reg ); 2706 // fild 64-bits at [SP] 2707 emit_opcode(cbuf,0xdf); 2708 emit_d8(cbuf, 0x6C); 2709 emit_d8(cbuf, 0x24); 2710 emit_d8(cbuf, 0x00); 2711 %} 2712 2713 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2714 // Basic idea: long = (long)int * (long)int 2715 // IMUL EDX:EAX, src 2716 emit_opcode( cbuf, 0xF7 ); 2717 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2718 %} 2719 2720 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2721 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2722 // MUL EDX:EAX, src 2723 emit_opcode( cbuf, 0xF7 ); 2724 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2725 %} 2726 2727 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2728 // Basic idea: lo(result) = lo(x_lo * y_lo) 2729 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2730 // MOV $tmp,$src.lo 2731 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2732 // IMUL $tmp,EDX 2733 emit_opcode( cbuf, 0x0F ); 2734 emit_opcode( cbuf, 0xAF ); 2735 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2736 // MOV EDX,$src.hi 2737 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2738 // IMUL EDX,EAX 2739 emit_opcode( cbuf, 0x0F ); 2740 emit_opcode( cbuf, 0xAF ); 2741 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2742 // ADD $tmp,EDX 2743 emit_opcode( cbuf, 0x03 ); 2744 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2745 // MUL EDX:EAX,$src.lo 2746 emit_opcode( cbuf, 0xF7 ); 2747 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2748 // ADD EDX,ESI 2749 emit_opcode( cbuf, 0x03 ); 2750 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2751 %} 2752 2753 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2754 // Basic idea: lo(result) = lo(src * y_lo) 2755 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2756 // IMUL $tmp,EDX,$src 2757 emit_opcode( cbuf, 0x6B ); 2758 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2759 emit_d8( cbuf, (int)$src$$constant ); 2760 // MOV EDX,$src 2761 emit_opcode(cbuf, 0xB8 + EDX_enc); 2762 emit_d32( cbuf, (int)$src$$constant ); 2763 // MUL EDX:EAX,EDX 2764 emit_opcode( cbuf, 0xF7 ); 2765 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2766 // ADD EDX,ESI 2767 emit_opcode( cbuf, 0x03 ); 2768 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2769 %} 2770 2771 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2772 // PUSH src1.hi 2773 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2774 // PUSH src1.lo 2775 emit_opcode(cbuf, 0x50+$src1$$reg ); 2776 // PUSH src2.hi 2777 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2778 // PUSH src2.lo 2779 emit_opcode(cbuf, 0x50+$src2$$reg ); 2780 // CALL directly to the runtime 2781 cbuf.set_insts_mark(); 2782 emit_opcode(cbuf,0xE8); // Call into runtime 2783 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2784 // Restore stack 2785 emit_opcode(cbuf, 0x83); // add SP, #framesize 2786 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2787 emit_d8(cbuf, 4*4); 2788 %} 2789 2790 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2791 // PUSH src1.hi 2792 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2793 // PUSH src1.lo 2794 emit_opcode(cbuf, 0x50+$src1$$reg ); 2795 // PUSH src2.hi 2796 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2797 // PUSH src2.lo 2798 emit_opcode(cbuf, 0x50+$src2$$reg ); 2799 // CALL directly to the runtime 2800 cbuf.set_insts_mark(); 2801 emit_opcode(cbuf,0xE8); // Call into runtime 2802 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2803 // Restore stack 2804 emit_opcode(cbuf, 0x83); // add SP, #framesize 2805 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2806 emit_d8(cbuf, 4*4); 2807 %} 2808 2809 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2810 // MOV $tmp,$src.lo 2811 emit_opcode(cbuf, 0x8B); 2812 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2813 // OR $tmp,$src.hi 2814 emit_opcode(cbuf, 0x0B); 2815 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2816 %} 2817 2818 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2819 // CMP $src1.lo,$src2.lo 2820 emit_opcode( cbuf, 0x3B ); 2821 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2822 // JNE,s skip 2823 emit_cc(cbuf, 0x70, 0x5); 2824 emit_d8(cbuf,2); 2825 // CMP $src1.hi,$src2.hi 2826 emit_opcode( cbuf, 0x3B ); 2827 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2828 %} 2829 2830 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2831 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2832 emit_opcode( cbuf, 0x3B ); 2833 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2834 // MOV $tmp,$src1.hi 2835 emit_opcode( cbuf, 0x8B ); 2836 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2837 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2838 emit_opcode( cbuf, 0x1B ); 2839 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2840 %} 2841 2842 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2843 // XOR $tmp,$tmp 2844 emit_opcode(cbuf,0x33); // XOR 2845 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2846 // CMP $tmp,$src.lo 2847 emit_opcode( cbuf, 0x3B ); 2848 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2849 // SBB $tmp,$src.hi 2850 emit_opcode( cbuf, 0x1B ); 2851 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2852 %} 2853 2854 // Sniff, sniff... smells like Gnu Superoptimizer 2855 enc_class neg_long( eRegL dst ) %{ 2856 emit_opcode(cbuf,0xF7); // NEG hi 2857 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2858 emit_opcode(cbuf,0xF7); // NEG lo 2859 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2860 emit_opcode(cbuf,0x83); // SBB hi,0 2861 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2862 emit_d8 (cbuf,0 ); 2863 %} 2864 2865 enc_class enc_pop_rdx() %{ 2866 emit_opcode(cbuf,0x5A); 2867 %} 2868 2869 enc_class enc_rethrow() %{ 2870 cbuf.set_insts_mark(); 2871 emit_opcode(cbuf, 0xE9); // jmp entry 2872 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2873 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2874 %} 2875 2876 2877 // Convert a double to an int. Java semantics require we do complex 2878 // manglelations in the corner cases. So we set the rounding mode to 2879 // 'zero', store the darned double down as an int, and reset the 2880 // rounding mode to 'nearest'. The hardware throws an exception which 2881 // patches up the correct value directly to the stack. 2882 enc_class DPR2I_encoding( regDPR src ) %{ 2883 // Flip to round-to-zero mode. We attempted to allow invalid-op 2884 // exceptions here, so that a NAN or other corner-case value will 2885 // thrown an exception (but normal values get converted at full speed). 2886 // However, I2C adapters and other float-stack manglers leave pending 2887 // invalid-op exceptions hanging. We would have to clear them before 2888 // enabling them and that is more expensive than just testing for the 2889 // invalid value Intel stores down in the corner cases. 2890 emit_opcode(cbuf,0xD9); // FLDCW trunc 2891 emit_opcode(cbuf,0x2D); 2892 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2893 // Allocate a word 2894 emit_opcode(cbuf,0x83); // SUB ESP,4 2895 emit_opcode(cbuf,0xEC); 2896 emit_d8(cbuf,0x04); 2897 // Encoding assumes a double has been pushed into FPR0. 2898 // Store down the double as an int, popping the FPU stack 2899 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2900 emit_opcode(cbuf,0x1C); 2901 emit_d8(cbuf,0x24); 2902 // Restore the rounding mode; mask the exception 2903 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2904 emit_opcode(cbuf,0x2D); 2905 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2906 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2907 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2908 2909 // Load the converted int; adjust CPU stack 2910 emit_opcode(cbuf,0x58); // POP EAX 2911 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2912 emit_d32 (cbuf,0x80000000); // 0x80000000 2913 emit_opcode(cbuf,0x75); // JNE around_slow_call 2914 emit_d8 (cbuf,0x07); // Size of slow_call 2915 // Push src onto stack slow-path 2916 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2917 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2918 // CALL directly to the runtime 2919 cbuf.set_insts_mark(); 2920 emit_opcode(cbuf,0xE8); // Call into runtime 2921 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2922 // Carry on here... 2923 %} 2924 2925 enc_class DPR2L_encoding( regDPR src ) %{ 2926 emit_opcode(cbuf,0xD9); // FLDCW trunc 2927 emit_opcode(cbuf,0x2D); 2928 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2929 // Allocate a word 2930 emit_opcode(cbuf,0x83); // SUB ESP,8 2931 emit_opcode(cbuf,0xEC); 2932 emit_d8(cbuf,0x08); 2933 // Encoding assumes a double has been pushed into FPR0. 2934 // Store down the double as a long, popping the FPU stack 2935 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2936 emit_opcode(cbuf,0x3C); 2937 emit_d8(cbuf,0x24); 2938 // Restore the rounding mode; mask the exception 2939 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2940 emit_opcode(cbuf,0x2D); 2941 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2942 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2943 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2944 2945 // Load the converted int; adjust CPU stack 2946 emit_opcode(cbuf,0x58); // POP EAX 2947 emit_opcode(cbuf,0x5A); // POP EDX 2948 emit_opcode(cbuf,0x81); // CMP EDX,imm 2949 emit_d8 (cbuf,0xFA); // rdx 2950 emit_d32 (cbuf,0x80000000); // 0x80000000 2951 emit_opcode(cbuf,0x75); // JNE around_slow_call 2952 emit_d8 (cbuf,0x07+4); // Size of slow_call 2953 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2954 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2955 emit_opcode(cbuf,0x75); // JNE around_slow_call 2956 emit_d8 (cbuf,0x07); // Size of slow_call 2957 // Push src onto stack slow-path 2958 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2959 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2960 // CALL directly to the runtime 2961 cbuf.set_insts_mark(); 2962 emit_opcode(cbuf,0xE8); // Call into runtime 2963 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2964 // Carry on here... 2965 %} 2966 2967 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2968 // Operand was loaded from memory into fp ST (stack top) 2969 // FMUL ST,$src /* D8 C8+i */ 2970 emit_opcode(cbuf, 0xD8); 2971 emit_opcode(cbuf, 0xC8 + $src1$$reg); 2972 %} 2973 2974 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2975 // FADDP ST,src2 /* D8 C0+i */ 2976 emit_opcode(cbuf, 0xD8); 2977 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2978 //could use FADDP src2,fpST /* DE C0+i */ 2979 %} 2980 2981 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2982 // FADDP src2,ST /* DE C0+i */ 2983 emit_opcode(cbuf, 0xDE); 2984 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2985 %} 2986 2987 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2988 // Operand has been loaded into fp ST (stack top) 2989 // FSUB ST,$src1 2990 emit_opcode(cbuf, 0xD8); 2991 emit_opcode(cbuf, 0xE0 + $src1$$reg); 2992 2993 // FDIV 2994 emit_opcode(cbuf, 0xD8); 2995 emit_opcode(cbuf, 0xF0 + $src2$$reg); 2996 %} 2997 2998 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 2999 // Operand was loaded from memory into fp ST (stack top) 3000 // FADD ST,$src /* D8 C0+i */ 3001 emit_opcode(cbuf, 0xD8); 3002 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3003 3004 // FMUL ST,src2 /* D8 C*+i */ 3005 emit_opcode(cbuf, 0xD8); 3006 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3007 %} 3008 3009 3010 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3011 // Operand was loaded from memory into fp ST (stack top) 3012 // FADD ST,$src /* D8 C0+i */ 3013 emit_opcode(cbuf, 0xD8); 3014 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3015 3016 // FMULP src2,ST /* DE C8+i */ 3017 emit_opcode(cbuf, 0xDE); 3018 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3019 %} 3020 3021 // Atomically load the volatile long 3022 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3023 emit_opcode(cbuf,0xDF); 3024 int rm_byte_opcode = 0x05; 3025 int base = $mem$$base; 3026 int index = $mem$$index; 3027 int scale = $mem$$scale; 3028 int displace = $mem$$disp; 3029 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3030 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3031 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3032 %} 3033 3034 // Volatile Store Long. Must be atomic, so move it into 3035 // the FP TOS and then do a 64-bit FIST. Has to probe the 3036 // target address before the store (for null-ptr checks) 3037 // so the memory operand is used twice in the encoding. 3038 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3039 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3040 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3041 emit_opcode(cbuf,0xDF); 3042 int rm_byte_opcode = 0x07; 3043 int base = $mem$$base; 3044 int index = $mem$$index; 3045 int scale = $mem$$scale; 3046 int displace = $mem$$disp; 3047 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3048 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3049 %} 3050 3051 %} 3052 3053 3054 //----------FRAME-------------------------------------------------------------- 3055 // Definition of frame structure and management information. 3056 // 3057 // S T A C K L A Y O U T Allocators stack-slot number 3058 // | (to get allocators register number 3059 // G Owned by | | v add OptoReg::stack0()) 3060 // r CALLER | | 3061 // o | +--------+ pad to even-align allocators stack-slot 3062 // w V | pad0 | numbers; owned by CALLER 3063 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3064 // h ^ | in | 5 3065 // | | args | 4 Holes in incoming args owned by SELF 3066 // | | | | 3 3067 // | | +--------+ 3068 // V | | old out| Empty on Intel, window on Sparc 3069 // | old |preserve| Must be even aligned. 3070 // | SP-+--------+----> Matcher::_old_SP, even aligned 3071 // | | in | 3 area for Intel ret address 3072 // Owned by |preserve| Empty on Sparc. 3073 // SELF +--------+ 3074 // | | pad2 | 2 pad to align old SP 3075 // | +--------+ 1 3076 // | | locks | 0 3077 // | +--------+----> OptoReg::stack0(), even aligned 3078 // | | pad1 | 11 pad to align new SP 3079 // | +--------+ 3080 // | | | 10 3081 // | | spills | 9 spills 3082 // V | | 8 (pad0 slot for callee) 3083 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3084 // ^ | out | 7 3085 // | | args | 6 Holes in outgoing args owned by CALLEE 3086 // Owned by +--------+ 3087 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3088 // | new |preserve| Must be even-aligned. 3089 // | SP-+--------+----> Matcher::_new_SP, even aligned 3090 // | | | 3091 // 3092 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3093 // known from SELF's arguments and the Java calling convention. 3094 // Region 6-7 is determined per call site. 3095 // Note 2: If the calling convention leaves holes in the incoming argument 3096 // area, those holes are owned by SELF. Holes in the outgoing area 3097 // are owned by the CALLEE. Holes should not be nessecary in the 3098 // incoming area, as the Java calling convention is completely under 3099 // the control of the AD file. Doubles can be sorted and packed to 3100 // avoid holes. Holes in the outgoing arguments may be nessecary for 3101 // varargs C calling conventions. 3102 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3103 // even aligned with pad0 as needed. 3104 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3105 // region 6-11 is even aligned; it may be padded out more so that 3106 // the region from SP to FP meets the minimum stack alignment. 3107 3108 frame %{ 3109 // These three registers define part of the calling convention 3110 // between compiled code and the interpreter. 3111 inline_cache_reg(EAX); // Inline Cache Register 3112 3113 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3114 cisc_spilling_operand_name(indOffset32); 3115 3116 // Number of stack slots consumed by locking an object 3117 sync_stack_slots(1); 3118 3119 // Compiled code's Frame Pointer 3120 frame_pointer(ESP); 3121 // Interpreter stores its frame pointer in a register which is 3122 // stored to the stack by I2CAdaptors. 3123 // I2CAdaptors convert from interpreted java to compiled java. 3124 interpreter_frame_pointer(EBP); 3125 3126 // Stack alignment requirement 3127 // Alignment size in bytes (128-bit -> 16 bytes) 3128 stack_alignment(StackAlignmentInBytes); 3129 3130 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3131 // for calls to C. Supports the var-args backing area for register parms. 3132 varargs_C_out_slots_killed(0); 3133 3134 // The after-PROLOG location of the return address. Location of 3135 // return address specifies a type (REG or STACK) and a number 3136 // representing the register number (i.e. - use a register name) or 3137 // stack slot. 3138 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3139 // Otherwise, it is above the locks and verification slot and alignment word 3140 return_addr(STACK - 1 + 3141 align_up((Compile::current()->in_preserve_stack_slots() + 3142 Compile::current()->fixed_slots()), 3143 stack_alignment_in_slots())); 3144 3145 // Location of C & interpreter return values 3146 c_return_value %{ 3147 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3148 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3149 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3150 3151 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3152 // that C functions return float and double results in XMM0. 3153 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3154 return OptoRegPair(XMM0b_num,XMM0_num); 3155 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3156 return OptoRegPair(OptoReg::Bad,XMM0_num); 3157 3158 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3159 %} 3160 3161 // Location of return values 3162 return_value %{ 3163 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3164 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3165 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3166 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3167 return OptoRegPair(XMM0b_num,XMM0_num); 3168 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3169 return OptoRegPair(OptoReg::Bad,XMM0_num); 3170 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3171 %} 3172 3173 %} 3174 3175 //----------ATTRIBUTES--------------------------------------------------------- 3176 //----------Operand Attributes------------------------------------------------- 3177 op_attrib op_cost(0); // Required cost attribute 3178 3179 //----------Instruction Attributes--------------------------------------------- 3180 ins_attrib ins_cost(100); // Required cost attribute 3181 ins_attrib ins_size(8); // Required size attribute (in bits) 3182 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3183 // non-matching short branch variant of some 3184 // long branch? 3185 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3186 // specifies the alignment that some part of the instruction (not 3187 // necessarily the start) requires. If > 1, a compute_padding() 3188 // function must be provided for the instruction 3189 3190 //----------OPERANDS----------------------------------------------------------- 3191 // Operand definitions must precede instruction definitions for correct parsing 3192 // in the ADLC because operands constitute user defined types which are used in 3193 // instruction definitions. 3194 3195 //----------Simple Operands---------------------------------------------------- 3196 // Immediate Operands 3197 // Integer Immediate 3198 operand immI() %{ 3199 match(ConI); 3200 3201 op_cost(10); 3202 format %{ %} 3203 interface(CONST_INTER); 3204 %} 3205 3206 // Constant for test vs zero 3207 operand immI_0() %{ 3208 predicate(n->get_int() == 0); 3209 match(ConI); 3210 3211 op_cost(0); 3212 format %{ %} 3213 interface(CONST_INTER); 3214 %} 3215 3216 // Constant for increment 3217 operand immI_1() %{ 3218 predicate(n->get_int() == 1); 3219 match(ConI); 3220 3221 op_cost(0); 3222 format %{ %} 3223 interface(CONST_INTER); 3224 %} 3225 3226 // Constant for decrement 3227 operand immI_M1() %{ 3228 predicate(n->get_int() == -1); 3229 match(ConI); 3230 3231 op_cost(0); 3232 format %{ %} 3233 interface(CONST_INTER); 3234 %} 3235 3236 // Valid scale values for addressing modes 3237 operand immI2() %{ 3238 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3239 match(ConI); 3240 3241 format %{ %} 3242 interface(CONST_INTER); 3243 %} 3244 3245 operand immI8() %{ 3246 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3247 match(ConI); 3248 3249 op_cost(5); 3250 format %{ %} 3251 interface(CONST_INTER); 3252 %} 3253 3254 operand immU8() %{ 3255 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3256 match(ConI); 3257 3258 op_cost(5); 3259 format %{ %} 3260 interface(CONST_INTER); 3261 %} 3262 3263 operand immI16() %{ 3264 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3265 match(ConI); 3266 3267 op_cost(10); 3268 format %{ %} 3269 interface(CONST_INTER); 3270 %} 3271 3272 // Int Immediate non-negative 3273 operand immU31() 3274 %{ 3275 predicate(n->get_int() >= 0); 3276 match(ConI); 3277 3278 op_cost(0); 3279 format %{ %} 3280 interface(CONST_INTER); 3281 %} 3282 3283 // Constant for long shifts 3284 operand immI_32() %{ 3285 predicate( n->get_int() == 32 ); 3286 match(ConI); 3287 3288 op_cost(0); 3289 format %{ %} 3290 interface(CONST_INTER); 3291 %} 3292 3293 operand immI_1_31() %{ 3294 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3295 match(ConI); 3296 3297 op_cost(0); 3298 format %{ %} 3299 interface(CONST_INTER); 3300 %} 3301 3302 operand immI_32_63() %{ 3303 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3304 match(ConI); 3305 op_cost(0); 3306 3307 format %{ %} 3308 interface(CONST_INTER); 3309 %} 3310 3311 operand immI_2() %{ 3312 predicate( n->get_int() == 2 ); 3313 match(ConI); 3314 3315 op_cost(0); 3316 format %{ %} 3317 interface(CONST_INTER); 3318 %} 3319 3320 operand immI_3() %{ 3321 predicate( n->get_int() == 3 ); 3322 match(ConI); 3323 3324 op_cost(0); 3325 format %{ %} 3326 interface(CONST_INTER); 3327 %} 3328 3329 operand immI_4() 3330 %{ 3331 predicate(n->get_int() == 4); 3332 match(ConI); 3333 3334 op_cost(0); 3335 format %{ %} 3336 interface(CONST_INTER); 3337 %} 3338 3339 operand immI_8() 3340 %{ 3341 predicate(n->get_int() == 8); 3342 match(ConI); 3343 3344 op_cost(0); 3345 format %{ %} 3346 interface(CONST_INTER); 3347 %} 3348 3349 // Pointer Immediate 3350 operand immP() %{ 3351 match(ConP); 3352 3353 op_cost(10); 3354 format %{ %} 3355 interface(CONST_INTER); 3356 %} 3357 3358 // NULL Pointer Immediate 3359 operand immP0() %{ 3360 predicate( n->get_ptr() == 0 ); 3361 match(ConP); 3362 op_cost(0); 3363 3364 format %{ %} 3365 interface(CONST_INTER); 3366 %} 3367 3368 // Long Immediate 3369 operand immL() %{ 3370 match(ConL); 3371 3372 op_cost(20); 3373 format %{ %} 3374 interface(CONST_INTER); 3375 %} 3376 3377 // Long Immediate zero 3378 operand immL0() %{ 3379 predicate( n->get_long() == 0L ); 3380 match(ConL); 3381 op_cost(0); 3382 3383 format %{ %} 3384 interface(CONST_INTER); 3385 %} 3386 3387 // Long Immediate zero 3388 operand immL_M1() %{ 3389 predicate( n->get_long() == -1L ); 3390 match(ConL); 3391 op_cost(0); 3392 3393 format %{ %} 3394 interface(CONST_INTER); 3395 %} 3396 3397 // Long immediate from 0 to 127. 3398 // Used for a shorter form of long mul by 10. 3399 operand immL_127() %{ 3400 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3401 match(ConL); 3402 op_cost(0); 3403 3404 format %{ %} 3405 interface(CONST_INTER); 3406 %} 3407 3408 // Long Immediate: low 32-bit mask 3409 operand immL_32bits() %{ 3410 predicate(n->get_long() == 0xFFFFFFFFL); 3411 match(ConL); 3412 op_cost(0); 3413 3414 format %{ %} 3415 interface(CONST_INTER); 3416 %} 3417 3418 // Long Immediate: low 32-bit mask 3419 operand immL32() %{ 3420 predicate(n->get_long() == (int)(n->get_long())); 3421 match(ConL); 3422 op_cost(20); 3423 3424 format %{ %} 3425 interface(CONST_INTER); 3426 %} 3427 3428 //Double Immediate zero 3429 operand immDPR0() %{ 3430 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3431 // bug that generates code such that NaNs compare equal to 0.0 3432 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3433 match(ConD); 3434 3435 op_cost(5); 3436 format %{ %} 3437 interface(CONST_INTER); 3438 %} 3439 3440 // Double Immediate one 3441 operand immDPR1() %{ 3442 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3443 match(ConD); 3444 3445 op_cost(5); 3446 format %{ %} 3447 interface(CONST_INTER); 3448 %} 3449 3450 // Double Immediate 3451 operand immDPR() %{ 3452 predicate(UseSSE<=1); 3453 match(ConD); 3454 3455 op_cost(5); 3456 format %{ %} 3457 interface(CONST_INTER); 3458 %} 3459 3460 operand immD() %{ 3461 predicate(UseSSE>=2); 3462 match(ConD); 3463 3464 op_cost(5); 3465 format %{ %} 3466 interface(CONST_INTER); 3467 %} 3468 3469 // Double Immediate zero 3470 operand immD0() %{ 3471 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3472 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3473 // compare equal to -0.0. 3474 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3475 match(ConD); 3476 3477 format %{ %} 3478 interface(CONST_INTER); 3479 %} 3480 3481 // Float Immediate zero 3482 operand immFPR0() %{ 3483 predicate(UseSSE == 0 && n->getf() == 0.0F); 3484 match(ConF); 3485 3486 op_cost(5); 3487 format %{ %} 3488 interface(CONST_INTER); 3489 %} 3490 3491 // Float Immediate one 3492 operand immFPR1() %{ 3493 predicate(UseSSE == 0 && n->getf() == 1.0F); 3494 match(ConF); 3495 3496 op_cost(5); 3497 format %{ %} 3498 interface(CONST_INTER); 3499 %} 3500 3501 // Float Immediate 3502 operand immFPR() %{ 3503 predicate( UseSSE == 0 ); 3504 match(ConF); 3505 3506 op_cost(5); 3507 format %{ %} 3508 interface(CONST_INTER); 3509 %} 3510 3511 // Float Immediate 3512 operand immF() %{ 3513 predicate(UseSSE >= 1); 3514 match(ConF); 3515 3516 op_cost(5); 3517 format %{ %} 3518 interface(CONST_INTER); 3519 %} 3520 3521 // Float Immediate zero. Zero and not -0.0 3522 operand immF0() %{ 3523 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3524 match(ConF); 3525 3526 op_cost(5); 3527 format %{ %} 3528 interface(CONST_INTER); 3529 %} 3530 3531 // Immediates for special shifts (sign extend) 3532 3533 // Constants for increment 3534 operand immI_16() %{ 3535 predicate( n->get_int() == 16 ); 3536 match(ConI); 3537 3538 format %{ %} 3539 interface(CONST_INTER); 3540 %} 3541 3542 operand immI_24() %{ 3543 predicate( n->get_int() == 24 ); 3544 match(ConI); 3545 3546 format %{ %} 3547 interface(CONST_INTER); 3548 %} 3549 3550 // Constant for byte-wide masking 3551 operand immI_255() %{ 3552 predicate( n->get_int() == 255 ); 3553 match(ConI); 3554 3555 format %{ %} 3556 interface(CONST_INTER); 3557 %} 3558 3559 // Constant for short-wide masking 3560 operand immI_65535() %{ 3561 predicate(n->get_int() == 65535); 3562 match(ConI); 3563 3564 format %{ %} 3565 interface(CONST_INTER); 3566 %} 3567 3568 operand kReg() 3569 %{ 3570 constraint(ALLOC_IN_RC(vectmask_reg)); 3571 match(RegVectMask); 3572 format %{%} 3573 interface(REG_INTER); 3574 %} 3575 3576 operand kReg_K1() 3577 %{ 3578 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3579 match(RegVectMask); 3580 format %{%} 3581 interface(REG_INTER); 3582 %} 3583 3584 operand kReg_K2() 3585 %{ 3586 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3587 match(RegVectMask); 3588 format %{%} 3589 interface(REG_INTER); 3590 %} 3591 3592 // Special Registers 3593 operand kReg_K3() 3594 %{ 3595 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3596 match(RegVectMask); 3597 format %{%} 3598 interface(REG_INTER); 3599 %} 3600 3601 operand kReg_K4() 3602 %{ 3603 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3604 match(RegVectMask); 3605 format %{%} 3606 interface(REG_INTER); 3607 %} 3608 3609 operand kReg_K5() 3610 %{ 3611 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3612 match(RegVectMask); 3613 format %{%} 3614 interface(REG_INTER); 3615 %} 3616 3617 operand kReg_K6() 3618 %{ 3619 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3620 match(RegVectMask); 3621 format %{%} 3622 interface(REG_INTER); 3623 %} 3624 3625 // Special Registers 3626 operand kReg_K7() 3627 %{ 3628 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3629 match(RegVectMask); 3630 format %{%} 3631 interface(REG_INTER); 3632 %} 3633 3634 // Register Operands 3635 // Integer Register 3636 operand rRegI() %{ 3637 constraint(ALLOC_IN_RC(int_reg)); 3638 match(RegI); 3639 match(xRegI); 3640 match(eAXRegI); 3641 match(eBXRegI); 3642 match(eCXRegI); 3643 match(eDXRegI); 3644 match(eDIRegI); 3645 match(eSIRegI); 3646 3647 format %{ %} 3648 interface(REG_INTER); 3649 %} 3650 3651 // Subset of Integer Register 3652 operand xRegI(rRegI reg) %{ 3653 constraint(ALLOC_IN_RC(int_x_reg)); 3654 match(reg); 3655 match(eAXRegI); 3656 match(eBXRegI); 3657 match(eCXRegI); 3658 match(eDXRegI); 3659 3660 format %{ %} 3661 interface(REG_INTER); 3662 %} 3663 3664 // Special Registers 3665 operand eAXRegI(xRegI reg) %{ 3666 constraint(ALLOC_IN_RC(eax_reg)); 3667 match(reg); 3668 match(rRegI); 3669 3670 format %{ "EAX" %} 3671 interface(REG_INTER); 3672 %} 3673 3674 // Special Registers 3675 operand eBXRegI(xRegI reg) %{ 3676 constraint(ALLOC_IN_RC(ebx_reg)); 3677 match(reg); 3678 match(rRegI); 3679 3680 format %{ "EBX" %} 3681 interface(REG_INTER); 3682 %} 3683 3684 operand eCXRegI(xRegI reg) %{ 3685 constraint(ALLOC_IN_RC(ecx_reg)); 3686 match(reg); 3687 match(rRegI); 3688 3689 format %{ "ECX" %} 3690 interface(REG_INTER); 3691 %} 3692 3693 operand eDXRegI(xRegI reg) %{ 3694 constraint(ALLOC_IN_RC(edx_reg)); 3695 match(reg); 3696 match(rRegI); 3697 3698 format %{ "EDX" %} 3699 interface(REG_INTER); 3700 %} 3701 3702 operand eDIRegI(xRegI reg) %{ 3703 constraint(ALLOC_IN_RC(edi_reg)); 3704 match(reg); 3705 match(rRegI); 3706 3707 format %{ "EDI" %} 3708 interface(REG_INTER); 3709 %} 3710 3711 operand naxRegI() %{ 3712 constraint(ALLOC_IN_RC(nax_reg)); 3713 match(RegI); 3714 match(eCXRegI); 3715 match(eDXRegI); 3716 match(eSIRegI); 3717 match(eDIRegI); 3718 3719 format %{ %} 3720 interface(REG_INTER); 3721 %} 3722 3723 operand nadxRegI() %{ 3724 constraint(ALLOC_IN_RC(nadx_reg)); 3725 match(RegI); 3726 match(eBXRegI); 3727 match(eCXRegI); 3728 match(eSIRegI); 3729 match(eDIRegI); 3730 3731 format %{ %} 3732 interface(REG_INTER); 3733 %} 3734 3735 operand ncxRegI() %{ 3736 constraint(ALLOC_IN_RC(ncx_reg)); 3737 match(RegI); 3738 match(eAXRegI); 3739 match(eDXRegI); 3740 match(eSIRegI); 3741 match(eDIRegI); 3742 3743 format %{ %} 3744 interface(REG_INTER); 3745 %} 3746 3747 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3748 // // 3749 operand eSIRegI(xRegI reg) %{ 3750 constraint(ALLOC_IN_RC(esi_reg)); 3751 match(reg); 3752 match(rRegI); 3753 3754 format %{ "ESI" %} 3755 interface(REG_INTER); 3756 %} 3757 3758 // Pointer Register 3759 operand anyRegP() %{ 3760 constraint(ALLOC_IN_RC(any_reg)); 3761 match(RegP); 3762 match(eAXRegP); 3763 match(eBXRegP); 3764 match(eCXRegP); 3765 match(eDIRegP); 3766 match(eRegP); 3767 3768 format %{ %} 3769 interface(REG_INTER); 3770 %} 3771 3772 operand eRegP() %{ 3773 constraint(ALLOC_IN_RC(int_reg)); 3774 match(RegP); 3775 match(eAXRegP); 3776 match(eBXRegP); 3777 match(eCXRegP); 3778 match(eDIRegP); 3779 3780 format %{ %} 3781 interface(REG_INTER); 3782 %} 3783 3784 operand rRegP() %{ 3785 constraint(ALLOC_IN_RC(int_reg)); 3786 match(RegP); 3787 match(eAXRegP); 3788 match(eBXRegP); 3789 match(eCXRegP); 3790 match(eDIRegP); 3791 3792 format %{ %} 3793 interface(REG_INTER); 3794 %} 3795 3796 // On windows95, EBP is not safe to use for implicit null tests. 3797 operand eRegP_no_EBP() %{ 3798 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3799 match(RegP); 3800 match(eAXRegP); 3801 match(eBXRegP); 3802 match(eCXRegP); 3803 match(eDIRegP); 3804 3805 op_cost(100); 3806 format %{ %} 3807 interface(REG_INTER); 3808 %} 3809 3810 operand naxRegP() %{ 3811 constraint(ALLOC_IN_RC(nax_reg)); 3812 match(RegP); 3813 match(eBXRegP); 3814 match(eDXRegP); 3815 match(eCXRegP); 3816 match(eSIRegP); 3817 match(eDIRegP); 3818 3819 format %{ %} 3820 interface(REG_INTER); 3821 %} 3822 3823 operand nabxRegP() %{ 3824 constraint(ALLOC_IN_RC(nabx_reg)); 3825 match(RegP); 3826 match(eCXRegP); 3827 match(eDXRegP); 3828 match(eSIRegP); 3829 match(eDIRegP); 3830 3831 format %{ %} 3832 interface(REG_INTER); 3833 %} 3834 3835 operand pRegP() %{ 3836 constraint(ALLOC_IN_RC(p_reg)); 3837 match(RegP); 3838 match(eBXRegP); 3839 match(eDXRegP); 3840 match(eSIRegP); 3841 match(eDIRegP); 3842 3843 format %{ %} 3844 interface(REG_INTER); 3845 %} 3846 3847 // Special Registers 3848 // Return a pointer value 3849 operand eAXRegP(eRegP reg) %{ 3850 constraint(ALLOC_IN_RC(eax_reg)); 3851 match(reg); 3852 format %{ "EAX" %} 3853 interface(REG_INTER); 3854 %} 3855 3856 // Used in AtomicAdd 3857 operand eBXRegP(eRegP reg) %{ 3858 constraint(ALLOC_IN_RC(ebx_reg)); 3859 match(reg); 3860 format %{ "EBX" %} 3861 interface(REG_INTER); 3862 %} 3863 3864 // Tail-call (interprocedural jump) to interpreter 3865 operand eCXRegP(eRegP reg) %{ 3866 constraint(ALLOC_IN_RC(ecx_reg)); 3867 match(reg); 3868 format %{ "ECX" %} 3869 interface(REG_INTER); 3870 %} 3871 3872 operand eDXRegP(eRegP reg) %{ 3873 constraint(ALLOC_IN_RC(edx_reg)); 3874 match(reg); 3875 format %{ "EDX" %} 3876 interface(REG_INTER); 3877 %} 3878 3879 operand eSIRegP(eRegP reg) %{ 3880 constraint(ALLOC_IN_RC(esi_reg)); 3881 match(reg); 3882 format %{ "ESI" %} 3883 interface(REG_INTER); 3884 %} 3885 3886 // Used in rep stosw 3887 operand eDIRegP(eRegP reg) %{ 3888 constraint(ALLOC_IN_RC(edi_reg)); 3889 match(reg); 3890 format %{ "EDI" %} 3891 interface(REG_INTER); 3892 %} 3893 3894 operand eRegL() %{ 3895 constraint(ALLOC_IN_RC(long_reg)); 3896 match(RegL); 3897 match(eADXRegL); 3898 3899 format %{ %} 3900 interface(REG_INTER); 3901 %} 3902 3903 operand eADXRegL( eRegL reg ) %{ 3904 constraint(ALLOC_IN_RC(eadx_reg)); 3905 match(reg); 3906 3907 format %{ "EDX:EAX" %} 3908 interface(REG_INTER); 3909 %} 3910 3911 operand eBCXRegL( eRegL reg ) %{ 3912 constraint(ALLOC_IN_RC(ebcx_reg)); 3913 match(reg); 3914 3915 format %{ "EBX:ECX" %} 3916 interface(REG_INTER); 3917 %} 3918 3919 // Special case for integer high multiply 3920 operand eADXRegL_low_only() %{ 3921 constraint(ALLOC_IN_RC(eadx_reg)); 3922 match(RegL); 3923 3924 format %{ "EAX" %} 3925 interface(REG_INTER); 3926 %} 3927 3928 // Flags register, used as output of compare instructions 3929 operand rFlagsReg() %{ 3930 constraint(ALLOC_IN_RC(int_flags)); 3931 match(RegFlags); 3932 3933 format %{ "EFLAGS" %} 3934 interface(REG_INTER); 3935 %} 3936 3937 // Flags register, used as output of compare instructions 3938 operand eFlagsReg() %{ 3939 constraint(ALLOC_IN_RC(int_flags)); 3940 match(RegFlags); 3941 3942 format %{ "EFLAGS" %} 3943 interface(REG_INTER); 3944 %} 3945 3946 // Flags register, used as output of FLOATING POINT compare instructions 3947 operand eFlagsRegU() %{ 3948 constraint(ALLOC_IN_RC(int_flags)); 3949 match(RegFlags); 3950 3951 format %{ "EFLAGS_U" %} 3952 interface(REG_INTER); 3953 %} 3954 3955 operand eFlagsRegUCF() %{ 3956 constraint(ALLOC_IN_RC(int_flags)); 3957 match(RegFlags); 3958 predicate(false); 3959 3960 format %{ "EFLAGS_U_CF" %} 3961 interface(REG_INTER); 3962 %} 3963 3964 // Condition Code Register used by long compare 3965 operand flagsReg_long_LTGE() %{ 3966 constraint(ALLOC_IN_RC(int_flags)); 3967 match(RegFlags); 3968 format %{ "FLAGS_LTGE" %} 3969 interface(REG_INTER); 3970 %} 3971 operand flagsReg_long_EQNE() %{ 3972 constraint(ALLOC_IN_RC(int_flags)); 3973 match(RegFlags); 3974 format %{ "FLAGS_EQNE" %} 3975 interface(REG_INTER); 3976 %} 3977 operand flagsReg_long_LEGT() %{ 3978 constraint(ALLOC_IN_RC(int_flags)); 3979 match(RegFlags); 3980 format %{ "FLAGS_LEGT" %} 3981 interface(REG_INTER); 3982 %} 3983 3984 // Condition Code Register used by unsigned long compare 3985 operand flagsReg_ulong_LTGE() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 format %{ "FLAGS_U_LTGE" %} 3989 interface(REG_INTER); 3990 %} 3991 operand flagsReg_ulong_EQNE() %{ 3992 constraint(ALLOC_IN_RC(int_flags)); 3993 match(RegFlags); 3994 format %{ "FLAGS_U_EQNE" %} 3995 interface(REG_INTER); 3996 %} 3997 operand flagsReg_ulong_LEGT() %{ 3998 constraint(ALLOC_IN_RC(int_flags)); 3999 match(RegFlags); 4000 format %{ "FLAGS_U_LEGT" %} 4001 interface(REG_INTER); 4002 %} 4003 4004 // Float register operands 4005 operand regDPR() %{ 4006 predicate( UseSSE < 2 ); 4007 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4008 match(RegD); 4009 match(regDPR1); 4010 match(regDPR2); 4011 format %{ %} 4012 interface(REG_INTER); 4013 %} 4014 4015 operand regDPR1(regDPR reg) %{ 4016 predicate( UseSSE < 2 ); 4017 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4018 match(reg); 4019 format %{ "FPR1" %} 4020 interface(REG_INTER); 4021 %} 4022 4023 operand regDPR2(regDPR reg) %{ 4024 predicate( UseSSE < 2 ); 4025 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4026 match(reg); 4027 format %{ "FPR2" %} 4028 interface(REG_INTER); 4029 %} 4030 4031 operand regnotDPR1(regDPR reg) %{ 4032 predicate( UseSSE < 2 ); 4033 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4034 match(reg); 4035 format %{ %} 4036 interface(REG_INTER); 4037 %} 4038 4039 // Float register operands 4040 operand regFPR() %{ 4041 predicate( UseSSE < 2 ); 4042 constraint(ALLOC_IN_RC(fp_flt_reg)); 4043 match(RegF); 4044 match(regFPR1); 4045 format %{ %} 4046 interface(REG_INTER); 4047 %} 4048 4049 // Float register operands 4050 operand regFPR1(regFPR reg) %{ 4051 predicate( UseSSE < 2 ); 4052 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4053 match(reg); 4054 format %{ "FPR1" %} 4055 interface(REG_INTER); 4056 %} 4057 4058 // XMM Float register operands 4059 operand regF() %{ 4060 predicate( UseSSE>=1 ); 4061 constraint(ALLOC_IN_RC(float_reg_legacy)); 4062 match(RegF); 4063 format %{ %} 4064 interface(REG_INTER); 4065 %} 4066 4067 operand legRegF() %{ 4068 predicate( UseSSE>=1 ); 4069 constraint(ALLOC_IN_RC(float_reg_legacy)); 4070 match(RegF); 4071 format %{ %} 4072 interface(REG_INTER); 4073 %} 4074 4075 // Float register operands 4076 operand vlRegF() %{ 4077 constraint(ALLOC_IN_RC(float_reg_vl)); 4078 match(RegF); 4079 4080 format %{ %} 4081 interface(REG_INTER); 4082 %} 4083 4084 // XMM Double register operands 4085 operand regD() %{ 4086 predicate( UseSSE>=2 ); 4087 constraint(ALLOC_IN_RC(double_reg_legacy)); 4088 match(RegD); 4089 format %{ %} 4090 interface(REG_INTER); 4091 %} 4092 4093 // Double register operands 4094 operand legRegD() %{ 4095 predicate( UseSSE>=2 ); 4096 constraint(ALLOC_IN_RC(double_reg_legacy)); 4097 match(RegD); 4098 format %{ %} 4099 interface(REG_INTER); 4100 %} 4101 4102 operand vlRegD() %{ 4103 constraint(ALLOC_IN_RC(double_reg_vl)); 4104 match(RegD); 4105 4106 format %{ %} 4107 interface(REG_INTER); 4108 %} 4109 4110 //----------Memory Operands---------------------------------------------------- 4111 // Direct Memory Operand 4112 operand direct(immP addr) %{ 4113 match(addr); 4114 4115 format %{ "[$addr]" %} 4116 interface(MEMORY_INTER) %{ 4117 base(0xFFFFFFFF); 4118 index(0x4); 4119 scale(0x0); 4120 disp($addr); 4121 %} 4122 %} 4123 4124 // Indirect Memory Operand 4125 operand indirect(eRegP reg) %{ 4126 constraint(ALLOC_IN_RC(int_reg)); 4127 match(reg); 4128 4129 format %{ "[$reg]" %} 4130 interface(MEMORY_INTER) %{ 4131 base($reg); 4132 index(0x4); 4133 scale(0x0); 4134 disp(0x0); 4135 %} 4136 %} 4137 4138 // Indirect Memory Plus Short Offset Operand 4139 operand indOffset8(eRegP reg, immI8 off) %{ 4140 match(AddP reg off); 4141 4142 format %{ "[$reg + $off]" %} 4143 interface(MEMORY_INTER) %{ 4144 base($reg); 4145 index(0x4); 4146 scale(0x0); 4147 disp($off); 4148 %} 4149 %} 4150 4151 // Indirect Memory Plus Long Offset Operand 4152 operand indOffset32(eRegP reg, immI off) %{ 4153 match(AddP reg off); 4154 4155 format %{ "[$reg + $off]" %} 4156 interface(MEMORY_INTER) %{ 4157 base($reg); 4158 index(0x4); 4159 scale(0x0); 4160 disp($off); 4161 %} 4162 %} 4163 4164 // Indirect Memory Plus Long Offset Operand 4165 operand indOffset32X(rRegI reg, immP off) %{ 4166 match(AddP off reg); 4167 4168 format %{ "[$reg + $off]" %} 4169 interface(MEMORY_INTER) %{ 4170 base($reg); 4171 index(0x4); 4172 scale(0x0); 4173 disp($off); 4174 %} 4175 %} 4176 4177 // Indirect Memory Plus Index Register Plus Offset Operand 4178 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4179 match(AddP (AddP reg ireg) off); 4180 4181 op_cost(10); 4182 format %{"[$reg + $off + $ireg]" %} 4183 interface(MEMORY_INTER) %{ 4184 base($reg); 4185 index($ireg); 4186 scale(0x0); 4187 disp($off); 4188 %} 4189 %} 4190 4191 // Indirect Memory Plus Index Register Plus Offset Operand 4192 operand indIndex(eRegP reg, rRegI ireg) %{ 4193 match(AddP reg ireg); 4194 4195 op_cost(10); 4196 format %{"[$reg + $ireg]" %} 4197 interface(MEMORY_INTER) %{ 4198 base($reg); 4199 index($ireg); 4200 scale(0x0); 4201 disp(0x0); 4202 %} 4203 %} 4204 4205 // // ------------------------------------------------------------------------- 4206 // // 486 architecture doesn't support "scale * index + offset" with out a base 4207 // // ------------------------------------------------------------------------- 4208 // // Scaled Memory Operands 4209 // // Indirect Memory Times Scale Plus Offset Operand 4210 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4211 // match(AddP off (LShiftI ireg scale)); 4212 // 4213 // op_cost(10); 4214 // format %{"[$off + $ireg << $scale]" %} 4215 // interface(MEMORY_INTER) %{ 4216 // base(0x4); 4217 // index($ireg); 4218 // scale($scale); 4219 // disp($off); 4220 // %} 4221 // %} 4222 4223 // Indirect Memory Times Scale Plus Index Register 4224 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4225 match(AddP reg (LShiftI ireg scale)); 4226 4227 op_cost(10); 4228 format %{"[$reg + $ireg << $scale]" %} 4229 interface(MEMORY_INTER) %{ 4230 base($reg); 4231 index($ireg); 4232 scale($scale); 4233 disp(0x0); 4234 %} 4235 %} 4236 4237 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4238 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4239 match(AddP (AddP reg (LShiftI ireg scale)) off); 4240 4241 op_cost(10); 4242 format %{"[$reg + $off + $ireg << $scale]" %} 4243 interface(MEMORY_INTER) %{ 4244 base($reg); 4245 index($ireg); 4246 scale($scale); 4247 disp($off); 4248 %} 4249 %} 4250 4251 //----------Load Long Memory Operands------------------------------------------ 4252 // The load-long idiom will use it's address expression again after loading 4253 // the first word of the long. If the load-long destination overlaps with 4254 // registers used in the addressing expression, the 2nd half will be loaded 4255 // from a clobbered address. Fix this by requiring that load-long use 4256 // address registers that do not overlap with the load-long target. 4257 4258 // load-long support 4259 operand load_long_RegP() %{ 4260 constraint(ALLOC_IN_RC(esi_reg)); 4261 match(RegP); 4262 match(eSIRegP); 4263 op_cost(100); 4264 format %{ %} 4265 interface(REG_INTER); 4266 %} 4267 4268 // Indirect Memory Operand Long 4269 operand load_long_indirect(load_long_RegP reg) %{ 4270 constraint(ALLOC_IN_RC(esi_reg)); 4271 match(reg); 4272 4273 format %{ "[$reg]" %} 4274 interface(MEMORY_INTER) %{ 4275 base($reg); 4276 index(0x4); 4277 scale(0x0); 4278 disp(0x0); 4279 %} 4280 %} 4281 4282 // Indirect Memory Plus Long Offset Operand 4283 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4284 match(AddP reg off); 4285 4286 format %{ "[$reg + $off]" %} 4287 interface(MEMORY_INTER) %{ 4288 base($reg); 4289 index(0x4); 4290 scale(0x0); 4291 disp($off); 4292 %} 4293 %} 4294 4295 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4296 4297 4298 //----------Special Memory Operands-------------------------------------------- 4299 // Stack Slot Operand - This operand is used for loading and storing temporary 4300 // values on the stack where a match requires a value to 4301 // flow through memory. 4302 operand stackSlotP(sRegP reg) %{ 4303 constraint(ALLOC_IN_RC(stack_slots)); 4304 // No match rule because this operand is only generated in matching 4305 format %{ "[$reg]" %} 4306 interface(MEMORY_INTER) %{ 4307 base(0x4); // ESP 4308 index(0x4); // No Index 4309 scale(0x0); // No Scale 4310 disp($reg); // Stack Offset 4311 %} 4312 %} 4313 4314 operand stackSlotI(sRegI reg) %{ 4315 constraint(ALLOC_IN_RC(stack_slots)); 4316 // No match rule because this operand is only generated in matching 4317 format %{ "[$reg]" %} 4318 interface(MEMORY_INTER) %{ 4319 base(0x4); // ESP 4320 index(0x4); // No Index 4321 scale(0x0); // No Scale 4322 disp($reg); // Stack Offset 4323 %} 4324 %} 4325 4326 operand stackSlotF(sRegF reg) %{ 4327 constraint(ALLOC_IN_RC(stack_slots)); 4328 // No match rule because this operand is only generated in matching 4329 format %{ "[$reg]" %} 4330 interface(MEMORY_INTER) %{ 4331 base(0x4); // ESP 4332 index(0x4); // No Index 4333 scale(0x0); // No Scale 4334 disp($reg); // Stack Offset 4335 %} 4336 %} 4337 4338 operand stackSlotD(sRegD reg) %{ 4339 constraint(ALLOC_IN_RC(stack_slots)); 4340 // No match rule because this operand is only generated in matching 4341 format %{ "[$reg]" %} 4342 interface(MEMORY_INTER) %{ 4343 base(0x4); // ESP 4344 index(0x4); // No Index 4345 scale(0x0); // No Scale 4346 disp($reg); // Stack Offset 4347 %} 4348 %} 4349 4350 operand stackSlotL(sRegL reg) %{ 4351 constraint(ALLOC_IN_RC(stack_slots)); 4352 // No match rule because this operand is only generated in matching 4353 format %{ "[$reg]" %} 4354 interface(MEMORY_INTER) %{ 4355 base(0x4); // ESP 4356 index(0x4); // No Index 4357 scale(0x0); // No Scale 4358 disp($reg); // Stack Offset 4359 %} 4360 %} 4361 4362 //----------Conditional Branch Operands---------------------------------------- 4363 // Comparison Op - This is the operation of the comparison, and is limited to 4364 // the following set of codes: 4365 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4366 // 4367 // Other attributes of the comparison, such as unsignedness, are specified 4368 // by the comparison instruction that sets a condition code flags register. 4369 // That result is represented by a flags operand whose subtype is appropriate 4370 // to the unsignedness (etc.) of the comparison. 4371 // 4372 // Later, the instruction which matches both the Comparison Op (a Bool) and 4373 // the flags (produced by the Cmp) specifies the coding of the comparison op 4374 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4375 4376 // Comparision Code 4377 operand cmpOp() %{ 4378 match(Bool); 4379 4380 format %{ "" %} 4381 interface(COND_INTER) %{ 4382 equal(0x4, "e"); 4383 not_equal(0x5, "ne"); 4384 less(0xC, "l"); 4385 greater_equal(0xD, "ge"); 4386 less_equal(0xE, "le"); 4387 greater(0xF, "g"); 4388 overflow(0x0, "o"); 4389 no_overflow(0x1, "no"); 4390 %} 4391 %} 4392 4393 // Comparison Code, unsigned compare. Used by FP also, with 4394 // C2 (unordered) turned into GT or LT already. The other bits 4395 // C0 and C3 are turned into Carry & Zero flags. 4396 operand cmpOpU() %{ 4397 match(Bool); 4398 4399 format %{ "" %} 4400 interface(COND_INTER) %{ 4401 equal(0x4, "e"); 4402 not_equal(0x5, "ne"); 4403 less(0x2, "b"); 4404 greater_equal(0x3, "nb"); 4405 less_equal(0x6, "be"); 4406 greater(0x7, "nbe"); 4407 overflow(0x0, "o"); 4408 no_overflow(0x1, "no"); 4409 %} 4410 %} 4411 4412 // Floating comparisons that don't require any fixup for the unordered case 4413 operand cmpOpUCF() %{ 4414 match(Bool); 4415 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4416 n->as_Bool()->_test._test == BoolTest::ge || 4417 n->as_Bool()->_test._test == BoolTest::le || 4418 n->as_Bool()->_test._test == BoolTest::gt); 4419 format %{ "" %} 4420 interface(COND_INTER) %{ 4421 equal(0x4, "e"); 4422 not_equal(0x5, "ne"); 4423 less(0x2, "b"); 4424 greater_equal(0x3, "nb"); 4425 less_equal(0x6, "be"); 4426 greater(0x7, "nbe"); 4427 overflow(0x0, "o"); 4428 no_overflow(0x1, "no"); 4429 %} 4430 %} 4431 4432 4433 // Floating comparisons that can be fixed up with extra conditional jumps 4434 operand cmpOpUCF2() %{ 4435 match(Bool); 4436 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4437 n->as_Bool()->_test._test == BoolTest::eq); 4438 format %{ "" %} 4439 interface(COND_INTER) %{ 4440 equal(0x4, "e"); 4441 not_equal(0x5, "ne"); 4442 less(0x2, "b"); 4443 greater_equal(0x3, "nb"); 4444 less_equal(0x6, "be"); 4445 greater(0x7, "nbe"); 4446 overflow(0x0, "o"); 4447 no_overflow(0x1, "no"); 4448 %} 4449 %} 4450 4451 // Comparison Code for FP conditional move 4452 operand cmpOp_fcmov() %{ 4453 match(Bool); 4454 4455 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4456 n->as_Bool()->_test._test != BoolTest::no_overflow); 4457 format %{ "" %} 4458 interface(COND_INTER) %{ 4459 equal (0x0C8); 4460 not_equal (0x1C8); 4461 less (0x0C0); 4462 greater_equal(0x1C0); 4463 less_equal (0x0D0); 4464 greater (0x1D0); 4465 overflow(0x0, "o"); // not really supported by the instruction 4466 no_overflow(0x1, "no"); // not really supported by the instruction 4467 %} 4468 %} 4469 4470 // Comparison Code used in long compares 4471 operand cmpOp_commute() %{ 4472 match(Bool); 4473 4474 format %{ "" %} 4475 interface(COND_INTER) %{ 4476 equal(0x4, "e"); 4477 not_equal(0x5, "ne"); 4478 less(0xF, "g"); 4479 greater_equal(0xE, "le"); 4480 less_equal(0xD, "ge"); 4481 greater(0xC, "l"); 4482 overflow(0x0, "o"); 4483 no_overflow(0x1, "no"); 4484 %} 4485 %} 4486 4487 // Comparison Code used in unsigned long compares 4488 operand cmpOpU_commute() %{ 4489 match(Bool); 4490 4491 format %{ "" %} 4492 interface(COND_INTER) %{ 4493 equal(0x4, "e"); 4494 not_equal(0x5, "ne"); 4495 less(0x7, "nbe"); 4496 greater_equal(0x6, "be"); 4497 less_equal(0x3, "nb"); 4498 greater(0x2, "b"); 4499 overflow(0x0, "o"); 4500 no_overflow(0x1, "no"); 4501 %} 4502 %} 4503 4504 //----------OPERAND CLASSES---------------------------------------------------- 4505 // Operand Classes are groups of operands that are used as to simplify 4506 // instruction definitions by not requiring the AD writer to specify separate 4507 // instructions for every form of operand when the instruction accepts 4508 // multiple operand types with the same basic encoding and format. The classic 4509 // case of this is memory operands. 4510 4511 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4512 indIndex, indIndexScale, indIndexScaleOffset); 4513 4514 // Long memory operations are encoded in 2 instructions and a +4 offset. 4515 // This means some kind of offset is always required and you cannot use 4516 // an oop as the offset (done when working on static globals). 4517 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4518 indIndex, indIndexScale, indIndexScaleOffset); 4519 4520 4521 //----------PIPELINE----------------------------------------------------------- 4522 // Rules which define the behavior of the target architectures pipeline. 4523 pipeline %{ 4524 4525 //----------ATTRIBUTES--------------------------------------------------------- 4526 attributes %{ 4527 variable_size_instructions; // Fixed size instructions 4528 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4529 instruction_unit_size = 1; // An instruction is 1 bytes long 4530 instruction_fetch_unit_size = 16; // The processor fetches one line 4531 instruction_fetch_units = 1; // of 16 bytes 4532 4533 // List of nop instructions 4534 nops( MachNop ); 4535 %} 4536 4537 //----------RESOURCES---------------------------------------------------------- 4538 // Resources are the functional units available to the machine 4539 4540 // Generic P2/P3 pipeline 4541 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4542 // 3 instructions decoded per cycle. 4543 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4544 // 2 ALU op, only ALU0 handles mul/div instructions. 4545 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4546 MS0, MS1, MEM = MS0 | MS1, 4547 BR, FPU, 4548 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4549 4550 //----------PIPELINE DESCRIPTION----------------------------------------------- 4551 // Pipeline Description specifies the stages in the machine's pipeline 4552 4553 // Generic P2/P3 pipeline 4554 pipe_desc(S0, S1, S2, S3, S4, S5); 4555 4556 //----------PIPELINE CLASSES--------------------------------------------------- 4557 // Pipeline Classes describe the stages in which input and output are 4558 // referenced by the hardware pipeline. 4559 4560 // Naming convention: ialu or fpu 4561 // Then: _reg 4562 // Then: _reg if there is a 2nd register 4563 // Then: _long if it's a pair of instructions implementing a long 4564 // Then: _fat if it requires the big decoder 4565 // Or: _mem if it requires the big decoder and a memory unit. 4566 4567 // Integer ALU reg operation 4568 pipe_class ialu_reg(rRegI dst) %{ 4569 single_instruction; 4570 dst : S4(write); 4571 dst : S3(read); 4572 DECODE : S0; // any decoder 4573 ALU : S3; // any alu 4574 %} 4575 4576 // Long ALU reg operation 4577 pipe_class ialu_reg_long(eRegL dst) %{ 4578 instruction_count(2); 4579 dst : S4(write); 4580 dst : S3(read); 4581 DECODE : S0(2); // any 2 decoders 4582 ALU : S3(2); // both alus 4583 %} 4584 4585 // Integer ALU reg operation using big decoder 4586 pipe_class ialu_reg_fat(rRegI dst) %{ 4587 single_instruction; 4588 dst : S4(write); 4589 dst : S3(read); 4590 D0 : S0; // big decoder only 4591 ALU : S3; // any alu 4592 %} 4593 4594 // Long ALU reg operation using big decoder 4595 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4596 instruction_count(2); 4597 dst : S4(write); 4598 dst : S3(read); 4599 D0 : S0(2); // big decoder only; twice 4600 ALU : S3(2); // any 2 alus 4601 %} 4602 4603 // Integer ALU reg-reg operation 4604 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4605 single_instruction; 4606 dst : S4(write); 4607 src : S3(read); 4608 DECODE : S0; // any decoder 4609 ALU : S3; // any alu 4610 %} 4611 4612 // Long ALU reg-reg operation 4613 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4614 instruction_count(2); 4615 dst : S4(write); 4616 src : S3(read); 4617 DECODE : S0(2); // any 2 decoders 4618 ALU : S3(2); // both alus 4619 %} 4620 4621 // Integer ALU reg-reg operation 4622 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4623 single_instruction; 4624 dst : S4(write); 4625 src : S3(read); 4626 D0 : S0; // big decoder only 4627 ALU : S3; // any alu 4628 %} 4629 4630 // Long ALU reg-reg operation 4631 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4632 instruction_count(2); 4633 dst : S4(write); 4634 src : S3(read); 4635 D0 : S0(2); // big decoder only; twice 4636 ALU : S3(2); // both alus 4637 %} 4638 4639 // Integer ALU reg-mem operation 4640 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4641 single_instruction; 4642 dst : S5(write); 4643 mem : S3(read); 4644 D0 : S0; // big decoder only 4645 ALU : S4; // any alu 4646 MEM : S3; // any mem 4647 %} 4648 4649 // Long ALU reg-mem operation 4650 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4651 instruction_count(2); 4652 dst : S5(write); 4653 mem : S3(read); 4654 D0 : S0(2); // big decoder only; twice 4655 ALU : S4(2); // any 2 alus 4656 MEM : S3(2); // both mems 4657 %} 4658 4659 // Integer mem operation (prefetch) 4660 pipe_class ialu_mem(memory mem) 4661 %{ 4662 single_instruction; 4663 mem : S3(read); 4664 D0 : S0; // big decoder only 4665 MEM : S3; // any mem 4666 %} 4667 4668 // Integer Store to Memory 4669 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4670 single_instruction; 4671 mem : S3(read); 4672 src : S5(read); 4673 D0 : S0; // big decoder only 4674 ALU : S4; // any alu 4675 MEM : S3; 4676 %} 4677 4678 // Long Store to Memory 4679 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4680 instruction_count(2); 4681 mem : S3(read); 4682 src : S5(read); 4683 D0 : S0(2); // big decoder only; twice 4684 ALU : S4(2); // any 2 alus 4685 MEM : S3(2); // Both mems 4686 %} 4687 4688 // Integer Store to Memory 4689 pipe_class ialu_mem_imm(memory mem) %{ 4690 single_instruction; 4691 mem : S3(read); 4692 D0 : S0; // big decoder only 4693 ALU : S4; // any alu 4694 MEM : S3; 4695 %} 4696 4697 // Integer ALU0 reg-reg operation 4698 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4699 single_instruction; 4700 dst : S4(write); 4701 src : S3(read); 4702 D0 : S0; // Big decoder only 4703 ALU0 : S3; // only alu0 4704 %} 4705 4706 // Integer ALU0 reg-mem operation 4707 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4708 single_instruction; 4709 dst : S5(write); 4710 mem : S3(read); 4711 D0 : S0; // big decoder only 4712 ALU0 : S4; // ALU0 only 4713 MEM : S3; // any mem 4714 %} 4715 4716 // Integer ALU reg-reg operation 4717 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4718 single_instruction; 4719 cr : S4(write); 4720 src1 : S3(read); 4721 src2 : S3(read); 4722 DECODE : S0; // any decoder 4723 ALU : S3; // any alu 4724 %} 4725 4726 // Integer ALU reg-imm operation 4727 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4728 single_instruction; 4729 cr : S4(write); 4730 src1 : S3(read); 4731 DECODE : S0; // any decoder 4732 ALU : S3; // any alu 4733 %} 4734 4735 // Integer ALU reg-mem operation 4736 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4737 single_instruction; 4738 cr : S4(write); 4739 src1 : S3(read); 4740 src2 : S3(read); 4741 D0 : S0; // big decoder only 4742 ALU : S4; // any alu 4743 MEM : S3; 4744 %} 4745 4746 // Conditional move reg-reg 4747 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4748 instruction_count(4); 4749 y : S4(read); 4750 q : S3(read); 4751 p : S3(read); 4752 DECODE : S0(4); // any decoder 4753 %} 4754 4755 // Conditional move reg-reg 4756 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4757 single_instruction; 4758 dst : S4(write); 4759 src : S3(read); 4760 cr : S3(read); 4761 DECODE : S0; // any decoder 4762 %} 4763 4764 // Conditional move reg-mem 4765 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4766 single_instruction; 4767 dst : S4(write); 4768 src : S3(read); 4769 cr : S3(read); 4770 DECODE : S0; // any decoder 4771 MEM : S3; 4772 %} 4773 4774 // Conditional move reg-reg long 4775 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4776 single_instruction; 4777 dst : S4(write); 4778 src : S3(read); 4779 cr : S3(read); 4780 DECODE : S0(2); // any 2 decoders 4781 %} 4782 4783 // Conditional move double reg-reg 4784 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4785 single_instruction; 4786 dst : S4(write); 4787 src : S3(read); 4788 cr : S3(read); 4789 DECODE : S0; // any decoder 4790 %} 4791 4792 // Float reg-reg operation 4793 pipe_class fpu_reg(regDPR dst) %{ 4794 instruction_count(2); 4795 dst : S3(read); 4796 DECODE : S0(2); // any 2 decoders 4797 FPU : S3; 4798 %} 4799 4800 // Float reg-reg operation 4801 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4802 instruction_count(2); 4803 dst : S4(write); 4804 src : S3(read); 4805 DECODE : S0(2); // any 2 decoders 4806 FPU : S3; 4807 %} 4808 4809 // Float reg-reg operation 4810 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4811 instruction_count(3); 4812 dst : S4(write); 4813 src1 : S3(read); 4814 src2 : S3(read); 4815 DECODE : S0(3); // any 3 decoders 4816 FPU : S3(2); 4817 %} 4818 4819 // Float reg-reg operation 4820 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4821 instruction_count(4); 4822 dst : S4(write); 4823 src1 : S3(read); 4824 src2 : S3(read); 4825 src3 : S3(read); 4826 DECODE : S0(4); // any 3 decoders 4827 FPU : S3(2); 4828 %} 4829 4830 // Float reg-reg operation 4831 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4832 instruction_count(4); 4833 dst : S4(write); 4834 src1 : S3(read); 4835 src2 : S3(read); 4836 src3 : S3(read); 4837 DECODE : S1(3); // any 3 decoders 4838 D0 : S0; // Big decoder only 4839 FPU : S3(2); 4840 MEM : S3; 4841 %} 4842 4843 // Float reg-mem operation 4844 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4845 instruction_count(2); 4846 dst : S5(write); 4847 mem : S3(read); 4848 D0 : S0; // big decoder only 4849 DECODE : S1; // any decoder for FPU POP 4850 FPU : S4; 4851 MEM : S3; // any mem 4852 %} 4853 4854 // Float reg-mem operation 4855 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4856 instruction_count(3); 4857 dst : S5(write); 4858 src1 : S3(read); 4859 mem : S3(read); 4860 D0 : S0; // big decoder only 4861 DECODE : S1(2); // any decoder for FPU POP 4862 FPU : S4; 4863 MEM : S3; // any mem 4864 %} 4865 4866 // Float mem-reg operation 4867 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4868 instruction_count(2); 4869 src : S5(read); 4870 mem : S3(read); 4871 DECODE : S0; // any decoder for FPU PUSH 4872 D0 : S1; // big decoder only 4873 FPU : S4; 4874 MEM : S3; // any mem 4875 %} 4876 4877 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4878 instruction_count(3); 4879 src1 : S3(read); 4880 src2 : S3(read); 4881 mem : S3(read); 4882 DECODE : S0(2); // any decoder for FPU PUSH 4883 D0 : S1; // big decoder only 4884 FPU : S4; 4885 MEM : S3; // any mem 4886 %} 4887 4888 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4889 instruction_count(3); 4890 src1 : S3(read); 4891 src2 : S3(read); 4892 mem : S4(read); 4893 DECODE : S0; // any decoder for FPU PUSH 4894 D0 : S0(2); // big decoder only 4895 FPU : S4; 4896 MEM : S3(2); // any mem 4897 %} 4898 4899 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4900 instruction_count(2); 4901 src1 : S3(read); 4902 dst : S4(read); 4903 D0 : S0(2); // big decoder only 4904 MEM : S3(2); // any mem 4905 %} 4906 4907 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4908 instruction_count(3); 4909 src1 : S3(read); 4910 src2 : S3(read); 4911 dst : S4(read); 4912 D0 : S0(3); // big decoder only 4913 FPU : S4; 4914 MEM : S3(3); // any mem 4915 %} 4916 4917 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4918 instruction_count(3); 4919 src1 : S4(read); 4920 mem : S4(read); 4921 DECODE : S0; // any decoder for FPU PUSH 4922 D0 : S0(2); // big decoder only 4923 FPU : S4; 4924 MEM : S3(2); // any mem 4925 %} 4926 4927 // Float load constant 4928 pipe_class fpu_reg_con(regDPR dst) %{ 4929 instruction_count(2); 4930 dst : S5(write); 4931 D0 : S0; // big decoder only for the load 4932 DECODE : S1; // any decoder for FPU POP 4933 FPU : S4; 4934 MEM : S3; // any mem 4935 %} 4936 4937 // Float load constant 4938 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4939 instruction_count(3); 4940 dst : S5(write); 4941 src : S3(read); 4942 D0 : S0; // big decoder only for the load 4943 DECODE : S1(2); // any decoder for FPU POP 4944 FPU : S4; 4945 MEM : S3; // any mem 4946 %} 4947 4948 // UnConditional branch 4949 pipe_class pipe_jmp( label labl ) %{ 4950 single_instruction; 4951 BR : S3; 4952 %} 4953 4954 // Conditional branch 4955 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4956 single_instruction; 4957 cr : S1(read); 4958 BR : S3; 4959 %} 4960 4961 // Allocation idiom 4962 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4963 instruction_count(1); force_serialization; 4964 fixed_latency(6); 4965 heap_ptr : S3(read); 4966 DECODE : S0(3); 4967 D0 : S2; 4968 MEM : S3; 4969 ALU : S3(2); 4970 dst : S5(write); 4971 BR : S5; 4972 %} 4973 4974 // Generic big/slow expanded idiom 4975 pipe_class pipe_slow( ) %{ 4976 instruction_count(10); multiple_bundles; force_serialization; 4977 fixed_latency(100); 4978 D0 : S0(2); 4979 MEM : S3(2); 4980 %} 4981 4982 // The real do-nothing guy 4983 pipe_class empty( ) %{ 4984 instruction_count(0); 4985 %} 4986 4987 // Define the class for the Nop node 4988 define %{ 4989 MachNop = empty; 4990 %} 4991 4992 %} 4993 4994 //----------INSTRUCTIONS------------------------------------------------------- 4995 // 4996 // match -- States which machine-independent subtree may be replaced 4997 // by this instruction. 4998 // ins_cost -- The estimated cost of this instruction is used by instruction 4999 // selection to identify a minimum cost tree of machine 5000 // instructions that matches a tree of machine-independent 5001 // instructions. 5002 // format -- A string providing the disassembly for this instruction. 5003 // The value of an instruction's operand may be inserted 5004 // by referring to it with a '$' prefix. 5005 // opcode -- Three instruction opcodes may be provided. These are referred 5006 // to within an encode class as $primary, $secondary, and $tertiary 5007 // respectively. The primary opcode is commonly used to 5008 // indicate the type of machine instruction, while secondary 5009 // and tertiary are often used for prefix options or addressing 5010 // modes. 5011 // ins_encode -- A list of encode classes with parameters. The encode class 5012 // name must have been defined in an 'enc_class' specification 5013 // in the encode section of the architecture description. 5014 5015 //----------BSWAP-Instruction-------------------------------------------------- 5016 instruct bytes_reverse_int(rRegI dst) %{ 5017 match(Set dst (ReverseBytesI dst)); 5018 5019 format %{ "BSWAP $dst" %} 5020 opcode(0x0F, 0xC8); 5021 ins_encode( OpcP, OpcSReg(dst) ); 5022 ins_pipe( ialu_reg ); 5023 %} 5024 5025 instruct bytes_reverse_long(eRegL dst) %{ 5026 match(Set dst (ReverseBytesL dst)); 5027 5028 format %{ "BSWAP $dst.lo\n\t" 5029 "BSWAP $dst.hi\n\t" 5030 "XCHG $dst.lo $dst.hi" %} 5031 5032 ins_cost(125); 5033 ins_encode( bswap_long_bytes(dst) ); 5034 ins_pipe( ialu_reg_reg); 5035 %} 5036 5037 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5038 match(Set dst (ReverseBytesUS dst)); 5039 effect(KILL cr); 5040 5041 format %{ "BSWAP $dst\n\t" 5042 "SHR $dst,16\n\t" %} 5043 ins_encode %{ 5044 __ bswapl($dst$$Register); 5045 __ shrl($dst$$Register, 16); 5046 %} 5047 ins_pipe( ialu_reg ); 5048 %} 5049 5050 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5051 match(Set dst (ReverseBytesS dst)); 5052 effect(KILL cr); 5053 5054 format %{ "BSWAP $dst\n\t" 5055 "SAR $dst,16\n\t" %} 5056 ins_encode %{ 5057 __ bswapl($dst$$Register); 5058 __ sarl($dst$$Register, 16); 5059 %} 5060 ins_pipe( ialu_reg ); 5061 %} 5062 5063 5064 //---------- Zeros Count Instructions ------------------------------------------ 5065 5066 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5067 predicate(UseCountLeadingZerosInstruction); 5068 match(Set dst (CountLeadingZerosI src)); 5069 effect(KILL cr); 5070 5071 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5072 ins_encode %{ 5073 __ lzcntl($dst$$Register, $src$$Register); 5074 %} 5075 ins_pipe(ialu_reg); 5076 %} 5077 5078 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5079 predicate(!UseCountLeadingZerosInstruction); 5080 match(Set dst (CountLeadingZerosI src)); 5081 effect(KILL cr); 5082 5083 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5084 "JNZ skip\n\t" 5085 "MOV $dst, -1\n" 5086 "skip:\n\t" 5087 "NEG $dst\n\t" 5088 "ADD $dst, 31" %} 5089 ins_encode %{ 5090 Register Rdst = $dst$$Register; 5091 Register Rsrc = $src$$Register; 5092 Label skip; 5093 __ bsrl(Rdst, Rsrc); 5094 __ jccb(Assembler::notZero, skip); 5095 __ movl(Rdst, -1); 5096 __ bind(skip); 5097 __ negl(Rdst); 5098 __ addl(Rdst, BitsPerInt - 1); 5099 %} 5100 ins_pipe(ialu_reg); 5101 %} 5102 5103 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5104 predicate(UseCountLeadingZerosInstruction); 5105 match(Set dst (CountLeadingZerosL src)); 5106 effect(TEMP dst, KILL cr); 5107 5108 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5109 "JNC done\n\t" 5110 "LZCNT $dst, $src.lo\n\t" 5111 "ADD $dst, 32\n" 5112 "done:" %} 5113 ins_encode %{ 5114 Register Rdst = $dst$$Register; 5115 Register Rsrc = $src$$Register; 5116 Label done; 5117 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5118 __ jccb(Assembler::carryClear, done); 5119 __ lzcntl(Rdst, Rsrc); 5120 __ addl(Rdst, BitsPerInt); 5121 __ bind(done); 5122 %} 5123 ins_pipe(ialu_reg); 5124 %} 5125 5126 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5127 predicate(!UseCountLeadingZerosInstruction); 5128 match(Set dst (CountLeadingZerosL src)); 5129 effect(TEMP dst, KILL cr); 5130 5131 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5132 "JZ msw_is_zero\n\t" 5133 "ADD $dst, 32\n\t" 5134 "JMP not_zero\n" 5135 "msw_is_zero:\n\t" 5136 "BSR $dst, $src.lo\n\t" 5137 "JNZ not_zero\n\t" 5138 "MOV $dst, -1\n" 5139 "not_zero:\n\t" 5140 "NEG $dst\n\t" 5141 "ADD $dst, 63\n" %} 5142 ins_encode %{ 5143 Register Rdst = $dst$$Register; 5144 Register Rsrc = $src$$Register; 5145 Label msw_is_zero; 5146 Label not_zero; 5147 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5148 __ jccb(Assembler::zero, msw_is_zero); 5149 __ addl(Rdst, BitsPerInt); 5150 __ jmpb(not_zero); 5151 __ bind(msw_is_zero); 5152 __ bsrl(Rdst, Rsrc); 5153 __ jccb(Assembler::notZero, not_zero); 5154 __ movl(Rdst, -1); 5155 __ bind(not_zero); 5156 __ negl(Rdst); 5157 __ addl(Rdst, BitsPerLong - 1); 5158 %} 5159 ins_pipe(ialu_reg); 5160 %} 5161 5162 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5163 predicate(UseCountTrailingZerosInstruction); 5164 match(Set dst (CountTrailingZerosI src)); 5165 effect(KILL cr); 5166 5167 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5168 ins_encode %{ 5169 __ tzcntl($dst$$Register, $src$$Register); 5170 %} 5171 ins_pipe(ialu_reg); 5172 %} 5173 5174 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5175 predicate(!UseCountTrailingZerosInstruction); 5176 match(Set dst (CountTrailingZerosI src)); 5177 effect(KILL cr); 5178 5179 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5180 "JNZ done\n\t" 5181 "MOV $dst, 32\n" 5182 "done:" %} 5183 ins_encode %{ 5184 Register Rdst = $dst$$Register; 5185 Label done; 5186 __ bsfl(Rdst, $src$$Register); 5187 __ jccb(Assembler::notZero, done); 5188 __ movl(Rdst, BitsPerInt); 5189 __ bind(done); 5190 %} 5191 ins_pipe(ialu_reg); 5192 %} 5193 5194 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5195 predicate(UseCountTrailingZerosInstruction); 5196 match(Set dst (CountTrailingZerosL src)); 5197 effect(TEMP dst, KILL cr); 5198 5199 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5200 "JNC done\n\t" 5201 "TZCNT $dst, $src.hi\n\t" 5202 "ADD $dst, 32\n" 5203 "done:" %} 5204 ins_encode %{ 5205 Register Rdst = $dst$$Register; 5206 Register Rsrc = $src$$Register; 5207 Label done; 5208 __ tzcntl(Rdst, Rsrc); 5209 __ jccb(Assembler::carryClear, done); 5210 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5211 __ addl(Rdst, BitsPerInt); 5212 __ bind(done); 5213 %} 5214 ins_pipe(ialu_reg); 5215 %} 5216 5217 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5218 predicate(!UseCountTrailingZerosInstruction); 5219 match(Set dst (CountTrailingZerosL src)); 5220 effect(TEMP dst, KILL cr); 5221 5222 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5223 "JNZ done\n\t" 5224 "BSF $dst, $src.hi\n\t" 5225 "JNZ msw_not_zero\n\t" 5226 "MOV $dst, 32\n" 5227 "msw_not_zero:\n\t" 5228 "ADD $dst, 32\n" 5229 "done:" %} 5230 ins_encode %{ 5231 Register Rdst = $dst$$Register; 5232 Register Rsrc = $src$$Register; 5233 Label msw_not_zero; 5234 Label done; 5235 __ bsfl(Rdst, Rsrc); 5236 __ jccb(Assembler::notZero, done); 5237 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5238 __ jccb(Assembler::notZero, msw_not_zero); 5239 __ movl(Rdst, BitsPerInt); 5240 __ bind(msw_not_zero); 5241 __ addl(Rdst, BitsPerInt); 5242 __ bind(done); 5243 %} 5244 ins_pipe(ialu_reg); 5245 %} 5246 5247 5248 //---------- Population Count Instructions ------------------------------------- 5249 5250 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5251 predicate(UsePopCountInstruction); 5252 match(Set dst (PopCountI src)); 5253 effect(KILL cr); 5254 5255 format %{ "POPCNT $dst, $src" %} 5256 ins_encode %{ 5257 __ popcntl($dst$$Register, $src$$Register); 5258 %} 5259 ins_pipe(ialu_reg); 5260 %} 5261 5262 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5263 predicate(UsePopCountInstruction); 5264 match(Set dst (PopCountI (LoadI mem))); 5265 effect(KILL cr); 5266 5267 format %{ "POPCNT $dst, $mem" %} 5268 ins_encode %{ 5269 __ popcntl($dst$$Register, $mem$$Address); 5270 %} 5271 ins_pipe(ialu_reg); 5272 %} 5273 5274 // Note: Long.bitCount(long) returns an int. 5275 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5276 predicate(UsePopCountInstruction); 5277 match(Set dst (PopCountL src)); 5278 effect(KILL cr, TEMP tmp, TEMP dst); 5279 5280 format %{ "POPCNT $dst, $src.lo\n\t" 5281 "POPCNT $tmp, $src.hi\n\t" 5282 "ADD $dst, $tmp" %} 5283 ins_encode %{ 5284 __ popcntl($dst$$Register, $src$$Register); 5285 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5286 __ addl($dst$$Register, $tmp$$Register); 5287 %} 5288 ins_pipe(ialu_reg); 5289 %} 5290 5291 // Note: Long.bitCount(long) returns an int. 5292 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5293 predicate(UsePopCountInstruction); 5294 match(Set dst (PopCountL (LoadL mem))); 5295 effect(KILL cr, TEMP tmp, TEMP dst); 5296 5297 format %{ "POPCNT $dst, $mem\n\t" 5298 "POPCNT $tmp, $mem+4\n\t" 5299 "ADD $dst, $tmp" %} 5300 ins_encode %{ 5301 //__ popcntl($dst$$Register, $mem$$Address$$first); 5302 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5303 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5304 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5305 __ addl($dst$$Register, $tmp$$Register); 5306 %} 5307 ins_pipe(ialu_reg); 5308 %} 5309 5310 5311 //----------Load/Store/Move Instructions--------------------------------------- 5312 //----------Load Instructions-------------------------------------------------- 5313 // Load Byte (8bit signed) 5314 instruct loadB(xRegI dst, memory mem) %{ 5315 match(Set dst (LoadB mem)); 5316 5317 ins_cost(125); 5318 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5319 5320 ins_encode %{ 5321 __ movsbl($dst$$Register, $mem$$Address); 5322 %} 5323 5324 ins_pipe(ialu_reg_mem); 5325 %} 5326 5327 // Load Byte (8bit signed) into Long Register 5328 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5329 match(Set dst (ConvI2L (LoadB mem))); 5330 effect(KILL cr); 5331 5332 ins_cost(375); 5333 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5334 "MOV $dst.hi,$dst.lo\n\t" 5335 "SAR $dst.hi,7" %} 5336 5337 ins_encode %{ 5338 __ movsbl($dst$$Register, $mem$$Address); 5339 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5340 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5341 %} 5342 5343 ins_pipe(ialu_reg_mem); 5344 %} 5345 5346 // Load Unsigned Byte (8bit UNsigned) 5347 instruct loadUB(xRegI dst, memory mem) %{ 5348 match(Set dst (LoadUB mem)); 5349 5350 ins_cost(125); 5351 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5352 5353 ins_encode %{ 5354 __ movzbl($dst$$Register, $mem$$Address); 5355 %} 5356 5357 ins_pipe(ialu_reg_mem); 5358 %} 5359 5360 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5361 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5362 match(Set dst (ConvI2L (LoadUB mem))); 5363 effect(KILL cr); 5364 5365 ins_cost(250); 5366 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5367 "XOR $dst.hi,$dst.hi" %} 5368 5369 ins_encode %{ 5370 Register Rdst = $dst$$Register; 5371 __ movzbl(Rdst, $mem$$Address); 5372 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5373 %} 5374 5375 ins_pipe(ialu_reg_mem); 5376 %} 5377 5378 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5379 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5380 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5381 effect(KILL cr); 5382 5383 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5384 "XOR $dst.hi,$dst.hi\n\t" 5385 "AND $dst.lo,right_n_bits($mask, 8)" %} 5386 ins_encode %{ 5387 Register Rdst = $dst$$Register; 5388 __ movzbl(Rdst, $mem$$Address); 5389 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5390 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5391 %} 5392 ins_pipe(ialu_reg_mem); 5393 %} 5394 5395 // Load Short (16bit signed) 5396 instruct loadS(rRegI dst, memory mem) %{ 5397 match(Set dst (LoadS mem)); 5398 5399 ins_cost(125); 5400 format %{ "MOVSX $dst,$mem\t# short" %} 5401 5402 ins_encode %{ 5403 __ movswl($dst$$Register, $mem$$Address); 5404 %} 5405 5406 ins_pipe(ialu_reg_mem); 5407 %} 5408 5409 // Load Short (16 bit signed) to Byte (8 bit signed) 5410 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5411 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5412 5413 ins_cost(125); 5414 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5415 ins_encode %{ 5416 __ movsbl($dst$$Register, $mem$$Address); 5417 %} 5418 ins_pipe(ialu_reg_mem); 5419 %} 5420 5421 // Load Short (16bit signed) into Long Register 5422 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5423 match(Set dst (ConvI2L (LoadS mem))); 5424 effect(KILL cr); 5425 5426 ins_cost(375); 5427 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5428 "MOV $dst.hi,$dst.lo\n\t" 5429 "SAR $dst.hi,15" %} 5430 5431 ins_encode %{ 5432 __ movswl($dst$$Register, $mem$$Address); 5433 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5434 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5435 %} 5436 5437 ins_pipe(ialu_reg_mem); 5438 %} 5439 5440 // Load Unsigned Short/Char (16bit unsigned) 5441 instruct loadUS(rRegI dst, memory mem) %{ 5442 match(Set dst (LoadUS mem)); 5443 5444 ins_cost(125); 5445 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5446 5447 ins_encode %{ 5448 __ movzwl($dst$$Register, $mem$$Address); 5449 %} 5450 5451 ins_pipe(ialu_reg_mem); 5452 %} 5453 5454 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5455 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5456 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5457 5458 ins_cost(125); 5459 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5460 ins_encode %{ 5461 __ movsbl($dst$$Register, $mem$$Address); 5462 %} 5463 ins_pipe(ialu_reg_mem); 5464 %} 5465 5466 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5467 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5468 match(Set dst (ConvI2L (LoadUS mem))); 5469 effect(KILL cr); 5470 5471 ins_cost(250); 5472 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5473 "XOR $dst.hi,$dst.hi" %} 5474 5475 ins_encode %{ 5476 __ movzwl($dst$$Register, $mem$$Address); 5477 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5478 %} 5479 5480 ins_pipe(ialu_reg_mem); 5481 %} 5482 5483 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5484 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5485 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5486 effect(KILL cr); 5487 5488 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5489 "XOR $dst.hi,$dst.hi" %} 5490 ins_encode %{ 5491 Register Rdst = $dst$$Register; 5492 __ movzbl(Rdst, $mem$$Address); 5493 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5494 %} 5495 ins_pipe(ialu_reg_mem); 5496 %} 5497 5498 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5499 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5500 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5501 effect(KILL cr); 5502 5503 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5504 "XOR $dst.hi,$dst.hi\n\t" 5505 "AND $dst.lo,right_n_bits($mask, 16)" %} 5506 ins_encode %{ 5507 Register Rdst = $dst$$Register; 5508 __ movzwl(Rdst, $mem$$Address); 5509 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5510 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5511 %} 5512 ins_pipe(ialu_reg_mem); 5513 %} 5514 5515 // Load Integer 5516 instruct loadI(rRegI dst, memory mem) %{ 5517 match(Set dst (LoadI mem)); 5518 5519 ins_cost(125); 5520 format %{ "MOV $dst,$mem\t# int" %} 5521 5522 ins_encode %{ 5523 __ movl($dst$$Register, $mem$$Address); 5524 %} 5525 5526 ins_pipe(ialu_reg_mem); 5527 %} 5528 5529 // Load Integer (32 bit signed) to Byte (8 bit signed) 5530 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5531 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5532 5533 ins_cost(125); 5534 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5535 ins_encode %{ 5536 __ movsbl($dst$$Register, $mem$$Address); 5537 %} 5538 ins_pipe(ialu_reg_mem); 5539 %} 5540 5541 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5542 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5543 match(Set dst (AndI (LoadI mem) mask)); 5544 5545 ins_cost(125); 5546 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5547 ins_encode %{ 5548 __ movzbl($dst$$Register, $mem$$Address); 5549 %} 5550 ins_pipe(ialu_reg_mem); 5551 %} 5552 5553 // Load Integer (32 bit signed) to Short (16 bit signed) 5554 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5555 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5556 5557 ins_cost(125); 5558 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5559 ins_encode %{ 5560 __ movswl($dst$$Register, $mem$$Address); 5561 %} 5562 ins_pipe(ialu_reg_mem); 5563 %} 5564 5565 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5566 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5567 match(Set dst (AndI (LoadI mem) mask)); 5568 5569 ins_cost(125); 5570 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5571 ins_encode %{ 5572 __ movzwl($dst$$Register, $mem$$Address); 5573 %} 5574 ins_pipe(ialu_reg_mem); 5575 %} 5576 5577 // Load Integer into Long Register 5578 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5579 match(Set dst (ConvI2L (LoadI mem))); 5580 effect(KILL cr); 5581 5582 ins_cost(375); 5583 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5584 "MOV $dst.hi,$dst.lo\n\t" 5585 "SAR $dst.hi,31" %} 5586 5587 ins_encode %{ 5588 __ movl($dst$$Register, $mem$$Address); 5589 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5590 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5591 %} 5592 5593 ins_pipe(ialu_reg_mem); 5594 %} 5595 5596 // Load Integer with mask 0xFF into Long Register 5597 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5598 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5599 effect(KILL cr); 5600 5601 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5602 "XOR $dst.hi,$dst.hi" %} 5603 ins_encode %{ 5604 Register Rdst = $dst$$Register; 5605 __ movzbl(Rdst, $mem$$Address); 5606 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5607 %} 5608 ins_pipe(ialu_reg_mem); 5609 %} 5610 5611 // Load Integer with mask 0xFFFF into Long Register 5612 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5613 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5614 effect(KILL cr); 5615 5616 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5617 "XOR $dst.hi,$dst.hi" %} 5618 ins_encode %{ 5619 Register Rdst = $dst$$Register; 5620 __ movzwl(Rdst, $mem$$Address); 5621 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5622 %} 5623 ins_pipe(ialu_reg_mem); 5624 %} 5625 5626 // Load Integer with 31-bit mask into Long Register 5627 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5628 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5629 effect(KILL cr); 5630 5631 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5632 "XOR $dst.hi,$dst.hi\n\t" 5633 "AND $dst.lo,$mask" %} 5634 ins_encode %{ 5635 Register Rdst = $dst$$Register; 5636 __ movl(Rdst, $mem$$Address); 5637 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5638 __ andl(Rdst, $mask$$constant); 5639 %} 5640 ins_pipe(ialu_reg_mem); 5641 %} 5642 5643 // Load Unsigned Integer into Long Register 5644 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5645 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5646 effect(KILL cr); 5647 5648 ins_cost(250); 5649 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5650 "XOR $dst.hi,$dst.hi" %} 5651 5652 ins_encode %{ 5653 __ movl($dst$$Register, $mem$$Address); 5654 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5655 %} 5656 5657 ins_pipe(ialu_reg_mem); 5658 %} 5659 5660 // Load Long. Cannot clobber address while loading, so restrict address 5661 // register to ESI 5662 instruct loadL(eRegL dst, load_long_memory mem) %{ 5663 predicate(!((LoadLNode*)n)->require_atomic_access()); 5664 match(Set dst (LoadL mem)); 5665 5666 ins_cost(250); 5667 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5668 "MOV $dst.hi,$mem+4" %} 5669 5670 ins_encode %{ 5671 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5672 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5673 __ movl($dst$$Register, Amemlo); 5674 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5675 %} 5676 5677 ins_pipe(ialu_reg_long_mem); 5678 %} 5679 5680 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5681 // then store it down to the stack and reload on the int 5682 // side. 5683 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5684 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5685 match(Set dst (LoadL mem)); 5686 5687 ins_cost(200); 5688 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5689 "FISTp $dst" %} 5690 ins_encode(enc_loadL_volatile(mem,dst)); 5691 ins_pipe( fpu_reg_mem ); 5692 %} 5693 5694 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5695 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5696 match(Set dst (LoadL mem)); 5697 effect(TEMP tmp); 5698 ins_cost(180); 5699 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5700 "MOVSD $dst,$tmp" %} 5701 ins_encode %{ 5702 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5703 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5704 %} 5705 ins_pipe( pipe_slow ); 5706 %} 5707 5708 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5709 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5710 match(Set dst (LoadL mem)); 5711 effect(TEMP tmp); 5712 ins_cost(160); 5713 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5714 "MOVD $dst.lo,$tmp\n\t" 5715 "PSRLQ $tmp,32\n\t" 5716 "MOVD $dst.hi,$tmp" %} 5717 ins_encode %{ 5718 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5719 __ movdl($dst$$Register, $tmp$$XMMRegister); 5720 __ psrlq($tmp$$XMMRegister, 32); 5721 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5722 %} 5723 ins_pipe( pipe_slow ); 5724 %} 5725 5726 // Load Range 5727 instruct loadRange(rRegI dst, memory mem) %{ 5728 match(Set dst (LoadRange mem)); 5729 5730 ins_cost(125); 5731 format %{ "MOV $dst,$mem" %} 5732 opcode(0x8B); 5733 ins_encode( OpcP, RegMem(dst,mem)); 5734 ins_pipe( ialu_reg_mem ); 5735 %} 5736 5737 5738 // Load Pointer 5739 instruct loadP(eRegP dst, memory mem) %{ 5740 match(Set dst (LoadP mem)); 5741 5742 ins_cost(125); 5743 format %{ "MOV $dst,$mem" %} 5744 opcode(0x8B); 5745 ins_encode( OpcP, RegMem(dst,mem)); 5746 ins_pipe( ialu_reg_mem ); 5747 %} 5748 5749 // Load Klass Pointer 5750 instruct loadKlass(eRegP dst, memory mem) %{ 5751 match(Set dst (LoadKlass mem)); 5752 5753 ins_cost(125); 5754 format %{ "MOV $dst,$mem" %} 5755 opcode(0x8B); 5756 ins_encode( OpcP, RegMem(dst,mem)); 5757 ins_pipe( ialu_reg_mem ); 5758 %} 5759 5760 // Load Float 5761 instruct MoveF2LEG(legRegF dst, regF src) %{ 5762 match(Set dst src); 5763 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5764 ins_encode %{ 5765 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5766 %} 5767 ins_pipe( fpu_reg_reg ); 5768 %} 5769 5770 // Load Float 5771 instruct MoveLEG2F(regF dst, legRegF src) %{ 5772 match(Set dst src); 5773 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5774 ins_encode %{ 5775 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5776 %} 5777 ins_pipe( fpu_reg_reg ); 5778 %} 5779 5780 // Load Double 5781 instruct MoveD2LEG(legRegD dst, regD src) %{ 5782 match(Set dst src); 5783 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5784 ins_encode %{ 5785 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5786 %} 5787 ins_pipe( fpu_reg_reg ); 5788 %} 5789 5790 // Load Double 5791 instruct MoveLEG2D(regD dst, legRegD src) %{ 5792 match(Set dst src); 5793 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5794 ins_encode %{ 5795 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5796 %} 5797 ins_pipe( fpu_reg_reg ); 5798 %} 5799 5800 // Load Double 5801 instruct loadDPR(regDPR dst, memory mem) %{ 5802 predicate(UseSSE<=1); 5803 match(Set dst (LoadD mem)); 5804 5805 ins_cost(150); 5806 format %{ "FLD_D ST,$mem\n\t" 5807 "FSTP $dst" %} 5808 opcode(0xDD); /* DD /0 */ 5809 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5810 Pop_Reg_DPR(dst) ); 5811 ins_pipe( fpu_reg_mem ); 5812 %} 5813 5814 // Load Double to XMM 5815 instruct loadD(regD dst, memory mem) %{ 5816 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5817 match(Set dst (LoadD mem)); 5818 ins_cost(145); 5819 format %{ "MOVSD $dst,$mem" %} 5820 ins_encode %{ 5821 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5822 %} 5823 ins_pipe( pipe_slow ); 5824 %} 5825 5826 instruct loadD_partial(regD dst, memory mem) %{ 5827 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5828 match(Set dst (LoadD mem)); 5829 ins_cost(145); 5830 format %{ "MOVLPD $dst,$mem" %} 5831 ins_encode %{ 5832 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5833 %} 5834 ins_pipe( pipe_slow ); 5835 %} 5836 5837 // Load to XMM register (single-precision floating point) 5838 // MOVSS instruction 5839 instruct loadF(regF dst, memory mem) %{ 5840 predicate(UseSSE>=1); 5841 match(Set dst (LoadF mem)); 5842 ins_cost(145); 5843 format %{ "MOVSS $dst,$mem" %} 5844 ins_encode %{ 5845 __ movflt ($dst$$XMMRegister, $mem$$Address); 5846 %} 5847 ins_pipe( pipe_slow ); 5848 %} 5849 5850 // Load Float 5851 instruct loadFPR(regFPR dst, memory mem) %{ 5852 predicate(UseSSE==0); 5853 match(Set dst (LoadF mem)); 5854 5855 ins_cost(150); 5856 format %{ "FLD_S ST,$mem\n\t" 5857 "FSTP $dst" %} 5858 opcode(0xD9); /* D9 /0 */ 5859 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5860 Pop_Reg_FPR(dst) ); 5861 ins_pipe( fpu_reg_mem ); 5862 %} 5863 5864 // Load Effective Address 5865 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5866 match(Set dst mem); 5867 5868 ins_cost(110); 5869 format %{ "LEA $dst,$mem" %} 5870 opcode(0x8D); 5871 ins_encode( OpcP, RegMem(dst,mem)); 5872 ins_pipe( ialu_reg_reg_fat ); 5873 %} 5874 5875 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5876 match(Set dst mem); 5877 5878 ins_cost(110); 5879 format %{ "LEA $dst,$mem" %} 5880 opcode(0x8D); 5881 ins_encode( OpcP, RegMem(dst,mem)); 5882 ins_pipe( ialu_reg_reg_fat ); 5883 %} 5884 5885 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5886 match(Set dst mem); 5887 5888 ins_cost(110); 5889 format %{ "LEA $dst,$mem" %} 5890 opcode(0x8D); 5891 ins_encode( OpcP, RegMem(dst,mem)); 5892 ins_pipe( ialu_reg_reg_fat ); 5893 %} 5894 5895 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5896 match(Set dst mem); 5897 5898 ins_cost(110); 5899 format %{ "LEA $dst,$mem" %} 5900 opcode(0x8D); 5901 ins_encode( OpcP, RegMem(dst,mem)); 5902 ins_pipe( ialu_reg_reg_fat ); 5903 %} 5904 5905 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5906 match(Set dst mem); 5907 5908 ins_cost(110); 5909 format %{ "LEA $dst,$mem" %} 5910 opcode(0x8D); 5911 ins_encode( OpcP, RegMem(dst,mem)); 5912 ins_pipe( ialu_reg_reg_fat ); 5913 %} 5914 5915 // Load Constant 5916 instruct loadConI(rRegI dst, immI src) %{ 5917 match(Set dst src); 5918 5919 format %{ "MOV $dst,$src" %} 5920 ins_encode( LdImmI(dst, src) ); 5921 ins_pipe( ialu_reg_fat ); 5922 %} 5923 5924 // Load Constant zero 5925 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5926 match(Set dst src); 5927 effect(KILL cr); 5928 5929 ins_cost(50); 5930 format %{ "XOR $dst,$dst" %} 5931 opcode(0x33); /* + rd */ 5932 ins_encode( OpcP, RegReg( dst, dst ) ); 5933 ins_pipe( ialu_reg ); 5934 %} 5935 5936 instruct loadConP(eRegP dst, immP src) %{ 5937 match(Set dst src); 5938 5939 format %{ "MOV $dst,$src" %} 5940 opcode(0xB8); /* + rd */ 5941 ins_encode( LdImmP(dst, src) ); 5942 ins_pipe( ialu_reg_fat ); 5943 %} 5944 5945 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5946 match(Set dst src); 5947 effect(KILL cr); 5948 ins_cost(200); 5949 format %{ "MOV $dst.lo,$src.lo\n\t" 5950 "MOV $dst.hi,$src.hi" %} 5951 opcode(0xB8); 5952 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5953 ins_pipe( ialu_reg_long_fat ); 5954 %} 5955 5956 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5957 match(Set dst src); 5958 effect(KILL cr); 5959 ins_cost(150); 5960 format %{ "XOR $dst.lo,$dst.lo\n\t" 5961 "XOR $dst.hi,$dst.hi" %} 5962 opcode(0x33,0x33); 5963 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5964 ins_pipe( ialu_reg_long ); 5965 %} 5966 5967 // The instruction usage is guarded by predicate in operand immFPR(). 5968 instruct loadConFPR(regFPR dst, immFPR con) %{ 5969 match(Set dst con); 5970 ins_cost(125); 5971 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5972 "FSTP $dst" %} 5973 ins_encode %{ 5974 __ fld_s($constantaddress($con)); 5975 __ fstp_d($dst$$reg); 5976 %} 5977 ins_pipe(fpu_reg_con); 5978 %} 5979 5980 // The instruction usage is guarded by predicate in operand immFPR0(). 5981 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5982 match(Set dst con); 5983 ins_cost(125); 5984 format %{ "FLDZ ST\n\t" 5985 "FSTP $dst" %} 5986 ins_encode %{ 5987 __ fldz(); 5988 __ fstp_d($dst$$reg); 5989 %} 5990 ins_pipe(fpu_reg_con); 5991 %} 5992 5993 // The instruction usage is guarded by predicate in operand immFPR1(). 5994 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5995 match(Set dst con); 5996 ins_cost(125); 5997 format %{ "FLD1 ST\n\t" 5998 "FSTP $dst" %} 5999 ins_encode %{ 6000 __ fld1(); 6001 __ fstp_d($dst$$reg); 6002 %} 6003 ins_pipe(fpu_reg_con); 6004 %} 6005 6006 // The instruction usage is guarded by predicate in operand immF(). 6007 instruct loadConF(regF dst, immF con) %{ 6008 match(Set dst con); 6009 ins_cost(125); 6010 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6011 ins_encode %{ 6012 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6013 %} 6014 ins_pipe(pipe_slow); 6015 %} 6016 6017 // The instruction usage is guarded by predicate in operand immF0(). 6018 instruct loadConF0(regF dst, immF0 src) %{ 6019 match(Set dst src); 6020 ins_cost(100); 6021 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6022 ins_encode %{ 6023 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6024 %} 6025 ins_pipe(pipe_slow); 6026 %} 6027 6028 // The instruction usage is guarded by predicate in operand immDPR(). 6029 instruct loadConDPR(regDPR dst, immDPR con) %{ 6030 match(Set dst con); 6031 ins_cost(125); 6032 6033 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6034 "FSTP $dst" %} 6035 ins_encode %{ 6036 __ fld_d($constantaddress($con)); 6037 __ fstp_d($dst$$reg); 6038 %} 6039 ins_pipe(fpu_reg_con); 6040 %} 6041 6042 // The instruction usage is guarded by predicate in operand immDPR0(). 6043 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6044 match(Set dst con); 6045 ins_cost(125); 6046 6047 format %{ "FLDZ ST\n\t" 6048 "FSTP $dst" %} 6049 ins_encode %{ 6050 __ fldz(); 6051 __ fstp_d($dst$$reg); 6052 %} 6053 ins_pipe(fpu_reg_con); 6054 %} 6055 6056 // The instruction usage is guarded by predicate in operand immDPR1(). 6057 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6058 match(Set dst con); 6059 ins_cost(125); 6060 6061 format %{ "FLD1 ST\n\t" 6062 "FSTP $dst" %} 6063 ins_encode %{ 6064 __ fld1(); 6065 __ fstp_d($dst$$reg); 6066 %} 6067 ins_pipe(fpu_reg_con); 6068 %} 6069 6070 // The instruction usage is guarded by predicate in operand immD(). 6071 instruct loadConD(regD dst, immD con) %{ 6072 match(Set dst con); 6073 ins_cost(125); 6074 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6075 ins_encode %{ 6076 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6077 %} 6078 ins_pipe(pipe_slow); 6079 %} 6080 6081 // The instruction usage is guarded by predicate in operand immD0(). 6082 instruct loadConD0(regD dst, immD0 src) %{ 6083 match(Set dst src); 6084 ins_cost(100); 6085 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6086 ins_encode %{ 6087 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6088 %} 6089 ins_pipe( pipe_slow ); 6090 %} 6091 6092 // Load Stack Slot 6093 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6094 match(Set dst src); 6095 ins_cost(125); 6096 6097 format %{ "MOV $dst,$src" %} 6098 opcode(0x8B); 6099 ins_encode( OpcP, RegMem(dst,src)); 6100 ins_pipe( ialu_reg_mem ); 6101 %} 6102 6103 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6104 match(Set dst src); 6105 6106 ins_cost(200); 6107 format %{ "MOV $dst,$src.lo\n\t" 6108 "MOV $dst+4,$src.hi" %} 6109 opcode(0x8B, 0x8B); 6110 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6111 ins_pipe( ialu_mem_long_reg ); 6112 %} 6113 6114 // Load Stack Slot 6115 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6116 match(Set dst src); 6117 ins_cost(125); 6118 6119 format %{ "MOV $dst,$src" %} 6120 opcode(0x8B); 6121 ins_encode( OpcP, RegMem(dst,src)); 6122 ins_pipe( ialu_reg_mem ); 6123 %} 6124 6125 // Load Stack Slot 6126 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6127 match(Set dst src); 6128 ins_cost(125); 6129 6130 format %{ "FLD_S $src\n\t" 6131 "FSTP $dst" %} 6132 opcode(0xD9); /* D9 /0, FLD m32real */ 6133 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6134 Pop_Reg_FPR(dst) ); 6135 ins_pipe( fpu_reg_mem ); 6136 %} 6137 6138 // Load Stack Slot 6139 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6140 match(Set dst src); 6141 ins_cost(125); 6142 6143 format %{ "FLD_D $src\n\t" 6144 "FSTP $dst" %} 6145 opcode(0xDD); /* DD /0, FLD m64real */ 6146 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6147 Pop_Reg_DPR(dst) ); 6148 ins_pipe( fpu_reg_mem ); 6149 %} 6150 6151 // Prefetch instructions for allocation. 6152 // Must be safe to execute with invalid address (cannot fault). 6153 6154 instruct prefetchAlloc0( memory mem ) %{ 6155 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6156 match(PrefetchAllocation mem); 6157 ins_cost(0); 6158 size(0); 6159 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6160 ins_encode(); 6161 ins_pipe(empty); 6162 %} 6163 6164 instruct prefetchAlloc( memory mem ) %{ 6165 predicate(AllocatePrefetchInstr==3); 6166 match( PrefetchAllocation mem ); 6167 ins_cost(100); 6168 6169 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6170 ins_encode %{ 6171 __ prefetchw($mem$$Address); 6172 %} 6173 ins_pipe(ialu_mem); 6174 %} 6175 6176 instruct prefetchAllocNTA( memory mem ) %{ 6177 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6178 match(PrefetchAllocation mem); 6179 ins_cost(100); 6180 6181 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6182 ins_encode %{ 6183 __ prefetchnta($mem$$Address); 6184 %} 6185 ins_pipe(ialu_mem); 6186 %} 6187 6188 instruct prefetchAllocT0( memory mem ) %{ 6189 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6190 match(PrefetchAllocation mem); 6191 ins_cost(100); 6192 6193 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6194 ins_encode %{ 6195 __ prefetcht0($mem$$Address); 6196 %} 6197 ins_pipe(ialu_mem); 6198 %} 6199 6200 instruct prefetchAllocT2( memory mem ) %{ 6201 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6202 match(PrefetchAllocation mem); 6203 ins_cost(100); 6204 6205 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6206 ins_encode %{ 6207 __ prefetcht2($mem$$Address); 6208 %} 6209 ins_pipe(ialu_mem); 6210 %} 6211 6212 //----------Store Instructions------------------------------------------------- 6213 6214 // Store Byte 6215 instruct storeB(memory mem, xRegI src) %{ 6216 match(Set mem (StoreB mem src)); 6217 6218 ins_cost(125); 6219 format %{ "MOV8 $mem,$src" %} 6220 opcode(0x88); 6221 ins_encode( OpcP, RegMem( src, mem ) ); 6222 ins_pipe( ialu_mem_reg ); 6223 %} 6224 6225 // Store Char/Short 6226 instruct storeC(memory mem, rRegI src) %{ 6227 match(Set mem (StoreC mem src)); 6228 6229 ins_cost(125); 6230 format %{ "MOV16 $mem,$src" %} 6231 opcode(0x89, 0x66); 6232 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6233 ins_pipe( ialu_mem_reg ); 6234 %} 6235 6236 // Store Integer 6237 instruct storeI(memory mem, rRegI src) %{ 6238 match(Set mem (StoreI mem src)); 6239 6240 ins_cost(125); 6241 format %{ "MOV $mem,$src" %} 6242 opcode(0x89); 6243 ins_encode( OpcP, RegMem( src, mem ) ); 6244 ins_pipe( ialu_mem_reg ); 6245 %} 6246 6247 // Store Long 6248 instruct storeL(long_memory mem, eRegL src) %{ 6249 predicate(!((StoreLNode*)n)->require_atomic_access()); 6250 match(Set mem (StoreL mem src)); 6251 6252 ins_cost(200); 6253 format %{ "MOV $mem,$src.lo\n\t" 6254 "MOV $mem+4,$src.hi" %} 6255 opcode(0x89, 0x89); 6256 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6257 ins_pipe( ialu_mem_long_reg ); 6258 %} 6259 6260 // Store Long to Integer 6261 instruct storeL2I(memory mem, eRegL src) %{ 6262 match(Set mem (StoreI mem (ConvL2I src))); 6263 6264 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6265 ins_encode %{ 6266 __ movl($mem$$Address, $src$$Register); 6267 %} 6268 ins_pipe(ialu_mem_reg); 6269 %} 6270 6271 // Volatile Store Long. Must be atomic, so move it into 6272 // the FP TOS and then do a 64-bit FIST. Has to probe the 6273 // target address before the store (for null-ptr checks) 6274 // so the memory operand is used twice in the encoding. 6275 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6276 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6277 match(Set mem (StoreL mem src)); 6278 effect( KILL cr ); 6279 ins_cost(400); 6280 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6281 "FILD $src\n\t" 6282 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6283 opcode(0x3B); 6284 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6285 ins_pipe( fpu_reg_mem ); 6286 %} 6287 6288 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6289 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6290 match(Set mem (StoreL mem src)); 6291 effect( TEMP tmp, KILL cr ); 6292 ins_cost(380); 6293 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6294 "MOVSD $tmp,$src\n\t" 6295 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6296 ins_encode %{ 6297 __ cmpl(rax, $mem$$Address); 6298 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6299 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6300 %} 6301 ins_pipe( pipe_slow ); 6302 %} 6303 6304 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6305 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6306 match(Set mem (StoreL mem src)); 6307 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6308 ins_cost(360); 6309 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6310 "MOVD $tmp,$src.lo\n\t" 6311 "MOVD $tmp2,$src.hi\n\t" 6312 "PUNPCKLDQ $tmp,$tmp2\n\t" 6313 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6314 ins_encode %{ 6315 __ cmpl(rax, $mem$$Address); 6316 __ movdl($tmp$$XMMRegister, $src$$Register); 6317 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6318 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6319 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6320 %} 6321 ins_pipe( pipe_slow ); 6322 %} 6323 6324 // Store Pointer; for storing unknown oops and raw pointers 6325 instruct storeP(memory mem, anyRegP src) %{ 6326 match(Set mem (StoreP mem src)); 6327 6328 ins_cost(125); 6329 format %{ "MOV $mem,$src" %} 6330 opcode(0x89); 6331 ins_encode( OpcP, RegMem( src, mem ) ); 6332 ins_pipe( ialu_mem_reg ); 6333 %} 6334 6335 // Store Integer Immediate 6336 instruct storeImmI(memory mem, immI src) %{ 6337 match(Set mem (StoreI mem src)); 6338 6339 ins_cost(150); 6340 format %{ "MOV $mem,$src" %} 6341 opcode(0xC7); /* C7 /0 */ 6342 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6343 ins_pipe( ialu_mem_imm ); 6344 %} 6345 6346 // Store Short/Char Immediate 6347 instruct storeImmI16(memory mem, immI16 src) %{ 6348 predicate(UseStoreImmI16); 6349 match(Set mem (StoreC mem src)); 6350 6351 ins_cost(150); 6352 format %{ "MOV16 $mem,$src" %} 6353 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6354 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6355 ins_pipe( ialu_mem_imm ); 6356 %} 6357 6358 // Store Pointer Immediate; null pointers or constant oops that do not 6359 // need card-mark barriers. 6360 instruct storeImmP(memory mem, immP src) %{ 6361 match(Set mem (StoreP mem src)); 6362 6363 ins_cost(150); 6364 format %{ "MOV $mem,$src" %} 6365 opcode(0xC7); /* C7 /0 */ 6366 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6367 ins_pipe( ialu_mem_imm ); 6368 %} 6369 6370 // Store Byte Immediate 6371 instruct storeImmB(memory mem, immI8 src) %{ 6372 match(Set mem (StoreB mem src)); 6373 6374 ins_cost(150); 6375 format %{ "MOV8 $mem,$src" %} 6376 opcode(0xC6); /* C6 /0 */ 6377 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6378 ins_pipe( ialu_mem_imm ); 6379 %} 6380 6381 // Store CMS card-mark Immediate 6382 instruct storeImmCM(memory mem, immI8 src) %{ 6383 match(Set mem (StoreCM mem src)); 6384 6385 ins_cost(150); 6386 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6387 opcode(0xC6); /* C6 /0 */ 6388 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6389 ins_pipe( ialu_mem_imm ); 6390 %} 6391 6392 // Store Double 6393 instruct storeDPR( memory mem, regDPR1 src) %{ 6394 predicate(UseSSE<=1); 6395 match(Set mem (StoreD mem src)); 6396 6397 ins_cost(100); 6398 format %{ "FST_D $mem,$src" %} 6399 opcode(0xDD); /* DD /2 */ 6400 ins_encode( enc_FPR_store(mem,src) ); 6401 ins_pipe( fpu_mem_reg ); 6402 %} 6403 6404 // Store double does rounding on x86 6405 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6406 predicate(UseSSE<=1); 6407 match(Set mem (StoreD mem (RoundDouble src))); 6408 6409 ins_cost(100); 6410 format %{ "FST_D $mem,$src\t# round" %} 6411 opcode(0xDD); /* DD /2 */ 6412 ins_encode( enc_FPR_store(mem,src) ); 6413 ins_pipe( fpu_mem_reg ); 6414 %} 6415 6416 // Store XMM register to memory (double-precision floating points) 6417 // MOVSD instruction 6418 instruct storeD(memory mem, regD src) %{ 6419 predicate(UseSSE>=2); 6420 match(Set mem (StoreD mem src)); 6421 ins_cost(95); 6422 format %{ "MOVSD $mem,$src" %} 6423 ins_encode %{ 6424 __ movdbl($mem$$Address, $src$$XMMRegister); 6425 %} 6426 ins_pipe( pipe_slow ); 6427 %} 6428 6429 // Load Double 6430 instruct MoveD2VL(vlRegD dst, regD src) %{ 6431 match(Set dst src); 6432 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6433 ins_encode %{ 6434 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6435 %} 6436 ins_pipe( fpu_reg_reg ); 6437 %} 6438 6439 // Load Double 6440 instruct MoveVL2D(regD dst, vlRegD src) %{ 6441 match(Set dst src); 6442 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6443 ins_encode %{ 6444 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6445 %} 6446 ins_pipe( fpu_reg_reg ); 6447 %} 6448 6449 // Store XMM register to memory (single-precision floating point) 6450 // MOVSS instruction 6451 instruct storeF(memory mem, regF src) %{ 6452 predicate(UseSSE>=1); 6453 match(Set mem (StoreF mem src)); 6454 ins_cost(95); 6455 format %{ "MOVSS $mem,$src" %} 6456 ins_encode %{ 6457 __ movflt($mem$$Address, $src$$XMMRegister); 6458 %} 6459 ins_pipe( pipe_slow ); 6460 %} 6461 6462 // Load Float 6463 instruct MoveF2VL(vlRegF dst, regF src) %{ 6464 match(Set dst src); 6465 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6466 ins_encode %{ 6467 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6468 %} 6469 ins_pipe( fpu_reg_reg ); 6470 %} 6471 6472 // Load Float 6473 instruct MoveVL2F(regF dst, vlRegF src) %{ 6474 match(Set dst src); 6475 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6476 ins_encode %{ 6477 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6478 %} 6479 ins_pipe( fpu_reg_reg ); 6480 %} 6481 6482 // Store Float 6483 instruct storeFPR( memory mem, regFPR1 src) %{ 6484 predicate(UseSSE==0); 6485 match(Set mem (StoreF mem src)); 6486 6487 ins_cost(100); 6488 format %{ "FST_S $mem,$src" %} 6489 opcode(0xD9); /* D9 /2 */ 6490 ins_encode( enc_FPR_store(mem,src) ); 6491 ins_pipe( fpu_mem_reg ); 6492 %} 6493 6494 // Store Float does rounding on x86 6495 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6496 predicate(UseSSE==0); 6497 match(Set mem (StoreF mem (RoundFloat src))); 6498 6499 ins_cost(100); 6500 format %{ "FST_S $mem,$src\t# round" %} 6501 opcode(0xD9); /* D9 /2 */ 6502 ins_encode( enc_FPR_store(mem,src) ); 6503 ins_pipe( fpu_mem_reg ); 6504 %} 6505 6506 // Store Float does rounding on x86 6507 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6508 predicate(UseSSE<=1); 6509 match(Set mem (StoreF mem (ConvD2F src))); 6510 6511 ins_cost(100); 6512 format %{ "FST_S $mem,$src\t# D-round" %} 6513 opcode(0xD9); /* D9 /2 */ 6514 ins_encode( enc_FPR_store(mem,src) ); 6515 ins_pipe( fpu_mem_reg ); 6516 %} 6517 6518 // Store immediate Float value (it is faster than store from FPU register) 6519 // The instruction usage is guarded by predicate in operand immFPR(). 6520 instruct storeFPR_imm( memory mem, immFPR src) %{ 6521 match(Set mem (StoreF mem src)); 6522 6523 ins_cost(50); 6524 format %{ "MOV $mem,$src\t# store float" %} 6525 opcode(0xC7); /* C7 /0 */ 6526 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6527 ins_pipe( ialu_mem_imm ); 6528 %} 6529 6530 // Store immediate Float value (it is faster than store from XMM register) 6531 // The instruction usage is guarded by predicate in operand immF(). 6532 instruct storeF_imm( memory mem, immF src) %{ 6533 match(Set mem (StoreF mem src)); 6534 6535 ins_cost(50); 6536 format %{ "MOV $mem,$src\t# store float" %} 6537 opcode(0xC7); /* C7 /0 */ 6538 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6539 ins_pipe( ialu_mem_imm ); 6540 %} 6541 6542 // Store Integer to stack slot 6543 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6544 match(Set dst src); 6545 6546 ins_cost(100); 6547 format %{ "MOV $dst,$src" %} 6548 opcode(0x89); 6549 ins_encode( OpcPRegSS( dst, src ) ); 6550 ins_pipe( ialu_mem_reg ); 6551 %} 6552 6553 // Store Integer to stack slot 6554 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6555 match(Set dst src); 6556 6557 ins_cost(100); 6558 format %{ "MOV $dst,$src" %} 6559 opcode(0x89); 6560 ins_encode( OpcPRegSS( dst, src ) ); 6561 ins_pipe( ialu_mem_reg ); 6562 %} 6563 6564 // Store Long to stack slot 6565 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6566 match(Set dst src); 6567 6568 ins_cost(200); 6569 format %{ "MOV $dst,$src.lo\n\t" 6570 "MOV $dst+4,$src.hi" %} 6571 opcode(0x89, 0x89); 6572 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6573 ins_pipe( ialu_mem_long_reg ); 6574 %} 6575 6576 //----------MemBar Instructions----------------------------------------------- 6577 // Memory barrier flavors 6578 6579 instruct membar_acquire() %{ 6580 match(MemBarAcquire); 6581 match(LoadFence); 6582 ins_cost(400); 6583 6584 size(0); 6585 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6586 ins_encode(); 6587 ins_pipe(empty); 6588 %} 6589 6590 instruct membar_acquire_lock() %{ 6591 match(MemBarAcquireLock); 6592 ins_cost(0); 6593 6594 size(0); 6595 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6596 ins_encode( ); 6597 ins_pipe(empty); 6598 %} 6599 6600 instruct membar_release() %{ 6601 match(MemBarRelease); 6602 match(StoreFence); 6603 ins_cost(400); 6604 6605 size(0); 6606 format %{ "MEMBAR-release ! (empty encoding)" %} 6607 ins_encode( ); 6608 ins_pipe(empty); 6609 %} 6610 6611 instruct membar_release_lock() %{ 6612 match(MemBarReleaseLock); 6613 ins_cost(0); 6614 6615 size(0); 6616 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6617 ins_encode( ); 6618 ins_pipe(empty); 6619 %} 6620 6621 instruct membar_volatile(eFlagsReg cr) %{ 6622 match(MemBarVolatile); 6623 effect(KILL cr); 6624 ins_cost(400); 6625 6626 format %{ 6627 $$template 6628 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6629 %} 6630 ins_encode %{ 6631 __ membar(Assembler::StoreLoad); 6632 %} 6633 ins_pipe(pipe_slow); 6634 %} 6635 6636 instruct unnecessary_membar_volatile() %{ 6637 match(MemBarVolatile); 6638 predicate(Matcher::post_store_load_barrier(n)); 6639 ins_cost(0); 6640 6641 size(0); 6642 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6643 ins_encode( ); 6644 ins_pipe(empty); 6645 %} 6646 6647 instruct membar_storestore() %{ 6648 match(MemBarStoreStore); 6649 ins_cost(0); 6650 6651 size(0); 6652 format %{ "MEMBAR-storestore (empty encoding)" %} 6653 ins_encode( ); 6654 ins_pipe(empty); 6655 %} 6656 6657 //----------Move Instructions-------------------------------------------------- 6658 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6659 match(Set dst (CastX2P src)); 6660 format %{ "# X2P $dst, $src" %} 6661 ins_encode( /*empty encoding*/ ); 6662 ins_cost(0); 6663 ins_pipe(empty); 6664 %} 6665 6666 instruct castP2X(rRegI dst, eRegP src ) %{ 6667 match(Set dst (CastP2X src)); 6668 ins_cost(50); 6669 format %{ "MOV $dst, $src\t# CastP2X" %} 6670 ins_encode( enc_Copy( dst, src) ); 6671 ins_pipe( ialu_reg_reg ); 6672 %} 6673 6674 //----------Conditional Move--------------------------------------------------- 6675 // Conditional move 6676 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6677 predicate(!VM_Version::supports_cmov() ); 6678 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6679 ins_cost(200); 6680 format %{ "J$cop,us skip\t# signed cmove\n\t" 6681 "MOV $dst,$src\n" 6682 "skip:" %} 6683 ins_encode %{ 6684 Label Lskip; 6685 // Invert sense of branch from sense of CMOV 6686 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6687 __ movl($dst$$Register, $src$$Register); 6688 __ bind(Lskip); 6689 %} 6690 ins_pipe( pipe_cmov_reg ); 6691 %} 6692 6693 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6694 predicate(!VM_Version::supports_cmov() ); 6695 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6696 ins_cost(200); 6697 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6698 "MOV $dst,$src\n" 6699 "skip:" %} 6700 ins_encode %{ 6701 Label Lskip; 6702 // Invert sense of branch from sense of CMOV 6703 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6704 __ movl($dst$$Register, $src$$Register); 6705 __ bind(Lskip); 6706 %} 6707 ins_pipe( pipe_cmov_reg ); 6708 %} 6709 6710 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6711 predicate(VM_Version::supports_cmov() ); 6712 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6713 ins_cost(200); 6714 format %{ "CMOV$cop $dst,$src" %} 6715 opcode(0x0F,0x40); 6716 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6717 ins_pipe( pipe_cmov_reg ); 6718 %} 6719 6720 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6721 predicate(VM_Version::supports_cmov() ); 6722 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6723 ins_cost(200); 6724 format %{ "CMOV$cop $dst,$src" %} 6725 opcode(0x0F,0x40); 6726 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6727 ins_pipe( pipe_cmov_reg ); 6728 %} 6729 6730 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6731 predicate(VM_Version::supports_cmov() ); 6732 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6733 ins_cost(200); 6734 expand %{ 6735 cmovI_regU(cop, cr, dst, src); 6736 %} 6737 %} 6738 6739 // Conditional move 6740 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6741 predicate(VM_Version::supports_cmov() ); 6742 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6743 ins_cost(250); 6744 format %{ "CMOV$cop $dst,$src" %} 6745 opcode(0x0F,0x40); 6746 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6747 ins_pipe( pipe_cmov_mem ); 6748 %} 6749 6750 // Conditional move 6751 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6752 predicate(VM_Version::supports_cmov() ); 6753 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6754 ins_cost(250); 6755 format %{ "CMOV$cop $dst,$src" %} 6756 opcode(0x0F,0x40); 6757 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6758 ins_pipe( pipe_cmov_mem ); 6759 %} 6760 6761 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6762 predicate(VM_Version::supports_cmov() ); 6763 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6764 ins_cost(250); 6765 expand %{ 6766 cmovI_memU(cop, cr, dst, src); 6767 %} 6768 %} 6769 6770 // Conditional move 6771 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6772 predicate(VM_Version::supports_cmov() ); 6773 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6774 ins_cost(200); 6775 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6776 opcode(0x0F,0x40); 6777 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6778 ins_pipe( pipe_cmov_reg ); 6779 %} 6780 6781 // Conditional move (non-P6 version) 6782 // Note: a CMoveP is generated for stubs and native wrappers 6783 // regardless of whether we are on a P6, so we 6784 // emulate a cmov here 6785 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6786 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6787 ins_cost(300); 6788 format %{ "Jn$cop skip\n\t" 6789 "MOV $dst,$src\t# pointer\n" 6790 "skip:" %} 6791 opcode(0x8b); 6792 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6793 ins_pipe( pipe_cmov_reg ); 6794 %} 6795 6796 // Conditional move 6797 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6798 predicate(VM_Version::supports_cmov() ); 6799 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6800 ins_cost(200); 6801 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6802 opcode(0x0F,0x40); 6803 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6804 ins_pipe( pipe_cmov_reg ); 6805 %} 6806 6807 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6808 predicate(VM_Version::supports_cmov() ); 6809 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6810 ins_cost(200); 6811 expand %{ 6812 cmovP_regU(cop, cr, dst, src); 6813 %} 6814 %} 6815 6816 // DISABLED: Requires the ADLC to emit a bottom_type call that 6817 // correctly meets the two pointer arguments; one is an incoming 6818 // register but the other is a memory operand. ALSO appears to 6819 // be buggy with implicit null checks. 6820 // 6821 //// Conditional move 6822 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6823 // predicate(VM_Version::supports_cmov() ); 6824 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6825 // ins_cost(250); 6826 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6827 // opcode(0x0F,0x40); 6828 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6829 // ins_pipe( pipe_cmov_mem ); 6830 //%} 6831 // 6832 //// Conditional move 6833 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6834 // predicate(VM_Version::supports_cmov() ); 6835 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6836 // ins_cost(250); 6837 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6838 // opcode(0x0F,0x40); 6839 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6840 // ins_pipe( pipe_cmov_mem ); 6841 //%} 6842 6843 // Conditional move 6844 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6845 predicate(UseSSE<=1); 6846 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6847 ins_cost(200); 6848 format %{ "FCMOV$cop $dst,$src\t# double" %} 6849 opcode(0xDA); 6850 ins_encode( enc_cmov_dpr(cop,src) ); 6851 ins_pipe( pipe_cmovDPR_reg ); 6852 %} 6853 6854 // Conditional move 6855 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6856 predicate(UseSSE==0); 6857 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6858 ins_cost(200); 6859 format %{ "FCMOV$cop $dst,$src\t# float" %} 6860 opcode(0xDA); 6861 ins_encode( enc_cmov_dpr(cop,src) ); 6862 ins_pipe( pipe_cmovDPR_reg ); 6863 %} 6864 6865 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6866 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6867 predicate(UseSSE<=1); 6868 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6869 ins_cost(200); 6870 format %{ "Jn$cop skip\n\t" 6871 "MOV $dst,$src\t# double\n" 6872 "skip:" %} 6873 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6874 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6875 ins_pipe( pipe_cmovDPR_reg ); 6876 %} 6877 6878 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6879 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6880 predicate(UseSSE==0); 6881 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6882 ins_cost(200); 6883 format %{ "Jn$cop skip\n\t" 6884 "MOV $dst,$src\t# float\n" 6885 "skip:" %} 6886 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6887 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6888 ins_pipe( pipe_cmovDPR_reg ); 6889 %} 6890 6891 // No CMOVE with SSE/SSE2 6892 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6893 predicate (UseSSE>=1); 6894 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6895 ins_cost(200); 6896 format %{ "Jn$cop skip\n\t" 6897 "MOVSS $dst,$src\t# float\n" 6898 "skip:" %} 6899 ins_encode %{ 6900 Label skip; 6901 // Invert sense of branch from sense of CMOV 6902 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6903 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6904 __ bind(skip); 6905 %} 6906 ins_pipe( pipe_slow ); 6907 %} 6908 6909 // No CMOVE with SSE/SSE2 6910 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6911 predicate (UseSSE>=2); 6912 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6913 ins_cost(200); 6914 format %{ "Jn$cop skip\n\t" 6915 "MOVSD $dst,$src\t# float\n" 6916 "skip:" %} 6917 ins_encode %{ 6918 Label skip; 6919 // Invert sense of branch from sense of CMOV 6920 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6921 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6922 __ bind(skip); 6923 %} 6924 ins_pipe( pipe_slow ); 6925 %} 6926 6927 // unsigned version 6928 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6929 predicate (UseSSE>=1); 6930 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6931 ins_cost(200); 6932 format %{ "Jn$cop skip\n\t" 6933 "MOVSS $dst,$src\t# float\n" 6934 "skip:" %} 6935 ins_encode %{ 6936 Label skip; 6937 // Invert sense of branch from sense of CMOV 6938 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6939 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6940 __ bind(skip); 6941 %} 6942 ins_pipe( pipe_slow ); 6943 %} 6944 6945 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6946 predicate (UseSSE>=1); 6947 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6948 ins_cost(200); 6949 expand %{ 6950 fcmovF_regU(cop, cr, dst, src); 6951 %} 6952 %} 6953 6954 // unsigned version 6955 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6956 predicate (UseSSE>=2); 6957 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6958 ins_cost(200); 6959 format %{ "Jn$cop skip\n\t" 6960 "MOVSD $dst,$src\t# float\n" 6961 "skip:" %} 6962 ins_encode %{ 6963 Label skip; 6964 // Invert sense of branch from sense of CMOV 6965 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6966 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6967 __ bind(skip); 6968 %} 6969 ins_pipe( pipe_slow ); 6970 %} 6971 6972 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6973 predicate (UseSSE>=2); 6974 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6975 ins_cost(200); 6976 expand %{ 6977 fcmovD_regU(cop, cr, dst, src); 6978 %} 6979 %} 6980 6981 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6982 predicate(VM_Version::supports_cmov() ); 6983 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6984 ins_cost(200); 6985 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6986 "CMOV$cop $dst.hi,$src.hi" %} 6987 opcode(0x0F,0x40); 6988 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6989 ins_pipe( pipe_cmov_reg_long ); 6990 %} 6991 6992 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6993 predicate(VM_Version::supports_cmov() ); 6994 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6995 ins_cost(200); 6996 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6997 "CMOV$cop $dst.hi,$src.hi" %} 6998 opcode(0x0F,0x40); 6999 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7000 ins_pipe( pipe_cmov_reg_long ); 7001 %} 7002 7003 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7004 predicate(VM_Version::supports_cmov() ); 7005 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7006 ins_cost(200); 7007 expand %{ 7008 cmovL_regU(cop, cr, dst, src); 7009 %} 7010 %} 7011 7012 //----------Arithmetic Instructions-------------------------------------------- 7013 //----------Addition Instructions---------------------------------------------- 7014 7015 // Integer Addition Instructions 7016 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7017 match(Set dst (AddI dst src)); 7018 effect(KILL cr); 7019 7020 size(2); 7021 format %{ "ADD $dst,$src" %} 7022 opcode(0x03); 7023 ins_encode( OpcP, RegReg( dst, src) ); 7024 ins_pipe( ialu_reg_reg ); 7025 %} 7026 7027 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7028 match(Set dst (AddI dst src)); 7029 effect(KILL cr); 7030 7031 format %{ "ADD $dst,$src" %} 7032 opcode(0x81, 0x00); /* /0 id */ 7033 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7034 ins_pipe( ialu_reg ); 7035 %} 7036 7037 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7038 predicate(UseIncDec); 7039 match(Set dst (AddI dst src)); 7040 effect(KILL cr); 7041 7042 size(1); 7043 format %{ "INC $dst" %} 7044 opcode(0x40); /* */ 7045 ins_encode( Opc_plus( primary, dst ) ); 7046 ins_pipe( ialu_reg ); 7047 %} 7048 7049 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7050 match(Set dst (AddI src0 src1)); 7051 ins_cost(110); 7052 7053 format %{ "LEA $dst,[$src0 + $src1]" %} 7054 opcode(0x8D); /* 0x8D /r */ 7055 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7056 ins_pipe( ialu_reg_reg ); 7057 %} 7058 7059 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7060 match(Set dst (AddP src0 src1)); 7061 ins_cost(110); 7062 7063 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7064 opcode(0x8D); /* 0x8D /r */ 7065 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7066 ins_pipe( ialu_reg_reg ); 7067 %} 7068 7069 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7070 predicate(UseIncDec); 7071 match(Set dst (AddI dst src)); 7072 effect(KILL cr); 7073 7074 size(1); 7075 format %{ "DEC $dst" %} 7076 opcode(0x48); /* */ 7077 ins_encode( Opc_plus( primary, dst ) ); 7078 ins_pipe( ialu_reg ); 7079 %} 7080 7081 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7082 match(Set dst (AddP dst src)); 7083 effect(KILL cr); 7084 7085 size(2); 7086 format %{ "ADD $dst,$src" %} 7087 opcode(0x03); 7088 ins_encode( OpcP, RegReg( dst, src) ); 7089 ins_pipe( ialu_reg_reg ); 7090 %} 7091 7092 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7093 match(Set dst (AddP dst src)); 7094 effect(KILL cr); 7095 7096 format %{ "ADD $dst,$src" %} 7097 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7098 // ins_encode( RegImm( dst, src) ); 7099 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7100 ins_pipe( ialu_reg ); 7101 %} 7102 7103 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7104 match(Set dst (AddI dst (LoadI src))); 7105 effect(KILL cr); 7106 7107 ins_cost(125); 7108 format %{ "ADD $dst,$src" %} 7109 opcode(0x03); 7110 ins_encode( OpcP, RegMem( dst, src) ); 7111 ins_pipe( ialu_reg_mem ); 7112 %} 7113 7114 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7115 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7116 effect(KILL cr); 7117 7118 ins_cost(150); 7119 format %{ "ADD $dst,$src" %} 7120 opcode(0x01); /* Opcode 01 /r */ 7121 ins_encode( OpcP, RegMem( src, dst ) ); 7122 ins_pipe( ialu_mem_reg ); 7123 %} 7124 7125 // Add Memory with Immediate 7126 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7127 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7128 effect(KILL cr); 7129 7130 ins_cost(125); 7131 format %{ "ADD $dst,$src" %} 7132 opcode(0x81); /* Opcode 81 /0 id */ 7133 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7134 ins_pipe( ialu_mem_imm ); 7135 %} 7136 7137 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7138 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7139 effect(KILL cr); 7140 7141 ins_cost(125); 7142 format %{ "INC $dst" %} 7143 opcode(0xFF); /* Opcode FF /0 */ 7144 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7145 ins_pipe( ialu_mem_imm ); 7146 %} 7147 7148 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7149 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7150 effect(KILL cr); 7151 7152 ins_cost(125); 7153 format %{ "DEC $dst" %} 7154 opcode(0xFF); /* Opcode FF /1 */ 7155 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7156 ins_pipe( ialu_mem_imm ); 7157 %} 7158 7159 7160 instruct checkCastPP( eRegP dst ) %{ 7161 match(Set dst (CheckCastPP dst)); 7162 7163 size(0); 7164 format %{ "#checkcastPP of $dst" %} 7165 ins_encode( /*empty encoding*/ ); 7166 ins_pipe( empty ); 7167 %} 7168 7169 instruct castPP( eRegP dst ) %{ 7170 match(Set dst (CastPP dst)); 7171 format %{ "#castPP of $dst" %} 7172 ins_encode( /*empty encoding*/ ); 7173 ins_pipe( empty ); 7174 %} 7175 7176 instruct castII( rRegI dst ) %{ 7177 match(Set dst (CastII dst)); 7178 format %{ "#castII of $dst" %} 7179 ins_encode( /*empty encoding*/ ); 7180 ins_cost(0); 7181 ins_pipe( empty ); 7182 %} 7183 7184 instruct castLL( eRegL dst ) %{ 7185 match(Set dst (CastLL dst)); 7186 format %{ "#castLL of $dst" %} 7187 ins_encode( /*empty encoding*/ ); 7188 ins_cost(0); 7189 ins_pipe( empty ); 7190 %} 7191 7192 instruct castFF( regF dst ) %{ 7193 predicate(UseSSE >= 1); 7194 match(Set dst (CastFF dst)); 7195 format %{ "#castFF of $dst" %} 7196 ins_encode( /*empty encoding*/ ); 7197 ins_cost(0); 7198 ins_pipe( empty ); 7199 %} 7200 7201 instruct castDD( regD dst ) %{ 7202 predicate(UseSSE >= 2); 7203 match(Set dst (CastDD dst)); 7204 format %{ "#castDD of $dst" %} 7205 ins_encode( /*empty encoding*/ ); 7206 ins_cost(0); 7207 ins_pipe( empty ); 7208 %} 7209 7210 instruct castFF_PR( regFPR dst ) %{ 7211 predicate(UseSSE < 1); 7212 match(Set dst (CastFF dst)); 7213 format %{ "#castFF of $dst" %} 7214 ins_encode( /*empty encoding*/ ); 7215 ins_cost(0); 7216 ins_pipe( empty ); 7217 %} 7218 7219 instruct castDD_PR( regDPR dst ) %{ 7220 predicate(UseSSE < 2); 7221 match(Set dst (CastDD dst)); 7222 format %{ "#castDD of $dst" %} 7223 ins_encode( /*empty encoding*/ ); 7224 ins_cost(0); 7225 ins_pipe( empty ); 7226 %} 7227 7228 // Load-locked - same as a regular pointer load when used with compare-swap 7229 instruct loadPLocked(eRegP dst, memory mem) %{ 7230 match(Set dst (LoadPLocked mem)); 7231 7232 ins_cost(125); 7233 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7234 opcode(0x8B); 7235 ins_encode( OpcP, RegMem(dst,mem)); 7236 ins_pipe( ialu_reg_mem ); 7237 %} 7238 7239 // Conditional-store of the updated heap-top. 7240 // Used during allocation of the shared heap. 7241 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7242 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7243 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7244 // EAX is killed if there is contention, but then it's also unused. 7245 // In the common case of no contention, EAX holds the new oop address. 7246 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7247 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7248 ins_pipe( pipe_cmpxchg ); 7249 %} 7250 7251 // Conditional-store of an int value. 7252 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7253 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7254 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7255 effect(KILL oldval); 7256 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7257 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7258 ins_pipe( pipe_cmpxchg ); 7259 %} 7260 7261 // Conditional-store of a long value. 7262 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7263 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7264 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7265 effect(KILL oldval); 7266 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7267 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7268 "XCHG EBX,ECX" 7269 %} 7270 ins_encode %{ 7271 // Note: we need to swap rbx, and rcx before and after the 7272 // cmpxchg8 instruction because the instruction uses 7273 // rcx as the high order word of the new value to store but 7274 // our register encoding uses rbx. 7275 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7276 __ lock(); 7277 __ cmpxchg8($mem$$Address); 7278 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7279 %} 7280 ins_pipe( pipe_cmpxchg ); 7281 %} 7282 7283 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7284 7285 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7286 predicate(VM_Version::supports_cx8()); 7287 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7288 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7289 effect(KILL cr, KILL oldval); 7290 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7291 "MOV $res,0\n\t" 7292 "JNE,s fail\n\t" 7293 "MOV $res,1\n" 7294 "fail:" %} 7295 ins_encode( enc_cmpxchg8(mem_ptr), 7296 enc_flags_ne_to_boolean(res) ); 7297 ins_pipe( pipe_cmpxchg ); 7298 %} 7299 7300 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7301 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7302 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7303 effect(KILL cr, KILL oldval); 7304 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7305 "MOV $res,0\n\t" 7306 "JNE,s fail\n\t" 7307 "MOV $res,1\n" 7308 "fail:" %} 7309 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7310 ins_pipe( pipe_cmpxchg ); 7311 %} 7312 7313 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7314 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7315 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7316 effect(KILL cr, KILL oldval); 7317 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7318 "MOV $res,0\n\t" 7319 "JNE,s fail\n\t" 7320 "MOV $res,1\n" 7321 "fail:" %} 7322 ins_encode( enc_cmpxchgb(mem_ptr), 7323 enc_flags_ne_to_boolean(res) ); 7324 ins_pipe( pipe_cmpxchg ); 7325 %} 7326 7327 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7328 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7329 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7330 effect(KILL cr, KILL oldval); 7331 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7332 "MOV $res,0\n\t" 7333 "JNE,s fail\n\t" 7334 "MOV $res,1\n" 7335 "fail:" %} 7336 ins_encode( enc_cmpxchgw(mem_ptr), 7337 enc_flags_ne_to_boolean(res) ); 7338 ins_pipe( pipe_cmpxchg ); 7339 %} 7340 7341 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7342 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7343 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7344 effect(KILL cr, KILL oldval); 7345 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7346 "MOV $res,0\n\t" 7347 "JNE,s fail\n\t" 7348 "MOV $res,1\n" 7349 "fail:" %} 7350 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7351 ins_pipe( pipe_cmpxchg ); 7352 %} 7353 7354 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7355 predicate(VM_Version::supports_cx8()); 7356 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7357 effect(KILL cr); 7358 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7359 ins_encode( enc_cmpxchg8(mem_ptr) ); 7360 ins_pipe( pipe_cmpxchg ); 7361 %} 7362 7363 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7364 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7365 effect(KILL cr); 7366 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7367 ins_encode( enc_cmpxchg(mem_ptr) ); 7368 ins_pipe( pipe_cmpxchg ); 7369 %} 7370 7371 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7372 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7373 effect(KILL cr); 7374 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7375 ins_encode( enc_cmpxchgb(mem_ptr) ); 7376 ins_pipe( pipe_cmpxchg ); 7377 %} 7378 7379 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7380 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7381 effect(KILL cr); 7382 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7383 ins_encode( enc_cmpxchgw(mem_ptr) ); 7384 ins_pipe( pipe_cmpxchg ); 7385 %} 7386 7387 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7388 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7389 effect(KILL cr); 7390 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7391 ins_encode( enc_cmpxchg(mem_ptr) ); 7392 ins_pipe( pipe_cmpxchg ); 7393 %} 7394 7395 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7396 predicate(n->as_LoadStore()->result_not_used()); 7397 match(Set dummy (GetAndAddB mem add)); 7398 effect(KILL cr); 7399 format %{ "ADDB [$mem],$add" %} 7400 ins_encode %{ 7401 __ lock(); 7402 __ addb($mem$$Address, $add$$constant); 7403 %} 7404 ins_pipe( pipe_cmpxchg ); 7405 %} 7406 7407 // Important to match to xRegI: only 8-bit regs. 7408 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7409 match(Set newval (GetAndAddB mem newval)); 7410 effect(KILL cr); 7411 format %{ "XADDB [$mem],$newval" %} 7412 ins_encode %{ 7413 __ lock(); 7414 __ xaddb($mem$$Address, $newval$$Register); 7415 %} 7416 ins_pipe( pipe_cmpxchg ); 7417 %} 7418 7419 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7420 predicate(n->as_LoadStore()->result_not_used()); 7421 match(Set dummy (GetAndAddS mem add)); 7422 effect(KILL cr); 7423 format %{ "ADDS [$mem],$add" %} 7424 ins_encode %{ 7425 __ lock(); 7426 __ addw($mem$$Address, $add$$constant); 7427 %} 7428 ins_pipe( pipe_cmpxchg ); 7429 %} 7430 7431 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7432 match(Set newval (GetAndAddS mem newval)); 7433 effect(KILL cr); 7434 format %{ "XADDS [$mem],$newval" %} 7435 ins_encode %{ 7436 __ lock(); 7437 __ xaddw($mem$$Address, $newval$$Register); 7438 %} 7439 ins_pipe( pipe_cmpxchg ); 7440 %} 7441 7442 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7443 predicate(n->as_LoadStore()->result_not_used()); 7444 match(Set dummy (GetAndAddI mem add)); 7445 effect(KILL cr); 7446 format %{ "ADDL [$mem],$add" %} 7447 ins_encode %{ 7448 __ lock(); 7449 __ addl($mem$$Address, $add$$constant); 7450 %} 7451 ins_pipe( pipe_cmpxchg ); 7452 %} 7453 7454 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7455 match(Set newval (GetAndAddI mem newval)); 7456 effect(KILL cr); 7457 format %{ "XADDL [$mem],$newval" %} 7458 ins_encode %{ 7459 __ lock(); 7460 __ xaddl($mem$$Address, $newval$$Register); 7461 %} 7462 ins_pipe( pipe_cmpxchg ); 7463 %} 7464 7465 // Important to match to xRegI: only 8-bit regs. 7466 instruct xchgB( memory mem, xRegI newval) %{ 7467 match(Set newval (GetAndSetB mem newval)); 7468 format %{ "XCHGB $newval,[$mem]" %} 7469 ins_encode %{ 7470 __ xchgb($newval$$Register, $mem$$Address); 7471 %} 7472 ins_pipe( pipe_cmpxchg ); 7473 %} 7474 7475 instruct xchgS( memory mem, rRegI newval) %{ 7476 match(Set newval (GetAndSetS mem newval)); 7477 format %{ "XCHGW $newval,[$mem]" %} 7478 ins_encode %{ 7479 __ xchgw($newval$$Register, $mem$$Address); 7480 %} 7481 ins_pipe( pipe_cmpxchg ); 7482 %} 7483 7484 instruct xchgI( memory mem, rRegI newval) %{ 7485 match(Set newval (GetAndSetI mem newval)); 7486 format %{ "XCHGL $newval,[$mem]" %} 7487 ins_encode %{ 7488 __ xchgl($newval$$Register, $mem$$Address); 7489 %} 7490 ins_pipe( pipe_cmpxchg ); 7491 %} 7492 7493 instruct xchgP( memory mem, pRegP newval) %{ 7494 match(Set newval (GetAndSetP mem newval)); 7495 format %{ "XCHGL $newval,[$mem]" %} 7496 ins_encode %{ 7497 __ xchgl($newval$$Register, $mem$$Address); 7498 %} 7499 ins_pipe( pipe_cmpxchg ); 7500 %} 7501 7502 //----------Subtraction Instructions------------------------------------------- 7503 7504 // Integer Subtraction Instructions 7505 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7506 match(Set dst (SubI dst src)); 7507 effect(KILL cr); 7508 7509 size(2); 7510 format %{ "SUB $dst,$src" %} 7511 opcode(0x2B); 7512 ins_encode( OpcP, RegReg( dst, src) ); 7513 ins_pipe( ialu_reg_reg ); 7514 %} 7515 7516 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7517 match(Set dst (SubI dst src)); 7518 effect(KILL cr); 7519 7520 format %{ "SUB $dst,$src" %} 7521 opcode(0x81,0x05); /* Opcode 81 /5 */ 7522 // ins_encode( RegImm( dst, src) ); 7523 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7524 ins_pipe( ialu_reg ); 7525 %} 7526 7527 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7528 match(Set dst (SubI dst (LoadI src))); 7529 effect(KILL cr); 7530 7531 ins_cost(125); 7532 format %{ "SUB $dst,$src" %} 7533 opcode(0x2B); 7534 ins_encode( OpcP, RegMem( dst, src) ); 7535 ins_pipe( ialu_reg_mem ); 7536 %} 7537 7538 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7539 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7540 effect(KILL cr); 7541 7542 ins_cost(150); 7543 format %{ "SUB $dst,$src" %} 7544 opcode(0x29); /* Opcode 29 /r */ 7545 ins_encode( OpcP, RegMem( src, dst ) ); 7546 ins_pipe( ialu_mem_reg ); 7547 %} 7548 7549 // Subtract from a pointer 7550 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7551 match(Set dst (AddP dst (SubI zero src))); 7552 effect(KILL cr); 7553 7554 size(2); 7555 format %{ "SUB $dst,$src" %} 7556 opcode(0x2B); 7557 ins_encode( OpcP, RegReg( dst, src) ); 7558 ins_pipe( ialu_reg_reg ); 7559 %} 7560 7561 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7562 match(Set dst (SubI zero dst)); 7563 effect(KILL cr); 7564 7565 size(2); 7566 format %{ "NEG $dst" %} 7567 opcode(0xF7,0x03); // Opcode F7 /3 7568 ins_encode( OpcP, RegOpc( dst ) ); 7569 ins_pipe( ialu_reg ); 7570 %} 7571 7572 //----------Multiplication/Division Instructions------------------------------- 7573 // Integer Multiplication Instructions 7574 // Multiply Register 7575 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7576 match(Set dst (MulI dst src)); 7577 effect(KILL cr); 7578 7579 size(3); 7580 ins_cost(300); 7581 format %{ "IMUL $dst,$src" %} 7582 opcode(0xAF, 0x0F); 7583 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7584 ins_pipe( ialu_reg_reg_alu0 ); 7585 %} 7586 7587 // Multiply 32-bit Immediate 7588 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7589 match(Set dst (MulI src imm)); 7590 effect(KILL cr); 7591 7592 ins_cost(300); 7593 format %{ "IMUL $dst,$src,$imm" %} 7594 opcode(0x69); /* 69 /r id */ 7595 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7596 ins_pipe( ialu_reg_reg_alu0 ); 7597 %} 7598 7599 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7600 match(Set dst src); 7601 effect(KILL cr); 7602 7603 // Note that this is artificially increased to make it more expensive than loadConL 7604 ins_cost(250); 7605 format %{ "MOV EAX,$src\t// low word only" %} 7606 opcode(0xB8); 7607 ins_encode( LdImmL_Lo(dst, src) ); 7608 ins_pipe( ialu_reg_fat ); 7609 %} 7610 7611 // Multiply by 32-bit Immediate, taking the shifted high order results 7612 // (special case for shift by 32) 7613 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7614 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7615 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7616 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7617 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7618 effect(USE src1, KILL cr); 7619 7620 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7621 ins_cost(0*100 + 1*400 - 150); 7622 format %{ "IMUL EDX:EAX,$src1" %} 7623 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7624 ins_pipe( pipe_slow ); 7625 %} 7626 7627 // Multiply by 32-bit Immediate, taking the shifted high order results 7628 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7629 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7630 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7631 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7632 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7633 effect(USE src1, KILL cr); 7634 7635 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7636 ins_cost(1*100 + 1*400 - 150); 7637 format %{ "IMUL EDX:EAX,$src1\n\t" 7638 "SAR EDX,$cnt-32" %} 7639 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7640 ins_pipe( pipe_slow ); 7641 %} 7642 7643 // Multiply Memory 32-bit Immediate 7644 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7645 match(Set dst (MulI (LoadI src) imm)); 7646 effect(KILL cr); 7647 7648 ins_cost(300); 7649 format %{ "IMUL $dst,$src,$imm" %} 7650 opcode(0x69); /* 69 /r id */ 7651 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7652 ins_pipe( ialu_reg_mem_alu0 ); 7653 %} 7654 7655 // Multiply Memory 7656 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7657 match(Set dst (MulI dst (LoadI src))); 7658 effect(KILL cr); 7659 7660 ins_cost(350); 7661 format %{ "IMUL $dst,$src" %} 7662 opcode(0xAF, 0x0F); 7663 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7664 ins_pipe( ialu_reg_mem_alu0 ); 7665 %} 7666 7667 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7668 %{ 7669 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7670 effect(KILL cr, KILL src2); 7671 7672 expand %{ mulI_eReg(dst, src1, cr); 7673 mulI_eReg(src2, src3, cr); 7674 addI_eReg(dst, src2, cr); %} 7675 %} 7676 7677 // Multiply Register Int to Long 7678 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7679 // Basic Idea: long = (long)int * (long)int 7680 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7681 effect(DEF dst, USE src, USE src1, KILL flags); 7682 7683 ins_cost(300); 7684 format %{ "IMUL $dst,$src1" %} 7685 7686 ins_encode( long_int_multiply( dst, src1 ) ); 7687 ins_pipe( ialu_reg_reg_alu0 ); 7688 %} 7689 7690 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7691 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7692 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7693 effect(KILL flags); 7694 7695 ins_cost(300); 7696 format %{ "MUL $dst,$src1" %} 7697 7698 ins_encode( long_uint_multiply(dst, src1) ); 7699 ins_pipe( ialu_reg_reg_alu0 ); 7700 %} 7701 7702 // Multiply Register Long 7703 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7704 match(Set dst (MulL dst src)); 7705 effect(KILL cr, TEMP tmp); 7706 ins_cost(4*100+3*400); 7707 // Basic idea: lo(result) = lo(x_lo * y_lo) 7708 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7709 format %{ "MOV $tmp,$src.lo\n\t" 7710 "IMUL $tmp,EDX\n\t" 7711 "MOV EDX,$src.hi\n\t" 7712 "IMUL EDX,EAX\n\t" 7713 "ADD $tmp,EDX\n\t" 7714 "MUL EDX:EAX,$src.lo\n\t" 7715 "ADD EDX,$tmp" %} 7716 ins_encode( long_multiply( dst, src, tmp ) ); 7717 ins_pipe( pipe_slow ); 7718 %} 7719 7720 // Multiply Register Long where the left operand's high 32 bits are zero 7721 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7722 predicate(is_operand_hi32_zero(n->in(1))); 7723 match(Set dst (MulL dst src)); 7724 effect(KILL cr, TEMP tmp); 7725 ins_cost(2*100+2*400); 7726 // Basic idea: lo(result) = lo(x_lo * y_lo) 7727 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7728 format %{ "MOV $tmp,$src.hi\n\t" 7729 "IMUL $tmp,EAX\n\t" 7730 "MUL EDX:EAX,$src.lo\n\t" 7731 "ADD EDX,$tmp" %} 7732 ins_encode %{ 7733 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7734 __ imull($tmp$$Register, rax); 7735 __ mull($src$$Register); 7736 __ addl(rdx, $tmp$$Register); 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 // Multiply Register Long where the right operand's high 32 bits are zero 7742 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7743 predicate(is_operand_hi32_zero(n->in(2))); 7744 match(Set dst (MulL dst src)); 7745 effect(KILL cr, TEMP tmp); 7746 ins_cost(2*100+2*400); 7747 // Basic idea: lo(result) = lo(x_lo * y_lo) 7748 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7749 format %{ "MOV $tmp,$src.lo\n\t" 7750 "IMUL $tmp,EDX\n\t" 7751 "MUL EDX:EAX,$src.lo\n\t" 7752 "ADD EDX,$tmp" %} 7753 ins_encode %{ 7754 __ movl($tmp$$Register, $src$$Register); 7755 __ imull($tmp$$Register, rdx); 7756 __ mull($src$$Register); 7757 __ addl(rdx, $tmp$$Register); 7758 %} 7759 ins_pipe( pipe_slow ); 7760 %} 7761 7762 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7763 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7764 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7765 match(Set dst (MulL dst src)); 7766 effect(KILL cr); 7767 ins_cost(1*400); 7768 // Basic idea: lo(result) = lo(x_lo * y_lo) 7769 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7770 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7771 ins_encode %{ 7772 __ mull($src$$Register); 7773 %} 7774 ins_pipe( pipe_slow ); 7775 %} 7776 7777 // Multiply Register Long by small constant 7778 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7779 match(Set dst (MulL dst src)); 7780 effect(KILL cr, TEMP tmp); 7781 ins_cost(2*100+2*400); 7782 size(12); 7783 // Basic idea: lo(result) = lo(src * EAX) 7784 // hi(result) = hi(src * EAX) + lo(src * EDX) 7785 format %{ "IMUL $tmp,EDX,$src\n\t" 7786 "MOV EDX,$src\n\t" 7787 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7788 "ADD EDX,$tmp" %} 7789 ins_encode( long_multiply_con( dst, src, tmp ) ); 7790 ins_pipe( pipe_slow ); 7791 %} 7792 7793 // Integer DIV with Register 7794 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7795 match(Set rax (DivI rax div)); 7796 effect(KILL rdx, KILL cr); 7797 size(26); 7798 ins_cost(30*100+10*100); 7799 format %{ "CMP EAX,0x80000000\n\t" 7800 "JNE,s normal\n\t" 7801 "XOR EDX,EDX\n\t" 7802 "CMP ECX,-1\n\t" 7803 "JE,s done\n" 7804 "normal: CDQ\n\t" 7805 "IDIV $div\n\t" 7806 "done:" %} 7807 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7808 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7809 ins_pipe( ialu_reg_reg_alu0 ); 7810 %} 7811 7812 // Divide Register Long 7813 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7814 match(Set dst (DivL src1 src2)); 7815 effect(CALL); 7816 ins_cost(10000); 7817 format %{ "PUSH $src1.hi\n\t" 7818 "PUSH $src1.lo\n\t" 7819 "PUSH $src2.hi\n\t" 7820 "PUSH $src2.lo\n\t" 7821 "CALL SharedRuntime::ldiv\n\t" 7822 "ADD ESP,16" %} 7823 ins_encode( long_div(src1,src2) ); 7824 ins_pipe( pipe_slow ); 7825 %} 7826 7827 // Integer DIVMOD with Register, both quotient and mod results 7828 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7829 match(DivModI rax div); 7830 effect(KILL cr); 7831 size(26); 7832 ins_cost(30*100+10*100); 7833 format %{ "CMP EAX,0x80000000\n\t" 7834 "JNE,s normal\n\t" 7835 "XOR EDX,EDX\n\t" 7836 "CMP ECX,-1\n\t" 7837 "JE,s done\n" 7838 "normal: CDQ\n\t" 7839 "IDIV $div\n\t" 7840 "done:" %} 7841 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7842 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7843 ins_pipe( pipe_slow ); 7844 %} 7845 7846 // Integer MOD with Register 7847 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7848 match(Set rdx (ModI rax div)); 7849 effect(KILL rax, KILL cr); 7850 7851 size(26); 7852 ins_cost(300); 7853 format %{ "CDQ\n\t" 7854 "IDIV $div" %} 7855 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7856 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7857 ins_pipe( ialu_reg_reg_alu0 ); 7858 %} 7859 7860 // Remainder Register Long 7861 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7862 match(Set dst (ModL src1 src2)); 7863 effect(CALL); 7864 ins_cost(10000); 7865 format %{ "PUSH $src1.hi\n\t" 7866 "PUSH $src1.lo\n\t" 7867 "PUSH $src2.hi\n\t" 7868 "PUSH $src2.lo\n\t" 7869 "CALL SharedRuntime::lrem\n\t" 7870 "ADD ESP,16" %} 7871 ins_encode( long_mod(src1,src2) ); 7872 ins_pipe( pipe_slow ); 7873 %} 7874 7875 // Divide Register Long (no special case since divisor != -1) 7876 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7877 match(Set dst (DivL dst imm)); 7878 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7879 ins_cost(1000); 7880 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7881 "XOR $tmp2,$tmp2\n\t" 7882 "CMP $tmp,EDX\n\t" 7883 "JA,s fast\n\t" 7884 "MOV $tmp2,EAX\n\t" 7885 "MOV EAX,EDX\n\t" 7886 "MOV EDX,0\n\t" 7887 "JLE,s pos\n\t" 7888 "LNEG EAX : $tmp2\n\t" 7889 "DIV $tmp # unsigned division\n\t" 7890 "XCHG EAX,$tmp2\n\t" 7891 "DIV $tmp\n\t" 7892 "LNEG $tmp2 : EAX\n\t" 7893 "JMP,s done\n" 7894 "pos:\n\t" 7895 "DIV $tmp\n\t" 7896 "XCHG EAX,$tmp2\n" 7897 "fast:\n\t" 7898 "DIV $tmp\n" 7899 "done:\n\t" 7900 "MOV EDX,$tmp2\n\t" 7901 "NEG EDX:EAX # if $imm < 0" %} 7902 ins_encode %{ 7903 int con = (int)$imm$$constant; 7904 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7905 int pcon = (con > 0) ? con : -con; 7906 Label Lfast, Lpos, Ldone; 7907 7908 __ movl($tmp$$Register, pcon); 7909 __ xorl($tmp2$$Register,$tmp2$$Register); 7910 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7911 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7912 7913 __ movl($tmp2$$Register, $dst$$Register); // save 7914 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7915 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7916 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7917 7918 // Negative dividend. 7919 // convert value to positive to use unsigned division 7920 __ lneg($dst$$Register, $tmp2$$Register); 7921 __ divl($tmp$$Register); 7922 __ xchgl($dst$$Register, $tmp2$$Register); 7923 __ divl($tmp$$Register); 7924 // revert result back to negative 7925 __ lneg($tmp2$$Register, $dst$$Register); 7926 __ jmpb(Ldone); 7927 7928 __ bind(Lpos); 7929 __ divl($tmp$$Register); // Use unsigned division 7930 __ xchgl($dst$$Register, $tmp2$$Register); 7931 // Fallthrow for final divide, tmp2 has 32 bit hi result 7932 7933 __ bind(Lfast); 7934 // fast path: src is positive 7935 __ divl($tmp$$Register); // Use unsigned division 7936 7937 __ bind(Ldone); 7938 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7939 if (con < 0) { 7940 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7941 } 7942 %} 7943 ins_pipe( pipe_slow ); 7944 %} 7945 7946 // Remainder Register Long (remainder fit into 32 bits) 7947 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7948 match(Set dst (ModL dst imm)); 7949 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7950 ins_cost(1000); 7951 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7952 "CMP $tmp,EDX\n\t" 7953 "JA,s fast\n\t" 7954 "MOV $tmp2,EAX\n\t" 7955 "MOV EAX,EDX\n\t" 7956 "MOV EDX,0\n\t" 7957 "JLE,s pos\n\t" 7958 "LNEG EAX : $tmp2\n\t" 7959 "DIV $tmp # unsigned division\n\t" 7960 "MOV EAX,$tmp2\n\t" 7961 "DIV $tmp\n\t" 7962 "NEG EDX\n\t" 7963 "JMP,s done\n" 7964 "pos:\n\t" 7965 "DIV $tmp\n\t" 7966 "MOV EAX,$tmp2\n" 7967 "fast:\n\t" 7968 "DIV $tmp\n" 7969 "done:\n\t" 7970 "MOV EAX,EDX\n\t" 7971 "SAR EDX,31\n\t" %} 7972 ins_encode %{ 7973 int con = (int)$imm$$constant; 7974 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7975 int pcon = (con > 0) ? con : -con; 7976 Label Lfast, Lpos, Ldone; 7977 7978 __ movl($tmp$$Register, pcon); 7979 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7980 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7981 7982 __ movl($tmp2$$Register, $dst$$Register); // save 7983 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7984 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7985 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7986 7987 // Negative dividend. 7988 // convert value to positive to use unsigned division 7989 __ lneg($dst$$Register, $tmp2$$Register); 7990 __ divl($tmp$$Register); 7991 __ movl($dst$$Register, $tmp2$$Register); 7992 __ divl($tmp$$Register); 7993 // revert remainder back to negative 7994 __ negl(HIGH_FROM_LOW($dst$$Register)); 7995 __ jmpb(Ldone); 7996 7997 __ bind(Lpos); 7998 __ divl($tmp$$Register); 7999 __ movl($dst$$Register, $tmp2$$Register); 8000 8001 __ bind(Lfast); 8002 // fast path: src is positive 8003 __ divl($tmp$$Register); 8004 8005 __ bind(Ldone); 8006 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8007 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8008 8009 %} 8010 ins_pipe( pipe_slow ); 8011 %} 8012 8013 // Integer Shift Instructions 8014 // Shift Left by one 8015 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8016 match(Set dst (LShiftI dst shift)); 8017 effect(KILL cr); 8018 8019 size(2); 8020 format %{ "SHL $dst,$shift" %} 8021 opcode(0xD1, 0x4); /* D1 /4 */ 8022 ins_encode( OpcP, RegOpc( dst ) ); 8023 ins_pipe( ialu_reg ); 8024 %} 8025 8026 // Shift Left by 8-bit immediate 8027 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8028 match(Set dst (LShiftI dst shift)); 8029 effect(KILL cr); 8030 8031 size(3); 8032 format %{ "SHL $dst,$shift" %} 8033 opcode(0xC1, 0x4); /* C1 /4 ib */ 8034 ins_encode( RegOpcImm( dst, shift) ); 8035 ins_pipe( ialu_reg ); 8036 %} 8037 8038 // Shift Left by variable 8039 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8040 match(Set dst (LShiftI dst shift)); 8041 effect(KILL cr); 8042 8043 size(2); 8044 format %{ "SHL $dst,$shift" %} 8045 opcode(0xD3, 0x4); /* D3 /4 */ 8046 ins_encode( OpcP, RegOpc( dst ) ); 8047 ins_pipe( ialu_reg_reg ); 8048 %} 8049 8050 // Arithmetic shift right by one 8051 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8052 match(Set dst (RShiftI dst shift)); 8053 effect(KILL cr); 8054 8055 size(2); 8056 format %{ "SAR $dst,$shift" %} 8057 opcode(0xD1, 0x7); /* D1 /7 */ 8058 ins_encode( OpcP, RegOpc( dst ) ); 8059 ins_pipe( ialu_reg ); 8060 %} 8061 8062 // Arithmetic shift right by one 8063 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8064 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8065 effect(KILL cr); 8066 format %{ "SAR $dst,$shift" %} 8067 opcode(0xD1, 0x7); /* D1 /7 */ 8068 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8069 ins_pipe( ialu_mem_imm ); 8070 %} 8071 8072 // Arithmetic Shift Right by 8-bit immediate 8073 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8074 match(Set dst (RShiftI dst shift)); 8075 effect(KILL cr); 8076 8077 size(3); 8078 format %{ "SAR $dst,$shift" %} 8079 opcode(0xC1, 0x7); /* C1 /7 ib */ 8080 ins_encode( RegOpcImm( dst, shift ) ); 8081 ins_pipe( ialu_mem_imm ); 8082 %} 8083 8084 // Arithmetic Shift Right by 8-bit immediate 8085 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8086 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8087 effect(KILL cr); 8088 8089 format %{ "SAR $dst,$shift" %} 8090 opcode(0xC1, 0x7); /* C1 /7 ib */ 8091 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8092 ins_pipe( ialu_mem_imm ); 8093 %} 8094 8095 // Arithmetic Shift Right by variable 8096 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8097 match(Set dst (RShiftI dst shift)); 8098 effect(KILL cr); 8099 8100 size(2); 8101 format %{ "SAR $dst,$shift" %} 8102 opcode(0xD3, 0x7); /* D3 /7 */ 8103 ins_encode( OpcP, RegOpc( dst ) ); 8104 ins_pipe( ialu_reg_reg ); 8105 %} 8106 8107 // Logical shift right by one 8108 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8109 match(Set dst (URShiftI dst shift)); 8110 effect(KILL cr); 8111 8112 size(2); 8113 format %{ "SHR $dst,$shift" %} 8114 opcode(0xD1, 0x5); /* D1 /5 */ 8115 ins_encode( OpcP, RegOpc( dst ) ); 8116 ins_pipe( ialu_reg ); 8117 %} 8118 8119 // Logical Shift Right by 8-bit immediate 8120 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8121 match(Set dst (URShiftI dst shift)); 8122 effect(KILL cr); 8123 8124 size(3); 8125 format %{ "SHR $dst,$shift" %} 8126 opcode(0xC1, 0x5); /* C1 /5 ib */ 8127 ins_encode( RegOpcImm( dst, shift) ); 8128 ins_pipe( ialu_reg ); 8129 %} 8130 8131 8132 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8133 // This idiom is used by the compiler for the i2b bytecode. 8134 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8135 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8136 8137 size(3); 8138 format %{ "MOVSX $dst,$src :8" %} 8139 ins_encode %{ 8140 __ movsbl($dst$$Register, $src$$Register); 8141 %} 8142 ins_pipe(ialu_reg_reg); 8143 %} 8144 8145 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8146 // This idiom is used by the compiler the i2s bytecode. 8147 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8148 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8149 8150 size(3); 8151 format %{ "MOVSX $dst,$src :16" %} 8152 ins_encode %{ 8153 __ movswl($dst$$Register, $src$$Register); 8154 %} 8155 ins_pipe(ialu_reg_reg); 8156 %} 8157 8158 8159 // Logical Shift Right by variable 8160 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8161 match(Set dst (URShiftI dst shift)); 8162 effect(KILL cr); 8163 8164 size(2); 8165 format %{ "SHR $dst,$shift" %} 8166 opcode(0xD3, 0x5); /* D3 /5 */ 8167 ins_encode( OpcP, RegOpc( dst ) ); 8168 ins_pipe( ialu_reg_reg ); 8169 %} 8170 8171 8172 //----------Logical Instructions----------------------------------------------- 8173 //----------Integer Logical Instructions--------------------------------------- 8174 // And Instructions 8175 // And Register with Register 8176 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8177 match(Set dst (AndI dst src)); 8178 effect(KILL cr); 8179 8180 size(2); 8181 format %{ "AND $dst,$src" %} 8182 opcode(0x23); 8183 ins_encode( OpcP, RegReg( dst, src) ); 8184 ins_pipe( ialu_reg_reg ); 8185 %} 8186 8187 // And Register with Immediate 8188 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8189 match(Set dst (AndI dst src)); 8190 effect(KILL cr); 8191 8192 format %{ "AND $dst,$src" %} 8193 opcode(0x81,0x04); /* Opcode 81 /4 */ 8194 // ins_encode( RegImm( dst, src) ); 8195 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8196 ins_pipe( ialu_reg ); 8197 %} 8198 8199 // And Register with Memory 8200 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8201 match(Set dst (AndI dst (LoadI src))); 8202 effect(KILL cr); 8203 8204 ins_cost(125); 8205 format %{ "AND $dst,$src" %} 8206 opcode(0x23); 8207 ins_encode( OpcP, RegMem( dst, src) ); 8208 ins_pipe( ialu_reg_mem ); 8209 %} 8210 8211 // And Memory with Register 8212 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8213 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8214 effect(KILL cr); 8215 8216 ins_cost(150); 8217 format %{ "AND $dst,$src" %} 8218 opcode(0x21); /* Opcode 21 /r */ 8219 ins_encode( OpcP, RegMem( src, dst ) ); 8220 ins_pipe( ialu_mem_reg ); 8221 %} 8222 8223 // And Memory with Immediate 8224 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8225 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8226 effect(KILL cr); 8227 8228 ins_cost(125); 8229 format %{ "AND $dst,$src" %} 8230 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8231 // ins_encode( MemImm( dst, src) ); 8232 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8233 ins_pipe( ialu_mem_imm ); 8234 %} 8235 8236 // BMI1 instructions 8237 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8238 match(Set dst (AndI (XorI src1 minus_1) src2)); 8239 predicate(UseBMI1Instructions); 8240 effect(KILL cr); 8241 8242 format %{ "ANDNL $dst, $src1, $src2" %} 8243 8244 ins_encode %{ 8245 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8246 %} 8247 ins_pipe(ialu_reg); 8248 %} 8249 8250 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8251 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8252 predicate(UseBMI1Instructions); 8253 effect(KILL cr); 8254 8255 ins_cost(125); 8256 format %{ "ANDNL $dst, $src1, $src2" %} 8257 8258 ins_encode %{ 8259 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8260 %} 8261 ins_pipe(ialu_reg_mem); 8262 %} 8263 8264 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8265 match(Set dst (AndI (SubI imm_zero src) src)); 8266 predicate(UseBMI1Instructions); 8267 effect(KILL cr); 8268 8269 format %{ "BLSIL $dst, $src" %} 8270 8271 ins_encode %{ 8272 __ blsil($dst$$Register, $src$$Register); 8273 %} 8274 ins_pipe(ialu_reg); 8275 %} 8276 8277 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8278 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8279 predicate(UseBMI1Instructions); 8280 effect(KILL cr); 8281 8282 ins_cost(125); 8283 format %{ "BLSIL $dst, $src" %} 8284 8285 ins_encode %{ 8286 __ blsil($dst$$Register, $src$$Address); 8287 %} 8288 ins_pipe(ialu_reg_mem); 8289 %} 8290 8291 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8292 %{ 8293 match(Set dst (XorI (AddI src minus_1) src)); 8294 predicate(UseBMI1Instructions); 8295 effect(KILL cr); 8296 8297 format %{ "BLSMSKL $dst, $src" %} 8298 8299 ins_encode %{ 8300 __ blsmskl($dst$$Register, $src$$Register); 8301 %} 8302 8303 ins_pipe(ialu_reg); 8304 %} 8305 8306 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8307 %{ 8308 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8309 predicate(UseBMI1Instructions); 8310 effect(KILL cr); 8311 8312 ins_cost(125); 8313 format %{ "BLSMSKL $dst, $src" %} 8314 8315 ins_encode %{ 8316 __ blsmskl($dst$$Register, $src$$Address); 8317 %} 8318 8319 ins_pipe(ialu_reg_mem); 8320 %} 8321 8322 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8323 %{ 8324 match(Set dst (AndI (AddI src minus_1) src) ); 8325 predicate(UseBMI1Instructions); 8326 effect(KILL cr); 8327 8328 format %{ "BLSRL $dst, $src" %} 8329 8330 ins_encode %{ 8331 __ blsrl($dst$$Register, $src$$Register); 8332 %} 8333 8334 ins_pipe(ialu_reg); 8335 %} 8336 8337 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8338 %{ 8339 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8340 predicate(UseBMI1Instructions); 8341 effect(KILL cr); 8342 8343 ins_cost(125); 8344 format %{ "BLSRL $dst, $src" %} 8345 8346 ins_encode %{ 8347 __ blsrl($dst$$Register, $src$$Address); 8348 %} 8349 8350 ins_pipe(ialu_reg_mem); 8351 %} 8352 8353 // Or Instructions 8354 // Or Register with Register 8355 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8356 match(Set dst (OrI dst src)); 8357 effect(KILL cr); 8358 8359 size(2); 8360 format %{ "OR $dst,$src" %} 8361 opcode(0x0B); 8362 ins_encode( OpcP, RegReg( dst, src) ); 8363 ins_pipe( ialu_reg_reg ); 8364 %} 8365 8366 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8367 match(Set dst (OrI dst (CastP2X src))); 8368 effect(KILL cr); 8369 8370 size(2); 8371 format %{ "OR $dst,$src" %} 8372 opcode(0x0B); 8373 ins_encode( OpcP, RegReg( dst, src) ); 8374 ins_pipe( ialu_reg_reg ); 8375 %} 8376 8377 8378 // Or Register with Immediate 8379 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8380 match(Set dst (OrI dst src)); 8381 effect(KILL cr); 8382 8383 format %{ "OR $dst,$src" %} 8384 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8385 // ins_encode( RegImm( dst, src) ); 8386 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8387 ins_pipe( ialu_reg ); 8388 %} 8389 8390 // Or Register with Memory 8391 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8392 match(Set dst (OrI dst (LoadI src))); 8393 effect(KILL cr); 8394 8395 ins_cost(125); 8396 format %{ "OR $dst,$src" %} 8397 opcode(0x0B); 8398 ins_encode( OpcP, RegMem( dst, src) ); 8399 ins_pipe( ialu_reg_mem ); 8400 %} 8401 8402 // Or Memory with Register 8403 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8404 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8405 effect(KILL cr); 8406 8407 ins_cost(150); 8408 format %{ "OR $dst,$src" %} 8409 opcode(0x09); /* Opcode 09 /r */ 8410 ins_encode( OpcP, RegMem( src, dst ) ); 8411 ins_pipe( ialu_mem_reg ); 8412 %} 8413 8414 // Or Memory with Immediate 8415 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8416 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8417 effect(KILL cr); 8418 8419 ins_cost(125); 8420 format %{ "OR $dst,$src" %} 8421 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8422 // ins_encode( MemImm( dst, src) ); 8423 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8424 ins_pipe( ialu_mem_imm ); 8425 %} 8426 8427 // ROL/ROR 8428 // ROL expand 8429 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8430 effect(USE_DEF dst, USE shift, KILL cr); 8431 8432 format %{ "ROL $dst, $shift" %} 8433 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8434 ins_encode( OpcP, RegOpc( dst )); 8435 ins_pipe( ialu_reg ); 8436 %} 8437 8438 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8439 effect(USE_DEF dst, USE shift, KILL cr); 8440 8441 format %{ "ROL $dst, $shift" %} 8442 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8443 ins_encode( RegOpcImm(dst, shift) ); 8444 ins_pipe(ialu_reg); 8445 %} 8446 8447 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8448 effect(USE_DEF dst, USE shift, KILL cr); 8449 8450 format %{ "ROL $dst, $shift" %} 8451 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8452 ins_encode(OpcP, RegOpc(dst)); 8453 ins_pipe( ialu_reg_reg ); 8454 %} 8455 // end of ROL expand 8456 8457 // ROL 32bit by one once 8458 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8459 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8460 8461 expand %{ 8462 rolI_eReg_imm1(dst, lshift, cr); 8463 %} 8464 %} 8465 8466 // ROL 32bit var by imm8 once 8467 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8468 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8469 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8470 8471 expand %{ 8472 rolI_eReg_imm8(dst, lshift, cr); 8473 %} 8474 %} 8475 8476 // ROL 32bit var by var once 8477 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8478 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8479 8480 expand %{ 8481 rolI_eReg_CL(dst, shift, cr); 8482 %} 8483 %} 8484 8485 // ROL 32bit var by var once 8486 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8487 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8488 8489 expand %{ 8490 rolI_eReg_CL(dst, shift, cr); 8491 %} 8492 %} 8493 8494 // ROR expand 8495 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8496 effect(USE_DEF dst, USE shift, KILL cr); 8497 8498 format %{ "ROR $dst, $shift" %} 8499 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8500 ins_encode( OpcP, RegOpc( dst ) ); 8501 ins_pipe( ialu_reg ); 8502 %} 8503 8504 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8505 effect (USE_DEF dst, USE shift, KILL cr); 8506 8507 format %{ "ROR $dst, $shift" %} 8508 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8509 ins_encode( RegOpcImm(dst, shift) ); 8510 ins_pipe( ialu_reg ); 8511 %} 8512 8513 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8514 effect(USE_DEF dst, USE shift, KILL cr); 8515 8516 format %{ "ROR $dst, $shift" %} 8517 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8518 ins_encode(OpcP, RegOpc(dst)); 8519 ins_pipe( ialu_reg_reg ); 8520 %} 8521 // end of ROR expand 8522 8523 // ROR right once 8524 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8525 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8526 8527 expand %{ 8528 rorI_eReg_imm1(dst, rshift, cr); 8529 %} 8530 %} 8531 8532 // ROR 32bit by immI8 once 8533 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8534 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8535 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8536 8537 expand %{ 8538 rorI_eReg_imm8(dst, rshift, cr); 8539 %} 8540 %} 8541 8542 // ROR 32bit var by var once 8543 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8544 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8545 8546 expand %{ 8547 rorI_eReg_CL(dst, shift, cr); 8548 %} 8549 %} 8550 8551 // ROR 32bit var by var once 8552 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8553 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8554 8555 expand %{ 8556 rorI_eReg_CL(dst, shift, cr); 8557 %} 8558 %} 8559 8560 // Xor Instructions 8561 // Xor Register with Register 8562 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8563 match(Set dst (XorI dst src)); 8564 effect(KILL cr); 8565 8566 size(2); 8567 format %{ "XOR $dst,$src" %} 8568 opcode(0x33); 8569 ins_encode( OpcP, RegReg( dst, src) ); 8570 ins_pipe( ialu_reg_reg ); 8571 %} 8572 8573 // Xor Register with Immediate -1 8574 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8575 match(Set dst (XorI dst imm)); 8576 8577 size(2); 8578 format %{ "NOT $dst" %} 8579 ins_encode %{ 8580 __ notl($dst$$Register); 8581 %} 8582 ins_pipe( ialu_reg ); 8583 %} 8584 8585 // Xor Register with Immediate 8586 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8587 match(Set dst (XorI dst src)); 8588 effect(KILL cr); 8589 8590 format %{ "XOR $dst,$src" %} 8591 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8592 // ins_encode( RegImm( dst, src) ); 8593 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8594 ins_pipe( ialu_reg ); 8595 %} 8596 8597 // Xor Register with Memory 8598 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8599 match(Set dst (XorI dst (LoadI src))); 8600 effect(KILL cr); 8601 8602 ins_cost(125); 8603 format %{ "XOR $dst,$src" %} 8604 opcode(0x33); 8605 ins_encode( OpcP, RegMem(dst, src) ); 8606 ins_pipe( ialu_reg_mem ); 8607 %} 8608 8609 // Xor Memory with Register 8610 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8611 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8612 effect(KILL cr); 8613 8614 ins_cost(150); 8615 format %{ "XOR $dst,$src" %} 8616 opcode(0x31); /* Opcode 31 /r */ 8617 ins_encode( OpcP, RegMem( src, dst ) ); 8618 ins_pipe( ialu_mem_reg ); 8619 %} 8620 8621 // Xor Memory with Immediate 8622 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8623 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8624 effect(KILL cr); 8625 8626 ins_cost(125); 8627 format %{ "XOR $dst,$src" %} 8628 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8629 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8630 ins_pipe( ialu_mem_imm ); 8631 %} 8632 8633 //----------Convert Int to Boolean--------------------------------------------- 8634 8635 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8636 effect( DEF dst, USE src ); 8637 format %{ "MOV $dst,$src" %} 8638 ins_encode( enc_Copy( dst, src) ); 8639 ins_pipe( ialu_reg_reg ); 8640 %} 8641 8642 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8643 effect( USE_DEF dst, USE src, KILL cr ); 8644 8645 size(4); 8646 format %{ "NEG $dst\n\t" 8647 "ADC $dst,$src" %} 8648 ins_encode( neg_reg(dst), 8649 OpcRegReg(0x13,dst,src) ); 8650 ins_pipe( ialu_reg_reg_long ); 8651 %} 8652 8653 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8654 match(Set dst (Conv2B src)); 8655 8656 expand %{ 8657 movI_nocopy(dst,src); 8658 ci2b(dst,src,cr); 8659 %} 8660 %} 8661 8662 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8663 effect( DEF dst, USE src ); 8664 format %{ "MOV $dst,$src" %} 8665 ins_encode( enc_Copy( dst, src) ); 8666 ins_pipe( ialu_reg_reg ); 8667 %} 8668 8669 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8670 effect( USE_DEF dst, USE src, KILL cr ); 8671 format %{ "NEG $dst\n\t" 8672 "ADC $dst,$src" %} 8673 ins_encode( neg_reg(dst), 8674 OpcRegReg(0x13,dst,src) ); 8675 ins_pipe( ialu_reg_reg_long ); 8676 %} 8677 8678 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8679 match(Set dst (Conv2B src)); 8680 8681 expand %{ 8682 movP_nocopy(dst,src); 8683 cp2b(dst,src,cr); 8684 %} 8685 %} 8686 8687 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8688 match(Set dst (CmpLTMask p q)); 8689 effect(KILL cr); 8690 ins_cost(400); 8691 8692 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8693 format %{ "XOR $dst,$dst\n\t" 8694 "CMP $p,$q\n\t" 8695 "SETlt $dst\n\t" 8696 "NEG $dst" %} 8697 ins_encode %{ 8698 Register Rp = $p$$Register; 8699 Register Rq = $q$$Register; 8700 Register Rd = $dst$$Register; 8701 Label done; 8702 __ xorl(Rd, Rd); 8703 __ cmpl(Rp, Rq); 8704 __ setb(Assembler::less, Rd); 8705 __ negl(Rd); 8706 %} 8707 8708 ins_pipe(pipe_slow); 8709 %} 8710 8711 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8712 match(Set dst (CmpLTMask dst zero)); 8713 effect(DEF dst, KILL cr); 8714 ins_cost(100); 8715 8716 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8717 ins_encode %{ 8718 __ sarl($dst$$Register, 31); 8719 %} 8720 ins_pipe(ialu_reg); 8721 %} 8722 8723 /* better to save a register than avoid a branch */ 8724 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8725 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8726 effect(KILL cr); 8727 ins_cost(400); 8728 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8729 "JGE done\n\t" 8730 "ADD $p,$y\n" 8731 "done: " %} 8732 ins_encode %{ 8733 Register Rp = $p$$Register; 8734 Register Rq = $q$$Register; 8735 Register Ry = $y$$Register; 8736 Label done; 8737 __ subl(Rp, Rq); 8738 __ jccb(Assembler::greaterEqual, done); 8739 __ addl(Rp, Ry); 8740 __ bind(done); 8741 %} 8742 8743 ins_pipe(pipe_cmplt); 8744 %} 8745 8746 /* better to save a register than avoid a branch */ 8747 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8748 match(Set y (AndI (CmpLTMask p q) y)); 8749 effect(KILL cr); 8750 8751 ins_cost(300); 8752 8753 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8754 "JLT done\n\t" 8755 "XORL $y, $y\n" 8756 "done: " %} 8757 ins_encode %{ 8758 Register Rp = $p$$Register; 8759 Register Rq = $q$$Register; 8760 Register Ry = $y$$Register; 8761 Label done; 8762 __ cmpl(Rp, Rq); 8763 __ jccb(Assembler::less, done); 8764 __ xorl(Ry, Ry); 8765 __ bind(done); 8766 %} 8767 8768 ins_pipe(pipe_cmplt); 8769 %} 8770 8771 /* If I enable this, I encourage spilling in the inner loop of compress. 8772 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8773 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8774 */ 8775 //----------Overflow Math Instructions----------------------------------------- 8776 8777 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8778 %{ 8779 match(Set cr (OverflowAddI op1 op2)); 8780 effect(DEF cr, USE_KILL op1, USE op2); 8781 8782 format %{ "ADD $op1, $op2\t# overflow check int" %} 8783 8784 ins_encode %{ 8785 __ addl($op1$$Register, $op2$$Register); 8786 %} 8787 ins_pipe(ialu_reg_reg); 8788 %} 8789 8790 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8791 %{ 8792 match(Set cr (OverflowAddI op1 op2)); 8793 effect(DEF cr, USE_KILL op1, USE op2); 8794 8795 format %{ "ADD $op1, $op2\t# overflow check int" %} 8796 8797 ins_encode %{ 8798 __ addl($op1$$Register, $op2$$constant); 8799 %} 8800 ins_pipe(ialu_reg_reg); 8801 %} 8802 8803 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8804 %{ 8805 match(Set cr (OverflowSubI op1 op2)); 8806 8807 format %{ "CMP $op1, $op2\t# overflow check int" %} 8808 ins_encode %{ 8809 __ cmpl($op1$$Register, $op2$$Register); 8810 %} 8811 ins_pipe(ialu_reg_reg); 8812 %} 8813 8814 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8815 %{ 8816 match(Set cr (OverflowSubI op1 op2)); 8817 8818 format %{ "CMP $op1, $op2\t# overflow check int" %} 8819 ins_encode %{ 8820 __ cmpl($op1$$Register, $op2$$constant); 8821 %} 8822 ins_pipe(ialu_reg_reg); 8823 %} 8824 8825 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8826 %{ 8827 match(Set cr (OverflowSubI zero op2)); 8828 effect(DEF cr, USE_KILL op2); 8829 8830 format %{ "NEG $op2\t# overflow check int" %} 8831 ins_encode %{ 8832 __ negl($op2$$Register); 8833 %} 8834 ins_pipe(ialu_reg_reg); 8835 %} 8836 8837 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8838 %{ 8839 match(Set cr (OverflowMulI op1 op2)); 8840 effect(DEF cr, USE_KILL op1, USE op2); 8841 8842 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8843 ins_encode %{ 8844 __ imull($op1$$Register, $op2$$Register); 8845 %} 8846 ins_pipe(ialu_reg_reg_alu0); 8847 %} 8848 8849 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8850 %{ 8851 match(Set cr (OverflowMulI op1 op2)); 8852 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8853 8854 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8855 ins_encode %{ 8856 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8857 %} 8858 ins_pipe(ialu_reg_reg_alu0); 8859 %} 8860 8861 // Integer Absolute Instructions 8862 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8863 %{ 8864 match(Set dst (AbsI src)); 8865 effect(TEMP dst, TEMP tmp, KILL cr); 8866 format %{ "movl $tmp, $src\n\t" 8867 "sarl $tmp, 31\n\t" 8868 "movl $dst, $src\n\t" 8869 "xorl $dst, $tmp\n\t" 8870 "subl $dst, $tmp\n" 8871 %} 8872 ins_encode %{ 8873 __ movl($tmp$$Register, $src$$Register); 8874 __ sarl($tmp$$Register, 31); 8875 __ movl($dst$$Register, $src$$Register); 8876 __ xorl($dst$$Register, $tmp$$Register); 8877 __ subl($dst$$Register, $tmp$$Register); 8878 %} 8879 8880 ins_pipe(ialu_reg_reg); 8881 %} 8882 8883 //----------Long Instructions------------------------------------------------ 8884 // Add Long Register with Register 8885 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8886 match(Set dst (AddL dst src)); 8887 effect(KILL cr); 8888 ins_cost(200); 8889 format %{ "ADD $dst.lo,$src.lo\n\t" 8890 "ADC $dst.hi,$src.hi" %} 8891 opcode(0x03, 0x13); 8892 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8893 ins_pipe( ialu_reg_reg_long ); 8894 %} 8895 8896 // Add Long Register with Immediate 8897 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8898 match(Set dst (AddL dst src)); 8899 effect(KILL cr); 8900 format %{ "ADD $dst.lo,$src.lo\n\t" 8901 "ADC $dst.hi,$src.hi" %} 8902 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8903 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8904 ins_pipe( ialu_reg_long ); 8905 %} 8906 8907 // Add Long Register with Memory 8908 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8909 match(Set dst (AddL dst (LoadL mem))); 8910 effect(KILL cr); 8911 ins_cost(125); 8912 format %{ "ADD $dst.lo,$mem\n\t" 8913 "ADC $dst.hi,$mem+4" %} 8914 opcode(0x03, 0x13); 8915 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8916 ins_pipe( ialu_reg_long_mem ); 8917 %} 8918 8919 // Subtract Long Register with Register. 8920 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8921 match(Set dst (SubL dst src)); 8922 effect(KILL cr); 8923 ins_cost(200); 8924 format %{ "SUB $dst.lo,$src.lo\n\t" 8925 "SBB $dst.hi,$src.hi" %} 8926 opcode(0x2B, 0x1B); 8927 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8928 ins_pipe( ialu_reg_reg_long ); 8929 %} 8930 8931 // Subtract Long Register with Immediate 8932 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8933 match(Set dst (SubL dst src)); 8934 effect(KILL cr); 8935 format %{ "SUB $dst.lo,$src.lo\n\t" 8936 "SBB $dst.hi,$src.hi" %} 8937 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8938 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8939 ins_pipe( ialu_reg_long ); 8940 %} 8941 8942 // Subtract Long Register with Memory 8943 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8944 match(Set dst (SubL dst (LoadL mem))); 8945 effect(KILL cr); 8946 ins_cost(125); 8947 format %{ "SUB $dst.lo,$mem\n\t" 8948 "SBB $dst.hi,$mem+4" %} 8949 opcode(0x2B, 0x1B); 8950 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8951 ins_pipe( ialu_reg_long_mem ); 8952 %} 8953 8954 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8955 match(Set dst (SubL zero dst)); 8956 effect(KILL cr); 8957 ins_cost(300); 8958 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8959 ins_encode( neg_long(dst) ); 8960 ins_pipe( ialu_reg_reg_long ); 8961 %} 8962 8963 // And Long Register with Register 8964 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8965 match(Set dst (AndL dst src)); 8966 effect(KILL cr); 8967 format %{ "AND $dst.lo,$src.lo\n\t" 8968 "AND $dst.hi,$src.hi" %} 8969 opcode(0x23,0x23); 8970 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8971 ins_pipe( ialu_reg_reg_long ); 8972 %} 8973 8974 // And Long Register with Immediate 8975 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8976 match(Set dst (AndL dst src)); 8977 effect(KILL cr); 8978 format %{ "AND $dst.lo,$src.lo\n\t" 8979 "AND $dst.hi,$src.hi" %} 8980 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8981 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8982 ins_pipe( ialu_reg_long ); 8983 %} 8984 8985 // And Long Register with Memory 8986 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8987 match(Set dst (AndL dst (LoadL mem))); 8988 effect(KILL cr); 8989 ins_cost(125); 8990 format %{ "AND $dst.lo,$mem\n\t" 8991 "AND $dst.hi,$mem+4" %} 8992 opcode(0x23, 0x23); 8993 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8994 ins_pipe( ialu_reg_long_mem ); 8995 %} 8996 8997 // BMI1 instructions 8998 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8999 match(Set dst (AndL (XorL src1 minus_1) src2)); 9000 predicate(UseBMI1Instructions); 9001 effect(KILL cr, TEMP dst); 9002 9003 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9004 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9005 %} 9006 9007 ins_encode %{ 9008 Register Rdst = $dst$$Register; 9009 Register Rsrc1 = $src1$$Register; 9010 Register Rsrc2 = $src2$$Register; 9011 __ andnl(Rdst, Rsrc1, Rsrc2); 9012 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9013 %} 9014 ins_pipe(ialu_reg_reg_long); 9015 %} 9016 9017 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9018 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9019 predicate(UseBMI1Instructions); 9020 effect(KILL cr, TEMP dst); 9021 9022 ins_cost(125); 9023 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9024 "ANDNL $dst.hi, $src1.hi, $src2+4" 9025 %} 9026 9027 ins_encode %{ 9028 Register Rdst = $dst$$Register; 9029 Register Rsrc1 = $src1$$Register; 9030 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9031 9032 __ andnl(Rdst, Rsrc1, $src2$$Address); 9033 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9034 %} 9035 ins_pipe(ialu_reg_mem); 9036 %} 9037 9038 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9039 match(Set dst (AndL (SubL imm_zero src) src)); 9040 predicate(UseBMI1Instructions); 9041 effect(KILL cr, TEMP dst); 9042 9043 format %{ "MOVL $dst.hi, 0\n\t" 9044 "BLSIL $dst.lo, $src.lo\n\t" 9045 "JNZ done\n\t" 9046 "BLSIL $dst.hi, $src.hi\n" 9047 "done:" 9048 %} 9049 9050 ins_encode %{ 9051 Label done; 9052 Register Rdst = $dst$$Register; 9053 Register Rsrc = $src$$Register; 9054 __ movl(HIGH_FROM_LOW(Rdst), 0); 9055 __ blsil(Rdst, Rsrc); 9056 __ jccb(Assembler::notZero, done); 9057 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9058 __ bind(done); 9059 %} 9060 ins_pipe(ialu_reg); 9061 %} 9062 9063 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9064 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9065 predicate(UseBMI1Instructions); 9066 effect(KILL cr, TEMP dst); 9067 9068 ins_cost(125); 9069 format %{ "MOVL $dst.hi, 0\n\t" 9070 "BLSIL $dst.lo, $src\n\t" 9071 "JNZ done\n\t" 9072 "BLSIL $dst.hi, $src+4\n" 9073 "done:" 9074 %} 9075 9076 ins_encode %{ 9077 Label done; 9078 Register Rdst = $dst$$Register; 9079 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9080 9081 __ movl(HIGH_FROM_LOW(Rdst), 0); 9082 __ blsil(Rdst, $src$$Address); 9083 __ jccb(Assembler::notZero, done); 9084 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9085 __ bind(done); 9086 %} 9087 ins_pipe(ialu_reg_mem); 9088 %} 9089 9090 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9091 %{ 9092 match(Set dst (XorL (AddL src minus_1) src)); 9093 predicate(UseBMI1Instructions); 9094 effect(KILL cr, TEMP dst); 9095 9096 format %{ "MOVL $dst.hi, 0\n\t" 9097 "BLSMSKL $dst.lo, $src.lo\n\t" 9098 "JNC done\n\t" 9099 "BLSMSKL $dst.hi, $src.hi\n" 9100 "done:" 9101 %} 9102 9103 ins_encode %{ 9104 Label done; 9105 Register Rdst = $dst$$Register; 9106 Register Rsrc = $src$$Register; 9107 __ movl(HIGH_FROM_LOW(Rdst), 0); 9108 __ blsmskl(Rdst, Rsrc); 9109 __ jccb(Assembler::carryClear, done); 9110 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9111 __ bind(done); 9112 %} 9113 9114 ins_pipe(ialu_reg); 9115 %} 9116 9117 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9118 %{ 9119 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9120 predicate(UseBMI1Instructions); 9121 effect(KILL cr, TEMP dst); 9122 9123 ins_cost(125); 9124 format %{ "MOVL $dst.hi, 0\n\t" 9125 "BLSMSKL $dst.lo, $src\n\t" 9126 "JNC done\n\t" 9127 "BLSMSKL $dst.hi, $src+4\n" 9128 "done:" 9129 %} 9130 9131 ins_encode %{ 9132 Label done; 9133 Register Rdst = $dst$$Register; 9134 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9135 9136 __ movl(HIGH_FROM_LOW(Rdst), 0); 9137 __ blsmskl(Rdst, $src$$Address); 9138 __ jccb(Assembler::carryClear, done); 9139 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9140 __ bind(done); 9141 %} 9142 9143 ins_pipe(ialu_reg_mem); 9144 %} 9145 9146 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9147 %{ 9148 match(Set dst (AndL (AddL src minus_1) src) ); 9149 predicate(UseBMI1Instructions); 9150 effect(KILL cr, TEMP dst); 9151 9152 format %{ "MOVL $dst.hi, $src.hi\n\t" 9153 "BLSRL $dst.lo, $src.lo\n\t" 9154 "JNC done\n\t" 9155 "BLSRL $dst.hi, $src.hi\n" 9156 "done:" 9157 %} 9158 9159 ins_encode %{ 9160 Label done; 9161 Register Rdst = $dst$$Register; 9162 Register Rsrc = $src$$Register; 9163 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9164 __ blsrl(Rdst, Rsrc); 9165 __ jccb(Assembler::carryClear, done); 9166 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9167 __ bind(done); 9168 %} 9169 9170 ins_pipe(ialu_reg); 9171 %} 9172 9173 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9174 %{ 9175 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9176 predicate(UseBMI1Instructions); 9177 effect(KILL cr, TEMP dst); 9178 9179 ins_cost(125); 9180 format %{ "MOVL $dst.hi, $src+4\n\t" 9181 "BLSRL $dst.lo, $src\n\t" 9182 "JNC done\n\t" 9183 "BLSRL $dst.hi, $src+4\n" 9184 "done:" 9185 %} 9186 9187 ins_encode %{ 9188 Label done; 9189 Register Rdst = $dst$$Register; 9190 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9191 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9192 __ blsrl(Rdst, $src$$Address); 9193 __ jccb(Assembler::carryClear, done); 9194 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9195 __ bind(done); 9196 %} 9197 9198 ins_pipe(ialu_reg_mem); 9199 %} 9200 9201 // Or Long Register with Register 9202 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9203 match(Set dst (OrL dst src)); 9204 effect(KILL cr); 9205 format %{ "OR $dst.lo,$src.lo\n\t" 9206 "OR $dst.hi,$src.hi" %} 9207 opcode(0x0B,0x0B); 9208 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9209 ins_pipe( ialu_reg_reg_long ); 9210 %} 9211 9212 // Or Long Register with Immediate 9213 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9214 match(Set dst (OrL dst src)); 9215 effect(KILL cr); 9216 format %{ "OR $dst.lo,$src.lo\n\t" 9217 "OR $dst.hi,$src.hi" %} 9218 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9219 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9220 ins_pipe( ialu_reg_long ); 9221 %} 9222 9223 // Or Long Register with Memory 9224 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9225 match(Set dst (OrL dst (LoadL mem))); 9226 effect(KILL cr); 9227 ins_cost(125); 9228 format %{ "OR $dst.lo,$mem\n\t" 9229 "OR $dst.hi,$mem+4" %} 9230 opcode(0x0B,0x0B); 9231 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9232 ins_pipe( ialu_reg_long_mem ); 9233 %} 9234 9235 // Xor Long Register with Register 9236 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9237 match(Set dst (XorL dst src)); 9238 effect(KILL cr); 9239 format %{ "XOR $dst.lo,$src.lo\n\t" 9240 "XOR $dst.hi,$src.hi" %} 9241 opcode(0x33,0x33); 9242 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9243 ins_pipe( ialu_reg_reg_long ); 9244 %} 9245 9246 // Xor Long Register with Immediate -1 9247 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9248 match(Set dst (XorL dst imm)); 9249 format %{ "NOT $dst.lo\n\t" 9250 "NOT $dst.hi" %} 9251 ins_encode %{ 9252 __ notl($dst$$Register); 9253 __ notl(HIGH_FROM_LOW($dst$$Register)); 9254 %} 9255 ins_pipe( ialu_reg_long ); 9256 %} 9257 9258 // Xor Long Register with Immediate 9259 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9260 match(Set dst (XorL dst src)); 9261 effect(KILL cr); 9262 format %{ "XOR $dst.lo,$src.lo\n\t" 9263 "XOR $dst.hi,$src.hi" %} 9264 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9265 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9266 ins_pipe( ialu_reg_long ); 9267 %} 9268 9269 // Xor Long Register with Memory 9270 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9271 match(Set dst (XorL dst (LoadL mem))); 9272 effect(KILL cr); 9273 ins_cost(125); 9274 format %{ "XOR $dst.lo,$mem\n\t" 9275 "XOR $dst.hi,$mem+4" %} 9276 opcode(0x33,0x33); 9277 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9278 ins_pipe( ialu_reg_long_mem ); 9279 %} 9280 9281 // Shift Left Long by 1 9282 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9283 predicate(UseNewLongLShift); 9284 match(Set dst (LShiftL dst cnt)); 9285 effect(KILL cr); 9286 ins_cost(100); 9287 format %{ "ADD $dst.lo,$dst.lo\n\t" 9288 "ADC $dst.hi,$dst.hi" %} 9289 ins_encode %{ 9290 __ addl($dst$$Register,$dst$$Register); 9291 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9292 %} 9293 ins_pipe( ialu_reg_long ); 9294 %} 9295 9296 // Shift Left Long by 2 9297 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9298 predicate(UseNewLongLShift); 9299 match(Set dst (LShiftL dst cnt)); 9300 effect(KILL cr); 9301 ins_cost(100); 9302 format %{ "ADD $dst.lo,$dst.lo\n\t" 9303 "ADC $dst.hi,$dst.hi\n\t" 9304 "ADD $dst.lo,$dst.lo\n\t" 9305 "ADC $dst.hi,$dst.hi" %} 9306 ins_encode %{ 9307 __ addl($dst$$Register,$dst$$Register); 9308 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9309 __ addl($dst$$Register,$dst$$Register); 9310 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9311 %} 9312 ins_pipe( ialu_reg_long ); 9313 %} 9314 9315 // Shift Left Long by 3 9316 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9317 predicate(UseNewLongLShift); 9318 match(Set dst (LShiftL dst cnt)); 9319 effect(KILL cr); 9320 ins_cost(100); 9321 format %{ "ADD $dst.lo,$dst.lo\n\t" 9322 "ADC $dst.hi,$dst.hi\n\t" 9323 "ADD $dst.lo,$dst.lo\n\t" 9324 "ADC $dst.hi,$dst.hi\n\t" 9325 "ADD $dst.lo,$dst.lo\n\t" 9326 "ADC $dst.hi,$dst.hi" %} 9327 ins_encode %{ 9328 __ addl($dst$$Register,$dst$$Register); 9329 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9330 __ addl($dst$$Register,$dst$$Register); 9331 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9332 __ addl($dst$$Register,$dst$$Register); 9333 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9334 %} 9335 ins_pipe( ialu_reg_long ); 9336 %} 9337 9338 // Shift Left Long by 1-31 9339 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9340 match(Set dst (LShiftL dst cnt)); 9341 effect(KILL cr); 9342 ins_cost(200); 9343 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9344 "SHL $dst.lo,$cnt" %} 9345 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9346 ins_encode( move_long_small_shift(dst,cnt) ); 9347 ins_pipe( ialu_reg_long ); 9348 %} 9349 9350 // Shift Left Long by 32-63 9351 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9352 match(Set dst (LShiftL dst cnt)); 9353 effect(KILL cr); 9354 ins_cost(300); 9355 format %{ "MOV $dst.hi,$dst.lo\n" 9356 "\tSHL $dst.hi,$cnt-32\n" 9357 "\tXOR $dst.lo,$dst.lo" %} 9358 opcode(0xC1, 0x4); /* C1 /4 ib */ 9359 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9360 ins_pipe( ialu_reg_long ); 9361 %} 9362 9363 // Shift Left Long by variable 9364 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9365 match(Set dst (LShiftL dst shift)); 9366 effect(KILL cr); 9367 ins_cost(500+200); 9368 size(17); 9369 format %{ "TEST $shift,32\n\t" 9370 "JEQ,s small\n\t" 9371 "MOV $dst.hi,$dst.lo\n\t" 9372 "XOR $dst.lo,$dst.lo\n" 9373 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9374 "SHL $dst.lo,$shift" %} 9375 ins_encode( shift_left_long( dst, shift ) ); 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 // Shift Right Long by 1-31 9380 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9381 match(Set dst (URShiftL dst cnt)); 9382 effect(KILL cr); 9383 ins_cost(200); 9384 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9385 "SHR $dst.hi,$cnt" %} 9386 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9387 ins_encode( move_long_small_shift(dst,cnt) ); 9388 ins_pipe( ialu_reg_long ); 9389 %} 9390 9391 // Shift Right Long by 32-63 9392 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9393 match(Set dst (URShiftL dst cnt)); 9394 effect(KILL cr); 9395 ins_cost(300); 9396 format %{ "MOV $dst.lo,$dst.hi\n" 9397 "\tSHR $dst.lo,$cnt-32\n" 9398 "\tXOR $dst.hi,$dst.hi" %} 9399 opcode(0xC1, 0x5); /* C1 /5 ib */ 9400 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9401 ins_pipe( ialu_reg_long ); 9402 %} 9403 9404 // Shift Right Long by variable 9405 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9406 match(Set dst (URShiftL dst shift)); 9407 effect(KILL cr); 9408 ins_cost(600); 9409 size(17); 9410 format %{ "TEST $shift,32\n\t" 9411 "JEQ,s small\n\t" 9412 "MOV $dst.lo,$dst.hi\n\t" 9413 "XOR $dst.hi,$dst.hi\n" 9414 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9415 "SHR $dst.hi,$shift" %} 9416 ins_encode( shift_right_long( dst, shift ) ); 9417 ins_pipe( pipe_slow ); 9418 %} 9419 9420 // Shift Right Long by 1-31 9421 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9422 match(Set dst (RShiftL dst cnt)); 9423 effect(KILL cr); 9424 ins_cost(200); 9425 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9426 "SAR $dst.hi,$cnt" %} 9427 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9428 ins_encode( move_long_small_shift(dst,cnt) ); 9429 ins_pipe( ialu_reg_long ); 9430 %} 9431 9432 // Shift Right Long by 32-63 9433 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9434 match(Set dst (RShiftL dst cnt)); 9435 effect(KILL cr); 9436 ins_cost(300); 9437 format %{ "MOV $dst.lo,$dst.hi\n" 9438 "\tSAR $dst.lo,$cnt-32\n" 9439 "\tSAR $dst.hi,31" %} 9440 opcode(0xC1, 0x7); /* C1 /7 ib */ 9441 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9442 ins_pipe( ialu_reg_long ); 9443 %} 9444 9445 // Shift Right arithmetic Long by variable 9446 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9447 match(Set dst (RShiftL dst shift)); 9448 effect(KILL cr); 9449 ins_cost(600); 9450 size(18); 9451 format %{ "TEST $shift,32\n\t" 9452 "JEQ,s small\n\t" 9453 "MOV $dst.lo,$dst.hi\n\t" 9454 "SAR $dst.hi,31\n" 9455 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9456 "SAR $dst.hi,$shift" %} 9457 ins_encode( shift_right_arith_long( dst, shift ) ); 9458 ins_pipe( pipe_slow ); 9459 %} 9460 9461 9462 //----------Double Instructions------------------------------------------------ 9463 // Double Math 9464 9465 // Compare & branch 9466 9467 // P6 version of float compare, sets condition codes in EFLAGS 9468 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9469 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9470 match(Set cr (CmpD src1 src2)); 9471 effect(KILL rax); 9472 ins_cost(150); 9473 format %{ "FLD $src1\n\t" 9474 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9475 "JNP exit\n\t" 9476 "MOV ah,1 // saw a NaN, set CF\n\t" 9477 "SAHF\n" 9478 "exit:\tNOP // avoid branch to branch" %} 9479 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9480 ins_encode( Push_Reg_DPR(src1), 9481 OpcP, RegOpc(src2), 9482 cmpF_P6_fixup ); 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9487 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9488 match(Set cr (CmpD src1 src2)); 9489 ins_cost(150); 9490 format %{ "FLD $src1\n\t" 9491 "FUCOMIP ST,$src2 // P6 instruction" %} 9492 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9493 ins_encode( Push_Reg_DPR(src1), 9494 OpcP, RegOpc(src2)); 9495 ins_pipe( pipe_slow ); 9496 %} 9497 9498 // Compare & branch 9499 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9500 predicate(UseSSE<=1); 9501 match(Set cr (CmpD src1 src2)); 9502 effect(KILL rax); 9503 ins_cost(200); 9504 format %{ "FLD $src1\n\t" 9505 "FCOMp $src2\n\t" 9506 "FNSTSW AX\n\t" 9507 "TEST AX,0x400\n\t" 9508 "JZ,s flags\n\t" 9509 "MOV AH,1\t# unordered treat as LT\n" 9510 "flags:\tSAHF" %} 9511 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9512 ins_encode( Push_Reg_DPR(src1), 9513 OpcP, RegOpc(src2), 9514 fpu_flags); 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 // Compare vs zero into -1,0,1 9519 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9520 predicate(UseSSE<=1); 9521 match(Set dst (CmpD3 src1 zero)); 9522 effect(KILL cr, KILL rax); 9523 ins_cost(280); 9524 format %{ "FTSTD $dst,$src1" %} 9525 opcode(0xE4, 0xD9); 9526 ins_encode( Push_Reg_DPR(src1), 9527 OpcS, OpcP, PopFPU, 9528 CmpF_Result(dst)); 9529 ins_pipe( pipe_slow ); 9530 %} 9531 9532 // Compare into -1,0,1 9533 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9534 predicate(UseSSE<=1); 9535 match(Set dst (CmpD3 src1 src2)); 9536 effect(KILL cr, KILL rax); 9537 ins_cost(300); 9538 format %{ "FCMPD $dst,$src1,$src2" %} 9539 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9540 ins_encode( Push_Reg_DPR(src1), 9541 OpcP, RegOpc(src2), 9542 CmpF_Result(dst)); 9543 ins_pipe( pipe_slow ); 9544 %} 9545 9546 // float compare and set condition codes in EFLAGS by XMM regs 9547 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9548 predicate(UseSSE>=2); 9549 match(Set cr (CmpD src1 src2)); 9550 ins_cost(145); 9551 format %{ "UCOMISD $src1,$src2\n\t" 9552 "JNP,s exit\n\t" 9553 "PUSHF\t# saw NaN, set CF\n\t" 9554 "AND [rsp], #0xffffff2b\n\t" 9555 "POPF\n" 9556 "exit:" %} 9557 ins_encode %{ 9558 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9559 emit_cmpfp_fixup(_masm); 9560 %} 9561 ins_pipe( pipe_slow ); 9562 %} 9563 9564 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9565 predicate(UseSSE>=2); 9566 match(Set cr (CmpD src1 src2)); 9567 ins_cost(100); 9568 format %{ "UCOMISD $src1,$src2" %} 9569 ins_encode %{ 9570 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 // float compare and set condition codes in EFLAGS by XMM regs 9576 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9577 predicate(UseSSE>=2); 9578 match(Set cr (CmpD src1 (LoadD src2))); 9579 ins_cost(145); 9580 format %{ "UCOMISD $src1,$src2\n\t" 9581 "JNP,s exit\n\t" 9582 "PUSHF\t# saw NaN, set CF\n\t" 9583 "AND [rsp], #0xffffff2b\n\t" 9584 "POPF\n" 9585 "exit:" %} 9586 ins_encode %{ 9587 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9588 emit_cmpfp_fixup(_masm); 9589 %} 9590 ins_pipe( pipe_slow ); 9591 %} 9592 9593 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9594 predicate(UseSSE>=2); 9595 match(Set cr (CmpD src1 (LoadD src2))); 9596 ins_cost(100); 9597 format %{ "UCOMISD $src1,$src2" %} 9598 ins_encode %{ 9599 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9600 %} 9601 ins_pipe( pipe_slow ); 9602 %} 9603 9604 // Compare into -1,0,1 in XMM 9605 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9606 predicate(UseSSE>=2); 9607 match(Set dst (CmpD3 src1 src2)); 9608 effect(KILL cr); 9609 ins_cost(255); 9610 format %{ "UCOMISD $src1, $src2\n\t" 9611 "MOV $dst, #-1\n\t" 9612 "JP,s done\n\t" 9613 "JB,s done\n\t" 9614 "SETNE $dst\n\t" 9615 "MOVZB $dst, $dst\n" 9616 "done:" %} 9617 ins_encode %{ 9618 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9619 emit_cmpfp3(_masm, $dst$$Register); 9620 %} 9621 ins_pipe( pipe_slow ); 9622 %} 9623 9624 // Compare into -1,0,1 in XMM and memory 9625 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9626 predicate(UseSSE>=2); 9627 match(Set dst (CmpD3 src1 (LoadD src2))); 9628 effect(KILL cr); 9629 ins_cost(275); 9630 format %{ "UCOMISD $src1, $src2\n\t" 9631 "MOV $dst, #-1\n\t" 9632 "JP,s done\n\t" 9633 "JB,s done\n\t" 9634 "SETNE $dst\n\t" 9635 "MOVZB $dst, $dst\n" 9636 "done:" %} 9637 ins_encode %{ 9638 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9639 emit_cmpfp3(_masm, $dst$$Register); 9640 %} 9641 ins_pipe( pipe_slow ); 9642 %} 9643 9644 9645 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9646 predicate (UseSSE <=1); 9647 match(Set dst (SubD dst src)); 9648 9649 format %{ "FLD $src\n\t" 9650 "DSUBp $dst,ST" %} 9651 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9652 ins_cost(150); 9653 ins_encode( Push_Reg_DPR(src), 9654 OpcP, RegOpc(dst) ); 9655 ins_pipe( fpu_reg_reg ); 9656 %} 9657 9658 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9659 predicate (UseSSE <=1); 9660 match(Set dst (RoundDouble (SubD src1 src2))); 9661 ins_cost(250); 9662 9663 format %{ "FLD $src2\n\t" 9664 "DSUB ST,$src1\n\t" 9665 "FSTP_D $dst\t# D-round" %} 9666 opcode(0xD8, 0x5); 9667 ins_encode( Push_Reg_DPR(src2), 9668 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9669 ins_pipe( fpu_mem_reg_reg ); 9670 %} 9671 9672 9673 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9674 predicate (UseSSE <=1); 9675 match(Set dst (SubD dst (LoadD src))); 9676 ins_cost(150); 9677 9678 format %{ "FLD $src\n\t" 9679 "DSUBp $dst,ST" %} 9680 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9681 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9682 OpcP, RegOpc(dst) ); 9683 ins_pipe( fpu_reg_mem ); 9684 %} 9685 9686 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9687 predicate (UseSSE<=1); 9688 match(Set dst (AbsD src)); 9689 ins_cost(100); 9690 format %{ "FABS" %} 9691 opcode(0xE1, 0xD9); 9692 ins_encode( OpcS, OpcP ); 9693 ins_pipe( fpu_reg_reg ); 9694 %} 9695 9696 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9697 predicate(UseSSE<=1); 9698 match(Set dst (NegD src)); 9699 ins_cost(100); 9700 format %{ "FCHS" %} 9701 opcode(0xE0, 0xD9); 9702 ins_encode( OpcS, OpcP ); 9703 ins_pipe( fpu_reg_reg ); 9704 %} 9705 9706 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9707 predicate(UseSSE<=1); 9708 match(Set dst (AddD dst src)); 9709 format %{ "FLD $src\n\t" 9710 "DADD $dst,ST" %} 9711 size(4); 9712 ins_cost(150); 9713 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9714 ins_encode( Push_Reg_DPR(src), 9715 OpcP, RegOpc(dst) ); 9716 ins_pipe( fpu_reg_reg ); 9717 %} 9718 9719 9720 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9721 predicate(UseSSE<=1); 9722 match(Set dst (RoundDouble (AddD src1 src2))); 9723 ins_cost(250); 9724 9725 format %{ "FLD $src2\n\t" 9726 "DADD ST,$src1\n\t" 9727 "FSTP_D $dst\t# D-round" %} 9728 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9729 ins_encode( Push_Reg_DPR(src2), 9730 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9731 ins_pipe( fpu_mem_reg_reg ); 9732 %} 9733 9734 9735 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9736 predicate(UseSSE<=1); 9737 match(Set dst (AddD dst (LoadD src))); 9738 ins_cost(150); 9739 9740 format %{ "FLD $src\n\t" 9741 "DADDp $dst,ST" %} 9742 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9743 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9744 OpcP, RegOpc(dst) ); 9745 ins_pipe( fpu_reg_mem ); 9746 %} 9747 9748 // add-to-memory 9749 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9750 predicate(UseSSE<=1); 9751 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9752 ins_cost(150); 9753 9754 format %{ "FLD_D $dst\n\t" 9755 "DADD ST,$src\n\t" 9756 "FST_D $dst" %} 9757 opcode(0xDD, 0x0); 9758 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9759 Opcode(0xD8), RegOpc(src), 9760 set_instruction_start, 9761 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9762 ins_pipe( fpu_reg_mem ); 9763 %} 9764 9765 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9766 predicate(UseSSE<=1); 9767 match(Set dst (AddD dst con)); 9768 ins_cost(125); 9769 format %{ "FLD1\n\t" 9770 "DADDp $dst,ST" %} 9771 ins_encode %{ 9772 __ fld1(); 9773 __ faddp($dst$$reg); 9774 %} 9775 ins_pipe(fpu_reg); 9776 %} 9777 9778 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9779 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9780 match(Set dst (AddD dst con)); 9781 ins_cost(200); 9782 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9783 "DADDp $dst,ST" %} 9784 ins_encode %{ 9785 __ fld_d($constantaddress($con)); 9786 __ faddp($dst$$reg); 9787 %} 9788 ins_pipe(fpu_reg_mem); 9789 %} 9790 9791 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9792 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9793 match(Set dst (RoundDouble (AddD src con))); 9794 ins_cost(200); 9795 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9796 "DADD ST,$src\n\t" 9797 "FSTP_D $dst\t# D-round" %} 9798 ins_encode %{ 9799 __ fld_d($constantaddress($con)); 9800 __ fadd($src$$reg); 9801 __ fstp_d(Address(rsp, $dst$$disp)); 9802 %} 9803 ins_pipe(fpu_mem_reg_con); 9804 %} 9805 9806 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9807 predicate(UseSSE<=1); 9808 match(Set dst (MulD dst src)); 9809 format %{ "FLD $src\n\t" 9810 "DMULp $dst,ST" %} 9811 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9812 ins_cost(150); 9813 ins_encode( Push_Reg_DPR(src), 9814 OpcP, RegOpc(dst) ); 9815 ins_pipe( fpu_reg_reg ); 9816 %} 9817 9818 // Strict FP instruction biases argument before multiply then 9819 // biases result to avoid double rounding of subnormals. 9820 // 9821 // scale arg1 by multiplying arg1 by 2^(-15360) 9822 // load arg2 9823 // multiply scaled arg1 by arg2 9824 // rescale product by 2^(15360) 9825 // 9826 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9827 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9828 match(Set dst (MulD dst src)); 9829 ins_cost(1); // Select this instruction for all FP double multiplies 9830 9831 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9832 "DMULp $dst,ST\n\t" 9833 "FLD $src\n\t" 9834 "DMULp $dst,ST\n\t" 9835 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9836 "DMULp $dst,ST\n\t" %} 9837 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9838 ins_encode( strictfp_bias1(dst), 9839 Push_Reg_DPR(src), 9840 OpcP, RegOpc(dst), 9841 strictfp_bias2(dst) ); 9842 ins_pipe( fpu_reg_reg ); 9843 %} 9844 9845 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9846 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9847 match(Set dst (MulD dst con)); 9848 ins_cost(200); 9849 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9850 "DMULp $dst,ST" %} 9851 ins_encode %{ 9852 __ fld_d($constantaddress($con)); 9853 __ fmulp($dst$$reg); 9854 %} 9855 ins_pipe(fpu_reg_mem); 9856 %} 9857 9858 9859 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9860 predicate( UseSSE<=1 ); 9861 match(Set dst (MulD dst (LoadD src))); 9862 ins_cost(200); 9863 format %{ "FLD_D $src\n\t" 9864 "DMULp $dst,ST" %} 9865 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9866 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9867 OpcP, RegOpc(dst) ); 9868 ins_pipe( fpu_reg_mem ); 9869 %} 9870 9871 // 9872 // Cisc-alternate to reg-reg multiply 9873 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9874 predicate( UseSSE<=1 ); 9875 match(Set dst (MulD src (LoadD mem))); 9876 ins_cost(250); 9877 format %{ "FLD_D $mem\n\t" 9878 "DMUL ST,$src\n\t" 9879 "FSTP_D $dst" %} 9880 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9881 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9882 OpcReg_FPR(src), 9883 Pop_Reg_DPR(dst) ); 9884 ins_pipe( fpu_reg_reg_mem ); 9885 %} 9886 9887 9888 // MACRO3 -- addDPR a mulDPR 9889 // This instruction is a '2-address' instruction in that the result goes 9890 // back to src2. This eliminates a move from the macro; possibly the 9891 // register allocator will have to add it back (and maybe not). 9892 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9893 predicate( UseSSE<=1 ); 9894 match(Set src2 (AddD (MulD src0 src1) src2)); 9895 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9896 "DMUL ST,$src1\n\t" 9897 "DADDp $src2,ST" %} 9898 ins_cost(250); 9899 opcode(0xDD); /* LoadD DD /0 */ 9900 ins_encode( Push_Reg_FPR(src0), 9901 FMul_ST_reg(src1), 9902 FAddP_reg_ST(src2) ); 9903 ins_pipe( fpu_reg_reg_reg ); 9904 %} 9905 9906 9907 // MACRO3 -- subDPR a mulDPR 9908 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9909 predicate( UseSSE<=1 ); 9910 match(Set src2 (SubD (MulD src0 src1) src2)); 9911 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9912 "DMUL ST,$src1\n\t" 9913 "DSUBRp $src2,ST" %} 9914 ins_cost(250); 9915 ins_encode( Push_Reg_FPR(src0), 9916 FMul_ST_reg(src1), 9917 Opcode(0xDE), Opc_plus(0xE0,src2)); 9918 ins_pipe( fpu_reg_reg_reg ); 9919 %} 9920 9921 9922 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9923 predicate( UseSSE<=1 ); 9924 match(Set dst (DivD dst src)); 9925 9926 format %{ "FLD $src\n\t" 9927 "FDIVp $dst,ST" %} 9928 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9929 ins_cost(150); 9930 ins_encode( Push_Reg_DPR(src), 9931 OpcP, RegOpc(dst) ); 9932 ins_pipe( fpu_reg_reg ); 9933 %} 9934 9935 // Strict FP instruction biases argument before division then 9936 // biases result, to avoid double rounding of subnormals. 9937 // 9938 // scale dividend by multiplying dividend by 2^(-15360) 9939 // load divisor 9940 // divide scaled dividend by divisor 9941 // rescale quotient by 2^(15360) 9942 // 9943 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9944 predicate (UseSSE<=1); 9945 match(Set dst (DivD dst src)); 9946 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9947 ins_cost(01); 9948 9949 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9950 "DMULp $dst,ST\n\t" 9951 "FLD $src\n\t" 9952 "FDIVp $dst,ST\n\t" 9953 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9954 "DMULp $dst,ST\n\t" %} 9955 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9956 ins_encode( strictfp_bias1(dst), 9957 Push_Reg_DPR(src), 9958 OpcP, RegOpc(dst), 9959 strictfp_bias2(dst) ); 9960 ins_pipe( fpu_reg_reg ); 9961 %} 9962 9963 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9964 predicate(UseSSE<=1); 9965 match(Set dst (ModD dst src)); 9966 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9967 9968 format %{ "DMOD $dst,$src" %} 9969 ins_cost(250); 9970 ins_encode(Push_Reg_Mod_DPR(dst, src), 9971 emitModDPR(), 9972 Push_Result_Mod_DPR(src), 9973 Pop_Reg_DPR(dst)); 9974 ins_pipe( pipe_slow ); 9975 %} 9976 9977 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9978 predicate(UseSSE>=2); 9979 match(Set dst (ModD src0 src1)); 9980 effect(KILL rax, KILL cr); 9981 9982 format %{ "SUB ESP,8\t # DMOD\n" 9983 "\tMOVSD [ESP+0],$src1\n" 9984 "\tFLD_D [ESP+0]\n" 9985 "\tMOVSD [ESP+0],$src0\n" 9986 "\tFLD_D [ESP+0]\n" 9987 "loop:\tFPREM\n" 9988 "\tFWAIT\n" 9989 "\tFNSTSW AX\n" 9990 "\tSAHF\n" 9991 "\tJP loop\n" 9992 "\tFSTP_D [ESP+0]\n" 9993 "\tMOVSD $dst,[ESP+0]\n" 9994 "\tADD ESP,8\n" 9995 "\tFSTP ST0\t # Restore FPU Stack" 9996 %} 9997 ins_cost(250); 9998 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9999 ins_pipe( pipe_slow ); 10000 %} 10001 10002 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10003 predicate (UseSSE<=1); 10004 match(Set dst(AtanD dst src)); 10005 format %{ "DATA $dst,$src" %} 10006 opcode(0xD9, 0xF3); 10007 ins_encode( Push_Reg_DPR(src), 10008 OpcP, OpcS, RegOpc(dst) ); 10009 ins_pipe( pipe_slow ); 10010 %} 10011 10012 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10013 predicate (UseSSE>=2); 10014 match(Set dst(AtanD dst src)); 10015 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10016 format %{ "DATA $dst,$src" %} 10017 opcode(0xD9, 0xF3); 10018 ins_encode( Push_SrcD(src), 10019 OpcP, OpcS, Push_ResultD(dst) ); 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10024 predicate (UseSSE<=1); 10025 match(Set dst (SqrtD src)); 10026 format %{ "DSQRT $dst,$src" %} 10027 opcode(0xFA, 0xD9); 10028 ins_encode( Push_Reg_DPR(src), 10029 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10030 ins_pipe( pipe_slow ); 10031 %} 10032 10033 //-------------Float Instructions------------------------------- 10034 // Float Math 10035 10036 // Code for float compare: 10037 // fcompp(); 10038 // fwait(); fnstsw_ax(); 10039 // sahf(); 10040 // movl(dst, unordered_result); 10041 // jcc(Assembler::parity, exit); 10042 // movl(dst, less_result); 10043 // jcc(Assembler::below, exit); 10044 // movl(dst, equal_result); 10045 // jcc(Assembler::equal, exit); 10046 // movl(dst, greater_result); 10047 // exit: 10048 10049 // P6 version of float compare, sets condition codes in EFLAGS 10050 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10051 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10052 match(Set cr (CmpF src1 src2)); 10053 effect(KILL rax); 10054 ins_cost(150); 10055 format %{ "FLD $src1\n\t" 10056 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10057 "JNP exit\n\t" 10058 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10059 "SAHF\n" 10060 "exit:\tNOP // avoid branch to branch" %} 10061 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10062 ins_encode( Push_Reg_DPR(src1), 10063 OpcP, RegOpc(src2), 10064 cmpF_P6_fixup ); 10065 ins_pipe( pipe_slow ); 10066 %} 10067 10068 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10069 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10070 match(Set cr (CmpF src1 src2)); 10071 ins_cost(100); 10072 format %{ "FLD $src1\n\t" 10073 "FUCOMIP ST,$src2 // P6 instruction" %} 10074 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10075 ins_encode( Push_Reg_DPR(src1), 10076 OpcP, RegOpc(src2)); 10077 ins_pipe( pipe_slow ); 10078 %} 10079 10080 10081 // Compare & branch 10082 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10083 predicate(UseSSE == 0); 10084 match(Set cr (CmpF src1 src2)); 10085 effect(KILL rax); 10086 ins_cost(200); 10087 format %{ "FLD $src1\n\t" 10088 "FCOMp $src2\n\t" 10089 "FNSTSW AX\n\t" 10090 "TEST AX,0x400\n\t" 10091 "JZ,s flags\n\t" 10092 "MOV AH,1\t# unordered treat as LT\n" 10093 "flags:\tSAHF" %} 10094 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10095 ins_encode( Push_Reg_DPR(src1), 10096 OpcP, RegOpc(src2), 10097 fpu_flags); 10098 ins_pipe( pipe_slow ); 10099 %} 10100 10101 // Compare vs zero into -1,0,1 10102 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10103 predicate(UseSSE == 0); 10104 match(Set dst (CmpF3 src1 zero)); 10105 effect(KILL cr, KILL rax); 10106 ins_cost(280); 10107 format %{ "FTSTF $dst,$src1" %} 10108 opcode(0xE4, 0xD9); 10109 ins_encode( Push_Reg_DPR(src1), 10110 OpcS, OpcP, PopFPU, 10111 CmpF_Result(dst)); 10112 ins_pipe( pipe_slow ); 10113 %} 10114 10115 // Compare into -1,0,1 10116 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10117 predicate(UseSSE == 0); 10118 match(Set dst (CmpF3 src1 src2)); 10119 effect(KILL cr, KILL rax); 10120 ins_cost(300); 10121 format %{ "FCMPF $dst,$src1,$src2" %} 10122 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10123 ins_encode( Push_Reg_DPR(src1), 10124 OpcP, RegOpc(src2), 10125 CmpF_Result(dst)); 10126 ins_pipe( pipe_slow ); 10127 %} 10128 10129 // float compare and set condition codes in EFLAGS by XMM regs 10130 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10131 predicate(UseSSE>=1); 10132 match(Set cr (CmpF src1 src2)); 10133 ins_cost(145); 10134 format %{ "UCOMISS $src1,$src2\n\t" 10135 "JNP,s exit\n\t" 10136 "PUSHF\t# saw NaN, set CF\n\t" 10137 "AND [rsp], #0xffffff2b\n\t" 10138 "POPF\n" 10139 "exit:" %} 10140 ins_encode %{ 10141 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10142 emit_cmpfp_fixup(_masm); 10143 %} 10144 ins_pipe( pipe_slow ); 10145 %} 10146 10147 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10148 predicate(UseSSE>=1); 10149 match(Set cr (CmpF src1 src2)); 10150 ins_cost(100); 10151 format %{ "UCOMISS $src1,$src2" %} 10152 ins_encode %{ 10153 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10154 %} 10155 ins_pipe( pipe_slow ); 10156 %} 10157 10158 // float compare and set condition codes in EFLAGS by XMM regs 10159 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10160 predicate(UseSSE>=1); 10161 match(Set cr (CmpF src1 (LoadF src2))); 10162 ins_cost(165); 10163 format %{ "UCOMISS $src1,$src2\n\t" 10164 "JNP,s exit\n\t" 10165 "PUSHF\t# saw NaN, set CF\n\t" 10166 "AND [rsp], #0xffffff2b\n\t" 10167 "POPF\n" 10168 "exit:" %} 10169 ins_encode %{ 10170 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10171 emit_cmpfp_fixup(_masm); 10172 %} 10173 ins_pipe( pipe_slow ); 10174 %} 10175 10176 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10177 predicate(UseSSE>=1); 10178 match(Set cr (CmpF src1 (LoadF src2))); 10179 ins_cost(100); 10180 format %{ "UCOMISS $src1,$src2" %} 10181 ins_encode %{ 10182 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10183 %} 10184 ins_pipe( pipe_slow ); 10185 %} 10186 10187 // Compare into -1,0,1 in XMM 10188 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10189 predicate(UseSSE>=1); 10190 match(Set dst (CmpF3 src1 src2)); 10191 effect(KILL cr); 10192 ins_cost(255); 10193 format %{ "UCOMISS $src1, $src2\n\t" 10194 "MOV $dst, #-1\n\t" 10195 "JP,s done\n\t" 10196 "JB,s done\n\t" 10197 "SETNE $dst\n\t" 10198 "MOVZB $dst, $dst\n" 10199 "done:" %} 10200 ins_encode %{ 10201 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10202 emit_cmpfp3(_masm, $dst$$Register); 10203 %} 10204 ins_pipe( pipe_slow ); 10205 %} 10206 10207 // Compare into -1,0,1 in XMM and memory 10208 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10209 predicate(UseSSE>=1); 10210 match(Set dst (CmpF3 src1 (LoadF src2))); 10211 effect(KILL cr); 10212 ins_cost(275); 10213 format %{ "UCOMISS $src1, $src2\n\t" 10214 "MOV $dst, #-1\n\t" 10215 "JP,s done\n\t" 10216 "JB,s done\n\t" 10217 "SETNE $dst\n\t" 10218 "MOVZB $dst, $dst\n" 10219 "done:" %} 10220 ins_encode %{ 10221 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10222 emit_cmpfp3(_masm, $dst$$Register); 10223 %} 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 // Spill to obtain 24-bit precision 10228 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10229 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10230 match(Set dst (SubF src1 src2)); 10231 10232 format %{ "FSUB $dst,$src1 - $src2" %} 10233 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10234 ins_encode( Push_Reg_FPR(src1), 10235 OpcReg_FPR(src2), 10236 Pop_Mem_FPR(dst) ); 10237 ins_pipe( fpu_mem_reg_reg ); 10238 %} 10239 // 10240 // This instruction does not round to 24-bits 10241 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10242 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10243 match(Set dst (SubF dst src)); 10244 10245 format %{ "FSUB $dst,$src" %} 10246 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10247 ins_encode( Push_Reg_FPR(src), 10248 OpcP, RegOpc(dst) ); 10249 ins_pipe( fpu_reg_reg ); 10250 %} 10251 10252 // Spill to obtain 24-bit precision 10253 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10254 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10255 match(Set dst (AddF src1 src2)); 10256 10257 format %{ "FADD $dst,$src1,$src2" %} 10258 opcode(0xD8, 0x0); /* D8 C0+i */ 10259 ins_encode( Push_Reg_FPR(src2), 10260 OpcReg_FPR(src1), 10261 Pop_Mem_FPR(dst) ); 10262 ins_pipe( fpu_mem_reg_reg ); 10263 %} 10264 // 10265 // This instruction does not round to 24-bits 10266 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10267 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10268 match(Set dst (AddF dst src)); 10269 10270 format %{ "FLD $src\n\t" 10271 "FADDp $dst,ST" %} 10272 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10273 ins_encode( Push_Reg_FPR(src), 10274 OpcP, RegOpc(dst) ); 10275 ins_pipe( fpu_reg_reg ); 10276 %} 10277 10278 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10279 predicate(UseSSE==0); 10280 match(Set dst (AbsF src)); 10281 ins_cost(100); 10282 format %{ "FABS" %} 10283 opcode(0xE1, 0xD9); 10284 ins_encode( OpcS, OpcP ); 10285 ins_pipe( fpu_reg_reg ); 10286 %} 10287 10288 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10289 predicate(UseSSE==0); 10290 match(Set dst (NegF src)); 10291 ins_cost(100); 10292 format %{ "FCHS" %} 10293 opcode(0xE0, 0xD9); 10294 ins_encode( OpcS, OpcP ); 10295 ins_pipe( fpu_reg_reg ); 10296 %} 10297 10298 // Cisc-alternate to addFPR_reg 10299 // Spill to obtain 24-bit precision 10300 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10301 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10302 match(Set dst (AddF src1 (LoadF src2))); 10303 10304 format %{ "FLD $src2\n\t" 10305 "FADD ST,$src1\n\t" 10306 "FSTP_S $dst" %} 10307 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10308 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10309 OpcReg_FPR(src1), 10310 Pop_Mem_FPR(dst) ); 10311 ins_pipe( fpu_mem_reg_mem ); 10312 %} 10313 // 10314 // Cisc-alternate to addFPR_reg 10315 // This instruction does not round to 24-bits 10316 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10317 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10318 match(Set dst (AddF dst (LoadF src))); 10319 10320 format %{ "FADD $dst,$src" %} 10321 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10322 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10323 OpcP, RegOpc(dst) ); 10324 ins_pipe( fpu_reg_mem ); 10325 %} 10326 10327 // // Following two instructions for _222_mpegaudio 10328 // Spill to obtain 24-bit precision 10329 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10330 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10331 match(Set dst (AddF src1 src2)); 10332 10333 format %{ "FADD $dst,$src1,$src2" %} 10334 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10335 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10336 OpcReg_FPR(src2), 10337 Pop_Mem_FPR(dst) ); 10338 ins_pipe( fpu_mem_reg_mem ); 10339 %} 10340 10341 // Cisc-spill variant 10342 // Spill to obtain 24-bit precision 10343 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10344 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10345 match(Set dst (AddF src1 (LoadF src2))); 10346 10347 format %{ "FADD $dst,$src1,$src2 cisc" %} 10348 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10349 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10350 set_instruction_start, 10351 OpcP, RMopc_Mem(secondary,src1), 10352 Pop_Mem_FPR(dst) ); 10353 ins_pipe( fpu_mem_mem_mem ); 10354 %} 10355 10356 // Spill to obtain 24-bit precision 10357 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10358 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10359 match(Set dst (AddF src1 src2)); 10360 10361 format %{ "FADD $dst,$src1,$src2" %} 10362 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10363 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10364 set_instruction_start, 10365 OpcP, RMopc_Mem(secondary,src1), 10366 Pop_Mem_FPR(dst) ); 10367 ins_pipe( fpu_mem_mem_mem ); 10368 %} 10369 10370 10371 // Spill to obtain 24-bit precision 10372 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10373 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10374 match(Set dst (AddF src con)); 10375 format %{ "FLD $src\n\t" 10376 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10377 "FSTP_S $dst" %} 10378 ins_encode %{ 10379 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10380 __ fadd_s($constantaddress($con)); 10381 __ fstp_s(Address(rsp, $dst$$disp)); 10382 %} 10383 ins_pipe(fpu_mem_reg_con); 10384 %} 10385 // 10386 // This instruction does not round to 24-bits 10387 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10388 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10389 match(Set dst (AddF src con)); 10390 format %{ "FLD $src\n\t" 10391 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10392 "FSTP $dst" %} 10393 ins_encode %{ 10394 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10395 __ fadd_s($constantaddress($con)); 10396 __ fstp_d($dst$$reg); 10397 %} 10398 ins_pipe(fpu_reg_reg_con); 10399 %} 10400 10401 // Spill to obtain 24-bit precision 10402 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10403 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10404 match(Set dst (MulF src1 src2)); 10405 10406 format %{ "FLD $src1\n\t" 10407 "FMUL $src2\n\t" 10408 "FSTP_S $dst" %} 10409 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10410 ins_encode( Push_Reg_FPR(src1), 10411 OpcReg_FPR(src2), 10412 Pop_Mem_FPR(dst) ); 10413 ins_pipe( fpu_mem_reg_reg ); 10414 %} 10415 // 10416 // This instruction does not round to 24-bits 10417 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10418 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10419 match(Set dst (MulF src1 src2)); 10420 10421 format %{ "FLD $src1\n\t" 10422 "FMUL $src2\n\t" 10423 "FSTP_S $dst" %} 10424 opcode(0xD8, 0x1); /* D8 C8+i */ 10425 ins_encode( Push_Reg_FPR(src2), 10426 OpcReg_FPR(src1), 10427 Pop_Reg_FPR(dst) ); 10428 ins_pipe( fpu_reg_reg_reg ); 10429 %} 10430 10431 10432 // Spill to obtain 24-bit precision 10433 // Cisc-alternate to reg-reg multiply 10434 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10435 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10436 match(Set dst (MulF src1 (LoadF src2))); 10437 10438 format %{ "FLD_S $src2\n\t" 10439 "FMUL $src1\n\t" 10440 "FSTP_S $dst" %} 10441 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10442 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10443 OpcReg_FPR(src1), 10444 Pop_Mem_FPR(dst) ); 10445 ins_pipe( fpu_mem_reg_mem ); 10446 %} 10447 // 10448 // This instruction does not round to 24-bits 10449 // Cisc-alternate to reg-reg multiply 10450 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10451 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10452 match(Set dst (MulF src1 (LoadF src2))); 10453 10454 format %{ "FMUL $dst,$src1,$src2" %} 10455 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10456 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10457 OpcReg_FPR(src1), 10458 Pop_Reg_FPR(dst) ); 10459 ins_pipe( fpu_reg_reg_mem ); 10460 %} 10461 10462 // Spill to obtain 24-bit precision 10463 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10464 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10465 match(Set dst (MulF src1 src2)); 10466 10467 format %{ "FMUL $dst,$src1,$src2" %} 10468 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10469 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10470 set_instruction_start, 10471 OpcP, RMopc_Mem(secondary,src1), 10472 Pop_Mem_FPR(dst) ); 10473 ins_pipe( fpu_mem_mem_mem ); 10474 %} 10475 10476 // Spill to obtain 24-bit precision 10477 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10478 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10479 match(Set dst (MulF src con)); 10480 10481 format %{ "FLD $src\n\t" 10482 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10483 "FSTP_S $dst" %} 10484 ins_encode %{ 10485 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10486 __ fmul_s($constantaddress($con)); 10487 __ fstp_s(Address(rsp, $dst$$disp)); 10488 %} 10489 ins_pipe(fpu_mem_reg_con); 10490 %} 10491 // 10492 // This instruction does not round to 24-bits 10493 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10494 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10495 match(Set dst (MulF src con)); 10496 10497 format %{ "FLD $src\n\t" 10498 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10499 "FSTP $dst" %} 10500 ins_encode %{ 10501 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10502 __ fmul_s($constantaddress($con)); 10503 __ fstp_d($dst$$reg); 10504 %} 10505 ins_pipe(fpu_reg_reg_con); 10506 %} 10507 10508 10509 // 10510 // MACRO1 -- subsume unshared load into mulFPR 10511 // This instruction does not round to 24-bits 10512 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10513 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10514 match(Set dst (MulF (LoadF mem1) src)); 10515 10516 format %{ "FLD $mem1 ===MACRO1===\n\t" 10517 "FMUL ST,$src\n\t" 10518 "FSTP $dst" %} 10519 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10520 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10521 OpcReg_FPR(src), 10522 Pop_Reg_FPR(dst) ); 10523 ins_pipe( fpu_reg_reg_mem ); 10524 %} 10525 // 10526 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10527 // This instruction does not round to 24-bits 10528 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10529 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10530 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10531 ins_cost(95); 10532 10533 format %{ "FLD $mem1 ===MACRO2===\n\t" 10534 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10535 "FADD ST,$src2\n\t" 10536 "FSTP $dst" %} 10537 opcode(0xD9); /* LoadF D9 /0 */ 10538 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10539 FMul_ST_reg(src1), 10540 FAdd_ST_reg(src2), 10541 Pop_Reg_FPR(dst) ); 10542 ins_pipe( fpu_reg_mem_reg_reg ); 10543 %} 10544 10545 // MACRO3 -- addFPR a mulFPR 10546 // This instruction does not round to 24-bits. It is a '2-address' 10547 // instruction in that the result goes back to src2. This eliminates 10548 // a move from the macro; possibly the register allocator will have 10549 // to add it back (and maybe not). 10550 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10551 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10552 match(Set src2 (AddF (MulF src0 src1) src2)); 10553 10554 format %{ "FLD $src0 ===MACRO3===\n\t" 10555 "FMUL ST,$src1\n\t" 10556 "FADDP $src2,ST" %} 10557 opcode(0xD9); /* LoadF D9 /0 */ 10558 ins_encode( Push_Reg_FPR(src0), 10559 FMul_ST_reg(src1), 10560 FAddP_reg_ST(src2) ); 10561 ins_pipe( fpu_reg_reg_reg ); 10562 %} 10563 10564 // MACRO4 -- divFPR subFPR 10565 // This instruction does not round to 24-bits 10566 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10567 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10568 match(Set dst (DivF (SubF src2 src1) src3)); 10569 10570 format %{ "FLD $src2 ===MACRO4===\n\t" 10571 "FSUB ST,$src1\n\t" 10572 "FDIV ST,$src3\n\t" 10573 "FSTP $dst" %} 10574 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10575 ins_encode( Push_Reg_FPR(src2), 10576 subFPR_divFPR_encode(src1,src3), 10577 Pop_Reg_FPR(dst) ); 10578 ins_pipe( fpu_reg_reg_reg_reg ); 10579 %} 10580 10581 // Spill to obtain 24-bit precision 10582 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10583 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10584 match(Set dst (DivF src1 src2)); 10585 10586 format %{ "FDIV $dst,$src1,$src2" %} 10587 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10588 ins_encode( Push_Reg_FPR(src1), 10589 OpcReg_FPR(src2), 10590 Pop_Mem_FPR(dst) ); 10591 ins_pipe( fpu_mem_reg_reg ); 10592 %} 10593 // 10594 // This instruction does not round to 24-bits 10595 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10596 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10597 match(Set dst (DivF dst src)); 10598 10599 format %{ "FDIV $dst,$src" %} 10600 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10601 ins_encode( Push_Reg_FPR(src), 10602 OpcP, RegOpc(dst) ); 10603 ins_pipe( fpu_reg_reg ); 10604 %} 10605 10606 10607 // Spill to obtain 24-bit precision 10608 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10609 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10610 match(Set dst (ModF src1 src2)); 10611 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10612 10613 format %{ "FMOD $dst,$src1,$src2" %} 10614 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10615 emitModDPR(), 10616 Push_Result_Mod_DPR(src2), 10617 Pop_Mem_FPR(dst)); 10618 ins_pipe( pipe_slow ); 10619 %} 10620 // 10621 // This instruction does not round to 24-bits 10622 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10623 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10624 match(Set dst (ModF dst src)); 10625 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10626 10627 format %{ "FMOD $dst,$src" %} 10628 ins_encode(Push_Reg_Mod_DPR(dst, src), 10629 emitModDPR(), 10630 Push_Result_Mod_DPR(src), 10631 Pop_Reg_FPR(dst)); 10632 ins_pipe( pipe_slow ); 10633 %} 10634 10635 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10636 predicate(UseSSE>=1); 10637 match(Set dst (ModF src0 src1)); 10638 effect(KILL rax, KILL cr); 10639 format %{ "SUB ESP,4\t # FMOD\n" 10640 "\tMOVSS [ESP+0],$src1\n" 10641 "\tFLD_S [ESP+0]\n" 10642 "\tMOVSS [ESP+0],$src0\n" 10643 "\tFLD_S [ESP+0]\n" 10644 "loop:\tFPREM\n" 10645 "\tFWAIT\n" 10646 "\tFNSTSW AX\n" 10647 "\tSAHF\n" 10648 "\tJP loop\n" 10649 "\tFSTP_S [ESP+0]\n" 10650 "\tMOVSS $dst,[ESP+0]\n" 10651 "\tADD ESP,4\n" 10652 "\tFSTP ST0\t # Restore FPU Stack" 10653 %} 10654 ins_cost(250); 10655 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10656 ins_pipe( pipe_slow ); 10657 %} 10658 10659 10660 //----------Arithmetic Conversion Instructions--------------------------------- 10661 // The conversions operations are all Alpha sorted. Please keep it that way! 10662 10663 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10664 predicate(UseSSE==0); 10665 match(Set dst (RoundFloat src)); 10666 ins_cost(125); 10667 format %{ "FST_S $dst,$src\t# F-round" %} 10668 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10669 ins_pipe( fpu_mem_reg ); 10670 %} 10671 10672 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10673 predicate(UseSSE<=1); 10674 match(Set dst (RoundDouble src)); 10675 ins_cost(125); 10676 format %{ "FST_D $dst,$src\t# D-round" %} 10677 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10678 ins_pipe( fpu_mem_reg ); 10679 %} 10680 10681 // Force rounding to 24-bit precision and 6-bit exponent 10682 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10683 predicate(UseSSE==0); 10684 match(Set dst (ConvD2F src)); 10685 format %{ "FST_S $dst,$src\t# F-round" %} 10686 expand %{ 10687 roundFloat_mem_reg(dst,src); 10688 %} 10689 %} 10690 10691 // Force rounding to 24-bit precision and 6-bit exponent 10692 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10693 predicate(UseSSE==1); 10694 match(Set dst (ConvD2F src)); 10695 effect( KILL cr ); 10696 format %{ "SUB ESP,4\n\t" 10697 "FST_S [ESP],$src\t# F-round\n\t" 10698 "MOVSS $dst,[ESP]\n\t" 10699 "ADD ESP,4" %} 10700 ins_encode %{ 10701 __ subptr(rsp, 4); 10702 if ($src$$reg != FPR1L_enc) { 10703 __ fld_s($src$$reg-1); 10704 __ fstp_s(Address(rsp, 0)); 10705 } else { 10706 __ fst_s(Address(rsp, 0)); 10707 } 10708 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10709 __ addptr(rsp, 4); 10710 %} 10711 ins_pipe( pipe_slow ); 10712 %} 10713 10714 // Force rounding double precision to single precision 10715 instruct convD2F_reg(regF dst, regD src) %{ 10716 predicate(UseSSE>=2); 10717 match(Set dst (ConvD2F src)); 10718 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10719 ins_encode %{ 10720 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10721 %} 10722 ins_pipe( pipe_slow ); 10723 %} 10724 10725 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10726 predicate(UseSSE==0); 10727 match(Set dst (ConvF2D src)); 10728 format %{ "FST_S $dst,$src\t# D-round" %} 10729 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10730 ins_pipe( fpu_reg_reg ); 10731 %} 10732 10733 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10734 predicate(UseSSE==1); 10735 match(Set dst (ConvF2D src)); 10736 format %{ "FST_D $dst,$src\t# D-round" %} 10737 expand %{ 10738 roundDouble_mem_reg(dst,src); 10739 %} 10740 %} 10741 10742 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10743 predicate(UseSSE==1); 10744 match(Set dst (ConvF2D src)); 10745 effect( KILL cr ); 10746 format %{ "SUB ESP,4\n\t" 10747 "MOVSS [ESP] $src\n\t" 10748 "FLD_S [ESP]\n\t" 10749 "ADD ESP,4\n\t" 10750 "FSTP $dst\t# D-round" %} 10751 ins_encode %{ 10752 __ subptr(rsp, 4); 10753 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10754 __ fld_s(Address(rsp, 0)); 10755 __ addptr(rsp, 4); 10756 __ fstp_d($dst$$reg); 10757 %} 10758 ins_pipe( pipe_slow ); 10759 %} 10760 10761 instruct convF2D_reg(regD dst, regF src) %{ 10762 predicate(UseSSE>=2); 10763 match(Set dst (ConvF2D src)); 10764 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10765 ins_encode %{ 10766 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10767 %} 10768 ins_pipe( pipe_slow ); 10769 %} 10770 10771 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10772 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10773 predicate(UseSSE<=1); 10774 match(Set dst (ConvD2I src)); 10775 effect( KILL tmp, KILL cr ); 10776 format %{ "FLD $src\t# Convert double to int \n\t" 10777 "FLDCW trunc mode\n\t" 10778 "SUB ESP,4\n\t" 10779 "FISTp [ESP + #0]\n\t" 10780 "FLDCW std/24-bit mode\n\t" 10781 "POP EAX\n\t" 10782 "CMP EAX,0x80000000\n\t" 10783 "JNE,s fast\n\t" 10784 "FLD_D $src\n\t" 10785 "CALL d2i_wrapper\n" 10786 "fast:" %} 10787 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10788 ins_pipe( pipe_slow ); 10789 %} 10790 10791 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10792 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10793 predicate(UseSSE>=2); 10794 match(Set dst (ConvD2I src)); 10795 effect( KILL tmp, KILL cr ); 10796 format %{ "CVTTSD2SI $dst, $src\n\t" 10797 "CMP $dst,0x80000000\n\t" 10798 "JNE,s fast\n\t" 10799 "SUB ESP, 8\n\t" 10800 "MOVSD [ESP], $src\n\t" 10801 "FLD_D [ESP]\n\t" 10802 "ADD ESP, 8\n\t" 10803 "CALL d2i_wrapper\n" 10804 "fast:" %} 10805 ins_encode %{ 10806 Label fast; 10807 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10808 __ cmpl($dst$$Register, 0x80000000); 10809 __ jccb(Assembler::notEqual, fast); 10810 __ subptr(rsp, 8); 10811 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10812 __ fld_d(Address(rsp, 0)); 10813 __ addptr(rsp, 8); 10814 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10815 __ bind(fast); 10816 %} 10817 ins_pipe( pipe_slow ); 10818 %} 10819 10820 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10821 predicate(UseSSE<=1); 10822 match(Set dst (ConvD2L src)); 10823 effect( KILL cr ); 10824 format %{ "FLD $src\t# Convert double to long\n\t" 10825 "FLDCW trunc mode\n\t" 10826 "SUB ESP,8\n\t" 10827 "FISTp [ESP + #0]\n\t" 10828 "FLDCW std/24-bit mode\n\t" 10829 "POP EAX\n\t" 10830 "POP EDX\n\t" 10831 "CMP EDX,0x80000000\n\t" 10832 "JNE,s fast\n\t" 10833 "TEST EAX,EAX\n\t" 10834 "JNE,s fast\n\t" 10835 "FLD $src\n\t" 10836 "CALL d2l_wrapper\n" 10837 "fast:" %} 10838 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10839 ins_pipe( pipe_slow ); 10840 %} 10841 10842 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10843 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10844 predicate (UseSSE>=2); 10845 match(Set dst (ConvD2L src)); 10846 effect( KILL cr ); 10847 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10848 "MOVSD [ESP],$src\n\t" 10849 "FLD_D [ESP]\n\t" 10850 "FLDCW trunc mode\n\t" 10851 "FISTp [ESP + #0]\n\t" 10852 "FLDCW std/24-bit mode\n\t" 10853 "POP EAX\n\t" 10854 "POP EDX\n\t" 10855 "CMP EDX,0x80000000\n\t" 10856 "JNE,s fast\n\t" 10857 "TEST EAX,EAX\n\t" 10858 "JNE,s fast\n\t" 10859 "SUB ESP,8\n\t" 10860 "MOVSD [ESP],$src\n\t" 10861 "FLD_D [ESP]\n\t" 10862 "ADD ESP,8\n\t" 10863 "CALL d2l_wrapper\n" 10864 "fast:" %} 10865 ins_encode %{ 10866 Label fast; 10867 __ subptr(rsp, 8); 10868 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10869 __ fld_d(Address(rsp, 0)); 10870 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10871 __ fistp_d(Address(rsp, 0)); 10872 // Restore the rounding mode, mask the exception 10873 if (Compile::current()->in_24_bit_fp_mode()) { 10874 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10875 } else { 10876 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10877 } 10878 // Load the converted long, adjust CPU stack 10879 __ pop(rax); 10880 __ pop(rdx); 10881 __ cmpl(rdx, 0x80000000); 10882 __ jccb(Assembler::notEqual, fast); 10883 __ testl(rax, rax); 10884 __ jccb(Assembler::notEqual, fast); 10885 __ subptr(rsp, 8); 10886 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10887 __ fld_d(Address(rsp, 0)); 10888 __ addptr(rsp, 8); 10889 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10890 __ bind(fast); 10891 %} 10892 ins_pipe( pipe_slow ); 10893 %} 10894 10895 // Convert a double to an int. Java semantics require we do complex 10896 // manglations in the corner cases. So we set the rounding mode to 10897 // 'zero', store the darned double down as an int, and reset the 10898 // rounding mode to 'nearest'. The hardware stores a flag value down 10899 // if we would overflow or converted a NAN; we check for this and 10900 // and go the slow path if needed. 10901 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10902 predicate(UseSSE==0); 10903 match(Set dst (ConvF2I src)); 10904 effect( KILL tmp, KILL cr ); 10905 format %{ "FLD $src\t# Convert float to int \n\t" 10906 "FLDCW trunc mode\n\t" 10907 "SUB ESP,4\n\t" 10908 "FISTp [ESP + #0]\n\t" 10909 "FLDCW std/24-bit mode\n\t" 10910 "POP EAX\n\t" 10911 "CMP EAX,0x80000000\n\t" 10912 "JNE,s fast\n\t" 10913 "FLD $src\n\t" 10914 "CALL d2i_wrapper\n" 10915 "fast:" %} 10916 // DPR2I_encoding works for FPR2I 10917 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10918 ins_pipe( pipe_slow ); 10919 %} 10920 10921 // Convert a float in xmm to an int reg. 10922 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10923 predicate(UseSSE>=1); 10924 match(Set dst (ConvF2I src)); 10925 effect( KILL tmp, KILL cr ); 10926 format %{ "CVTTSS2SI $dst, $src\n\t" 10927 "CMP $dst,0x80000000\n\t" 10928 "JNE,s fast\n\t" 10929 "SUB ESP, 4\n\t" 10930 "MOVSS [ESP], $src\n\t" 10931 "FLD [ESP]\n\t" 10932 "ADD ESP, 4\n\t" 10933 "CALL d2i_wrapper\n" 10934 "fast:" %} 10935 ins_encode %{ 10936 Label fast; 10937 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10938 __ cmpl($dst$$Register, 0x80000000); 10939 __ jccb(Assembler::notEqual, fast); 10940 __ subptr(rsp, 4); 10941 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10942 __ fld_s(Address(rsp, 0)); 10943 __ addptr(rsp, 4); 10944 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10945 __ bind(fast); 10946 %} 10947 ins_pipe( pipe_slow ); 10948 %} 10949 10950 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10951 predicate(UseSSE==0); 10952 match(Set dst (ConvF2L src)); 10953 effect( KILL cr ); 10954 format %{ "FLD $src\t# Convert float to long\n\t" 10955 "FLDCW trunc mode\n\t" 10956 "SUB ESP,8\n\t" 10957 "FISTp [ESP + #0]\n\t" 10958 "FLDCW std/24-bit mode\n\t" 10959 "POP EAX\n\t" 10960 "POP EDX\n\t" 10961 "CMP EDX,0x80000000\n\t" 10962 "JNE,s fast\n\t" 10963 "TEST EAX,EAX\n\t" 10964 "JNE,s fast\n\t" 10965 "FLD $src\n\t" 10966 "CALL d2l_wrapper\n" 10967 "fast:" %} 10968 // DPR2L_encoding works for FPR2L 10969 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10970 ins_pipe( pipe_slow ); 10971 %} 10972 10973 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10974 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10975 predicate (UseSSE>=1); 10976 match(Set dst (ConvF2L src)); 10977 effect( KILL cr ); 10978 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10979 "MOVSS [ESP],$src\n\t" 10980 "FLD_S [ESP]\n\t" 10981 "FLDCW trunc mode\n\t" 10982 "FISTp [ESP + #0]\n\t" 10983 "FLDCW std/24-bit mode\n\t" 10984 "POP EAX\n\t" 10985 "POP EDX\n\t" 10986 "CMP EDX,0x80000000\n\t" 10987 "JNE,s fast\n\t" 10988 "TEST EAX,EAX\n\t" 10989 "JNE,s fast\n\t" 10990 "SUB ESP,4\t# Convert float to long\n\t" 10991 "MOVSS [ESP],$src\n\t" 10992 "FLD_S [ESP]\n\t" 10993 "ADD ESP,4\n\t" 10994 "CALL d2l_wrapper\n" 10995 "fast:" %} 10996 ins_encode %{ 10997 Label fast; 10998 __ subptr(rsp, 8); 10999 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11000 __ fld_s(Address(rsp, 0)); 11001 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 11002 __ fistp_d(Address(rsp, 0)); 11003 // Restore the rounding mode, mask the exception 11004 if (Compile::current()->in_24_bit_fp_mode()) { 11005 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 11006 } else { 11007 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 11008 } 11009 // Load the converted long, adjust CPU stack 11010 __ pop(rax); 11011 __ pop(rdx); 11012 __ cmpl(rdx, 0x80000000); 11013 __ jccb(Assembler::notEqual, fast); 11014 __ testl(rax, rax); 11015 __ jccb(Assembler::notEqual, fast); 11016 __ subptr(rsp, 4); 11017 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11018 __ fld_s(Address(rsp, 0)); 11019 __ addptr(rsp, 4); 11020 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11021 __ bind(fast); 11022 %} 11023 ins_pipe( pipe_slow ); 11024 %} 11025 11026 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11027 predicate( UseSSE<=1 ); 11028 match(Set dst (ConvI2D src)); 11029 format %{ "FILD $src\n\t" 11030 "FSTP $dst" %} 11031 opcode(0xDB, 0x0); /* DB /0 */ 11032 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11033 ins_pipe( fpu_reg_mem ); 11034 %} 11035 11036 instruct convI2D_reg(regD dst, rRegI src) %{ 11037 predicate( UseSSE>=2 && !UseXmmI2D ); 11038 match(Set dst (ConvI2D src)); 11039 format %{ "CVTSI2SD $dst,$src" %} 11040 ins_encode %{ 11041 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11042 %} 11043 ins_pipe( pipe_slow ); 11044 %} 11045 11046 instruct convI2D_mem(regD dst, memory mem) %{ 11047 predicate( UseSSE>=2 ); 11048 match(Set dst (ConvI2D (LoadI mem))); 11049 format %{ "CVTSI2SD $dst,$mem" %} 11050 ins_encode %{ 11051 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11052 %} 11053 ins_pipe( pipe_slow ); 11054 %} 11055 11056 instruct convXI2D_reg(regD dst, rRegI src) 11057 %{ 11058 predicate( UseSSE>=2 && UseXmmI2D ); 11059 match(Set dst (ConvI2D src)); 11060 11061 format %{ "MOVD $dst,$src\n\t" 11062 "CVTDQ2PD $dst,$dst\t# i2d" %} 11063 ins_encode %{ 11064 __ movdl($dst$$XMMRegister, $src$$Register); 11065 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11066 %} 11067 ins_pipe(pipe_slow); // XXX 11068 %} 11069 11070 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11071 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11072 match(Set dst (ConvI2D (LoadI mem))); 11073 format %{ "FILD $mem\n\t" 11074 "FSTP $dst" %} 11075 opcode(0xDB); /* DB /0 */ 11076 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11077 Pop_Reg_DPR(dst)); 11078 ins_pipe( fpu_reg_mem ); 11079 %} 11080 11081 // Convert a byte to a float; no rounding step needed. 11082 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11083 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11084 match(Set dst (ConvI2F src)); 11085 format %{ "FILD $src\n\t" 11086 "FSTP $dst" %} 11087 11088 opcode(0xDB, 0x0); /* DB /0 */ 11089 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11090 ins_pipe( fpu_reg_mem ); 11091 %} 11092 11093 // In 24-bit mode, force exponent rounding by storing back out 11094 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11095 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11096 match(Set dst (ConvI2F src)); 11097 ins_cost(200); 11098 format %{ "FILD $src\n\t" 11099 "FSTP_S $dst" %} 11100 opcode(0xDB, 0x0); /* DB /0 */ 11101 ins_encode( Push_Mem_I(src), 11102 Pop_Mem_FPR(dst)); 11103 ins_pipe( fpu_mem_mem ); 11104 %} 11105 11106 // In 24-bit mode, force exponent rounding by storing back out 11107 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11108 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11109 match(Set dst (ConvI2F (LoadI mem))); 11110 ins_cost(200); 11111 format %{ "FILD $mem\n\t" 11112 "FSTP_S $dst" %} 11113 opcode(0xDB); /* DB /0 */ 11114 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11115 Pop_Mem_FPR(dst)); 11116 ins_pipe( fpu_mem_mem ); 11117 %} 11118 11119 // This instruction does not round to 24-bits 11120 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11121 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11122 match(Set dst (ConvI2F src)); 11123 format %{ "FILD $src\n\t" 11124 "FSTP $dst" %} 11125 opcode(0xDB, 0x0); /* DB /0 */ 11126 ins_encode( Push_Mem_I(src), 11127 Pop_Reg_FPR(dst)); 11128 ins_pipe( fpu_reg_mem ); 11129 %} 11130 11131 // This instruction does not round to 24-bits 11132 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11133 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11134 match(Set dst (ConvI2F (LoadI mem))); 11135 format %{ "FILD $mem\n\t" 11136 "FSTP $dst" %} 11137 opcode(0xDB); /* DB /0 */ 11138 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11139 Pop_Reg_FPR(dst)); 11140 ins_pipe( fpu_reg_mem ); 11141 %} 11142 11143 // Convert an int to a float in xmm; no rounding step needed. 11144 instruct convI2F_reg(regF dst, rRegI src) %{ 11145 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11146 match(Set dst (ConvI2F src)); 11147 format %{ "CVTSI2SS $dst, $src" %} 11148 ins_encode %{ 11149 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11150 %} 11151 ins_pipe( pipe_slow ); 11152 %} 11153 11154 instruct convXI2F_reg(regF dst, rRegI src) 11155 %{ 11156 predicate( UseSSE>=2 && UseXmmI2F ); 11157 match(Set dst (ConvI2F src)); 11158 11159 format %{ "MOVD $dst,$src\n\t" 11160 "CVTDQ2PS $dst,$dst\t# i2f" %} 11161 ins_encode %{ 11162 __ movdl($dst$$XMMRegister, $src$$Register); 11163 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11164 %} 11165 ins_pipe(pipe_slow); // XXX 11166 %} 11167 11168 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11169 match(Set dst (ConvI2L src)); 11170 effect(KILL cr); 11171 ins_cost(375); 11172 format %{ "MOV $dst.lo,$src\n\t" 11173 "MOV $dst.hi,$src\n\t" 11174 "SAR $dst.hi,31" %} 11175 ins_encode(convert_int_long(dst,src)); 11176 ins_pipe( ialu_reg_reg_long ); 11177 %} 11178 11179 // Zero-extend convert int to long 11180 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11181 match(Set dst (AndL (ConvI2L src) mask) ); 11182 effect( KILL flags ); 11183 ins_cost(250); 11184 format %{ "MOV $dst.lo,$src\n\t" 11185 "XOR $dst.hi,$dst.hi" %} 11186 opcode(0x33); // XOR 11187 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11188 ins_pipe( ialu_reg_reg_long ); 11189 %} 11190 11191 // Zero-extend long 11192 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11193 match(Set dst (AndL src mask) ); 11194 effect( KILL flags ); 11195 ins_cost(250); 11196 format %{ "MOV $dst.lo,$src.lo\n\t" 11197 "XOR $dst.hi,$dst.hi\n\t" %} 11198 opcode(0x33); // XOR 11199 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11200 ins_pipe( ialu_reg_reg_long ); 11201 %} 11202 11203 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11204 predicate (UseSSE<=1); 11205 match(Set dst (ConvL2D src)); 11206 effect( KILL cr ); 11207 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11208 "PUSH $src.lo\n\t" 11209 "FILD ST,[ESP + #0]\n\t" 11210 "ADD ESP,8\n\t" 11211 "FSTP_D $dst\t# D-round" %} 11212 opcode(0xDF, 0x5); /* DF /5 */ 11213 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11214 ins_pipe( pipe_slow ); 11215 %} 11216 11217 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11218 predicate (UseSSE>=2); 11219 match(Set dst (ConvL2D src)); 11220 effect( KILL cr ); 11221 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11222 "PUSH $src.lo\n\t" 11223 "FILD_D [ESP]\n\t" 11224 "FSTP_D [ESP]\n\t" 11225 "MOVSD $dst,[ESP]\n\t" 11226 "ADD ESP,8" %} 11227 opcode(0xDF, 0x5); /* DF /5 */ 11228 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11229 ins_pipe( pipe_slow ); 11230 %} 11231 11232 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11233 predicate (UseSSE>=1); 11234 match(Set dst (ConvL2F src)); 11235 effect( KILL cr ); 11236 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11237 "PUSH $src.lo\n\t" 11238 "FILD_D [ESP]\n\t" 11239 "FSTP_S [ESP]\n\t" 11240 "MOVSS $dst,[ESP]\n\t" 11241 "ADD ESP,8" %} 11242 opcode(0xDF, 0x5); /* DF /5 */ 11243 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11244 ins_pipe( pipe_slow ); 11245 %} 11246 11247 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11248 match(Set dst (ConvL2F src)); 11249 effect( KILL cr ); 11250 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11251 "PUSH $src.lo\n\t" 11252 "FILD ST,[ESP + #0]\n\t" 11253 "ADD ESP,8\n\t" 11254 "FSTP_S $dst\t# F-round" %} 11255 opcode(0xDF, 0x5); /* DF /5 */ 11256 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11257 ins_pipe( pipe_slow ); 11258 %} 11259 11260 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11261 match(Set dst (ConvL2I src)); 11262 effect( DEF dst, USE src ); 11263 format %{ "MOV $dst,$src.lo" %} 11264 ins_encode(enc_CopyL_Lo(dst,src)); 11265 ins_pipe( ialu_reg_reg ); 11266 %} 11267 11268 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11269 match(Set dst (MoveF2I src)); 11270 effect( DEF dst, USE src ); 11271 ins_cost(100); 11272 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11273 ins_encode %{ 11274 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11275 %} 11276 ins_pipe( ialu_reg_mem ); 11277 %} 11278 11279 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11280 predicate(UseSSE==0); 11281 match(Set dst (MoveF2I src)); 11282 effect( DEF dst, USE src ); 11283 11284 ins_cost(125); 11285 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11286 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11287 ins_pipe( fpu_mem_reg ); 11288 %} 11289 11290 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11291 predicate(UseSSE>=1); 11292 match(Set dst (MoveF2I src)); 11293 effect( DEF dst, USE src ); 11294 11295 ins_cost(95); 11296 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11297 ins_encode %{ 11298 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11299 %} 11300 ins_pipe( pipe_slow ); 11301 %} 11302 11303 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11304 predicate(UseSSE>=2); 11305 match(Set dst (MoveF2I src)); 11306 effect( DEF dst, USE src ); 11307 ins_cost(85); 11308 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11309 ins_encode %{ 11310 __ movdl($dst$$Register, $src$$XMMRegister); 11311 %} 11312 ins_pipe( pipe_slow ); 11313 %} 11314 11315 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11316 match(Set dst (MoveI2F src)); 11317 effect( DEF dst, USE src ); 11318 11319 ins_cost(100); 11320 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11321 ins_encode %{ 11322 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11323 %} 11324 ins_pipe( ialu_mem_reg ); 11325 %} 11326 11327 11328 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11329 predicate(UseSSE==0); 11330 match(Set dst (MoveI2F src)); 11331 effect(DEF dst, USE src); 11332 11333 ins_cost(125); 11334 format %{ "FLD_S $src\n\t" 11335 "FSTP $dst\t# MoveI2F_stack_reg" %} 11336 opcode(0xD9); /* D9 /0, FLD m32real */ 11337 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11338 Pop_Reg_FPR(dst) ); 11339 ins_pipe( fpu_reg_mem ); 11340 %} 11341 11342 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11343 predicate(UseSSE>=1); 11344 match(Set dst (MoveI2F src)); 11345 effect( DEF dst, USE src ); 11346 11347 ins_cost(95); 11348 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11349 ins_encode %{ 11350 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11351 %} 11352 ins_pipe( pipe_slow ); 11353 %} 11354 11355 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11356 predicate(UseSSE>=2); 11357 match(Set dst (MoveI2F src)); 11358 effect( DEF dst, USE src ); 11359 11360 ins_cost(85); 11361 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11362 ins_encode %{ 11363 __ movdl($dst$$XMMRegister, $src$$Register); 11364 %} 11365 ins_pipe( pipe_slow ); 11366 %} 11367 11368 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11369 match(Set dst (MoveD2L src)); 11370 effect(DEF dst, USE src); 11371 11372 ins_cost(250); 11373 format %{ "MOV $dst.lo,$src\n\t" 11374 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11375 opcode(0x8B, 0x8B); 11376 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11377 ins_pipe( ialu_mem_long_reg ); 11378 %} 11379 11380 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11381 predicate(UseSSE<=1); 11382 match(Set dst (MoveD2L src)); 11383 effect(DEF dst, USE src); 11384 11385 ins_cost(125); 11386 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11387 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11388 ins_pipe( fpu_mem_reg ); 11389 %} 11390 11391 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11392 predicate(UseSSE>=2); 11393 match(Set dst (MoveD2L src)); 11394 effect(DEF dst, USE src); 11395 ins_cost(95); 11396 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11397 ins_encode %{ 11398 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11399 %} 11400 ins_pipe( pipe_slow ); 11401 %} 11402 11403 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11404 predicate(UseSSE>=2); 11405 match(Set dst (MoveD2L src)); 11406 effect(DEF dst, USE src, TEMP tmp); 11407 ins_cost(85); 11408 format %{ "MOVD $dst.lo,$src\n\t" 11409 "PSHUFLW $tmp,$src,0x4E\n\t" 11410 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11411 ins_encode %{ 11412 __ movdl($dst$$Register, $src$$XMMRegister); 11413 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11414 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11415 %} 11416 ins_pipe( pipe_slow ); 11417 %} 11418 11419 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11420 match(Set dst (MoveL2D src)); 11421 effect(DEF dst, USE src); 11422 11423 ins_cost(200); 11424 format %{ "MOV $dst,$src.lo\n\t" 11425 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11426 opcode(0x89, 0x89); 11427 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11428 ins_pipe( ialu_mem_long_reg ); 11429 %} 11430 11431 11432 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11433 predicate(UseSSE<=1); 11434 match(Set dst (MoveL2D src)); 11435 effect(DEF dst, USE src); 11436 ins_cost(125); 11437 11438 format %{ "FLD_D $src\n\t" 11439 "FSTP $dst\t# MoveL2D_stack_reg" %} 11440 opcode(0xDD); /* DD /0, FLD m64real */ 11441 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11442 Pop_Reg_DPR(dst) ); 11443 ins_pipe( fpu_reg_mem ); 11444 %} 11445 11446 11447 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11448 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11449 match(Set dst (MoveL2D src)); 11450 effect(DEF dst, USE src); 11451 11452 ins_cost(95); 11453 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11454 ins_encode %{ 11455 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11456 %} 11457 ins_pipe( pipe_slow ); 11458 %} 11459 11460 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11461 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11462 match(Set dst (MoveL2D src)); 11463 effect(DEF dst, USE src); 11464 11465 ins_cost(95); 11466 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11467 ins_encode %{ 11468 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11469 %} 11470 ins_pipe( pipe_slow ); 11471 %} 11472 11473 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11474 predicate(UseSSE>=2); 11475 match(Set dst (MoveL2D src)); 11476 effect(TEMP dst, USE src, TEMP tmp); 11477 ins_cost(85); 11478 format %{ "MOVD $dst,$src.lo\n\t" 11479 "MOVD $tmp,$src.hi\n\t" 11480 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11481 ins_encode %{ 11482 __ movdl($dst$$XMMRegister, $src$$Register); 11483 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11484 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11485 %} 11486 ins_pipe( pipe_slow ); 11487 %} 11488 11489 11490 // ======================================================================= 11491 // fast clearing of an array 11492 // Small ClearArray non-AVX512. 11493 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11494 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11495 match(Set dummy (ClearArray cnt base)); 11496 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11497 11498 format %{ $$template 11499 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11500 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11501 $$emit$$"JG LARGE\n\t" 11502 $$emit$$"SHL ECX, 1\n\t" 11503 $$emit$$"DEC ECX\n\t" 11504 $$emit$$"JS DONE\t# Zero length\n\t" 11505 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11506 $$emit$$"DEC ECX\n\t" 11507 $$emit$$"JGE LOOP\n\t" 11508 $$emit$$"JMP DONE\n\t" 11509 $$emit$$"# LARGE:\n\t" 11510 if (UseFastStosb) { 11511 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11512 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11513 } else if (UseXMMForObjInit) { 11514 $$emit$$"MOV RDI,RAX\n\t" 11515 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11516 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11517 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11518 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11519 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11520 $$emit$$"ADD 0x40,RAX\n\t" 11521 $$emit$$"# L_zero_64_bytes:\n\t" 11522 $$emit$$"SUB 0x8,RCX\n\t" 11523 $$emit$$"JGE L_loop\n\t" 11524 $$emit$$"ADD 0x4,RCX\n\t" 11525 $$emit$$"JL L_tail\n\t" 11526 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11527 $$emit$$"ADD 0x20,RAX\n\t" 11528 $$emit$$"SUB 0x4,RCX\n\t" 11529 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11530 $$emit$$"ADD 0x4,RCX\n\t" 11531 $$emit$$"JLE L_end\n\t" 11532 $$emit$$"DEC RCX\n\t" 11533 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11534 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11535 $$emit$$"ADD 0x8,RAX\n\t" 11536 $$emit$$"DEC RCX\n\t" 11537 $$emit$$"JGE L_sloop\n\t" 11538 $$emit$$"# L_end:\n\t" 11539 } else { 11540 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11541 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11542 } 11543 $$emit$$"# DONE" 11544 %} 11545 ins_encode %{ 11546 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11547 $tmp$$XMMRegister, false, knoreg); 11548 %} 11549 ins_pipe( pipe_slow ); 11550 %} 11551 11552 // Small ClearArray AVX512 non-constant length. 11553 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11554 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11555 match(Set dummy (ClearArray cnt base)); 11556 ins_cost(125); 11557 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11558 11559 format %{ $$template 11560 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11561 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11562 $$emit$$"JG LARGE\n\t" 11563 $$emit$$"SHL ECX, 1\n\t" 11564 $$emit$$"DEC ECX\n\t" 11565 $$emit$$"JS DONE\t# Zero length\n\t" 11566 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11567 $$emit$$"DEC ECX\n\t" 11568 $$emit$$"JGE LOOP\n\t" 11569 $$emit$$"JMP DONE\n\t" 11570 $$emit$$"# LARGE:\n\t" 11571 if (UseFastStosb) { 11572 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11573 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11574 } else if (UseXMMForObjInit) { 11575 $$emit$$"MOV RDI,RAX\n\t" 11576 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11577 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11578 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11579 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11580 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11581 $$emit$$"ADD 0x40,RAX\n\t" 11582 $$emit$$"# L_zero_64_bytes:\n\t" 11583 $$emit$$"SUB 0x8,RCX\n\t" 11584 $$emit$$"JGE L_loop\n\t" 11585 $$emit$$"ADD 0x4,RCX\n\t" 11586 $$emit$$"JL L_tail\n\t" 11587 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11588 $$emit$$"ADD 0x20,RAX\n\t" 11589 $$emit$$"SUB 0x4,RCX\n\t" 11590 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11591 $$emit$$"ADD 0x4,RCX\n\t" 11592 $$emit$$"JLE L_end\n\t" 11593 $$emit$$"DEC RCX\n\t" 11594 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11595 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11596 $$emit$$"ADD 0x8,RAX\n\t" 11597 $$emit$$"DEC RCX\n\t" 11598 $$emit$$"JGE L_sloop\n\t" 11599 $$emit$$"# L_end:\n\t" 11600 } else { 11601 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11602 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11603 } 11604 $$emit$$"# DONE" 11605 %} 11606 ins_encode %{ 11607 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11608 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11609 %} 11610 ins_pipe( pipe_slow ); 11611 %} 11612 11613 // Large ClearArray non-AVX512. 11614 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11615 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11616 match(Set dummy (ClearArray cnt base)); 11617 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11618 format %{ $$template 11619 if (UseFastStosb) { 11620 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11621 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11622 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11623 } else if (UseXMMForObjInit) { 11624 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11625 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11626 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11627 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11628 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11629 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11630 $$emit$$"ADD 0x40,RAX\n\t" 11631 $$emit$$"# L_zero_64_bytes:\n\t" 11632 $$emit$$"SUB 0x8,RCX\n\t" 11633 $$emit$$"JGE L_loop\n\t" 11634 $$emit$$"ADD 0x4,RCX\n\t" 11635 $$emit$$"JL L_tail\n\t" 11636 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11637 $$emit$$"ADD 0x20,RAX\n\t" 11638 $$emit$$"SUB 0x4,RCX\n\t" 11639 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11640 $$emit$$"ADD 0x4,RCX\n\t" 11641 $$emit$$"JLE L_end\n\t" 11642 $$emit$$"DEC RCX\n\t" 11643 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11644 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11645 $$emit$$"ADD 0x8,RAX\n\t" 11646 $$emit$$"DEC RCX\n\t" 11647 $$emit$$"JGE L_sloop\n\t" 11648 $$emit$$"# L_end:\n\t" 11649 } else { 11650 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11651 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11652 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11653 } 11654 $$emit$$"# DONE" 11655 %} 11656 ins_encode %{ 11657 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11658 $tmp$$XMMRegister, true, knoreg); 11659 %} 11660 ins_pipe( pipe_slow ); 11661 %} 11662 11663 // Large ClearArray AVX512. 11664 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11665 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11666 match(Set dummy (ClearArray cnt base)); 11667 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11668 format %{ $$template 11669 if (UseFastStosb) { 11670 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11671 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11672 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11673 } else if (UseXMMForObjInit) { 11674 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11675 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11676 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11677 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11678 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11679 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11680 $$emit$$"ADD 0x40,RAX\n\t" 11681 $$emit$$"# L_zero_64_bytes:\n\t" 11682 $$emit$$"SUB 0x8,RCX\n\t" 11683 $$emit$$"JGE L_loop\n\t" 11684 $$emit$$"ADD 0x4,RCX\n\t" 11685 $$emit$$"JL L_tail\n\t" 11686 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11687 $$emit$$"ADD 0x20,RAX\n\t" 11688 $$emit$$"SUB 0x4,RCX\n\t" 11689 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11690 $$emit$$"ADD 0x4,RCX\n\t" 11691 $$emit$$"JLE L_end\n\t" 11692 $$emit$$"DEC RCX\n\t" 11693 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11694 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11695 $$emit$$"ADD 0x8,RAX\n\t" 11696 $$emit$$"DEC RCX\n\t" 11697 $$emit$$"JGE L_sloop\n\t" 11698 $$emit$$"# L_end:\n\t" 11699 } else { 11700 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11701 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11702 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11703 } 11704 $$emit$$"# DONE" 11705 %} 11706 ins_encode %{ 11707 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11708 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11709 %} 11710 ins_pipe( pipe_slow ); 11711 %} 11712 11713 // Small ClearArray AVX512 constant length. 11714 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11715 %{ 11716 predicate(!((ClearArrayNode*)n)->is_large() && 11717 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11718 match(Set dummy (ClearArray cnt base)); 11719 ins_cost(100); 11720 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11721 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11722 ins_encode %{ 11723 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11724 %} 11725 ins_pipe(pipe_slow); 11726 %} 11727 11728 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11729 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11730 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11731 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11732 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11733 11734 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11735 ins_encode %{ 11736 __ string_compare($str1$$Register, $str2$$Register, 11737 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11738 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11739 %} 11740 ins_pipe( pipe_slow ); 11741 %} 11742 11743 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11744 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11745 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11746 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11747 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11748 11749 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11750 ins_encode %{ 11751 __ string_compare($str1$$Register, $str2$$Register, 11752 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11753 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11754 %} 11755 ins_pipe( pipe_slow ); 11756 %} 11757 11758 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11759 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11760 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11761 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11762 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11763 11764 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11765 ins_encode %{ 11766 __ string_compare($str1$$Register, $str2$$Register, 11767 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11768 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11769 %} 11770 ins_pipe( pipe_slow ); 11771 %} 11772 11773 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11774 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11775 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11776 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11777 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11778 11779 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11780 ins_encode %{ 11781 __ string_compare($str1$$Register, $str2$$Register, 11782 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11783 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11784 %} 11785 ins_pipe( pipe_slow ); 11786 %} 11787 11788 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11789 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11790 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11791 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11792 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11793 11794 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11795 ins_encode %{ 11796 __ string_compare($str1$$Register, $str2$$Register, 11797 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11798 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11799 %} 11800 ins_pipe( pipe_slow ); 11801 %} 11802 11803 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11804 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11805 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11806 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11807 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11808 11809 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11810 ins_encode %{ 11811 __ string_compare($str1$$Register, $str2$$Register, 11812 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11813 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11814 %} 11815 ins_pipe( pipe_slow ); 11816 %} 11817 11818 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11819 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11820 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11821 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11822 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11823 11824 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11825 ins_encode %{ 11826 __ string_compare($str2$$Register, $str1$$Register, 11827 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11828 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11829 %} 11830 ins_pipe( pipe_slow ); 11831 %} 11832 11833 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11834 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11835 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11836 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11837 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11838 11839 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11840 ins_encode %{ 11841 __ string_compare($str2$$Register, $str1$$Register, 11842 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11843 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11844 %} 11845 ins_pipe( pipe_slow ); 11846 %} 11847 11848 // fast string equals 11849 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11850 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11851 predicate(!VM_Version::supports_avx512vlbw()); 11852 match(Set result (StrEquals (Binary str1 str2) cnt)); 11853 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11854 11855 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11856 ins_encode %{ 11857 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11858 $cnt$$Register, $result$$Register, $tmp3$$Register, 11859 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11860 %} 11861 11862 ins_pipe( pipe_slow ); 11863 %} 11864 11865 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11866 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11867 predicate(VM_Version::supports_avx512vlbw()); 11868 match(Set result (StrEquals (Binary str1 str2) cnt)); 11869 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11870 11871 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11872 ins_encode %{ 11873 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11874 $cnt$$Register, $result$$Register, $tmp3$$Register, 11875 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11876 %} 11877 11878 ins_pipe( pipe_slow ); 11879 %} 11880 11881 11882 // fast search of substring with known size. 11883 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11884 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11885 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11886 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11887 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11888 11889 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11890 ins_encode %{ 11891 int icnt2 = (int)$int_cnt2$$constant; 11892 if (icnt2 >= 16) { 11893 // IndexOf for constant substrings with size >= 16 elements 11894 // which don't need to be loaded through stack. 11895 __ string_indexofC8($str1$$Register, $str2$$Register, 11896 $cnt1$$Register, $cnt2$$Register, 11897 icnt2, $result$$Register, 11898 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11899 } else { 11900 // Small strings are loaded through stack if they cross page boundary. 11901 __ string_indexof($str1$$Register, $str2$$Register, 11902 $cnt1$$Register, $cnt2$$Register, 11903 icnt2, $result$$Register, 11904 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11905 } 11906 %} 11907 ins_pipe( pipe_slow ); 11908 %} 11909 11910 // fast search of substring with known size. 11911 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11912 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11913 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11914 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11915 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11916 11917 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11918 ins_encode %{ 11919 int icnt2 = (int)$int_cnt2$$constant; 11920 if (icnt2 >= 8) { 11921 // IndexOf for constant substrings with size >= 8 elements 11922 // which don't need to be loaded through stack. 11923 __ string_indexofC8($str1$$Register, $str2$$Register, 11924 $cnt1$$Register, $cnt2$$Register, 11925 icnt2, $result$$Register, 11926 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11927 } else { 11928 // Small strings are loaded through stack if they cross page boundary. 11929 __ string_indexof($str1$$Register, $str2$$Register, 11930 $cnt1$$Register, $cnt2$$Register, 11931 icnt2, $result$$Register, 11932 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11933 } 11934 %} 11935 ins_pipe( pipe_slow ); 11936 %} 11937 11938 // fast search of substring with known size. 11939 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11940 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11941 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11942 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11943 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11944 11945 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11946 ins_encode %{ 11947 int icnt2 = (int)$int_cnt2$$constant; 11948 if (icnt2 >= 8) { 11949 // IndexOf for constant substrings with size >= 8 elements 11950 // which don't need to be loaded through stack. 11951 __ string_indexofC8($str1$$Register, $str2$$Register, 11952 $cnt1$$Register, $cnt2$$Register, 11953 icnt2, $result$$Register, 11954 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11955 } else { 11956 // Small strings are loaded through stack if they cross page boundary. 11957 __ string_indexof($str1$$Register, $str2$$Register, 11958 $cnt1$$Register, $cnt2$$Register, 11959 icnt2, $result$$Register, 11960 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11961 } 11962 %} 11963 ins_pipe( pipe_slow ); 11964 %} 11965 11966 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11967 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11968 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11969 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11970 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11971 11972 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11973 ins_encode %{ 11974 __ string_indexof($str1$$Register, $str2$$Register, 11975 $cnt1$$Register, $cnt2$$Register, 11976 (-1), $result$$Register, 11977 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11978 %} 11979 ins_pipe( pipe_slow ); 11980 %} 11981 11982 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11983 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11984 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11985 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11986 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11987 11988 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11989 ins_encode %{ 11990 __ string_indexof($str1$$Register, $str2$$Register, 11991 $cnt1$$Register, $cnt2$$Register, 11992 (-1), $result$$Register, 11993 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11994 %} 11995 ins_pipe( pipe_slow ); 11996 %} 11997 11998 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11999 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12000 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12001 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12002 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12003 12004 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12005 ins_encode %{ 12006 __ string_indexof($str1$$Register, $str2$$Register, 12007 $cnt1$$Register, $cnt2$$Register, 12008 (-1), $result$$Register, 12009 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12010 %} 12011 ins_pipe( pipe_slow ); 12012 %} 12013 12014 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12015 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12016 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12017 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12018 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12019 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12020 ins_encode %{ 12021 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12022 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12023 %} 12024 ins_pipe( pipe_slow ); 12025 %} 12026 12027 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12028 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12029 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12030 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12031 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12032 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12033 ins_encode %{ 12034 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12035 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12036 %} 12037 ins_pipe( pipe_slow ); 12038 %} 12039 12040 12041 // fast array equals 12042 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12043 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12044 %{ 12045 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12046 match(Set result (AryEq ary1 ary2)); 12047 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12048 //ins_cost(300); 12049 12050 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12051 ins_encode %{ 12052 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12053 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12054 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12055 %} 12056 ins_pipe( pipe_slow ); 12057 %} 12058 12059 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12060 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12061 %{ 12062 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12063 match(Set result (AryEq ary1 ary2)); 12064 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12065 //ins_cost(300); 12066 12067 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12068 ins_encode %{ 12069 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12070 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12071 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12072 %} 12073 ins_pipe( pipe_slow ); 12074 %} 12075 12076 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12077 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12078 %{ 12079 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12080 match(Set result (AryEq ary1 ary2)); 12081 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12082 //ins_cost(300); 12083 12084 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12085 ins_encode %{ 12086 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12087 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12088 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12089 %} 12090 ins_pipe( pipe_slow ); 12091 %} 12092 12093 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12094 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12095 %{ 12096 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12097 match(Set result (AryEq ary1 ary2)); 12098 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12099 //ins_cost(300); 12100 12101 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12102 ins_encode %{ 12103 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12104 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12105 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12106 %} 12107 ins_pipe( pipe_slow ); 12108 %} 12109 12110 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12111 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12112 %{ 12113 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12114 match(Set result (HasNegatives ary1 len)); 12115 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12116 12117 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12118 ins_encode %{ 12119 __ has_negatives($ary1$$Register, $len$$Register, 12120 $result$$Register, $tmp3$$Register, 12121 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12122 %} 12123 ins_pipe( pipe_slow ); 12124 %} 12125 12126 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12127 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12128 %{ 12129 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12130 match(Set result (HasNegatives ary1 len)); 12131 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12132 12133 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12134 ins_encode %{ 12135 __ has_negatives($ary1$$Register, $len$$Register, 12136 $result$$Register, $tmp3$$Register, 12137 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12138 %} 12139 ins_pipe( pipe_slow ); 12140 %} 12141 12142 12143 // fast char[] to byte[] compression 12144 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12145 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12146 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12147 match(Set result (StrCompressedCopy src (Binary dst len))); 12148 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12149 12150 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12151 ins_encode %{ 12152 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12153 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12154 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12155 knoreg, knoreg); 12156 %} 12157 ins_pipe( pipe_slow ); 12158 %} 12159 12160 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12161 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12162 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12163 match(Set result (StrCompressedCopy src (Binary dst len))); 12164 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12165 12166 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12167 ins_encode %{ 12168 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12169 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12170 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12171 $ktmp1$$KRegister, $ktmp2$$KRegister); 12172 %} 12173 ins_pipe( pipe_slow ); 12174 %} 12175 12176 // fast byte[] to char[] inflation 12177 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12178 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12179 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12180 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12181 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12182 12183 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12184 ins_encode %{ 12185 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12186 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12187 %} 12188 ins_pipe( pipe_slow ); 12189 %} 12190 12191 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12192 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12193 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12194 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12195 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12196 12197 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12198 ins_encode %{ 12199 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12200 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12201 %} 12202 ins_pipe( pipe_slow ); 12203 %} 12204 12205 // encode char[] to byte[] in ISO_8859_1 12206 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12207 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12208 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12209 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12210 match(Set result (EncodeISOArray src (Binary dst len))); 12211 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12212 12213 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12214 ins_encode %{ 12215 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12216 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12217 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12218 %} 12219 ins_pipe( pipe_slow ); 12220 %} 12221 12222 // encode char[] to byte[] in ASCII 12223 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12224 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12225 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12226 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12227 match(Set result (EncodeISOArray src (Binary dst len))); 12228 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12229 12230 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12231 ins_encode %{ 12232 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12233 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12234 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12235 %} 12236 ins_pipe( pipe_slow ); 12237 %} 12238 12239 //----------Control Flow Instructions------------------------------------------ 12240 // Signed compare Instructions 12241 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12242 match(Set cr (CmpI op1 op2)); 12243 effect( DEF cr, USE op1, USE op2 ); 12244 format %{ "CMP $op1,$op2" %} 12245 opcode(0x3B); /* Opcode 3B /r */ 12246 ins_encode( OpcP, RegReg( op1, op2) ); 12247 ins_pipe( ialu_cr_reg_reg ); 12248 %} 12249 12250 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12251 match(Set cr (CmpI op1 op2)); 12252 effect( DEF cr, USE op1 ); 12253 format %{ "CMP $op1,$op2" %} 12254 opcode(0x81,0x07); /* Opcode 81 /7 */ 12255 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12256 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12257 ins_pipe( ialu_cr_reg_imm ); 12258 %} 12259 12260 // Cisc-spilled version of cmpI_eReg 12261 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12262 match(Set cr (CmpI op1 (LoadI op2))); 12263 12264 format %{ "CMP $op1,$op2" %} 12265 ins_cost(500); 12266 opcode(0x3B); /* Opcode 3B /r */ 12267 ins_encode( OpcP, RegMem( op1, op2) ); 12268 ins_pipe( ialu_cr_reg_mem ); 12269 %} 12270 12271 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12272 match(Set cr (CmpI src zero)); 12273 effect( DEF cr, USE src ); 12274 12275 format %{ "TEST $src,$src" %} 12276 opcode(0x85); 12277 ins_encode( OpcP, RegReg( src, src ) ); 12278 ins_pipe( ialu_cr_reg_imm ); 12279 %} 12280 12281 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12282 match(Set cr (CmpI (AndI src con) zero)); 12283 12284 format %{ "TEST $src,$con" %} 12285 opcode(0xF7,0x00); 12286 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12287 ins_pipe( ialu_cr_reg_imm ); 12288 %} 12289 12290 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12291 match(Set cr (CmpI (AndI src mem) zero)); 12292 12293 format %{ "TEST $src,$mem" %} 12294 opcode(0x85); 12295 ins_encode( OpcP, RegMem( src, mem ) ); 12296 ins_pipe( ialu_cr_reg_mem ); 12297 %} 12298 12299 // Unsigned compare Instructions; really, same as signed except they 12300 // produce an eFlagsRegU instead of eFlagsReg. 12301 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12302 match(Set cr (CmpU op1 op2)); 12303 12304 format %{ "CMPu $op1,$op2" %} 12305 opcode(0x3B); /* Opcode 3B /r */ 12306 ins_encode( OpcP, RegReg( op1, op2) ); 12307 ins_pipe( ialu_cr_reg_reg ); 12308 %} 12309 12310 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12311 match(Set cr (CmpU op1 op2)); 12312 12313 format %{ "CMPu $op1,$op2" %} 12314 opcode(0x81,0x07); /* Opcode 81 /7 */ 12315 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12316 ins_pipe( ialu_cr_reg_imm ); 12317 %} 12318 12319 // // Cisc-spilled version of cmpU_eReg 12320 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12321 match(Set cr (CmpU op1 (LoadI op2))); 12322 12323 format %{ "CMPu $op1,$op2" %} 12324 ins_cost(500); 12325 opcode(0x3B); /* Opcode 3B /r */ 12326 ins_encode( OpcP, RegMem( op1, op2) ); 12327 ins_pipe( ialu_cr_reg_mem ); 12328 %} 12329 12330 // // Cisc-spilled version of cmpU_eReg 12331 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12332 // match(Set cr (CmpU (LoadI op1) op2)); 12333 // 12334 // format %{ "CMPu $op1,$op2" %} 12335 // ins_cost(500); 12336 // opcode(0x39); /* Opcode 39 /r */ 12337 // ins_encode( OpcP, RegMem( op1, op2) ); 12338 //%} 12339 12340 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12341 match(Set cr (CmpU src zero)); 12342 12343 format %{ "TESTu $src,$src" %} 12344 opcode(0x85); 12345 ins_encode( OpcP, RegReg( src, src ) ); 12346 ins_pipe( ialu_cr_reg_imm ); 12347 %} 12348 12349 // Unsigned pointer compare Instructions 12350 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12351 match(Set cr (CmpP op1 op2)); 12352 12353 format %{ "CMPu $op1,$op2" %} 12354 opcode(0x3B); /* Opcode 3B /r */ 12355 ins_encode( OpcP, RegReg( op1, op2) ); 12356 ins_pipe( ialu_cr_reg_reg ); 12357 %} 12358 12359 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12360 match(Set cr (CmpP op1 op2)); 12361 12362 format %{ "CMPu $op1,$op2" %} 12363 opcode(0x81,0x07); /* Opcode 81 /7 */ 12364 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12365 ins_pipe( ialu_cr_reg_imm ); 12366 %} 12367 12368 // // Cisc-spilled version of cmpP_eReg 12369 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12370 match(Set cr (CmpP op1 (LoadP op2))); 12371 12372 format %{ "CMPu $op1,$op2" %} 12373 ins_cost(500); 12374 opcode(0x3B); /* Opcode 3B /r */ 12375 ins_encode( OpcP, RegMem( op1, op2) ); 12376 ins_pipe( ialu_cr_reg_mem ); 12377 %} 12378 12379 // // Cisc-spilled version of cmpP_eReg 12380 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12381 // match(Set cr (CmpP (LoadP op1) op2)); 12382 // 12383 // format %{ "CMPu $op1,$op2" %} 12384 // ins_cost(500); 12385 // opcode(0x39); /* Opcode 39 /r */ 12386 // ins_encode( OpcP, RegMem( op1, op2) ); 12387 //%} 12388 12389 // Compare raw pointer (used in out-of-heap check). 12390 // Only works because non-oop pointers must be raw pointers 12391 // and raw pointers have no anti-dependencies. 12392 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12393 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12394 match(Set cr (CmpP op1 (LoadP op2))); 12395 12396 format %{ "CMPu $op1,$op2" %} 12397 opcode(0x3B); /* Opcode 3B /r */ 12398 ins_encode( OpcP, RegMem( op1, op2) ); 12399 ins_pipe( ialu_cr_reg_mem ); 12400 %} 12401 12402 // 12403 // This will generate a signed flags result. This should be ok 12404 // since any compare to a zero should be eq/neq. 12405 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12406 match(Set cr (CmpP src zero)); 12407 12408 format %{ "TEST $src,$src" %} 12409 opcode(0x85); 12410 ins_encode( OpcP, RegReg( src, src ) ); 12411 ins_pipe( ialu_cr_reg_imm ); 12412 %} 12413 12414 // Cisc-spilled version of testP_reg 12415 // This will generate a signed flags result. This should be ok 12416 // since any compare to a zero should be eq/neq. 12417 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12418 match(Set cr (CmpP (LoadP op) zero)); 12419 12420 format %{ "TEST $op,0xFFFFFFFF" %} 12421 ins_cost(500); 12422 opcode(0xF7); /* Opcode F7 /0 */ 12423 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12424 ins_pipe( ialu_cr_reg_imm ); 12425 %} 12426 12427 // Yanked all unsigned pointer compare operations. 12428 // Pointer compares are done with CmpP which is already unsigned. 12429 12430 //----------Max and Min-------------------------------------------------------- 12431 // Min Instructions 12432 //// 12433 // *** Min and Max using the conditional move are slower than the 12434 // *** branch version on a Pentium III. 12435 // // Conditional move for min 12436 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12437 // effect( USE_DEF op2, USE op1, USE cr ); 12438 // format %{ "CMOVlt $op2,$op1\t! min" %} 12439 // opcode(0x4C,0x0F); 12440 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12441 // ins_pipe( pipe_cmov_reg ); 12442 //%} 12443 // 12444 //// Min Register with Register (P6 version) 12445 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12446 // predicate(VM_Version::supports_cmov() ); 12447 // match(Set op2 (MinI op1 op2)); 12448 // ins_cost(200); 12449 // expand %{ 12450 // eFlagsReg cr; 12451 // compI_eReg(cr,op1,op2); 12452 // cmovI_reg_lt(op2,op1,cr); 12453 // %} 12454 //%} 12455 12456 // Min Register with Register (generic version) 12457 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12458 match(Set dst (MinI dst src)); 12459 effect(KILL flags); 12460 ins_cost(300); 12461 12462 format %{ "MIN $dst,$src" %} 12463 opcode(0xCC); 12464 ins_encode( min_enc(dst,src) ); 12465 ins_pipe( pipe_slow ); 12466 %} 12467 12468 // Max Register with Register 12469 // *** Min and Max using the conditional move are slower than the 12470 // *** branch version on a Pentium III. 12471 // // Conditional move for max 12472 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12473 // effect( USE_DEF op2, USE op1, USE cr ); 12474 // format %{ "CMOVgt $op2,$op1\t! max" %} 12475 // opcode(0x4F,0x0F); 12476 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12477 // ins_pipe( pipe_cmov_reg ); 12478 //%} 12479 // 12480 // // Max Register with Register (P6 version) 12481 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12482 // predicate(VM_Version::supports_cmov() ); 12483 // match(Set op2 (MaxI op1 op2)); 12484 // ins_cost(200); 12485 // expand %{ 12486 // eFlagsReg cr; 12487 // compI_eReg(cr,op1,op2); 12488 // cmovI_reg_gt(op2,op1,cr); 12489 // %} 12490 //%} 12491 12492 // Max Register with Register (generic version) 12493 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12494 match(Set dst (MaxI dst src)); 12495 effect(KILL flags); 12496 ins_cost(300); 12497 12498 format %{ "MAX $dst,$src" %} 12499 opcode(0xCC); 12500 ins_encode( max_enc(dst,src) ); 12501 ins_pipe( pipe_slow ); 12502 %} 12503 12504 // ============================================================================ 12505 // Counted Loop limit node which represents exact final iterator value. 12506 // Note: the resulting value should fit into integer range since 12507 // counted loops have limit check on overflow. 12508 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12509 match(Set limit (LoopLimit (Binary init limit) stride)); 12510 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12511 ins_cost(300); 12512 12513 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12514 ins_encode %{ 12515 int strd = (int)$stride$$constant; 12516 assert(strd != 1 && strd != -1, "sanity"); 12517 int m1 = (strd > 0) ? 1 : -1; 12518 // Convert limit to long (EAX:EDX) 12519 __ cdql(); 12520 // Convert init to long (init:tmp) 12521 __ movl($tmp$$Register, $init$$Register); 12522 __ sarl($tmp$$Register, 31); 12523 // $limit - $init 12524 __ subl($limit$$Register, $init$$Register); 12525 __ sbbl($limit_hi$$Register, $tmp$$Register); 12526 // + ($stride - 1) 12527 if (strd > 0) { 12528 __ addl($limit$$Register, (strd - 1)); 12529 __ adcl($limit_hi$$Register, 0); 12530 __ movl($tmp$$Register, strd); 12531 } else { 12532 __ addl($limit$$Register, (strd + 1)); 12533 __ adcl($limit_hi$$Register, -1); 12534 __ lneg($limit_hi$$Register, $limit$$Register); 12535 __ movl($tmp$$Register, -strd); 12536 } 12537 // signed devision: (EAX:EDX) / pos_stride 12538 __ idivl($tmp$$Register); 12539 if (strd < 0) { 12540 // restore sign 12541 __ negl($tmp$$Register); 12542 } 12543 // (EAX) * stride 12544 __ mull($tmp$$Register); 12545 // + init (ignore upper bits) 12546 __ addl($limit$$Register, $init$$Register); 12547 %} 12548 ins_pipe( pipe_slow ); 12549 %} 12550 12551 // ============================================================================ 12552 // Branch Instructions 12553 // Jump Table 12554 instruct jumpXtnd(rRegI switch_val) %{ 12555 match(Jump switch_val); 12556 ins_cost(350); 12557 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12558 ins_encode %{ 12559 // Jump to Address(table_base + switch_reg) 12560 Address index(noreg, $switch_val$$Register, Address::times_1); 12561 __ jump(ArrayAddress($constantaddress, index)); 12562 %} 12563 ins_pipe(pipe_jmp); 12564 %} 12565 12566 // Jump Direct - Label defines a relative address from JMP+1 12567 instruct jmpDir(label labl) %{ 12568 match(Goto); 12569 effect(USE labl); 12570 12571 ins_cost(300); 12572 format %{ "JMP $labl" %} 12573 size(5); 12574 ins_encode %{ 12575 Label* L = $labl$$label; 12576 __ jmp(*L, false); // Always long jump 12577 %} 12578 ins_pipe( pipe_jmp ); 12579 %} 12580 12581 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12582 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12583 match(If cop cr); 12584 effect(USE labl); 12585 12586 ins_cost(300); 12587 format %{ "J$cop $labl" %} 12588 size(6); 12589 ins_encode %{ 12590 Label* L = $labl$$label; 12591 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12592 %} 12593 ins_pipe( pipe_jcc ); 12594 %} 12595 12596 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12597 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12598 predicate(!n->has_vector_mask_set()); 12599 match(CountedLoopEnd cop cr); 12600 effect(USE labl); 12601 12602 ins_cost(300); 12603 format %{ "J$cop $labl\t# Loop end" %} 12604 size(6); 12605 ins_encode %{ 12606 Label* L = $labl$$label; 12607 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12608 %} 12609 ins_pipe( pipe_jcc ); 12610 %} 12611 12612 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12613 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12614 predicate(!n->has_vector_mask_set()); 12615 match(CountedLoopEnd cop cmp); 12616 effect(USE labl); 12617 12618 ins_cost(300); 12619 format %{ "J$cop,u $labl\t# Loop end" %} 12620 size(6); 12621 ins_encode %{ 12622 Label* L = $labl$$label; 12623 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12624 %} 12625 ins_pipe( pipe_jcc ); 12626 %} 12627 12628 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12629 predicate(!n->has_vector_mask_set()); 12630 match(CountedLoopEnd cop cmp); 12631 effect(USE labl); 12632 12633 ins_cost(200); 12634 format %{ "J$cop,u $labl\t# Loop end" %} 12635 size(6); 12636 ins_encode %{ 12637 Label* L = $labl$$label; 12638 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12639 %} 12640 ins_pipe( pipe_jcc ); 12641 %} 12642 12643 // mask version 12644 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12645 // Bounded mask operand used in following patten is needed for 12646 // post-loop multiversioning. 12647 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{ 12648 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12649 match(CountedLoopEnd cop cr); 12650 effect(USE labl, TEMP ktmp); 12651 12652 ins_cost(400); 12653 format %{ "J$cop $labl\t# Loop end\n\t" 12654 "restorevectmask \t# vector mask restore for loops" %} 12655 size(10); 12656 ins_encode %{ 12657 Label* L = $labl$$label; 12658 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12659 __ restorevectmask($ktmp$$KRegister); 12660 %} 12661 ins_pipe( pipe_jcc ); 12662 %} 12663 12664 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12665 // Bounded mask operand used in following patten is needed for 12666 // post-loop multiversioning. 12667 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{ 12668 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12669 match(CountedLoopEnd cop cmp); 12670 effect(USE labl, TEMP ktmp); 12671 12672 ins_cost(400); 12673 format %{ "J$cop,u $labl\t# Loop end\n\t" 12674 "restorevectmask \t# vector mask restore for loops" %} 12675 size(10); 12676 ins_encode %{ 12677 Label* L = $labl$$label; 12678 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12679 __ restorevectmask($ktmp$$KRegister); 12680 %} 12681 ins_pipe( pipe_jcc ); 12682 %} 12683 12684 // Bounded mask operand used in following patten is needed for 12685 // post-loop multiversioning. 12686 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{ 12687 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12688 match(CountedLoopEnd cop cmp); 12689 effect(USE labl, TEMP ktmp); 12690 12691 ins_cost(300); 12692 format %{ "J$cop,u $labl\t# Loop end\n\t" 12693 "restorevectmask \t# vector mask restore for loops" %} 12694 size(10); 12695 ins_encode %{ 12696 Label* L = $labl$$label; 12697 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12698 __ restorevectmask($ktmp$$KRegister); 12699 %} 12700 ins_pipe( pipe_jcc ); 12701 %} 12702 12703 // Jump Direct Conditional - using unsigned comparison 12704 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12705 match(If cop cmp); 12706 effect(USE labl); 12707 12708 ins_cost(300); 12709 format %{ "J$cop,u $labl" %} 12710 size(6); 12711 ins_encode %{ 12712 Label* L = $labl$$label; 12713 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12714 %} 12715 ins_pipe(pipe_jcc); 12716 %} 12717 12718 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12719 match(If cop cmp); 12720 effect(USE labl); 12721 12722 ins_cost(200); 12723 format %{ "J$cop,u $labl" %} 12724 size(6); 12725 ins_encode %{ 12726 Label* L = $labl$$label; 12727 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12728 %} 12729 ins_pipe(pipe_jcc); 12730 %} 12731 12732 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12733 match(If cop cmp); 12734 effect(USE labl); 12735 12736 ins_cost(200); 12737 format %{ $$template 12738 if ($cop$$cmpcode == Assembler::notEqual) { 12739 $$emit$$"JP,u $labl\n\t" 12740 $$emit$$"J$cop,u $labl" 12741 } else { 12742 $$emit$$"JP,u done\n\t" 12743 $$emit$$"J$cop,u $labl\n\t" 12744 $$emit$$"done:" 12745 } 12746 %} 12747 ins_encode %{ 12748 Label* l = $labl$$label; 12749 if ($cop$$cmpcode == Assembler::notEqual) { 12750 __ jcc(Assembler::parity, *l, false); 12751 __ jcc(Assembler::notEqual, *l, false); 12752 } else if ($cop$$cmpcode == Assembler::equal) { 12753 Label done; 12754 __ jccb(Assembler::parity, done); 12755 __ jcc(Assembler::equal, *l, false); 12756 __ bind(done); 12757 } else { 12758 ShouldNotReachHere(); 12759 } 12760 %} 12761 ins_pipe(pipe_jcc); 12762 %} 12763 12764 // ============================================================================ 12765 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12766 // array for an instance of the superklass. Set a hidden internal cache on a 12767 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12768 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12769 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12770 match(Set result (PartialSubtypeCheck sub super)); 12771 effect( KILL rcx, KILL cr ); 12772 12773 ins_cost(1100); // slightly larger than the next version 12774 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12775 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12776 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12777 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12778 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12779 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12780 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12781 "miss:\t" %} 12782 12783 opcode(0x1); // Force a XOR of EDI 12784 ins_encode( enc_PartialSubtypeCheck() ); 12785 ins_pipe( pipe_slow ); 12786 %} 12787 12788 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12789 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12790 effect( KILL rcx, KILL result ); 12791 12792 ins_cost(1000); 12793 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12794 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12795 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12796 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12797 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12798 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12799 "miss:\t" %} 12800 12801 opcode(0x0); // No need to XOR EDI 12802 ins_encode( enc_PartialSubtypeCheck() ); 12803 ins_pipe( pipe_slow ); 12804 %} 12805 12806 // ============================================================================ 12807 // Branch Instructions -- short offset versions 12808 // 12809 // These instructions are used to replace jumps of a long offset (the default 12810 // match) with jumps of a shorter offset. These instructions are all tagged 12811 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12812 // match rules in general matching. Instead, the ADLC generates a conversion 12813 // method in the MachNode which can be used to do in-place replacement of the 12814 // long variant with the shorter variant. The compiler will determine if a 12815 // branch can be taken by the is_short_branch_offset() predicate in the machine 12816 // specific code section of the file. 12817 12818 // Jump Direct - Label defines a relative address from JMP+1 12819 instruct jmpDir_short(label labl) %{ 12820 match(Goto); 12821 effect(USE labl); 12822 12823 ins_cost(300); 12824 format %{ "JMP,s $labl" %} 12825 size(2); 12826 ins_encode %{ 12827 Label* L = $labl$$label; 12828 __ jmpb(*L); 12829 %} 12830 ins_pipe( pipe_jmp ); 12831 ins_short_branch(1); 12832 %} 12833 12834 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12835 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12836 match(If cop cr); 12837 effect(USE labl); 12838 12839 ins_cost(300); 12840 format %{ "J$cop,s $labl" %} 12841 size(2); 12842 ins_encode %{ 12843 Label* L = $labl$$label; 12844 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12845 %} 12846 ins_pipe( pipe_jcc ); 12847 ins_short_branch(1); 12848 %} 12849 12850 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12851 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12852 match(CountedLoopEnd cop cr); 12853 effect(USE labl); 12854 12855 ins_cost(300); 12856 format %{ "J$cop,s $labl\t# Loop end" %} 12857 size(2); 12858 ins_encode %{ 12859 Label* L = $labl$$label; 12860 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12861 %} 12862 ins_pipe( pipe_jcc ); 12863 ins_short_branch(1); 12864 %} 12865 12866 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12867 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12868 match(CountedLoopEnd cop cmp); 12869 effect(USE labl); 12870 12871 ins_cost(300); 12872 format %{ "J$cop,us $labl\t# Loop end" %} 12873 size(2); 12874 ins_encode %{ 12875 Label* L = $labl$$label; 12876 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12877 %} 12878 ins_pipe( pipe_jcc ); 12879 ins_short_branch(1); 12880 %} 12881 12882 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12883 match(CountedLoopEnd cop cmp); 12884 effect(USE labl); 12885 12886 ins_cost(300); 12887 format %{ "J$cop,us $labl\t# Loop end" %} 12888 size(2); 12889 ins_encode %{ 12890 Label* L = $labl$$label; 12891 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12892 %} 12893 ins_pipe( pipe_jcc ); 12894 ins_short_branch(1); 12895 %} 12896 12897 // Jump Direct Conditional - using unsigned comparison 12898 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12899 match(If cop cmp); 12900 effect(USE labl); 12901 12902 ins_cost(300); 12903 format %{ "J$cop,us $labl" %} 12904 size(2); 12905 ins_encode %{ 12906 Label* L = $labl$$label; 12907 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12908 %} 12909 ins_pipe( pipe_jcc ); 12910 ins_short_branch(1); 12911 %} 12912 12913 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12914 match(If cop cmp); 12915 effect(USE labl); 12916 12917 ins_cost(300); 12918 format %{ "J$cop,us $labl" %} 12919 size(2); 12920 ins_encode %{ 12921 Label* L = $labl$$label; 12922 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12923 %} 12924 ins_pipe( pipe_jcc ); 12925 ins_short_branch(1); 12926 %} 12927 12928 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12929 match(If cop cmp); 12930 effect(USE labl); 12931 12932 ins_cost(300); 12933 format %{ $$template 12934 if ($cop$$cmpcode == Assembler::notEqual) { 12935 $$emit$$"JP,u,s $labl\n\t" 12936 $$emit$$"J$cop,u,s $labl" 12937 } else { 12938 $$emit$$"JP,u,s done\n\t" 12939 $$emit$$"J$cop,u,s $labl\n\t" 12940 $$emit$$"done:" 12941 } 12942 %} 12943 size(4); 12944 ins_encode %{ 12945 Label* l = $labl$$label; 12946 if ($cop$$cmpcode == Assembler::notEqual) { 12947 __ jccb(Assembler::parity, *l); 12948 __ jccb(Assembler::notEqual, *l); 12949 } else if ($cop$$cmpcode == Assembler::equal) { 12950 Label done; 12951 __ jccb(Assembler::parity, done); 12952 __ jccb(Assembler::equal, *l); 12953 __ bind(done); 12954 } else { 12955 ShouldNotReachHere(); 12956 } 12957 %} 12958 ins_pipe(pipe_jcc); 12959 ins_short_branch(1); 12960 %} 12961 12962 // ============================================================================ 12963 // Long Compare 12964 // 12965 // Currently we hold longs in 2 registers. Comparing such values efficiently 12966 // is tricky. The flavor of compare used depends on whether we are testing 12967 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12968 // The GE test is the negated LT test. The LE test can be had by commuting 12969 // the operands (yielding a GE test) and then negating; negate again for the 12970 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12971 // NE test is negated from that. 12972 12973 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12974 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12975 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12976 // are collapsed internally in the ADLC's dfa-gen code. The match for 12977 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12978 // foo match ends up with the wrong leaf. One fix is to not match both 12979 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12980 // both forms beat the trinary form of long-compare and both are very useful 12981 // on Intel which has so few registers. 12982 12983 // Manifest a CmpL result in an integer register. Very painful. 12984 // This is the test to avoid. 12985 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12986 match(Set dst (CmpL3 src1 src2)); 12987 effect( KILL flags ); 12988 ins_cost(1000); 12989 format %{ "XOR $dst,$dst\n\t" 12990 "CMP $src1.hi,$src2.hi\n\t" 12991 "JLT,s m_one\n\t" 12992 "JGT,s p_one\n\t" 12993 "CMP $src1.lo,$src2.lo\n\t" 12994 "JB,s m_one\n\t" 12995 "JEQ,s done\n" 12996 "p_one:\tINC $dst\n\t" 12997 "JMP,s done\n" 12998 "m_one:\tDEC $dst\n" 12999 "done:" %} 13000 ins_encode %{ 13001 Label p_one, m_one, done; 13002 __ xorptr($dst$$Register, $dst$$Register); 13003 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13004 __ jccb(Assembler::less, m_one); 13005 __ jccb(Assembler::greater, p_one); 13006 __ cmpl($src1$$Register, $src2$$Register); 13007 __ jccb(Assembler::below, m_one); 13008 __ jccb(Assembler::equal, done); 13009 __ bind(p_one); 13010 __ incrementl($dst$$Register); 13011 __ jmpb(done); 13012 __ bind(m_one); 13013 __ decrementl($dst$$Register); 13014 __ bind(done); 13015 %} 13016 ins_pipe( pipe_slow ); 13017 %} 13018 13019 //====== 13020 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13021 // compares. Can be used for LE or GT compares by reversing arguments. 13022 // NOT GOOD FOR EQ/NE tests. 13023 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13024 match( Set flags (CmpL src zero )); 13025 ins_cost(100); 13026 format %{ "TEST $src.hi,$src.hi" %} 13027 opcode(0x85); 13028 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13029 ins_pipe( ialu_cr_reg_reg ); 13030 %} 13031 13032 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13033 // compares. Can be used for LE or GT compares by reversing arguments. 13034 // NOT GOOD FOR EQ/NE tests. 13035 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13036 match( Set flags (CmpL src1 src2 )); 13037 effect( TEMP tmp ); 13038 ins_cost(300); 13039 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13040 "MOV $tmp,$src1.hi\n\t" 13041 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13042 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13043 ins_pipe( ialu_cr_reg_reg ); 13044 %} 13045 13046 // Long compares reg < zero/req OR reg >= zero/req. 13047 // Just a wrapper for a normal branch, plus the predicate test. 13048 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13049 match(If cmp flags); 13050 effect(USE labl); 13051 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13052 expand %{ 13053 jmpCon(cmp,flags,labl); // JLT or JGE... 13054 %} 13055 %} 13056 13057 //====== 13058 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13059 // compares. Can be used for LE or GT compares by reversing arguments. 13060 // NOT GOOD FOR EQ/NE tests. 13061 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13062 match(Set flags (CmpUL src zero)); 13063 ins_cost(100); 13064 format %{ "TEST $src.hi,$src.hi" %} 13065 opcode(0x85); 13066 ins_encode(OpcP, RegReg_Hi2(src, src)); 13067 ins_pipe(ialu_cr_reg_reg); 13068 %} 13069 13070 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13071 // compares. Can be used for LE or GT compares by reversing arguments. 13072 // NOT GOOD FOR EQ/NE tests. 13073 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13074 match(Set flags (CmpUL src1 src2)); 13075 effect(TEMP tmp); 13076 ins_cost(300); 13077 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13078 "MOV $tmp,$src1.hi\n\t" 13079 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13080 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13081 ins_pipe(ialu_cr_reg_reg); 13082 %} 13083 13084 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13085 // Just a wrapper for a normal branch, plus the predicate test. 13086 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13087 match(If cmp flags); 13088 effect(USE labl); 13089 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13090 expand %{ 13091 jmpCon(cmp, flags, labl); // JLT or JGE... 13092 %} 13093 %} 13094 13095 // Compare 2 longs and CMOVE longs. 13096 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13097 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13098 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13099 ins_cost(400); 13100 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13101 "CMOV$cmp $dst.hi,$src.hi" %} 13102 opcode(0x0F,0x40); 13103 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13104 ins_pipe( pipe_cmov_reg_long ); 13105 %} 13106 13107 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13108 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13109 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13110 ins_cost(500); 13111 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13112 "CMOV$cmp $dst.hi,$src.hi" %} 13113 opcode(0x0F,0x40); 13114 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13115 ins_pipe( pipe_cmov_reg_long ); 13116 %} 13117 13118 // Compare 2 longs and CMOVE ints. 13119 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13120 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13121 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13122 ins_cost(200); 13123 format %{ "CMOV$cmp $dst,$src" %} 13124 opcode(0x0F,0x40); 13125 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13126 ins_pipe( pipe_cmov_reg ); 13127 %} 13128 13129 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13130 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13131 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13132 ins_cost(250); 13133 format %{ "CMOV$cmp $dst,$src" %} 13134 opcode(0x0F,0x40); 13135 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13136 ins_pipe( pipe_cmov_mem ); 13137 %} 13138 13139 // Compare 2 longs and CMOVE ints. 13140 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13141 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13142 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13143 ins_cost(200); 13144 format %{ "CMOV$cmp $dst,$src" %} 13145 opcode(0x0F,0x40); 13146 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13147 ins_pipe( pipe_cmov_reg ); 13148 %} 13149 13150 // Compare 2 longs and CMOVE doubles 13151 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13152 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13153 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13154 ins_cost(200); 13155 expand %{ 13156 fcmovDPR_regS(cmp,flags,dst,src); 13157 %} 13158 %} 13159 13160 // Compare 2 longs and CMOVE doubles 13161 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13162 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13163 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13164 ins_cost(200); 13165 expand %{ 13166 fcmovD_regS(cmp,flags,dst,src); 13167 %} 13168 %} 13169 13170 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13171 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13172 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13173 ins_cost(200); 13174 expand %{ 13175 fcmovFPR_regS(cmp,flags,dst,src); 13176 %} 13177 %} 13178 13179 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13180 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13181 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13182 ins_cost(200); 13183 expand %{ 13184 fcmovF_regS(cmp,flags,dst,src); 13185 %} 13186 %} 13187 13188 //====== 13189 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13190 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13191 match( Set flags (CmpL src zero )); 13192 effect(TEMP tmp); 13193 ins_cost(200); 13194 format %{ "MOV $tmp,$src.lo\n\t" 13195 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13196 ins_encode( long_cmp_flags0( src, tmp ) ); 13197 ins_pipe( ialu_reg_reg_long ); 13198 %} 13199 13200 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13201 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13202 match( Set flags (CmpL src1 src2 )); 13203 ins_cost(200+300); 13204 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13205 "JNE,s skip\n\t" 13206 "CMP $src1.hi,$src2.hi\n\t" 13207 "skip:\t" %} 13208 ins_encode( long_cmp_flags1( src1, src2 ) ); 13209 ins_pipe( ialu_cr_reg_reg ); 13210 %} 13211 13212 // Long compare reg == zero/reg OR reg != zero/reg 13213 // Just a wrapper for a normal branch, plus the predicate test. 13214 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13215 match(If cmp flags); 13216 effect(USE labl); 13217 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13218 expand %{ 13219 jmpCon(cmp,flags,labl); // JEQ or JNE... 13220 %} 13221 %} 13222 13223 //====== 13224 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13225 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13226 match(Set flags (CmpUL src zero)); 13227 effect(TEMP tmp); 13228 ins_cost(200); 13229 format %{ "MOV $tmp,$src.lo\n\t" 13230 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13231 ins_encode(long_cmp_flags0(src, tmp)); 13232 ins_pipe(ialu_reg_reg_long); 13233 %} 13234 13235 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13236 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13237 match(Set flags (CmpUL src1 src2)); 13238 ins_cost(200+300); 13239 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13240 "JNE,s skip\n\t" 13241 "CMP $src1.hi,$src2.hi\n\t" 13242 "skip:\t" %} 13243 ins_encode(long_cmp_flags1(src1, src2)); 13244 ins_pipe(ialu_cr_reg_reg); 13245 %} 13246 13247 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13248 // Just a wrapper for a normal branch, plus the predicate test. 13249 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13250 match(If cmp flags); 13251 effect(USE labl); 13252 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13253 expand %{ 13254 jmpCon(cmp, flags, labl); // JEQ or JNE... 13255 %} 13256 %} 13257 13258 // Compare 2 longs and CMOVE longs. 13259 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13260 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13261 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13262 ins_cost(400); 13263 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13264 "CMOV$cmp $dst.hi,$src.hi" %} 13265 opcode(0x0F,0x40); 13266 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13267 ins_pipe( pipe_cmov_reg_long ); 13268 %} 13269 13270 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13271 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13272 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13273 ins_cost(500); 13274 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13275 "CMOV$cmp $dst.hi,$src.hi" %} 13276 opcode(0x0F,0x40); 13277 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13278 ins_pipe( pipe_cmov_reg_long ); 13279 %} 13280 13281 // Compare 2 longs and CMOVE ints. 13282 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13283 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13284 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13285 ins_cost(200); 13286 format %{ "CMOV$cmp $dst,$src" %} 13287 opcode(0x0F,0x40); 13288 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13289 ins_pipe( pipe_cmov_reg ); 13290 %} 13291 13292 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13293 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13294 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13295 ins_cost(250); 13296 format %{ "CMOV$cmp $dst,$src" %} 13297 opcode(0x0F,0x40); 13298 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13299 ins_pipe( pipe_cmov_mem ); 13300 %} 13301 13302 // Compare 2 longs and CMOVE ints. 13303 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13304 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13305 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13306 ins_cost(200); 13307 format %{ "CMOV$cmp $dst,$src" %} 13308 opcode(0x0F,0x40); 13309 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13310 ins_pipe( pipe_cmov_reg ); 13311 %} 13312 13313 // Compare 2 longs and CMOVE doubles 13314 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13315 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13316 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13317 ins_cost(200); 13318 expand %{ 13319 fcmovDPR_regS(cmp,flags,dst,src); 13320 %} 13321 %} 13322 13323 // Compare 2 longs and CMOVE doubles 13324 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13325 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13326 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13327 ins_cost(200); 13328 expand %{ 13329 fcmovD_regS(cmp,flags,dst,src); 13330 %} 13331 %} 13332 13333 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13334 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13335 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13336 ins_cost(200); 13337 expand %{ 13338 fcmovFPR_regS(cmp,flags,dst,src); 13339 %} 13340 %} 13341 13342 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13343 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13344 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13345 ins_cost(200); 13346 expand %{ 13347 fcmovF_regS(cmp,flags,dst,src); 13348 %} 13349 %} 13350 13351 //====== 13352 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13353 // Same as cmpL_reg_flags_LEGT except must negate src 13354 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13355 match( Set flags (CmpL src zero )); 13356 effect( TEMP tmp ); 13357 ins_cost(300); 13358 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13359 "CMP $tmp,$src.lo\n\t" 13360 "SBB $tmp,$src.hi\n\t" %} 13361 ins_encode( long_cmp_flags3(src, tmp) ); 13362 ins_pipe( ialu_reg_reg_long ); 13363 %} 13364 13365 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13366 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13367 // requires a commuted test to get the same result. 13368 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13369 match( Set flags (CmpL src1 src2 )); 13370 effect( TEMP tmp ); 13371 ins_cost(300); 13372 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13373 "MOV $tmp,$src2.hi\n\t" 13374 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13375 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13376 ins_pipe( ialu_cr_reg_reg ); 13377 %} 13378 13379 // Long compares reg < zero/req OR reg >= zero/req. 13380 // Just a wrapper for a normal branch, plus the predicate test 13381 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13382 match(If cmp flags); 13383 effect(USE labl); 13384 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13385 ins_cost(300); 13386 expand %{ 13387 jmpCon(cmp,flags,labl); // JGT or JLE... 13388 %} 13389 %} 13390 13391 //====== 13392 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13393 // Same as cmpUL_reg_flags_LEGT except must negate src 13394 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13395 match(Set flags (CmpUL src zero)); 13396 effect(TEMP tmp); 13397 ins_cost(300); 13398 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13399 "CMP $tmp,$src.lo\n\t" 13400 "SBB $tmp,$src.hi\n\t" %} 13401 ins_encode(long_cmp_flags3(src, tmp)); 13402 ins_pipe(ialu_reg_reg_long); 13403 %} 13404 13405 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13406 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13407 // requires a commuted test to get the same result. 13408 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13409 match(Set flags (CmpUL src1 src2)); 13410 effect(TEMP tmp); 13411 ins_cost(300); 13412 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13413 "MOV $tmp,$src2.hi\n\t" 13414 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13415 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13416 ins_pipe(ialu_cr_reg_reg); 13417 %} 13418 13419 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13420 // Just a wrapper for a normal branch, plus the predicate test 13421 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13422 match(If cmp flags); 13423 effect(USE labl); 13424 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13425 ins_cost(300); 13426 expand %{ 13427 jmpCon(cmp, flags, labl); // JGT or JLE... 13428 %} 13429 %} 13430 13431 // Compare 2 longs and CMOVE longs. 13432 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13433 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13434 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13435 ins_cost(400); 13436 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13437 "CMOV$cmp $dst.hi,$src.hi" %} 13438 opcode(0x0F,0x40); 13439 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13440 ins_pipe( pipe_cmov_reg_long ); 13441 %} 13442 13443 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13444 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13445 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13446 ins_cost(500); 13447 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13448 "CMOV$cmp $dst.hi,$src.hi+4" %} 13449 opcode(0x0F,0x40); 13450 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13451 ins_pipe( pipe_cmov_reg_long ); 13452 %} 13453 13454 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13455 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13456 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13457 ins_cost(400); 13458 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13459 "CMOV$cmp $dst.hi,$src.hi" %} 13460 opcode(0x0F,0x40); 13461 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13462 ins_pipe( pipe_cmov_reg_long ); 13463 %} 13464 13465 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13466 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13467 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13468 ins_cost(500); 13469 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13470 "CMOV$cmp $dst.hi,$src.hi+4" %} 13471 opcode(0x0F,0x40); 13472 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13473 ins_pipe( pipe_cmov_reg_long ); 13474 %} 13475 13476 // Compare 2 longs and CMOVE ints. 13477 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13478 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13479 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13480 ins_cost(200); 13481 format %{ "CMOV$cmp $dst,$src" %} 13482 opcode(0x0F,0x40); 13483 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13484 ins_pipe( pipe_cmov_reg ); 13485 %} 13486 13487 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13488 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13489 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13490 ins_cost(250); 13491 format %{ "CMOV$cmp $dst,$src" %} 13492 opcode(0x0F,0x40); 13493 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13494 ins_pipe( pipe_cmov_mem ); 13495 %} 13496 13497 // Compare 2 longs and CMOVE ptrs. 13498 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13499 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13500 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13501 ins_cost(200); 13502 format %{ "CMOV$cmp $dst,$src" %} 13503 opcode(0x0F,0x40); 13504 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13505 ins_pipe( pipe_cmov_reg ); 13506 %} 13507 13508 // Compare 2 longs and CMOVE doubles 13509 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13510 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13511 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13512 ins_cost(200); 13513 expand %{ 13514 fcmovDPR_regS(cmp,flags,dst,src); 13515 %} 13516 %} 13517 13518 // Compare 2 longs and CMOVE doubles 13519 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13520 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13521 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13522 ins_cost(200); 13523 expand %{ 13524 fcmovD_regS(cmp,flags,dst,src); 13525 %} 13526 %} 13527 13528 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13529 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13530 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13531 ins_cost(200); 13532 expand %{ 13533 fcmovFPR_regS(cmp,flags,dst,src); 13534 %} 13535 %} 13536 13537 13538 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13539 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13540 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13541 ins_cost(200); 13542 expand %{ 13543 fcmovF_regS(cmp,flags,dst,src); 13544 %} 13545 %} 13546 13547 13548 // ============================================================================ 13549 // Procedure Call/Return Instructions 13550 // Call Java Static Instruction 13551 // Note: If this code changes, the corresponding ret_addr_offset() and 13552 // compute_padding() functions will have to be adjusted. 13553 instruct CallStaticJavaDirect(method meth) %{ 13554 match(CallStaticJava); 13555 effect(USE meth); 13556 13557 ins_cost(300); 13558 format %{ "CALL,static " %} 13559 opcode(0xE8); /* E8 cd */ 13560 ins_encode( pre_call_resets, 13561 Java_Static_Call( meth ), 13562 call_epilog, 13563 post_call_FPU ); 13564 ins_pipe( pipe_slow ); 13565 ins_alignment(4); 13566 %} 13567 13568 // Call Java Dynamic Instruction 13569 // Note: If this code changes, the corresponding ret_addr_offset() and 13570 // compute_padding() functions will have to be adjusted. 13571 instruct CallDynamicJavaDirect(method meth) %{ 13572 match(CallDynamicJava); 13573 effect(USE meth); 13574 13575 ins_cost(300); 13576 format %{ "MOV EAX,(oop)-1\n\t" 13577 "CALL,dynamic" %} 13578 opcode(0xE8); /* E8 cd */ 13579 ins_encode( pre_call_resets, 13580 Java_Dynamic_Call( meth ), 13581 call_epilog, 13582 post_call_FPU ); 13583 ins_pipe( pipe_slow ); 13584 ins_alignment(4); 13585 %} 13586 13587 // Call Runtime Instruction 13588 instruct CallRuntimeDirect(method meth) %{ 13589 match(CallRuntime ); 13590 effect(USE meth); 13591 13592 ins_cost(300); 13593 format %{ "CALL,runtime " %} 13594 opcode(0xE8); /* E8 cd */ 13595 // Use FFREEs to clear entries in float stack 13596 ins_encode( pre_call_resets, 13597 FFree_Float_Stack_All, 13598 Java_To_Runtime( meth ), 13599 post_call_FPU ); 13600 ins_pipe( pipe_slow ); 13601 %} 13602 13603 // Call runtime without safepoint 13604 instruct CallLeafDirect(method meth) %{ 13605 match(CallLeaf); 13606 effect(USE meth); 13607 13608 ins_cost(300); 13609 format %{ "CALL_LEAF,runtime " %} 13610 opcode(0xE8); /* E8 cd */ 13611 ins_encode( pre_call_resets, 13612 FFree_Float_Stack_All, 13613 Java_To_Runtime( meth ), 13614 Verify_FPU_For_Leaf, post_call_FPU ); 13615 ins_pipe( pipe_slow ); 13616 %} 13617 13618 instruct CallLeafNoFPDirect(method meth) %{ 13619 match(CallLeafNoFP); 13620 effect(USE meth); 13621 13622 ins_cost(300); 13623 format %{ "CALL_LEAF_NOFP,runtime " %} 13624 opcode(0xE8); /* E8 cd */ 13625 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13626 ins_pipe( pipe_slow ); 13627 %} 13628 13629 13630 // Return Instruction 13631 // Remove the return address & jump to it. 13632 instruct Ret() %{ 13633 match(Return); 13634 format %{ "RET" %} 13635 opcode(0xC3); 13636 ins_encode(OpcP); 13637 ins_pipe( pipe_jmp ); 13638 %} 13639 13640 // Tail Call; Jump from runtime stub to Java code. 13641 // Also known as an 'interprocedural jump'. 13642 // Target of jump will eventually return to caller. 13643 // TailJump below removes the return address. 13644 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13645 match(TailCall jump_target method_ptr); 13646 ins_cost(300); 13647 format %{ "JMP $jump_target \t# EBX holds method" %} 13648 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13649 ins_encode( OpcP, RegOpc(jump_target) ); 13650 ins_pipe( pipe_jmp ); 13651 %} 13652 13653 13654 // Tail Jump; remove the return address; jump to target. 13655 // TailCall above leaves the return address around. 13656 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13657 match( TailJump jump_target ex_oop ); 13658 ins_cost(300); 13659 format %{ "POP EDX\t# pop return address into dummy\n\t" 13660 "JMP $jump_target " %} 13661 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13662 ins_encode( enc_pop_rdx, 13663 OpcP, RegOpc(jump_target) ); 13664 ins_pipe( pipe_jmp ); 13665 %} 13666 13667 // Create exception oop: created by stack-crawling runtime code. 13668 // Created exception is now available to this handler, and is setup 13669 // just prior to jumping to this handler. No code emitted. 13670 instruct CreateException( eAXRegP ex_oop ) 13671 %{ 13672 match(Set ex_oop (CreateEx)); 13673 13674 size(0); 13675 // use the following format syntax 13676 format %{ "# exception oop is in EAX; no code emitted" %} 13677 ins_encode(); 13678 ins_pipe( empty ); 13679 %} 13680 13681 13682 // Rethrow exception: 13683 // The exception oop will come in the first argument position. 13684 // Then JUMP (not call) to the rethrow stub code. 13685 instruct RethrowException() 13686 %{ 13687 match(Rethrow); 13688 13689 // use the following format syntax 13690 format %{ "JMP rethrow_stub" %} 13691 ins_encode(enc_rethrow); 13692 ins_pipe( pipe_jmp ); 13693 %} 13694 13695 // inlined locking and unlocking 13696 13697 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13698 predicate(Compile::current()->use_rtm()); 13699 match(Set cr (FastLock object box)); 13700 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13701 ins_cost(300); 13702 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13703 ins_encode %{ 13704 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13705 $scr$$Register, $cx1$$Register, $cx2$$Register, 13706 _counters, _rtm_counters, _stack_rtm_counters, 13707 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13708 true, ra_->C->profile_rtm()); 13709 %} 13710 ins_pipe(pipe_slow); 13711 %} 13712 13713 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13714 predicate(!Compile::current()->use_rtm()); 13715 match(Set cr (FastLock object box)); 13716 effect(TEMP tmp, TEMP scr, USE_KILL box); 13717 ins_cost(300); 13718 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13719 ins_encode %{ 13720 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13721 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13722 %} 13723 ins_pipe(pipe_slow); 13724 %} 13725 13726 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13727 match(Set cr (FastUnlock object box)); 13728 effect(TEMP tmp, USE_KILL box); 13729 ins_cost(300); 13730 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13731 ins_encode %{ 13732 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13733 %} 13734 ins_pipe(pipe_slow); 13735 %} 13736 13737 13738 13739 // ============================================================================ 13740 // Safepoint Instruction 13741 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13742 match(SafePoint poll); 13743 effect(KILL cr, USE poll); 13744 13745 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13746 ins_cost(125); 13747 // EBP would need size(3) 13748 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13749 ins_encode %{ 13750 __ relocate(relocInfo::poll_type); 13751 address pre_pc = __ pc(); 13752 __ testl(rax, Address($poll$$Register, 0)); 13753 address post_pc = __ pc(); 13754 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13755 %} 13756 ins_pipe(ialu_reg_mem); 13757 %} 13758 13759 13760 // ============================================================================ 13761 // This name is KNOWN by the ADLC and cannot be changed. 13762 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13763 // for this guy. 13764 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13765 match(Set dst (ThreadLocal)); 13766 effect(DEF dst, KILL cr); 13767 13768 format %{ "MOV $dst, Thread::current()" %} 13769 ins_encode %{ 13770 Register dstReg = as_Register($dst$$reg); 13771 __ get_thread(dstReg); 13772 %} 13773 ins_pipe( ialu_reg_fat ); 13774 %} 13775 13776 13777 13778 //----------PEEPHOLE RULES----------------------------------------------------- 13779 // These must follow all instruction definitions as they use the names 13780 // defined in the instructions definitions. 13781 // 13782 // peepmatch ( root_instr_name [preceding_instruction]* ); 13783 // 13784 // peepconstraint %{ 13785 // (instruction_number.operand_name relational_op instruction_number.operand_name 13786 // [, ...] ); 13787 // // instruction numbers are zero-based using left to right order in peepmatch 13788 // 13789 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13790 // // provide an instruction_number.operand_name for each operand that appears 13791 // // in the replacement instruction's match rule 13792 // 13793 // ---------VM FLAGS--------------------------------------------------------- 13794 // 13795 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13796 // 13797 // Each peephole rule is given an identifying number starting with zero and 13798 // increasing by one in the order seen by the parser. An individual peephole 13799 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13800 // on the command-line. 13801 // 13802 // ---------CURRENT LIMITATIONS---------------------------------------------- 13803 // 13804 // Only match adjacent instructions in same basic block 13805 // Only equality constraints 13806 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13807 // Only one replacement instruction 13808 // 13809 // ---------EXAMPLE---------------------------------------------------------- 13810 // 13811 // // pertinent parts of existing instructions in architecture description 13812 // instruct movI(rRegI dst, rRegI src) %{ 13813 // match(Set dst (CopyI src)); 13814 // %} 13815 // 13816 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13817 // match(Set dst (AddI dst src)); 13818 // effect(KILL cr); 13819 // %} 13820 // 13821 // // Change (inc mov) to lea 13822 // peephole %{ 13823 // // increment preceeded by register-register move 13824 // peepmatch ( incI_eReg movI ); 13825 // // require that the destination register of the increment 13826 // // match the destination register of the move 13827 // peepconstraint ( 0.dst == 1.dst ); 13828 // // construct a replacement instruction that sets 13829 // // the destination to ( move's source register + one ) 13830 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13831 // %} 13832 // 13833 // Implementation no longer uses movX instructions since 13834 // machine-independent system no longer uses CopyX nodes. 13835 // 13836 // peephole %{ 13837 // peepmatch ( incI_eReg movI ); 13838 // peepconstraint ( 0.dst == 1.dst ); 13839 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13840 // %} 13841 // 13842 // peephole %{ 13843 // peepmatch ( decI_eReg movI ); 13844 // peepconstraint ( 0.dst == 1.dst ); 13845 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13846 // %} 13847 // 13848 // peephole %{ 13849 // peepmatch ( addI_eReg_imm movI ); 13850 // peepconstraint ( 0.dst == 1.dst ); 13851 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13852 // %} 13853 // 13854 // peephole %{ 13855 // peepmatch ( addP_eReg_imm movP ); 13856 // peepconstraint ( 0.dst == 1.dst ); 13857 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13858 // %} 13859 13860 // // Change load of spilled value to only a spill 13861 // instruct storeI(memory mem, rRegI src) %{ 13862 // match(Set mem (StoreI mem src)); 13863 // %} 13864 // 13865 // instruct loadI(rRegI dst, memory mem) %{ 13866 // match(Set dst (LoadI mem)); 13867 // %} 13868 // 13869 peephole %{ 13870 peepmatch ( loadI storeI ); 13871 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13872 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13873 %} 13874 13875 //----------SMARTSPILL RULES--------------------------------------------------- 13876 // These must follow all instruction definitions as they use the names 13877 // defined in the instructions definitions.