1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 217 // Not AX or DX, used in divides 218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 219 // Not AX or DX (and neither EBP), used in divides 220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 223 224 // Floating point registers. Notice FPR0 is not a choice. 225 // FPR0 is not ever allocated; we use clever encodings to fake 226 // a 2-address instructions out of Intels FP stack. 227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 228 229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 230 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 231 FPR7L,FPR7H ); 232 233 reg_class fp_flt_reg0( FPR1L ); 234 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 235 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 237 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 238 239 %} 240 241 242 //----------SOURCE BLOCK------------------------------------------------------- 243 // This is a block of C++ code which provides values, functions, and 244 // definitions necessary in the rest of the architecture description 245 source_hpp %{ 246 // Must be visible to the DFA in dfa_x86_32.cpp 247 extern bool is_operand_hi32_zero(Node* n); 248 %} 249 250 source %{ 251 #define RELOC_IMM32 Assembler::imm_operand 252 #define RELOC_DISP32 Assembler::disp32_operand 253 254 #define __ _masm. 255 256 // How to find the high register of a Long pair, given the low register 257 #define HIGH_FROM_LOW(x) ((x)+2) 258 259 // These masks are used to provide 128-bit aligned bitmasks to the XMM 260 // instructions, to allow sign-masking or sign-bit flipping. They allow 261 // fast versions of NegF/NegD and AbsF/AbsD. 262 263 void reg_mask_init() { 264 if (Matcher::has_predicated_vectors()) { 265 // Post-loop multi-versioning expects mask to be present in K1 register, till the time 266 // its fixed, RA should not be allocting K1 register, this shall prevent any accidental 267 // curruption of value held in K1 register. 268 if (PostLoopMultiversioning) { 269 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg())); 270 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next())); 271 } 272 } 273 } 274 275 // Note: 'double' and 'long long' have 32-bits alignment on x86. 276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 277 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 278 // of 128-bits operands for SSE instructions. 279 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 280 // Store the value to a 128-bits operand. 281 operand[0] = lo; 282 operand[1] = hi; 283 return operand; 284 } 285 286 // Buffer for 128-bits masks used by SSE instructions. 287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 288 289 // Static initialization during VM startup. 290 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 292 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 294 295 // Offset hacking within calls. 296 static int pre_call_resets_size() { 297 int size = 0; 298 Compile* C = Compile::current(); 299 if (C->in_24_bit_fp_mode()) { 300 size += 6; // fldcw 301 } 302 if (VM_Version::supports_vzeroupper()) { 303 size += 3; // vzeroupper 304 } 305 return size; 306 } 307 308 // !!!!! Special hack to get all type of calls to specify the byte offset 309 // from the start of the call to the point where the return address 310 // will point. 311 int MachCallStaticJavaNode::ret_addr_offset() { 312 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 313 } 314 315 int MachCallDynamicJavaNode::ret_addr_offset() { 316 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 317 } 318 319 static int sizeof_FFree_Float_Stack_All = -1; 320 321 int MachCallRuntimeNode::ret_addr_offset() { 322 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 323 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 324 } 325 326 int MachCallNativeNode::ret_addr_offset() { 327 ShouldNotCallThis(); 328 return -1; 329 } 330 331 // 332 // Compute padding required for nodes which need alignment 333 // 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 1; // skip call opcode byte 340 return align_up(current_offset, alignment_required()) - current_offset; 341 } 342 343 // The address of the call instruction needs to be 4-byte aligned to 344 // ensure that it does not span a cache line so that it can be patched. 345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 346 current_offset += pre_call_resets_size(); // skip fldcw, if any 347 current_offset += 5; // skip MOV instruction 348 current_offset += 1; // skip call opcode byte 349 return align_up(current_offset, alignment_required()) - current_offset; 350 } 351 352 // EMIT_RM() 353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 354 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_CC() 359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 360 unsigned char c = (unsigned char)( f1 | f2 ); 361 cbuf.insts()->emit_int8(c); 362 } 363 364 // EMIT_OPCODE() 365 void emit_opcode(CodeBuffer &cbuf, int code) { 366 cbuf.insts()->emit_int8((unsigned char) code); 367 } 368 369 // EMIT_OPCODE() w/ relocation information 370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 371 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 372 emit_opcode(cbuf, code); 373 } 374 375 // EMIT_D8() 376 void emit_d8(CodeBuffer &cbuf, int d8) { 377 cbuf.insts()->emit_int8((unsigned char) d8); 378 } 379 380 // EMIT_D16() 381 void emit_d16(CodeBuffer &cbuf, int d16) { 382 cbuf.insts()->emit_int16(d16); 383 } 384 385 // EMIT_D32() 386 void emit_d32(CodeBuffer &cbuf, int d32) { 387 cbuf.insts()->emit_int32(d32); 388 } 389 390 // emit 32 bit value and construct relocation entry from relocInfo::relocType 391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 392 int format) { 393 cbuf.relocate(cbuf.insts_mark(), reloc, format); 394 cbuf.insts()->emit_int32(d32); 395 } 396 397 // emit 32 bit value and construct relocation entry from RelocationHolder 398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 399 int format) { 400 #ifdef ASSERT 401 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 402 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 403 } 404 #endif 405 cbuf.relocate(cbuf.insts_mark(), rspec, format); 406 cbuf.insts()->emit_int32(d32); 407 } 408 409 // Access stack slot for load or store 410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 411 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 412 if( -128 <= disp && disp <= 127 ) { 413 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 414 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 415 emit_d8 (cbuf, disp); // Displacement // R/M byte 416 } else { 417 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 418 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 419 emit_d32(cbuf, disp); // Displacement // R/M byte 420 } 421 } 422 423 // rRegI ereg, memory mem) %{ // emit_reg_mem 424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 425 // There is no index & no scale, use form without SIB byte 426 if ((index == 0x4) && 427 (scale == 0) && (base != ESP_enc)) { 428 // If no displacement, mode is 0x0; unless base is [EBP] 429 if ( (displace == 0) && (base != EBP_enc) ) { 430 emit_rm(cbuf, 0x0, reg_encoding, base); 431 } 432 else { // If 8-bit displacement, mode 0x1 433 if ((displace >= -128) && (displace <= 127) 434 && (disp_reloc == relocInfo::none) ) { 435 emit_rm(cbuf, 0x1, reg_encoding, base); 436 emit_d8(cbuf, displace); 437 } 438 else { // If 32-bit displacement 439 if (base == -1) { // Special flag for absolute address 440 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 441 // (manual lies; no SIB needed here) 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 else { // Normal base + offset 449 emit_rm(cbuf, 0x2, reg_encoding, base); 450 if ( disp_reloc != relocInfo::none ) { 451 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 452 } else { 453 emit_d32 (cbuf, displace); 454 } 455 } 456 } 457 } 458 } 459 else { // Else, encode with the SIB byte 460 // If no displacement, mode is 0x0; unless base is [EBP] 461 if (displace == 0 && (base != EBP_enc)) { // If no displacement 462 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 } 465 else { // If 8-bit displacement, mode 0x1 466 if ((displace >= -128) && (displace <= 127) 467 && (disp_reloc == relocInfo::none) ) { 468 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, base); 470 emit_d8(cbuf, displace); 471 } 472 else { // If 32-bit displacement 473 if (base == 0x04 ) { 474 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 475 emit_rm(cbuf, scale, index, 0x04); 476 } else { 477 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 478 emit_rm(cbuf, scale, index, base); 479 } 480 if ( disp_reloc != relocInfo::none ) { 481 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 482 } else { 483 emit_d32 (cbuf, displace); 484 } 485 } 486 } 487 } 488 } 489 490 491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 492 if( dst_encoding == src_encoding ) { 493 // reg-reg copy, use an empty encoding 494 } else { 495 emit_opcode( cbuf, 0x8B ); 496 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 497 } 498 } 499 500 void emit_cmpfp_fixup(MacroAssembler& _masm) { 501 Label exit; 502 __ jccb(Assembler::noParity, exit); 503 __ pushf(); 504 // 505 // comiss/ucomiss instructions set ZF,PF,CF flags and 506 // zero OF,AF,SF for NaN values. 507 // Fixup flags by zeroing ZF,PF so that compare of NaN 508 // values returns 'less than' result (CF is set). 509 // Leave the rest of flags unchanged. 510 // 511 // 7 6 5 4 3 2 1 0 512 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 513 // 0 0 1 0 1 0 1 1 (0x2B) 514 // 515 __ andl(Address(rsp, 0), 0xffffff2b); 516 __ popf(); 517 __ bind(exit); 518 } 519 520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 521 Label done; 522 __ movl(dst, -1); 523 __ jcc(Assembler::parity, done); 524 __ jcc(Assembler::below, done); 525 __ setb(Assembler::notEqual, dst); 526 __ movzbl(dst, dst); 527 __ bind(done); 528 } 529 530 531 //============================================================================= 532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 533 534 int ConstantTable::calculate_table_base_offset() const { 535 return 0; // absolute addressing, no offset 536 } 537 538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 540 ShouldNotReachHere(); 541 } 542 543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 544 // Empty encoding 545 } 546 547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 548 return 0; 549 } 550 551 #ifndef PRODUCT 552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 st->print("# MachConstantBaseNode (empty encoding)"); 554 } 555 #endif 556 557 558 //============================================================================= 559 #ifndef PRODUCT 560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 561 Compile* C = ra_->C; 562 563 int framesize = C->output()->frame_size_in_bytes(); 564 int bangsize = C->output()->bang_size_in_bytes(); 565 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 566 // Remove wordSize for return addr which is already pushed. 567 framesize -= wordSize; 568 569 if (C->output()->need_stack_bang(bangsize)) { 570 framesize -= wordSize; 571 st->print("# stack bang (%d bytes)", bangsize); 572 st->print("\n\t"); 573 st->print("PUSH EBP\t# Save EBP"); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 } 578 if (framesize) { 579 st->print("\n\t"); 580 st->print("SUB ESP, #%d\t# Create frame",framesize); 581 } 582 } else { 583 st->print("SUB ESP, #%d\t# Create frame",framesize); 584 st->print("\n\t"); 585 framesize -= wordSize; 586 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 587 if (PreserveFramePointer) { 588 st->print("\n\t"); 589 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 590 if (framesize > 0) { 591 st->print("\n\t"); 592 st->print("ADD EBP, #%d", framesize); 593 } 594 } 595 } 596 597 if (VerifyStackAtCalls) { 598 st->print("\n\t"); 599 framesize -= wordSize; 600 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 601 } 602 603 if( C->in_24_bit_fp_mode() ) { 604 st->print("\n\t"); 605 st->print("FLDCW \t# load 24 bit fpu control word"); 606 } 607 if (UseSSE >= 2 && VerifyFPU) { 608 st->print("\n\t"); 609 st->print("# verify FPU stack (must be clean on entry)"); 610 } 611 612 #ifdef ASSERT 613 if (VerifyStackAtCalls) { 614 st->print("\n\t"); 615 st->print("# stack alignment check"); 616 } 617 #endif 618 st->cr(); 619 } 620 #endif 621 622 623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 624 Compile* C = ra_->C; 625 MacroAssembler _masm(&cbuf); 626 627 int framesize = C->output()->frame_size_in_bytes(); 628 int bangsize = C->output()->bang_size_in_bytes(); 629 630 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 631 632 C->output()->set_frame_complete(cbuf.insts_size()); 633 634 if (C->has_mach_constant_base_node()) { 635 // NOTE: We set the table base offset here because users might be 636 // emitted before MachConstantBaseNode. 637 ConstantTable& constant_table = C->output()->constant_table(); 638 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 639 } 640 } 641 642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 643 return MachNode::size(ra_); // too many variables; just compute it the hard way 644 } 645 646 int MachPrologNode::reloc() const { 647 return 0; // a large enough number 648 } 649 650 //============================================================================= 651 #ifndef PRODUCT 652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 653 Compile *C = ra_->C; 654 int framesize = C->output()->frame_size_in_bytes(); 655 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 656 // Remove two words for return addr and rbp, 657 framesize -= 2*wordSize; 658 659 if (C->max_vector_size() > 16) { 660 st->print("VZEROUPPER"); 661 st->cr(); st->print("\t"); 662 } 663 if (C->in_24_bit_fp_mode()) { 664 st->print("FLDCW standard control word"); 665 st->cr(); st->print("\t"); 666 } 667 if (framesize) { 668 st->print("ADD ESP,%d\t# Destroy frame",framesize); 669 st->cr(); st->print("\t"); 670 } 671 st->print_cr("POPL EBP"); st->print("\t"); 672 if (do_polling() && C->is_method_compilation()) { 673 st->print("CMPL rsp, poll_offset[thread] \n\t" 674 "JA #safepoint_stub\t" 675 "# Safepoint: poll for GC"); 676 } 677 } 678 #endif 679 680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 681 Compile *C = ra_->C; 682 MacroAssembler _masm(&cbuf); 683 684 if (C->max_vector_size() > 16) { 685 // Clear upper bits of YMM registers when current compiled code uses 686 // wide vectors to avoid AVX <-> SSE transition penalty during call. 687 _masm.vzeroupper(); 688 } 689 // If method set FPU control word, restore to standard control word 690 if (C->in_24_bit_fp_mode()) { 691 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 692 } 693 694 int framesize = C->output()->frame_size_in_bytes(); 695 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 696 // Remove two words for return addr and rbp, 697 framesize -= 2*wordSize; 698 699 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 700 701 if (framesize >= 128) { 702 emit_opcode(cbuf, 0x81); // add SP, #framesize 703 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 704 emit_d32(cbuf, framesize); 705 } else if (framesize) { 706 emit_opcode(cbuf, 0x83); // add SP, #framesize 707 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 708 emit_d8(cbuf, framesize); 709 } 710 711 emit_opcode(cbuf, 0x58 | EBP_enc); 712 713 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 714 __ reserved_stack_check(); 715 } 716 717 if (do_polling() && C->is_method_compilation()) { 718 Register thread = as_Register(EBX_enc); 719 MacroAssembler masm(&cbuf); 720 __ get_thread(thread); 721 Label dummy_label; 722 Label* code_stub = &dummy_label; 723 if (!C->output()->in_scratch_emit_size()) { 724 code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); 725 } 726 __ relocate(relocInfo::poll_return_type); 727 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 728 } 729 } 730 731 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 732 return MachNode::size(ra_); // too many variables; just compute it 733 // the hard way 734 } 735 736 int MachEpilogNode::reloc() const { 737 return 0; // a large enough number 738 } 739 740 const Pipeline * MachEpilogNode::pipeline() const { 741 return MachNode::pipeline_class(); 742 } 743 744 //============================================================================= 745 746 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 747 static enum RC rc_class( OptoReg::Name reg ) { 748 749 if( !OptoReg::is_valid(reg) ) return rc_bad; 750 if (OptoReg::is_stack(reg)) return rc_stack; 751 752 VMReg r = OptoReg::as_VMReg(reg); 753 if (r->is_Register()) return rc_int; 754 if (r->is_FloatRegister()) { 755 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 756 return rc_float; 757 } 758 if (r->is_KRegister()) return rc_kreg; 759 assert(r->is_XMMRegister(), "must be"); 760 return rc_xmm; 761 } 762 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 764 int opcode, const char *op_str, int size, outputStream* st ) { 765 if( cbuf ) { 766 emit_opcode (*cbuf, opcode ); 767 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 768 #ifndef PRODUCT 769 } else if( !do_size ) { 770 if( size != 0 ) st->print("\n\t"); 771 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 772 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 773 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 774 } else { // FLD, FST, PUSH, POP 775 st->print("%s [ESP + #%d]",op_str,offset); 776 } 777 #endif 778 } 779 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 780 return size+3+offset_size; 781 } 782 783 // Helper for XMM registers. Extra opcode bits, limited syntax. 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 785 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 786 int in_size_in_bits = Assembler::EVEX_32bit; 787 int evex_encoding = 0; 788 if (reg_lo+1 == reg_hi) { 789 in_size_in_bits = Assembler::EVEX_64bit; 790 evex_encoding = Assembler::VEX_W; 791 } 792 if (cbuf) { 793 MacroAssembler _masm(cbuf); 794 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 795 // it maps more cases to single byte displacement 796 _masm.set_managed(); 797 if (reg_lo+1 == reg_hi) { // double move? 798 if (is_load) { 799 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 800 } else { 801 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 802 } 803 } else { 804 if (is_load) { 805 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 806 } else { 807 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 808 } 809 } 810 #ifndef PRODUCT 811 } else if (!do_size) { 812 if (size != 0) st->print("\n\t"); 813 if (reg_lo+1 == reg_hi) { // double move? 814 if (is_load) st->print("%s %s,[ESP + #%d]", 815 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSD [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } else { 820 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 821 Matcher::regName[reg_lo], offset); 822 else st->print("MOVSS [ESP + #%d],%s", 823 offset, Matcher::regName[reg_lo]); 824 } 825 #endif 826 } 827 bool is_single_byte = false; 828 if ((UseAVX > 2) && (offset != 0)) { 829 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 830 } 831 int offset_size = 0; 832 if (UseAVX > 2 ) { 833 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 834 } else { 835 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 836 } 837 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 838 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 839 return size+5+offset_size; 840 } 841 842 843 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 844 int src_hi, int dst_hi, int size, outputStream* st ) { 845 if (cbuf) { 846 MacroAssembler _masm(cbuf); 847 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 848 _masm.set_managed(); 849 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 850 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 851 as_XMMRegister(Matcher::_regEncode[src_lo])); 852 } else { 853 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 854 as_XMMRegister(Matcher::_regEncode[src_lo])); 855 } 856 #ifndef PRODUCT 857 } else if (!do_size) { 858 if (size != 0) st->print("\n\t"); 859 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 860 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 861 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 862 } else { 863 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 864 } 865 } else { 866 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 867 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 868 } else { 869 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 870 } 871 } 872 #endif 873 } 874 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 875 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 876 int sz = (UseAVX > 2) ? 6 : 4; 877 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 878 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 879 return size + sz; 880 } 881 882 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 883 int src_hi, int dst_hi, int size, outputStream* st ) { 884 // 32-bit 885 if (cbuf) { 886 MacroAssembler _masm(cbuf); 887 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 888 _masm.set_managed(); 889 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 890 as_Register(Matcher::_regEncode[src_lo])); 891 #ifndef PRODUCT 892 } else if (!do_size) { 893 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 894 #endif 895 } 896 return (UseAVX> 2) ? 6 : 4; 897 } 898 899 900 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 901 int src_hi, int dst_hi, int size, outputStream* st ) { 902 // 32-bit 903 if (cbuf) { 904 MacroAssembler _masm(cbuf); 905 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 906 _masm.set_managed(); 907 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 908 as_XMMRegister(Matcher::_regEncode[src_lo])); 909 #ifndef PRODUCT 910 } else if (!do_size) { 911 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 912 #endif 913 } 914 return (UseAVX> 2) ? 6 : 4; 915 } 916 917 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 918 if( cbuf ) { 919 emit_opcode(*cbuf, 0x8B ); 920 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 921 #ifndef PRODUCT 922 } else if( !do_size ) { 923 if( size != 0 ) st->print("\n\t"); 924 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 925 #endif 926 } 927 return size+2; 928 } 929 930 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 931 int offset, int size, outputStream* st ) { 932 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 933 if( cbuf ) { 934 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 935 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 936 #ifndef PRODUCT 937 } else if( !do_size ) { 938 if( size != 0 ) st->print("\n\t"); 939 st->print("FLD %s",Matcher::regName[src_lo]); 940 #endif 941 } 942 size += 2; 943 } 944 945 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 946 const char *op_str; 947 int op; 948 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 949 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 950 op = 0xDD; 951 } else { // 32-bit store 952 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 953 op = 0xD9; 954 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 955 } 956 957 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 958 } 959 960 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 961 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 962 int src_hi, int dst_hi, uint ireg, outputStream* st); 963 964 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 965 int stack_offset, int reg, uint ireg, outputStream* st); 966 967 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 968 int dst_offset, uint ireg, outputStream* st) { 969 if (cbuf) { 970 MacroAssembler _masm(cbuf); 971 switch (ireg) { 972 case Op_VecS: 973 __ pushl(Address(rsp, src_offset)); 974 __ popl (Address(rsp, dst_offset)); 975 break; 976 case Op_VecD: 977 __ pushl(Address(rsp, src_offset)); 978 __ popl (Address(rsp, dst_offset)); 979 __ pushl(Address(rsp, src_offset+4)); 980 __ popl (Address(rsp, dst_offset+4)); 981 break; 982 case Op_VecX: 983 __ movdqu(Address(rsp, -16), xmm0); 984 __ movdqu(xmm0, Address(rsp, src_offset)); 985 __ movdqu(Address(rsp, dst_offset), xmm0); 986 __ movdqu(xmm0, Address(rsp, -16)); 987 break; 988 case Op_VecY: 989 __ vmovdqu(Address(rsp, -32), xmm0); 990 __ vmovdqu(xmm0, Address(rsp, src_offset)); 991 __ vmovdqu(Address(rsp, dst_offset), xmm0); 992 __ vmovdqu(xmm0, Address(rsp, -32)); 993 break; 994 case Op_VecZ: 995 __ evmovdquq(Address(rsp, -64), xmm0, 2); 996 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 997 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 998 __ evmovdquq(xmm0, Address(rsp, -64), 2); 999 break; 1000 default: 1001 ShouldNotReachHere(); 1002 } 1003 #ifndef PRODUCT 1004 } else { 1005 switch (ireg) { 1006 case Op_VecS: 1007 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1008 "popl [rsp + #%d]", 1009 src_offset, dst_offset); 1010 break; 1011 case Op_VecD: 1012 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1013 "popq [rsp + #%d]\n\t" 1014 "pushl [rsp + #%d]\n\t" 1015 "popq [rsp + #%d]", 1016 src_offset, dst_offset, src_offset+4, dst_offset+4); 1017 break; 1018 case Op_VecX: 1019 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1020 "movdqu xmm0, [rsp + #%d]\n\t" 1021 "movdqu [rsp + #%d], xmm0\n\t" 1022 "movdqu xmm0, [rsp - #16]", 1023 src_offset, dst_offset); 1024 break; 1025 case Op_VecY: 1026 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1027 "vmovdqu xmm0, [rsp + #%d]\n\t" 1028 "vmovdqu [rsp + #%d], xmm0\n\t" 1029 "vmovdqu xmm0, [rsp - #32]", 1030 src_offset, dst_offset); 1031 break; 1032 case Op_VecZ: 1033 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1034 "vmovdqu xmm0, [rsp + #%d]\n\t" 1035 "vmovdqu [rsp + #%d], xmm0\n\t" 1036 "vmovdqu xmm0, [rsp - #64]", 1037 src_offset, dst_offset); 1038 break; 1039 default: 1040 ShouldNotReachHere(); 1041 } 1042 #endif 1043 } 1044 } 1045 1046 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1047 // Get registers to move 1048 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1049 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1050 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1051 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1052 1053 enum RC src_second_rc = rc_class(src_second); 1054 enum RC src_first_rc = rc_class(src_first); 1055 enum RC dst_second_rc = rc_class(dst_second); 1056 enum RC dst_first_rc = rc_class(dst_first); 1057 1058 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1059 1060 // Generate spill code! 1061 int size = 0; 1062 1063 if( src_first == dst_first && src_second == dst_second ) 1064 return size; // Self copy, no move 1065 1066 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1067 uint ireg = ideal_reg(); 1068 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1069 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1070 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1071 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1072 // mem -> mem 1073 int src_offset = ra_->reg2offset(src_first); 1074 int dst_offset = ra_->reg2offset(dst_first); 1075 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1076 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1077 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1078 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1079 int stack_offset = ra_->reg2offset(dst_first); 1080 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1081 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1082 int stack_offset = ra_->reg2offset(src_first); 1083 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1084 } else { 1085 ShouldNotReachHere(); 1086 } 1087 return 0; 1088 } 1089 1090 // -------------------------------------- 1091 // Check for mem-mem move. push/pop to move. 1092 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1093 if( src_second == dst_first ) { // overlapping stack copy ranges 1094 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1095 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1096 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1097 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1098 } 1099 // move low bits 1100 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1101 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1102 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1103 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1104 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1105 } 1106 return size; 1107 } 1108 1109 // -------------------------------------- 1110 // Check for integer reg-reg copy 1111 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1112 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1113 1114 // Check for integer store 1115 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1116 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1117 1118 // Check for integer load 1119 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1120 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1121 1122 // Check for integer reg-xmm reg copy 1123 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1124 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1125 "no 64 bit integer-float reg moves" ); 1126 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1127 } 1128 // -------------------------------------- 1129 // Check for float reg-reg copy 1130 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1131 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1132 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1133 if( cbuf ) { 1134 1135 // Note the mucking with the register encode to compensate for the 0/1 1136 // indexing issue mentioned in a comment in the reg_def sections 1137 // for FPR registers many lines above here. 1138 1139 if( src_first != FPR1L_num ) { 1140 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1141 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1142 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1143 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1144 } else { 1145 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1146 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1147 } 1148 #ifndef PRODUCT 1149 } else if( !do_size ) { 1150 if( size != 0 ) st->print("\n\t"); 1151 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1152 else st->print( "FST %s", Matcher::regName[dst_first]); 1153 #endif 1154 } 1155 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1156 } 1157 1158 // Check for float store 1159 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1160 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1161 } 1162 1163 // Check for float load 1164 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1165 int offset = ra_->reg2offset(src_first); 1166 const char *op_str; 1167 int op; 1168 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1169 op_str = "FLD_D"; 1170 op = 0xDD; 1171 } else { // 32-bit load 1172 op_str = "FLD_S"; 1173 op = 0xD9; 1174 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1175 } 1176 if( cbuf ) { 1177 emit_opcode (*cbuf, op ); 1178 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1179 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1180 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1181 #ifndef PRODUCT 1182 } else if( !do_size ) { 1183 if( size != 0 ) st->print("\n\t"); 1184 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1185 #endif 1186 } 1187 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1188 return size + 3+offset_size+2; 1189 } 1190 1191 // Check for xmm reg-reg copy 1192 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1193 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1194 (src_first+1 == src_second && dst_first+1 == dst_second), 1195 "no non-adjacent float-moves" ); 1196 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1197 } 1198 1199 // Check for xmm reg-integer reg copy 1200 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1201 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1202 "no 64 bit float-integer reg moves" ); 1203 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1204 } 1205 1206 // Check for xmm store 1207 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1208 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1209 } 1210 1211 // Check for float xmm load 1212 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1213 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1214 } 1215 1216 // Copy from float reg to xmm reg 1217 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1218 // copy to the top of stack from floating point reg 1219 // and use LEA to preserve flags 1220 if( cbuf ) { 1221 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1222 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1223 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1224 emit_d8(*cbuf,0xF8); 1225 #ifndef PRODUCT 1226 } else if( !do_size ) { 1227 if( size != 0 ) st->print("\n\t"); 1228 st->print("LEA ESP,[ESP-8]"); 1229 #endif 1230 } 1231 size += 4; 1232 1233 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1234 1235 // Copy from the temp memory to the xmm reg. 1236 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1237 1238 if( cbuf ) { 1239 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1240 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1241 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1242 emit_d8(*cbuf,0x08); 1243 #ifndef PRODUCT 1244 } else if( !do_size ) { 1245 if( size != 0 ) st->print("\n\t"); 1246 st->print("LEA ESP,[ESP+8]"); 1247 #endif 1248 } 1249 size += 4; 1250 return size; 1251 } 1252 1253 // AVX-512 opmask specific spilling. 1254 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1255 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1256 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1257 MacroAssembler _masm(cbuf); 1258 int offset = ra_->reg2offset(src_first); 1259 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1260 return 0; 1261 } 1262 1263 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1264 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1265 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1266 MacroAssembler _masm(cbuf); 1267 int offset = ra_->reg2offset(dst_first); 1268 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1269 return 0; 1270 } 1271 1272 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1273 Unimplemented(); 1274 return 0; 1275 } 1276 1277 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1278 Unimplemented(); 1279 return 0; 1280 } 1281 1282 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1283 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1284 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1285 MacroAssembler _masm(cbuf); 1286 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1287 return 0; 1288 } 1289 1290 assert( size > 0, "missed a case" ); 1291 1292 // -------------------------------------------------------------------- 1293 // Check for second bits still needing moving. 1294 if( src_second == dst_second ) 1295 return size; // Self copy; no move 1296 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1297 1298 // Check for second word int-int move 1299 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1300 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1301 1302 // Check for second word integer store 1303 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1304 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1305 1306 // Check for second word integer load 1307 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1308 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1309 1310 Unimplemented(); 1311 return 0; // Mute compiler 1312 } 1313 1314 #ifndef PRODUCT 1315 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1316 implementation( NULL, ra_, false, st ); 1317 } 1318 #endif 1319 1320 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1321 implementation( &cbuf, ra_, false, NULL ); 1322 } 1323 1324 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1325 return MachNode::size(ra_); 1326 } 1327 1328 1329 //============================================================================= 1330 #ifndef PRODUCT 1331 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1332 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1333 int reg = ra_->get_reg_first(this); 1334 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1335 } 1336 #endif 1337 1338 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1339 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1340 int reg = ra_->get_encode(this); 1341 if( offset >= 128 ) { 1342 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1343 emit_rm(cbuf, 0x2, reg, 0x04); 1344 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1345 emit_d32(cbuf, offset); 1346 } 1347 else { 1348 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1349 emit_rm(cbuf, 0x1, reg, 0x04); 1350 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1351 emit_d8(cbuf, offset); 1352 } 1353 } 1354 1355 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1356 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1357 if( offset >= 128 ) { 1358 return 7; 1359 } 1360 else { 1361 return 4; 1362 } 1363 } 1364 1365 //============================================================================= 1366 #ifndef PRODUCT 1367 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1368 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1369 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1370 st->print_cr("\tNOP"); 1371 st->print_cr("\tNOP"); 1372 if( !OptoBreakpoint ) 1373 st->print_cr("\tNOP"); 1374 } 1375 #endif 1376 1377 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1378 MacroAssembler masm(&cbuf); 1379 #ifdef ASSERT 1380 uint insts_size = cbuf.insts_size(); 1381 #endif 1382 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1383 masm.jump_cc(Assembler::notEqual, 1384 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1385 /* WARNING these NOPs are critical so that verified entry point is properly 1386 aligned for patching by NativeJump::patch_verified_entry() */ 1387 int nops_cnt = 2; 1388 if( !OptoBreakpoint ) // Leave space for int3 1389 nops_cnt += 1; 1390 masm.nop(nops_cnt); 1391 1392 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1393 } 1394 1395 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1396 return OptoBreakpoint ? 11 : 12; 1397 } 1398 1399 1400 //============================================================================= 1401 1402 // Vector calling convention not supported. 1403 const bool Matcher::supports_vector_calling_convention() { 1404 return false; 1405 } 1406 1407 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1408 Unimplemented(); 1409 return OptoRegPair(0, 0); 1410 } 1411 1412 // Is this branch offset short enough that a short branch can be used? 1413 // 1414 // NOTE: If the platform does not provide any short branch variants, then 1415 // this method should return false for offset 0. 1416 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1417 // The passed offset is relative to address of the branch. 1418 // On 86 a branch displacement is calculated relative to address 1419 // of a next instruction. 1420 offset -= br_size; 1421 1422 // the short version of jmpConUCF2 contains multiple branches, 1423 // making the reach slightly less 1424 if (rule == jmpConUCF2_rule) 1425 return (-126 <= offset && offset <= 125); 1426 return (-128 <= offset && offset <= 127); 1427 } 1428 1429 // Return whether or not this register is ever used as an argument. This 1430 // function is used on startup to build the trampoline stubs in generateOptoStub. 1431 // Registers not mentioned will be killed by the VM call in the trampoline, and 1432 // arguments in those registers not be available to the callee. 1433 bool Matcher::can_be_java_arg( int reg ) { 1434 if( reg == ECX_num || reg == EDX_num ) return true; 1435 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1436 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1437 return false; 1438 } 1439 1440 bool Matcher::is_spillable_arg( int reg ) { 1441 return can_be_java_arg(reg); 1442 } 1443 1444 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1445 // Use hardware integer DIV instruction when 1446 // it is faster than a code which use multiply. 1447 // Only when constant divisor fits into 32 bit 1448 // (min_jint is excluded to get only correct 1449 // positive 32 bit values from negative). 1450 return VM_Version::has_fast_idiv() && 1451 (divisor == (int)divisor && divisor != min_jint); 1452 } 1453 1454 // Register for DIVI projection of divmodI 1455 RegMask Matcher::divI_proj_mask() { 1456 return EAX_REG_mask(); 1457 } 1458 1459 // Register for MODI projection of divmodI 1460 RegMask Matcher::modI_proj_mask() { 1461 return EDX_REG_mask(); 1462 } 1463 1464 // Register for DIVL projection of divmodL 1465 RegMask Matcher::divL_proj_mask() { 1466 ShouldNotReachHere(); 1467 return RegMask(); 1468 } 1469 1470 // Register for MODL projection of divmodL 1471 RegMask Matcher::modL_proj_mask() { 1472 ShouldNotReachHere(); 1473 return RegMask(); 1474 } 1475 1476 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1477 return NO_REG_mask(); 1478 } 1479 1480 // Returns true if the high 32 bits of the value is known to be zero. 1481 bool is_operand_hi32_zero(Node* n) { 1482 int opc = n->Opcode(); 1483 if (opc == Op_AndL) { 1484 Node* o2 = n->in(2); 1485 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1486 return true; 1487 } 1488 } 1489 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1490 return true; 1491 } 1492 return false; 1493 } 1494 1495 %} 1496 1497 //----------ENCODING BLOCK----------------------------------------------------- 1498 // This block specifies the encoding classes used by the compiler to output 1499 // byte streams. Encoding classes generate functions which are called by 1500 // Machine Instruction Nodes in order to generate the bit encoding of the 1501 // instruction. Operands specify their base encoding interface with the 1502 // interface keyword. There are currently supported four interfaces, 1503 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1504 // operand to generate a function which returns its register number when 1505 // queried. CONST_INTER causes an operand to generate a function which 1506 // returns the value of the constant when queried. MEMORY_INTER causes an 1507 // operand to generate four functions which return the Base Register, the 1508 // Index Register, the Scale Value, and the Offset Value of the operand when 1509 // queried. COND_INTER causes an operand to generate six functions which 1510 // return the encoding code (ie - encoding bits for the instruction) 1511 // associated with each basic boolean condition for a conditional instruction. 1512 // Instructions specify two basic values for encoding. They use the 1513 // ins_encode keyword to specify their encoding class (which must be one of 1514 // the class names specified in the encoding block), and they use the 1515 // opcode keyword to specify, in order, their primary, secondary, and 1516 // tertiary opcode. Only the opcode sections which a particular instruction 1517 // needs for encoding need to be specified. 1518 encode %{ 1519 // Build emit functions for each basic byte or larger field in the intel 1520 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1521 // code in the enc_class source block. Emit functions will live in the 1522 // main source block for now. In future, we can generalize this by 1523 // adding a syntax that specifies the sizes of fields in an order, 1524 // so that the adlc can build the emit functions automagically 1525 1526 // Emit primary opcode 1527 enc_class OpcP %{ 1528 emit_opcode(cbuf, $primary); 1529 %} 1530 1531 // Emit secondary opcode 1532 enc_class OpcS %{ 1533 emit_opcode(cbuf, $secondary); 1534 %} 1535 1536 // Emit opcode directly 1537 enc_class Opcode(immI d8) %{ 1538 emit_opcode(cbuf, $d8$$constant); 1539 %} 1540 1541 enc_class SizePrefix %{ 1542 emit_opcode(cbuf,0x66); 1543 %} 1544 1545 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1546 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1547 %} 1548 1549 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1550 emit_opcode(cbuf,$opcode$$constant); 1551 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1552 %} 1553 1554 enc_class mov_r32_imm0( rRegI dst ) %{ 1555 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1556 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1557 %} 1558 1559 enc_class cdq_enc %{ 1560 // Full implementation of Java idiv and irem; checks for 1561 // special case as described in JVM spec., p.243 & p.271. 1562 // 1563 // normal case special case 1564 // 1565 // input : rax,: dividend min_int 1566 // reg: divisor -1 1567 // 1568 // output: rax,: quotient (= rax, idiv reg) min_int 1569 // rdx: remainder (= rax, irem reg) 0 1570 // 1571 // Code sequnce: 1572 // 1573 // 81 F8 00 00 00 80 cmp rax,80000000h 1574 // 0F 85 0B 00 00 00 jne normal_case 1575 // 33 D2 xor rdx,edx 1576 // 83 F9 FF cmp rcx,0FFh 1577 // 0F 84 03 00 00 00 je done 1578 // normal_case: 1579 // 99 cdq 1580 // F7 F9 idiv rax,ecx 1581 // done: 1582 // 1583 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1584 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1585 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1586 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1587 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1588 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1589 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1590 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1591 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1592 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1593 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1594 // normal_case: 1595 emit_opcode(cbuf,0x99); // cdq 1596 // idiv (note: must be emitted by the user of this rule) 1597 // normal: 1598 %} 1599 1600 // Dense encoding for older common ops 1601 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1602 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1603 %} 1604 1605 1606 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1607 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1608 // Check for 8-bit immediate, and set sign extend bit in opcode 1609 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1610 emit_opcode(cbuf, $primary | 0x02); 1611 } 1612 else { // If 32-bit immediate 1613 emit_opcode(cbuf, $primary); 1614 } 1615 %} 1616 1617 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1618 // Emit primary opcode and set sign-extend bit 1619 // Check for 8-bit immediate, and set sign extend bit in opcode 1620 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1621 emit_opcode(cbuf, $primary | 0x02); } 1622 else { // If 32-bit immediate 1623 emit_opcode(cbuf, $primary); 1624 } 1625 // Emit r/m byte with secondary opcode, after primary opcode. 1626 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1627 %} 1628 1629 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1630 // Check for 8-bit immediate, and set sign extend bit in opcode 1631 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1632 $$$emit8$imm$$constant; 1633 } 1634 else { // If 32-bit immediate 1635 // Output immediate 1636 $$$emit32$imm$$constant; 1637 } 1638 %} 1639 1640 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1641 // Emit primary opcode and set sign-extend bit 1642 // Check for 8-bit immediate, and set sign extend bit in opcode 1643 int con = (int)$imm$$constant; // Throw away top bits 1644 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1645 // Emit r/m byte with secondary opcode, after primary opcode. 1646 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1647 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1648 else emit_d32(cbuf,con); 1649 %} 1650 1651 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1652 // Emit primary opcode and set sign-extend bit 1653 // Check for 8-bit immediate, and set sign extend bit in opcode 1654 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1655 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1656 // Emit r/m byte with tertiary opcode, after primary opcode. 1657 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1658 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1659 else emit_d32(cbuf,con); 1660 %} 1661 1662 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1663 emit_cc(cbuf, $secondary, $dst$$reg ); 1664 %} 1665 1666 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1667 int destlo = $dst$$reg; 1668 int desthi = HIGH_FROM_LOW(destlo); 1669 // bswap lo 1670 emit_opcode(cbuf, 0x0F); 1671 emit_cc(cbuf, 0xC8, destlo); 1672 // bswap hi 1673 emit_opcode(cbuf, 0x0F); 1674 emit_cc(cbuf, 0xC8, desthi); 1675 // xchg lo and hi 1676 emit_opcode(cbuf, 0x87); 1677 emit_rm(cbuf, 0x3, destlo, desthi); 1678 %} 1679 1680 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1681 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1682 %} 1683 1684 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1685 $$$emit8$primary; 1686 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1687 %} 1688 1689 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1690 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1691 emit_d8(cbuf, op >> 8 ); 1692 emit_d8(cbuf, op & 255); 1693 %} 1694 1695 // emulate a CMOV with a conditional branch around a MOV 1696 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1697 // Invert sense of branch from sense of CMOV 1698 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1699 emit_d8( cbuf, $brOffs$$constant ); 1700 %} 1701 1702 enc_class enc_PartialSubtypeCheck( ) %{ 1703 Register Redi = as_Register(EDI_enc); // result register 1704 Register Reax = as_Register(EAX_enc); // super class 1705 Register Recx = as_Register(ECX_enc); // killed 1706 Register Resi = as_Register(ESI_enc); // sub class 1707 Label miss; 1708 1709 MacroAssembler _masm(&cbuf); 1710 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1711 NULL, &miss, 1712 /*set_cond_codes:*/ true); 1713 if ($primary) { 1714 __ xorptr(Redi, Redi); 1715 } 1716 __ bind(miss); 1717 %} 1718 1719 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1720 MacroAssembler masm(&cbuf); 1721 int start = masm.offset(); 1722 if (UseSSE >= 2) { 1723 if (VerifyFPU) { 1724 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1725 } 1726 } else { 1727 // External c_calling_convention expects the FPU stack to be 'clean'. 1728 // Compiled code leaves it dirty. Do cleanup now. 1729 masm.empty_FPU_stack(); 1730 } 1731 if (sizeof_FFree_Float_Stack_All == -1) { 1732 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1733 } else { 1734 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1735 } 1736 %} 1737 1738 enc_class Verify_FPU_For_Leaf %{ 1739 if( VerifyFPU ) { 1740 MacroAssembler masm(&cbuf); 1741 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1742 } 1743 %} 1744 1745 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1746 // This is the instruction starting address for relocation info. 1747 cbuf.set_insts_mark(); 1748 $$$emit8$primary; 1749 // CALL directly to the runtime 1750 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1751 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1752 1753 if (UseSSE >= 2) { 1754 MacroAssembler _masm(&cbuf); 1755 BasicType rt = tf()->return_type(); 1756 1757 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1758 // A C runtime call where the return value is unused. In SSE2+ 1759 // mode the result needs to be removed from the FPU stack. It's 1760 // likely that this function call could be removed by the 1761 // optimizer if the C function is a pure function. 1762 __ ffree(0); 1763 } else if (rt == T_FLOAT) { 1764 __ lea(rsp, Address(rsp, -4)); 1765 __ fstp_s(Address(rsp, 0)); 1766 __ movflt(xmm0, Address(rsp, 0)); 1767 __ lea(rsp, Address(rsp, 4)); 1768 } else if (rt == T_DOUBLE) { 1769 __ lea(rsp, Address(rsp, -8)); 1770 __ fstp_d(Address(rsp, 0)); 1771 __ movdbl(xmm0, Address(rsp, 0)); 1772 __ lea(rsp, Address(rsp, 8)); 1773 } 1774 } 1775 %} 1776 1777 enc_class pre_call_resets %{ 1778 // If method sets FPU control word restore it here 1779 debug_only(int off0 = cbuf.insts_size()); 1780 if (ra_->C->in_24_bit_fp_mode()) { 1781 MacroAssembler _masm(&cbuf); 1782 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1783 } 1784 // Clear upper bits of YMM registers when current compiled code uses 1785 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1786 MacroAssembler _masm(&cbuf); 1787 __ vzeroupper(); 1788 debug_only(int off1 = cbuf.insts_size()); 1789 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1790 %} 1791 1792 enc_class post_call_FPU %{ 1793 // If method sets FPU control word do it here also 1794 if (Compile::current()->in_24_bit_fp_mode()) { 1795 MacroAssembler masm(&cbuf); 1796 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1797 } 1798 %} 1799 1800 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1801 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1802 // who we intended to call. 1803 cbuf.set_insts_mark(); 1804 $$$emit8$primary; 1805 1806 if (!_method) { 1807 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1808 runtime_call_Relocation::spec(), 1809 RELOC_IMM32); 1810 } else { 1811 int method_index = resolved_method_index(cbuf); 1812 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1813 : static_call_Relocation::spec(method_index); 1814 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1815 rspec, RELOC_DISP32); 1816 // Emit stubs for static call. 1817 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1818 if (stub == NULL) { 1819 ciEnv::current()->record_failure("CodeCache is full"); 1820 return; 1821 } 1822 } 1823 %} 1824 1825 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1826 MacroAssembler _masm(&cbuf); 1827 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1828 %} 1829 1830 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1831 int disp = in_bytes(Method::from_compiled_offset()); 1832 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1833 1834 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1835 cbuf.set_insts_mark(); 1836 $$$emit8$primary; 1837 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1838 emit_d8(cbuf, disp); // Displacement 1839 1840 %} 1841 1842 // Following encoding is no longer used, but may be restored if calling 1843 // convention changes significantly. 1844 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1845 // 1846 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1847 // // int ic_reg = Matcher::inline_cache_reg(); 1848 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1849 // // int imo_reg = Matcher::interpreter_method_reg(); 1850 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1851 // 1852 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1853 // // // so we load it immediately before the call 1854 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1855 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1856 // 1857 // // xor rbp,ebp 1858 // emit_opcode(cbuf, 0x33); 1859 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1860 // 1861 // // CALL to interpreter. 1862 // cbuf.set_insts_mark(); 1863 // $$$emit8$primary; 1864 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1865 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1866 // %} 1867 1868 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1869 $$$emit8$primary; 1870 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1871 $$$emit8$shift$$constant; 1872 %} 1873 1874 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1875 // Load immediate does not have a zero or sign extended version 1876 // for 8-bit immediates 1877 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1878 $$$emit32$src$$constant; 1879 %} 1880 1881 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1882 // Load immediate does not have a zero or sign extended version 1883 // for 8-bit immediates 1884 emit_opcode(cbuf, $primary + $dst$$reg); 1885 $$$emit32$src$$constant; 1886 %} 1887 1888 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1889 // Load immediate does not have a zero or sign extended version 1890 // for 8-bit immediates 1891 int dst_enc = $dst$$reg; 1892 int src_con = $src$$constant & 0x0FFFFFFFFL; 1893 if (src_con == 0) { 1894 // xor dst, dst 1895 emit_opcode(cbuf, 0x33); 1896 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1897 } else { 1898 emit_opcode(cbuf, $primary + dst_enc); 1899 emit_d32(cbuf, src_con); 1900 } 1901 %} 1902 1903 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1904 // Load immediate does not have a zero or sign extended version 1905 // for 8-bit immediates 1906 int dst_enc = $dst$$reg + 2; 1907 int src_con = ((julong)($src$$constant)) >> 32; 1908 if (src_con == 0) { 1909 // xor dst, dst 1910 emit_opcode(cbuf, 0x33); 1911 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1912 } else { 1913 emit_opcode(cbuf, $primary + dst_enc); 1914 emit_d32(cbuf, src_con); 1915 } 1916 %} 1917 1918 1919 // Encode a reg-reg copy. If it is useless, then empty encoding. 1920 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1921 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1922 %} 1923 1924 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1925 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1926 %} 1927 1928 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1929 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1930 %} 1931 1932 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1933 $$$emit8$primary; 1934 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1935 %} 1936 1937 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1938 $$$emit8$secondary; 1939 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1940 %} 1941 1942 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1943 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1944 %} 1945 1946 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1947 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1948 %} 1949 1950 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1951 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 1952 %} 1953 1954 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1955 // Output immediate 1956 $$$emit32$src$$constant; 1957 %} 1958 1959 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1960 // Output Float immediate bits 1961 jfloat jf = $src$$constant; 1962 int jf_as_bits = jint_cast( jf ); 1963 emit_d32(cbuf, jf_as_bits); 1964 %} 1965 1966 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1967 // Output Float immediate bits 1968 jfloat jf = $src$$constant; 1969 int jf_as_bits = jint_cast( jf ); 1970 emit_d32(cbuf, jf_as_bits); 1971 %} 1972 1973 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1974 // Output immediate 1975 $$$emit16$src$$constant; 1976 %} 1977 1978 enc_class Con_d32(immI src) %{ 1979 emit_d32(cbuf,$src$$constant); 1980 %} 1981 1982 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1983 // Output immediate memory reference 1984 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 1985 emit_d32(cbuf, 0x00); 1986 %} 1987 1988 enc_class lock_prefix( ) %{ 1989 emit_opcode(cbuf,0xF0); // [Lock] 1990 %} 1991 1992 // Cmp-xchg long value. 1993 // Note: we need to swap rbx, and rcx before and after the 1994 // cmpxchg8 instruction because the instruction uses 1995 // rcx as the high order word of the new value to store but 1996 // our register encoding uses rbx,. 1997 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 1998 1999 // XCHG rbx,ecx 2000 emit_opcode(cbuf,0x87); 2001 emit_opcode(cbuf,0xD9); 2002 // [Lock] 2003 emit_opcode(cbuf,0xF0); 2004 // CMPXCHG8 [Eptr] 2005 emit_opcode(cbuf,0x0F); 2006 emit_opcode(cbuf,0xC7); 2007 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2008 // XCHG rbx,ecx 2009 emit_opcode(cbuf,0x87); 2010 emit_opcode(cbuf,0xD9); 2011 %} 2012 2013 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2014 // [Lock] 2015 emit_opcode(cbuf,0xF0); 2016 2017 // CMPXCHG [Eptr] 2018 emit_opcode(cbuf,0x0F); 2019 emit_opcode(cbuf,0xB1); 2020 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2021 %} 2022 2023 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2024 // [Lock] 2025 emit_opcode(cbuf,0xF0); 2026 2027 // CMPXCHGB [Eptr] 2028 emit_opcode(cbuf,0x0F); 2029 emit_opcode(cbuf,0xB0); 2030 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2031 %} 2032 2033 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2034 // [Lock] 2035 emit_opcode(cbuf,0xF0); 2036 2037 // 16-bit mode 2038 emit_opcode(cbuf, 0x66); 2039 2040 // CMPXCHGW [Eptr] 2041 emit_opcode(cbuf,0x0F); 2042 emit_opcode(cbuf,0xB1); 2043 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2044 %} 2045 2046 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2047 int res_encoding = $res$$reg; 2048 2049 // MOV res,0 2050 emit_opcode( cbuf, 0xB8 + res_encoding); 2051 emit_d32( cbuf, 0 ); 2052 // JNE,s fail 2053 emit_opcode(cbuf,0x75); 2054 emit_d8(cbuf, 5 ); 2055 // MOV res,1 2056 emit_opcode( cbuf, 0xB8 + res_encoding); 2057 emit_d32( cbuf, 1 ); 2058 // fail: 2059 %} 2060 2061 enc_class set_instruction_start( ) %{ 2062 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2063 %} 2064 2065 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2066 int reg_encoding = $ereg$$reg; 2067 int base = $mem$$base; 2068 int index = $mem$$index; 2069 int scale = $mem$$scale; 2070 int displace = $mem$$disp; 2071 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2072 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2073 %} 2074 2075 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2076 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2077 int base = $mem$$base; 2078 int index = $mem$$index; 2079 int scale = $mem$$scale; 2080 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2081 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2082 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2083 %} 2084 2085 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2086 int r1, r2; 2087 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2088 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2089 emit_opcode(cbuf,0x0F); 2090 emit_opcode(cbuf,$tertiary); 2091 emit_rm(cbuf, 0x3, r1, r2); 2092 emit_d8(cbuf,$cnt$$constant); 2093 emit_d8(cbuf,$primary); 2094 emit_rm(cbuf, 0x3, $secondary, r1); 2095 emit_d8(cbuf,$cnt$$constant); 2096 %} 2097 2098 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2099 emit_opcode( cbuf, 0x8B ); // Move 2100 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2101 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2102 emit_d8(cbuf,$primary); 2103 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2104 emit_d8(cbuf,$cnt$$constant-32); 2105 } 2106 emit_d8(cbuf,$primary); 2107 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2108 emit_d8(cbuf,31); 2109 %} 2110 2111 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2112 int r1, r2; 2113 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2114 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2115 2116 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2117 emit_rm(cbuf, 0x3, r1, r2); 2118 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2119 emit_opcode(cbuf,$primary); 2120 emit_rm(cbuf, 0x3, $secondary, r1); 2121 emit_d8(cbuf,$cnt$$constant-32); 2122 } 2123 emit_opcode(cbuf,0x33); // XOR r2,r2 2124 emit_rm(cbuf, 0x3, r2, r2); 2125 %} 2126 2127 // Clone of RegMem but accepts an extra parameter to access each 2128 // half of a double in memory; it never needs relocation info. 2129 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2130 emit_opcode(cbuf,$opcode$$constant); 2131 int reg_encoding = $rm_reg$$reg; 2132 int base = $mem$$base; 2133 int index = $mem$$index; 2134 int scale = $mem$$scale; 2135 int displace = $mem$$disp + $disp_for_half$$constant; 2136 relocInfo::relocType disp_reloc = relocInfo::none; 2137 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2138 %} 2139 2140 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2141 // 2142 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2143 // and it never needs relocation information. 2144 // Frequently used to move data between FPU's Stack Top and memory. 2145 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2146 int rm_byte_opcode = $rm_opcode$$constant; 2147 int base = $mem$$base; 2148 int index = $mem$$index; 2149 int scale = $mem$$scale; 2150 int displace = $mem$$disp; 2151 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2152 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2153 %} 2154 2155 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2156 int rm_byte_opcode = $rm_opcode$$constant; 2157 int base = $mem$$base; 2158 int index = $mem$$index; 2159 int scale = $mem$$scale; 2160 int displace = $mem$$disp; 2161 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2162 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2163 %} 2164 2165 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2166 int reg_encoding = $dst$$reg; 2167 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2168 int index = 0x04; // 0x04 indicates no index 2169 int scale = 0x00; // 0x00 indicates no scale 2170 int displace = $src1$$constant; // 0x00 indicates no displacement 2171 relocInfo::relocType disp_reloc = relocInfo::none; 2172 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2173 %} 2174 2175 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2176 // Compare dst,src 2177 emit_opcode(cbuf,0x3B); 2178 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2179 // jmp dst < src around move 2180 emit_opcode(cbuf,0x7C); 2181 emit_d8(cbuf,2); 2182 // move dst,src 2183 emit_opcode(cbuf,0x8B); 2184 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2185 %} 2186 2187 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2188 // Compare dst,src 2189 emit_opcode(cbuf,0x3B); 2190 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2191 // jmp dst > src around move 2192 emit_opcode(cbuf,0x7F); 2193 emit_d8(cbuf,2); 2194 // move dst,src 2195 emit_opcode(cbuf,0x8B); 2196 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2197 %} 2198 2199 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2200 // If src is FPR1, we can just FST to store it. 2201 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2202 int reg_encoding = 0x2; // Just store 2203 int base = $mem$$base; 2204 int index = $mem$$index; 2205 int scale = $mem$$scale; 2206 int displace = $mem$$disp; 2207 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2208 if( $src$$reg != FPR1L_enc ) { 2209 reg_encoding = 0x3; // Store & pop 2210 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2211 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2212 } 2213 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2214 emit_opcode(cbuf,$primary); 2215 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2216 %} 2217 2218 enc_class neg_reg(rRegI dst) %{ 2219 // NEG $dst 2220 emit_opcode(cbuf,0xF7); 2221 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2222 %} 2223 2224 enc_class setLT_reg(eCXRegI dst) %{ 2225 // SETLT $dst 2226 emit_opcode(cbuf,0x0F); 2227 emit_opcode(cbuf,0x9C); 2228 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2229 %} 2230 2231 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2232 int tmpReg = $tmp$$reg; 2233 2234 // SUB $p,$q 2235 emit_opcode(cbuf,0x2B); 2236 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2237 // SBB $tmp,$tmp 2238 emit_opcode(cbuf,0x1B); 2239 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2240 // AND $tmp,$y 2241 emit_opcode(cbuf,0x23); 2242 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2243 // ADD $p,$tmp 2244 emit_opcode(cbuf,0x03); 2245 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2246 %} 2247 2248 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2249 // TEST shift,32 2250 emit_opcode(cbuf,0xF7); 2251 emit_rm(cbuf, 0x3, 0, ECX_enc); 2252 emit_d32(cbuf,0x20); 2253 // JEQ,s small 2254 emit_opcode(cbuf, 0x74); 2255 emit_d8(cbuf, 0x04); 2256 // MOV $dst.hi,$dst.lo 2257 emit_opcode( cbuf, 0x8B ); 2258 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2259 // CLR $dst.lo 2260 emit_opcode(cbuf, 0x33); 2261 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2262 // small: 2263 // SHLD $dst.hi,$dst.lo,$shift 2264 emit_opcode(cbuf,0x0F); 2265 emit_opcode(cbuf,0xA5); 2266 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2267 // SHL $dst.lo,$shift" 2268 emit_opcode(cbuf,0xD3); 2269 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2270 %} 2271 2272 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2273 // TEST shift,32 2274 emit_opcode(cbuf,0xF7); 2275 emit_rm(cbuf, 0x3, 0, ECX_enc); 2276 emit_d32(cbuf,0x20); 2277 // JEQ,s small 2278 emit_opcode(cbuf, 0x74); 2279 emit_d8(cbuf, 0x04); 2280 // MOV $dst.lo,$dst.hi 2281 emit_opcode( cbuf, 0x8B ); 2282 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2283 // CLR $dst.hi 2284 emit_opcode(cbuf, 0x33); 2285 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2286 // small: 2287 // SHRD $dst.lo,$dst.hi,$shift 2288 emit_opcode(cbuf,0x0F); 2289 emit_opcode(cbuf,0xAD); 2290 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2291 // SHR $dst.hi,$shift" 2292 emit_opcode(cbuf,0xD3); 2293 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2294 %} 2295 2296 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2297 // TEST shift,32 2298 emit_opcode(cbuf,0xF7); 2299 emit_rm(cbuf, 0x3, 0, ECX_enc); 2300 emit_d32(cbuf,0x20); 2301 // JEQ,s small 2302 emit_opcode(cbuf, 0x74); 2303 emit_d8(cbuf, 0x05); 2304 // MOV $dst.lo,$dst.hi 2305 emit_opcode( cbuf, 0x8B ); 2306 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2307 // SAR $dst.hi,31 2308 emit_opcode(cbuf, 0xC1); 2309 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2310 emit_d8(cbuf, 0x1F ); 2311 // small: 2312 // SHRD $dst.lo,$dst.hi,$shift 2313 emit_opcode(cbuf,0x0F); 2314 emit_opcode(cbuf,0xAD); 2315 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2316 // SAR $dst.hi,$shift" 2317 emit_opcode(cbuf,0xD3); 2318 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2319 %} 2320 2321 2322 // ----------------- Encodings for floating point unit ----------------- 2323 // May leave result in FPU-TOS or FPU reg depending on opcodes 2324 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2325 $$$emit8$primary; 2326 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2327 %} 2328 2329 // Pop argument in FPR0 with FSTP ST(0) 2330 enc_class PopFPU() %{ 2331 emit_opcode( cbuf, 0xDD ); 2332 emit_d8( cbuf, 0xD8 ); 2333 %} 2334 2335 // !!!!! equivalent to Pop_Reg_F 2336 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2337 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2338 emit_d8( cbuf, 0xD8+$dst$$reg ); 2339 %} 2340 2341 enc_class Push_Reg_DPR( regDPR dst ) %{ 2342 emit_opcode( cbuf, 0xD9 ); 2343 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2344 %} 2345 2346 enc_class strictfp_bias1( regDPR dst ) %{ 2347 emit_opcode( cbuf, 0xDB ); // FLD m80real 2348 emit_opcode( cbuf, 0x2D ); 2349 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2350 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2351 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2352 %} 2353 2354 enc_class strictfp_bias2( regDPR dst ) %{ 2355 emit_opcode( cbuf, 0xDB ); // FLD m80real 2356 emit_opcode( cbuf, 0x2D ); 2357 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2358 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2359 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2360 %} 2361 2362 // Special case for moving an integer register to a stack slot. 2363 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2364 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2365 %} 2366 2367 // Special case for moving a register to a stack slot. 2368 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2369 // Opcode already emitted 2370 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2371 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2372 emit_d32(cbuf, $dst$$disp); // Displacement 2373 %} 2374 2375 // Push the integer in stackSlot 'src' onto FP-stack 2376 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2377 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2378 %} 2379 2380 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2381 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2382 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2383 %} 2384 2385 // Same as Pop_Mem_F except for opcode 2386 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2387 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2388 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2389 %} 2390 2391 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2392 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2393 emit_d8( cbuf, 0xD8+$dst$$reg ); 2394 %} 2395 2396 enc_class Push_Reg_FPR( regFPR dst ) %{ 2397 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2398 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2399 %} 2400 2401 // Push FPU's float to a stack-slot, and pop FPU-stack 2402 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2403 int pop = 0x02; 2404 if ($src$$reg != FPR1L_enc) { 2405 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2406 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2407 pop = 0x03; 2408 } 2409 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2410 %} 2411 2412 // Push FPU's double to a stack-slot, and pop FPU-stack 2413 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2414 int pop = 0x02; 2415 if ($src$$reg != FPR1L_enc) { 2416 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2417 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2418 pop = 0x03; 2419 } 2420 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2421 %} 2422 2423 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2424 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2425 int pop = 0xD0 - 1; // -1 since we skip FLD 2426 if ($src$$reg != FPR1L_enc) { 2427 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2428 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2429 pop = 0xD8; 2430 } 2431 emit_opcode( cbuf, 0xDD ); 2432 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2433 %} 2434 2435 2436 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2437 // load dst in FPR0 2438 emit_opcode( cbuf, 0xD9 ); 2439 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2440 if ($src$$reg != FPR1L_enc) { 2441 // fincstp 2442 emit_opcode (cbuf, 0xD9); 2443 emit_opcode (cbuf, 0xF7); 2444 // swap src with FPR1: 2445 // FXCH FPR1 with src 2446 emit_opcode(cbuf, 0xD9); 2447 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2448 // fdecstp 2449 emit_opcode (cbuf, 0xD9); 2450 emit_opcode (cbuf, 0xF6); 2451 } 2452 %} 2453 2454 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2455 MacroAssembler _masm(&cbuf); 2456 __ subptr(rsp, 8); 2457 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2458 __ fld_d(Address(rsp, 0)); 2459 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2460 __ fld_d(Address(rsp, 0)); 2461 %} 2462 2463 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2464 MacroAssembler _masm(&cbuf); 2465 __ subptr(rsp, 4); 2466 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2467 __ fld_s(Address(rsp, 0)); 2468 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2469 __ fld_s(Address(rsp, 0)); 2470 %} 2471 2472 enc_class Push_ResultD(regD dst) %{ 2473 MacroAssembler _masm(&cbuf); 2474 __ fstp_d(Address(rsp, 0)); 2475 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2476 __ addptr(rsp, 8); 2477 %} 2478 2479 enc_class Push_ResultF(regF dst, immI d8) %{ 2480 MacroAssembler _masm(&cbuf); 2481 __ fstp_s(Address(rsp, 0)); 2482 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2483 __ addptr(rsp, $d8$$constant); 2484 %} 2485 2486 enc_class Push_SrcD(regD src) %{ 2487 MacroAssembler _masm(&cbuf); 2488 __ subptr(rsp, 8); 2489 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2490 __ fld_d(Address(rsp, 0)); 2491 %} 2492 2493 enc_class push_stack_temp_qword() %{ 2494 MacroAssembler _masm(&cbuf); 2495 __ subptr(rsp, 8); 2496 %} 2497 2498 enc_class pop_stack_temp_qword() %{ 2499 MacroAssembler _masm(&cbuf); 2500 __ addptr(rsp, 8); 2501 %} 2502 2503 enc_class push_xmm_to_fpr1(regD src) %{ 2504 MacroAssembler _masm(&cbuf); 2505 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2506 __ fld_d(Address(rsp, 0)); 2507 %} 2508 2509 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2510 if ($src$$reg != FPR1L_enc) { 2511 // fincstp 2512 emit_opcode (cbuf, 0xD9); 2513 emit_opcode (cbuf, 0xF7); 2514 // FXCH FPR1 with src 2515 emit_opcode(cbuf, 0xD9); 2516 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2517 // fdecstp 2518 emit_opcode (cbuf, 0xD9); 2519 emit_opcode (cbuf, 0xF6); 2520 } 2521 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2522 // // FSTP FPR$dst$$reg 2523 // emit_opcode( cbuf, 0xDD ); 2524 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2525 %} 2526 2527 enc_class fnstsw_sahf_skip_parity() %{ 2528 // fnstsw ax 2529 emit_opcode( cbuf, 0xDF ); 2530 emit_opcode( cbuf, 0xE0 ); 2531 // sahf 2532 emit_opcode( cbuf, 0x9E ); 2533 // jnp ::skip 2534 emit_opcode( cbuf, 0x7B ); 2535 emit_opcode( cbuf, 0x05 ); 2536 %} 2537 2538 enc_class emitModDPR() %{ 2539 // fprem must be iterative 2540 // :: loop 2541 // fprem 2542 emit_opcode( cbuf, 0xD9 ); 2543 emit_opcode( cbuf, 0xF8 ); 2544 // wait 2545 emit_opcode( cbuf, 0x9b ); 2546 // fnstsw ax 2547 emit_opcode( cbuf, 0xDF ); 2548 emit_opcode( cbuf, 0xE0 ); 2549 // sahf 2550 emit_opcode( cbuf, 0x9E ); 2551 // jp ::loop 2552 emit_opcode( cbuf, 0x0F ); 2553 emit_opcode( cbuf, 0x8A ); 2554 emit_opcode( cbuf, 0xF4 ); 2555 emit_opcode( cbuf, 0xFF ); 2556 emit_opcode( cbuf, 0xFF ); 2557 emit_opcode( cbuf, 0xFF ); 2558 %} 2559 2560 enc_class fpu_flags() %{ 2561 // fnstsw_ax 2562 emit_opcode( cbuf, 0xDF); 2563 emit_opcode( cbuf, 0xE0); 2564 // test ax,0x0400 2565 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2566 emit_opcode( cbuf, 0xA9 ); 2567 emit_d16 ( cbuf, 0x0400 ); 2568 // // // This sequence works, but stalls for 12-16 cycles on PPro 2569 // // test rax,0x0400 2570 // emit_opcode( cbuf, 0xA9 ); 2571 // emit_d32 ( cbuf, 0x00000400 ); 2572 // 2573 // jz exit (no unordered comparison) 2574 emit_opcode( cbuf, 0x74 ); 2575 emit_d8 ( cbuf, 0x02 ); 2576 // mov ah,1 - treat as LT case (set carry flag) 2577 emit_opcode( cbuf, 0xB4 ); 2578 emit_d8 ( cbuf, 0x01 ); 2579 // sahf 2580 emit_opcode( cbuf, 0x9E); 2581 %} 2582 2583 enc_class cmpF_P6_fixup() %{ 2584 // Fixup the integer flags in case comparison involved a NaN 2585 // 2586 // JNP exit (no unordered comparison, P-flag is set by NaN) 2587 emit_opcode( cbuf, 0x7B ); 2588 emit_d8 ( cbuf, 0x03 ); 2589 // MOV AH,1 - treat as LT case (set carry flag) 2590 emit_opcode( cbuf, 0xB4 ); 2591 emit_d8 ( cbuf, 0x01 ); 2592 // SAHF 2593 emit_opcode( cbuf, 0x9E); 2594 // NOP // target for branch to avoid branch to branch 2595 emit_opcode( cbuf, 0x90); 2596 %} 2597 2598 // fnstsw_ax(); 2599 // sahf(); 2600 // movl(dst, nan_result); 2601 // jcc(Assembler::parity, exit); 2602 // movl(dst, less_result); 2603 // jcc(Assembler::below, exit); 2604 // movl(dst, equal_result); 2605 // jcc(Assembler::equal, exit); 2606 // movl(dst, greater_result); 2607 2608 // less_result = 1; 2609 // greater_result = -1; 2610 // equal_result = 0; 2611 // nan_result = -1; 2612 2613 enc_class CmpF_Result(rRegI dst) %{ 2614 // fnstsw_ax(); 2615 emit_opcode( cbuf, 0xDF); 2616 emit_opcode( cbuf, 0xE0); 2617 // sahf 2618 emit_opcode( cbuf, 0x9E); 2619 // movl(dst, nan_result); 2620 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2621 emit_d32( cbuf, -1 ); 2622 // jcc(Assembler::parity, exit); 2623 emit_opcode( cbuf, 0x7A ); 2624 emit_d8 ( cbuf, 0x13 ); 2625 // movl(dst, less_result); 2626 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2627 emit_d32( cbuf, -1 ); 2628 // jcc(Assembler::below, exit); 2629 emit_opcode( cbuf, 0x72 ); 2630 emit_d8 ( cbuf, 0x0C ); 2631 // movl(dst, equal_result); 2632 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2633 emit_d32( cbuf, 0 ); 2634 // jcc(Assembler::equal, exit); 2635 emit_opcode( cbuf, 0x74 ); 2636 emit_d8 ( cbuf, 0x05 ); 2637 // movl(dst, greater_result); 2638 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2639 emit_d32( cbuf, 1 ); 2640 %} 2641 2642 2643 // Compare the longs and set flags 2644 // BROKEN! Do Not use as-is 2645 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2646 // CMP $src1.hi,$src2.hi 2647 emit_opcode( cbuf, 0x3B ); 2648 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2649 // JNE,s done 2650 emit_opcode(cbuf,0x75); 2651 emit_d8(cbuf, 2 ); 2652 // CMP $src1.lo,$src2.lo 2653 emit_opcode( cbuf, 0x3B ); 2654 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2655 // done: 2656 %} 2657 2658 enc_class convert_int_long( regL dst, rRegI src ) %{ 2659 // mov $dst.lo,$src 2660 int dst_encoding = $dst$$reg; 2661 int src_encoding = $src$$reg; 2662 encode_Copy( cbuf, dst_encoding , src_encoding ); 2663 // mov $dst.hi,$src 2664 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2665 // sar $dst.hi,31 2666 emit_opcode( cbuf, 0xC1 ); 2667 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2668 emit_d8(cbuf, 0x1F ); 2669 %} 2670 2671 enc_class convert_long_double( eRegL src ) %{ 2672 // push $src.hi 2673 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2674 // push $src.lo 2675 emit_opcode(cbuf, 0x50+$src$$reg ); 2676 // fild 64-bits at [SP] 2677 emit_opcode(cbuf,0xdf); 2678 emit_d8(cbuf, 0x6C); 2679 emit_d8(cbuf, 0x24); 2680 emit_d8(cbuf, 0x00); 2681 // pop stack 2682 emit_opcode(cbuf, 0x83); // add SP, #8 2683 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2684 emit_d8(cbuf, 0x8); 2685 %} 2686 2687 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2688 // IMUL EDX:EAX,$src1 2689 emit_opcode( cbuf, 0xF7 ); 2690 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2691 // SAR EDX,$cnt-32 2692 int shift_count = ((int)$cnt$$constant) - 32; 2693 if (shift_count > 0) { 2694 emit_opcode(cbuf, 0xC1); 2695 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2696 emit_d8(cbuf, shift_count); 2697 } 2698 %} 2699 2700 // this version doesn't have add sp, 8 2701 enc_class convert_long_double2( eRegL src ) %{ 2702 // push $src.hi 2703 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2704 // push $src.lo 2705 emit_opcode(cbuf, 0x50+$src$$reg ); 2706 // fild 64-bits at [SP] 2707 emit_opcode(cbuf,0xdf); 2708 emit_d8(cbuf, 0x6C); 2709 emit_d8(cbuf, 0x24); 2710 emit_d8(cbuf, 0x00); 2711 %} 2712 2713 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2714 // Basic idea: long = (long)int * (long)int 2715 // IMUL EDX:EAX, src 2716 emit_opcode( cbuf, 0xF7 ); 2717 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2718 %} 2719 2720 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2721 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2722 // MUL EDX:EAX, src 2723 emit_opcode( cbuf, 0xF7 ); 2724 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2725 %} 2726 2727 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2728 // Basic idea: lo(result) = lo(x_lo * y_lo) 2729 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2730 // MOV $tmp,$src.lo 2731 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2732 // IMUL $tmp,EDX 2733 emit_opcode( cbuf, 0x0F ); 2734 emit_opcode( cbuf, 0xAF ); 2735 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2736 // MOV EDX,$src.hi 2737 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2738 // IMUL EDX,EAX 2739 emit_opcode( cbuf, 0x0F ); 2740 emit_opcode( cbuf, 0xAF ); 2741 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2742 // ADD $tmp,EDX 2743 emit_opcode( cbuf, 0x03 ); 2744 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2745 // MUL EDX:EAX,$src.lo 2746 emit_opcode( cbuf, 0xF7 ); 2747 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2748 // ADD EDX,ESI 2749 emit_opcode( cbuf, 0x03 ); 2750 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2751 %} 2752 2753 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2754 // Basic idea: lo(result) = lo(src * y_lo) 2755 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2756 // IMUL $tmp,EDX,$src 2757 emit_opcode( cbuf, 0x6B ); 2758 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2759 emit_d8( cbuf, (int)$src$$constant ); 2760 // MOV EDX,$src 2761 emit_opcode(cbuf, 0xB8 + EDX_enc); 2762 emit_d32( cbuf, (int)$src$$constant ); 2763 // MUL EDX:EAX,EDX 2764 emit_opcode( cbuf, 0xF7 ); 2765 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2766 // ADD EDX,ESI 2767 emit_opcode( cbuf, 0x03 ); 2768 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2769 %} 2770 2771 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2772 // PUSH src1.hi 2773 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2774 // PUSH src1.lo 2775 emit_opcode(cbuf, 0x50+$src1$$reg ); 2776 // PUSH src2.hi 2777 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2778 // PUSH src2.lo 2779 emit_opcode(cbuf, 0x50+$src2$$reg ); 2780 // CALL directly to the runtime 2781 cbuf.set_insts_mark(); 2782 emit_opcode(cbuf,0xE8); // Call into runtime 2783 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2784 // Restore stack 2785 emit_opcode(cbuf, 0x83); // add SP, #framesize 2786 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2787 emit_d8(cbuf, 4*4); 2788 %} 2789 2790 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2791 // PUSH src1.hi 2792 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2793 // PUSH src1.lo 2794 emit_opcode(cbuf, 0x50+$src1$$reg ); 2795 // PUSH src2.hi 2796 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2797 // PUSH src2.lo 2798 emit_opcode(cbuf, 0x50+$src2$$reg ); 2799 // CALL directly to the runtime 2800 cbuf.set_insts_mark(); 2801 emit_opcode(cbuf,0xE8); // Call into runtime 2802 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2803 // Restore stack 2804 emit_opcode(cbuf, 0x83); // add SP, #framesize 2805 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2806 emit_d8(cbuf, 4*4); 2807 %} 2808 2809 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2810 // MOV $tmp,$src.lo 2811 emit_opcode(cbuf, 0x8B); 2812 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2813 // OR $tmp,$src.hi 2814 emit_opcode(cbuf, 0x0B); 2815 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2816 %} 2817 2818 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2819 // CMP $src1.lo,$src2.lo 2820 emit_opcode( cbuf, 0x3B ); 2821 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2822 // JNE,s skip 2823 emit_cc(cbuf, 0x70, 0x5); 2824 emit_d8(cbuf,2); 2825 // CMP $src1.hi,$src2.hi 2826 emit_opcode( cbuf, 0x3B ); 2827 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2828 %} 2829 2830 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2831 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2832 emit_opcode( cbuf, 0x3B ); 2833 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2834 // MOV $tmp,$src1.hi 2835 emit_opcode( cbuf, 0x8B ); 2836 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2837 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2838 emit_opcode( cbuf, 0x1B ); 2839 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2840 %} 2841 2842 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2843 // XOR $tmp,$tmp 2844 emit_opcode(cbuf,0x33); // XOR 2845 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2846 // CMP $tmp,$src.lo 2847 emit_opcode( cbuf, 0x3B ); 2848 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2849 // SBB $tmp,$src.hi 2850 emit_opcode( cbuf, 0x1B ); 2851 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2852 %} 2853 2854 // Sniff, sniff... smells like Gnu Superoptimizer 2855 enc_class neg_long( eRegL dst ) %{ 2856 emit_opcode(cbuf,0xF7); // NEG hi 2857 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2858 emit_opcode(cbuf,0xF7); // NEG lo 2859 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2860 emit_opcode(cbuf,0x83); // SBB hi,0 2861 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2862 emit_d8 (cbuf,0 ); 2863 %} 2864 2865 enc_class enc_pop_rdx() %{ 2866 emit_opcode(cbuf,0x5A); 2867 %} 2868 2869 enc_class enc_rethrow() %{ 2870 cbuf.set_insts_mark(); 2871 emit_opcode(cbuf, 0xE9); // jmp entry 2872 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2873 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2874 %} 2875 2876 2877 // Convert a double to an int. Java semantics require we do complex 2878 // manglelations in the corner cases. So we set the rounding mode to 2879 // 'zero', store the darned double down as an int, and reset the 2880 // rounding mode to 'nearest'. The hardware throws an exception which 2881 // patches up the correct value directly to the stack. 2882 enc_class DPR2I_encoding( regDPR src ) %{ 2883 // Flip to round-to-zero mode. We attempted to allow invalid-op 2884 // exceptions here, so that a NAN or other corner-case value will 2885 // thrown an exception (but normal values get converted at full speed). 2886 // However, I2C adapters and other float-stack manglers leave pending 2887 // invalid-op exceptions hanging. We would have to clear them before 2888 // enabling them and that is more expensive than just testing for the 2889 // invalid value Intel stores down in the corner cases. 2890 emit_opcode(cbuf,0xD9); // FLDCW trunc 2891 emit_opcode(cbuf,0x2D); 2892 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2893 // Allocate a word 2894 emit_opcode(cbuf,0x83); // SUB ESP,4 2895 emit_opcode(cbuf,0xEC); 2896 emit_d8(cbuf,0x04); 2897 // Encoding assumes a double has been pushed into FPR0. 2898 // Store down the double as an int, popping the FPU stack 2899 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2900 emit_opcode(cbuf,0x1C); 2901 emit_d8(cbuf,0x24); 2902 // Restore the rounding mode; mask the exception 2903 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2904 emit_opcode(cbuf,0x2D); 2905 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2906 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2907 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2908 2909 // Load the converted int; adjust CPU stack 2910 emit_opcode(cbuf,0x58); // POP EAX 2911 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2912 emit_d32 (cbuf,0x80000000); // 0x80000000 2913 emit_opcode(cbuf,0x75); // JNE around_slow_call 2914 emit_d8 (cbuf,0x07); // Size of slow_call 2915 // Push src onto stack slow-path 2916 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2917 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2918 // CALL directly to the runtime 2919 cbuf.set_insts_mark(); 2920 emit_opcode(cbuf,0xE8); // Call into runtime 2921 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2922 // Carry on here... 2923 %} 2924 2925 enc_class DPR2L_encoding( regDPR src ) %{ 2926 emit_opcode(cbuf,0xD9); // FLDCW trunc 2927 emit_opcode(cbuf,0x2D); 2928 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2929 // Allocate a word 2930 emit_opcode(cbuf,0x83); // SUB ESP,8 2931 emit_opcode(cbuf,0xEC); 2932 emit_d8(cbuf,0x08); 2933 // Encoding assumes a double has been pushed into FPR0. 2934 // Store down the double as a long, popping the FPU stack 2935 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2936 emit_opcode(cbuf,0x3C); 2937 emit_d8(cbuf,0x24); 2938 // Restore the rounding mode; mask the exception 2939 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2940 emit_opcode(cbuf,0x2D); 2941 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2942 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2943 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2944 2945 // Load the converted int; adjust CPU stack 2946 emit_opcode(cbuf,0x58); // POP EAX 2947 emit_opcode(cbuf,0x5A); // POP EDX 2948 emit_opcode(cbuf,0x81); // CMP EDX,imm 2949 emit_d8 (cbuf,0xFA); // rdx 2950 emit_d32 (cbuf,0x80000000); // 0x80000000 2951 emit_opcode(cbuf,0x75); // JNE around_slow_call 2952 emit_d8 (cbuf,0x07+4); // Size of slow_call 2953 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2954 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2955 emit_opcode(cbuf,0x75); // JNE around_slow_call 2956 emit_d8 (cbuf,0x07); // Size of slow_call 2957 // Push src onto stack slow-path 2958 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2959 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2960 // CALL directly to the runtime 2961 cbuf.set_insts_mark(); 2962 emit_opcode(cbuf,0xE8); // Call into runtime 2963 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2964 // Carry on here... 2965 %} 2966 2967 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2968 // Operand was loaded from memory into fp ST (stack top) 2969 // FMUL ST,$src /* D8 C8+i */ 2970 emit_opcode(cbuf, 0xD8); 2971 emit_opcode(cbuf, 0xC8 + $src1$$reg); 2972 %} 2973 2974 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2975 // FADDP ST,src2 /* D8 C0+i */ 2976 emit_opcode(cbuf, 0xD8); 2977 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2978 //could use FADDP src2,fpST /* DE C0+i */ 2979 %} 2980 2981 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2982 // FADDP src2,ST /* DE C0+i */ 2983 emit_opcode(cbuf, 0xDE); 2984 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2985 %} 2986 2987 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2988 // Operand has been loaded into fp ST (stack top) 2989 // FSUB ST,$src1 2990 emit_opcode(cbuf, 0xD8); 2991 emit_opcode(cbuf, 0xE0 + $src1$$reg); 2992 2993 // FDIV 2994 emit_opcode(cbuf, 0xD8); 2995 emit_opcode(cbuf, 0xF0 + $src2$$reg); 2996 %} 2997 2998 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 2999 // Operand was loaded from memory into fp ST (stack top) 3000 // FADD ST,$src /* D8 C0+i */ 3001 emit_opcode(cbuf, 0xD8); 3002 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3003 3004 // FMUL ST,src2 /* D8 C*+i */ 3005 emit_opcode(cbuf, 0xD8); 3006 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3007 %} 3008 3009 3010 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3011 // Operand was loaded from memory into fp ST (stack top) 3012 // FADD ST,$src /* D8 C0+i */ 3013 emit_opcode(cbuf, 0xD8); 3014 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3015 3016 // FMULP src2,ST /* DE C8+i */ 3017 emit_opcode(cbuf, 0xDE); 3018 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3019 %} 3020 3021 // Atomically load the volatile long 3022 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3023 emit_opcode(cbuf,0xDF); 3024 int rm_byte_opcode = 0x05; 3025 int base = $mem$$base; 3026 int index = $mem$$index; 3027 int scale = $mem$$scale; 3028 int displace = $mem$$disp; 3029 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3030 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3031 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3032 %} 3033 3034 // Volatile Store Long. Must be atomic, so move it into 3035 // the FP TOS and then do a 64-bit FIST. Has to probe the 3036 // target address before the store (for null-ptr checks) 3037 // so the memory operand is used twice in the encoding. 3038 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3039 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3040 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3041 emit_opcode(cbuf,0xDF); 3042 int rm_byte_opcode = 0x07; 3043 int base = $mem$$base; 3044 int index = $mem$$index; 3045 int scale = $mem$$scale; 3046 int displace = $mem$$disp; 3047 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3048 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3049 %} 3050 3051 %} 3052 3053 3054 //----------FRAME-------------------------------------------------------------- 3055 // Definition of frame structure and management information. 3056 // 3057 // S T A C K L A Y O U T Allocators stack-slot number 3058 // | (to get allocators register number 3059 // G Owned by | | v add OptoReg::stack0()) 3060 // r CALLER | | 3061 // o | +--------+ pad to even-align allocators stack-slot 3062 // w V | pad0 | numbers; owned by CALLER 3063 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3064 // h ^ | in | 5 3065 // | | args | 4 Holes in incoming args owned by SELF 3066 // | | | | 3 3067 // | | +--------+ 3068 // V | | old out| Empty on Intel, window on Sparc 3069 // | old |preserve| Must be even aligned. 3070 // | SP-+--------+----> Matcher::_old_SP, even aligned 3071 // | | in | 3 area for Intel ret address 3072 // Owned by |preserve| Empty on Sparc. 3073 // SELF +--------+ 3074 // | | pad2 | 2 pad to align old SP 3075 // | +--------+ 1 3076 // | | locks | 0 3077 // | +--------+----> OptoReg::stack0(), even aligned 3078 // | | pad1 | 11 pad to align new SP 3079 // | +--------+ 3080 // | | | 10 3081 // | | spills | 9 spills 3082 // V | | 8 (pad0 slot for callee) 3083 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3084 // ^ | out | 7 3085 // | | args | 6 Holes in outgoing args owned by CALLEE 3086 // Owned by +--------+ 3087 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3088 // | new |preserve| Must be even-aligned. 3089 // | SP-+--------+----> Matcher::_new_SP, even aligned 3090 // | | | 3091 // 3092 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3093 // known from SELF's arguments and the Java calling convention. 3094 // Region 6-7 is determined per call site. 3095 // Note 2: If the calling convention leaves holes in the incoming argument 3096 // area, those holes are owned by SELF. Holes in the outgoing area 3097 // are owned by the CALLEE. Holes should not be nessecary in the 3098 // incoming area, as the Java calling convention is completely under 3099 // the control of the AD file. Doubles can be sorted and packed to 3100 // avoid holes. Holes in the outgoing arguments may be nessecary for 3101 // varargs C calling conventions. 3102 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3103 // even aligned with pad0 as needed. 3104 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3105 // region 6-11 is even aligned; it may be padded out more so that 3106 // the region from SP to FP meets the minimum stack alignment. 3107 3108 frame %{ 3109 // These three registers define part of the calling convention 3110 // between compiled code and the interpreter. 3111 inline_cache_reg(EAX); // Inline Cache Register 3112 3113 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3114 cisc_spilling_operand_name(indOffset32); 3115 3116 // Number of stack slots consumed by locking an object 3117 sync_stack_slots(1); 3118 3119 // Compiled code's Frame Pointer 3120 frame_pointer(ESP); 3121 // Interpreter stores its frame pointer in a register which is 3122 // stored to the stack by I2CAdaptors. 3123 // I2CAdaptors convert from interpreted java to compiled java. 3124 interpreter_frame_pointer(EBP); 3125 3126 // Stack alignment requirement 3127 // Alignment size in bytes (128-bit -> 16 bytes) 3128 stack_alignment(StackAlignmentInBytes); 3129 3130 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3131 // for calls to C. Supports the var-args backing area for register parms. 3132 varargs_C_out_slots_killed(0); 3133 3134 // The after-PROLOG location of the return address. Location of 3135 // return address specifies a type (REG or STACK) and a number 3136 // representing the register number (i.e. - use a register name) or 3137 // stack slot. 3138 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3139 // Otherwise, it is above the locks and verification slot and alignment word 3140 return_addr(STACK - 1 + 3141 align_up((Compile::current()->in_preserve_stack_slots() + 3142 Compile::current()->fixed_slots()), 3143 stack_alignment_in_slots())); 3144 3145 // Location of C & interpreter return values 3146 c_return_value %{ 3147 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3148 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3149 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3150 3151 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3152 // that C functions return float and double results in XMM0. 3153 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3154 return OptoRegPair(XMM0b_num,XMM0_num); 3155 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3156 return OptoRegPair(OptoReg::Bad,XMM0_num); 3157 3158 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3159 %} 3160 3161 // Location of return values 3162 return_value %{ 3163 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3164 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3165 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3166 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3167 return OptoRegPair(XMM0b_num,XMM0_num); 3168 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3169 return OptoRegPair(OptoReg::Bad,XMM0_num); 3170 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3171 %} 3172 3173 %} 3174 3175 //----------ATTRIBUTES--------------------------------------------------------- 3176 //----------Operand Attributes------------------------------------------------- 3177 op_attrib op_cost(0); // Required cost attribute 3178 3179 //----------Instruction Attributes--------------------------------------------- 3180 ins_attrib ins_cost(100); // Required cost attribute 3181 ins_attrib ins_size(8); // Required size attribute (in bits) 3182 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3183 // non-matching short branch variant of some 3184 // long branch? 3185 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3186 // specifies the alignment that some part of the instruction (not 3187 // necessarily the start) requires. If > 1, a compute_padding() 3188 // function must be provided for the instruction 3189 3190 //----------OPERANDS----------------------------------------------------------- 3191 // Operand definitions must precede instruction definitions for correct parsing 3192 // in the ADLC because operands constitute user defined types which are used in 3193 // instruction definitions. 3194 3195 //----------Simple Operands---------------------------------------------------- 3196 // Immediate Operands 3197 // Integer Immediate 3198 operand immI() %{ 3199 match(ConI); 3200 3201 op_cost(10); 3202 format %{ %} 3203 interface(CONST_INTER); 3204 %} 3205 3206 // Constant for test vs zero 3207 operand immI_0() %{ 3208 predicate(n->get_int() == 0); 3209 match(ConI); 3210 3211 op_cost(0); 3212 format %{ %} 3213 interface(CONST_INTER); 3214 %} 3215 3216 // Constant for increment 3217 operand immI_1() %{ 3218 predicate(n->get_int() == 1); 3219 match(ConI); 3220 3221 op_cost(0); 3222 format %{ %} 3223 interface(CONST_INTER); 3224 %} 3225 3226 // Constant for decrement 3227 operand immI_M1() %{ 3228 predicate(n->get_int() == -1); 3229 match(ConI); 3230 3231 op_cost(0); 3232 format %{ %} 3233 interface(CONST_INTER); 3234 %} 3235 3236 // Valid scale values for addressing modes 3237 operand immI2() %{ 3238 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3239 match(ConI); 3240 3241 format %{ %} 3242 interface(CONST_INTER); 3243 %} 3244 3245 operand immI8() %{ 3246 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3247 match(ConI); 3248 3249 op_cost(5); 3250 format %{ %} 3251 interface(CONST_INTER); 3252 %} 3253 3254 operand immU8() %{ 3255 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3256 match(ConI); 3257 3258 op_cost(5); 3259 format %{ %} 3260 interface(CONST_INTER); 3261 %} 3262 3263 operand immI16() %{ 3264 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3265 match(ConI); 3266 3267 op_cost(10); 3268 format %{ %} 3269 interface(CONST_INTER); 3270 %} 3271 3272 // Int Immediate non-negative 3273 operand immU31() 3274 %{ 3275 predicate(n->get_int() >= 0); 3276 match(ConI); 3277 3278 op_cost(0); 3279 format %{ %} 3280 interface(CONST_INTER); 3281 %} 3282 3283 // Constant for long shifts 3284 operand immI_32() %{ 3285 predicate( n->get_int() == 32 ); 3286 match(ConI); 3287 3288 op_cost(0); 3289 format %{ %} 3290 interface(CONST_INTER); 3291 %} 3292 3293 operand immI_1_31() %{ 3294 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3295 match(ConI); 3296 3297 op_cost(0); 3298 format %{ %} 3299 interface(CONST_INTER); 3300 %} 3301 3302 operand immI_32_63() %{ 3303 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3304 match(ConI); 3305 op_cost(0); 3306 3307 format %{ %} 3308 interface(CONST_INTER); 3309 %} 3310 3311 operand immI_2() %{ 3312 predicate( n->get_int() == 2 ); 3313 match(ConI); 3314 3315 op_cost(0); 3316 format %{ %} 3317 interface(CONST_INTER); 3318 %} 3319 3320 operand immI_3() %{ 3321 predicate( n->get_int() == 3 ); 3322 match(ConI); 3323 3324 op_cost(0); 3325 format %{ %} 3326 interface(CONST_INTER); 3327 %} 3328 3329 operand immI_4() 3330 %{ 3331 predicate(n->get_int() == 4); 3332 match(ConI); 3333 3334 op_cost(0); 3335 format %{ %} 3336 interface(CONST_INTER); 3337 %} 3338 3339 operand immI_8() 3340 %{ 3341 predicate(n->get_int() == 8); 3342 match(ConI); 3343 3344 op_cost(0); 3345 format %{ %} 3346 interface(CONST_INTER); 3347 %} 3348 3349 // Pointer Immediate 3350 operand immP() %{ 3351 match(ConP); 3352 3353 op_cost(10); 3354 format %{ %} 3355 interface(CONST_INTER); 3356 %} 3357 3358 // NULL Pointer Immediate 3359 operand immP0() %{ 3360 predicate( n->get_ptr() == 0 ); 3361 match(ConP); 3362 op_cost(0); 3363 3364 format %{ %} 3365 interface(CONST_INTER); 3366 %} 3367 3368 // Long Immediate 3369 operand immL() %{ 3370 match(ConL); 3371 3372 op_cost(20); 3373 format %{ %} 3374 interface(CONST_INTER); 3375 %} 3376 3377 // Long Immediate zero 3378 operand immL0() %{ 3379 predicate( n->get_long() == 0L ); 3380 match(ConL); 3381 op_cost(0); 3382 3383 format %{ %} 3384 interface(CONST_INTER); 3385 %} 3386 3387 // Long Immediate zero 3388 operand immL_M1() %{ 3389 predicate( n->get_long() == -1L ); 3390 match(ConL); 3391 op_cost(0); 3392 3393 format %{ %} 3394 interface(CONST_INTER); 3395 %} 3396 3397 // Long immediate from 0 to 127. 3398 // Used for a shorter form of long mul by 10. 3399 operand immL_127() %{ 3400 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3401 match(ConL); 3402 op_cost(0); 3403 3404 format %{ %} 3405 interface(CONST_INTER); 3406 %} 3407 3408 // Long Immediate: low 32-bit mask 3409 operand immL_32bits() %{ 3410 predicate(n->get_long() == 0xFFFFFFFFL); 3411 match(ConL); 3412 op_cost(0); 3413 3414 format %{ %} 3415 interface(CONST_INTER); 3416 %} 3417 3418 // Long Immediate: low 32-bit mask 3419 operand immL32() %{ 3420 predicate(n->get_long() == (int)(n->get_long())); 3421 match(ConL); 3422 op_cost(20); 3423 3424 format %{ %} 3425 interface(CONST_INTER); 3426 %} 3427 3428 //Double Immediate zero 3429 operand immDPR0() %{ 3430 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3431 // bug that generates code such that NaNs compare equal to 0.0 3432 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3433 match(ConD); 3434 3435 op_cost(5); 3436 format %{ %} 3437 interface(CONST_INTER); 3438 %} 3439 3440 // Double Immediate one 3441 operand immDPR1() %{ 3442 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3443 match(ConD); 3444 3445 op_cost(5); 3446 format %{ %} 3447 interface(CONST_INTER); 3448 %} 3449 3450 // Double Immediate 3451 operand immDPR() %{ 3452 predicate(UseSSE<=1); 3453 match(ConD); 3454 3455 op_cost(5); 3456 format %{ %} 3457 interface(CONST_INTER); 3458 %} 3459 3460 operand immD() %{ 3461 predicate(UseSSE>=2); 3462 match(ConD); 3463 3464 op_cost(5); 3465 format %{ %} 3466 interface(CONST_INTER); 3467 %} 3468 3469 // Double Immediate zero 3470 operand immD0() %{ 3471 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3472 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3473 // compare equal to -0.0. 3474 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3475 match(ConD); 3476 3477 format %{ %} 3478 interface(CONST_INTER); 3479 %} 3480 3481 // Float Immediate zero 3482 operand immFPR0() %{ 3483 predicate(UseSSE == 0 && n->getf() == 0.0F); 3484 match(ConF); 3485 3486 op_cost(5); 3487 format %{ %} 3488 interface(CONST_INTER); 3489 %} 3490 3491 // Float Immediate one 3492 operand immFPR1() %{ 3493 predicate(UseSSE == 0 && n->getf() == 1.0F); 3494 match(ConF); 3495 3496 op_cost(5); 3497 format %{ %} 3498 interface(CONST_INTER); 3499 %} 3500 3501 // Float Immediate 3502 operand immFPR() %{ 3503 predicate( UseSSE == 0 ); 3504 match(ConF); 3505 3506 op_cost(5); 3507 format %{ %} 3508 interface(CONST_INTER); 3509 %} 3510 3511 // Float Immediate 3512 operand immF() %{ 3513 predicate(UseSSE >= 1); 3514 match(ConF); 3515 3516 op_cost(5); 3517 format %{ %} 3518 interface(CONST_INTER); 3519 %} 3520 3521 // Float Immediate zero. Zero and not -0.0 3522 operand immF0() %{ 3523 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3524 match(ConF); 3525 3526 op_cost(5); 3527 format %{ %} 3528 interface(CONST_INTER); 3529 %} 3530 3531 // Immediates for special shifts (sign extend) 3532 3533 // Constants for increment 3534 operand immI_16() %{ 3535 predicate( n->get_int() == 16 ); 3536 match(ConI); 3537 3538 format %{ %} 3539 interface(CONST_INTER); 3540 %} 3541 3542 operand immI_24() %{ 3543 predicate( n->get_int() == 24 ); 3544 match(ConI); 3545 3546 format %{ %} 3547 interface(CONST_INTER); 3548 %} 3549 3550 // Constant for byte-wide masking 3551 operand immI_255() %{ 3552 predicate( n->get_int() == 255 ); 3553 match(ConI); 3554 3555 format %{ %} 3556 interface(CONST_INTER); 3557 %} 3558 3559 // Constant for short-wide masking 3560 operand immI_65535() %{ 3561 predicate(n->get_int() == 65535); 3562 match(ConI); 3563 3564 format %{ %} 3565 interface(CONST_INTER); 3566 %} 3567 3568 operand kReg() 3569 %{ 3570 constraint(ALLOC_IN_RC(vectmask_reg)); 3571 match(RegVectMask); 3572 format %{%} 3573 interface(REG_INTER); 3574 %} 3575 3576 operand kReg_K1() 3577 %{ 3578 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3579 match(RegVectMask); 3580 format %{%} 3581 interface(REG_INTER); 3582 %} 3583 3584 operand kReg_K2() 3585 %{ 3586 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3587 match(RegVectMask); 3588 format %{%} 3589 interface(REG_INTER); 3590 %} 3591 3592 // Special Registers 3593 operand kReg_K3() 3594 %{ 3595 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3596 match(RegVectMask); 3597 format %{%} 3598 interface(REG_INTER); 3599 %} 3600 3601 operand kReg_K4() 3602 %{ 3603 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3604 match(RegVectMask); 3605 format %{%} 3606 interface(REG_INTER); 3607 %} 3608 3609 operand kReg_K5() 3610 %{ 3611 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3612 match(RegVectMask); 3613 format %{%} 3614 interface(REG_INTER); 3615 %} 3616 3617 operand kReg_K6() 3618 %{ 3619 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3620 match(RegVectMask); 3621 format %{%} 3622 interface(REG_INTER); 3623 %} 3624 3625 // Special Registers 3626 operand kReg_K7() 3627 %{ 3628 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3629 match(RegVectMask); 3630 format %{%} 3631 interface(REG_INTER); 3632 %} 3633 3634 // Register Operands 3635 // Integer Register 3636 operand rRegI() %{ 3637 constraint(ALLOC_IN_RC(int_reg)); 3638 match(RegI); 3639 match(xRegI); 3640 match(eAXRegI); 3641 match(eBXRegI); 3642 match(eCXRegI); 3643 match(eDXRegI); 3644 match(eDIRegI); 3645 match(eSIRegI); 3646 3647 format %{ %} 3648 interface(REG_INTER); 3649 %} 3650 3651 // Subset of Integer Register 3652 operand xRegI(rRegI reg) %{ 3653 constraint(ALLOC_IN_RC(int_x_reg)); 3654 match(reg); 3655 match(eAXRegI); 3656 match(eBXRegI); 3657 match(eCXRegI); 3658 match(eDXRegI); 3659 3660 format %{ %} 3661 interface(REG_INTER); 3662 %} 3663 3664 // Special Registers 3665 operand eAXRegI(xRegI reg) %{ 3666 constraint(ALLOC_IN_RC(eax_reg)); 3667 match(reg); 3668 match(rRegI); 3669 3670 format %{ "EAX" %} 3671 interface(REG_INTER); 3672 %} 3673 3674 // Special Registers 3675 operand eBXRegI(xRegI reg) %{ 3676 constraint(ALLOC_IN_RC(ebx_reg)); 3677 match(reg); 3678 match(rRegI); 3679 3680 format %{ "EBX" %} 3681 interface(REG_INTER); 3682 %} 3683 3684 operand eCXRegI(xRegI reg) %{ 3685 constraint(ALLOC_IN_RC(ecx_reg)); 3686 match(reg); 3687 match(rRegI); 3688 3689 format %{ "ECX" %} 3690 interface(REG_INTER); 3691 %} 3692 3693 operand eDXRegI(xRegI reg) %{ 3694 constraint(ALLOC_IN_RC(edx_reg)); 3695 match(reg); 3696 match(rRegI); 3697 3698 format %{ "EDX" %} 3699 interface(REG_INTER); 3700 %} 3701 3702 operand eDIRegI(xRegI reg) %{ 3703 constraint(ALLOC_IN_RC(edi_reg)); 3704 match(reg); 3705 match(rRegI); 3706 3707 format %{ "EDI" %} 3708 interface(REG_INTER); 3709 %} 3710 3711 operand naxRegI() %{ 3712 constraint(ALLOC_IN_RC(nax_reg)); 3713 match(RegI); 3714 match(eCXRegI); 3715 match(eDXRegI); 3716 match(eSIRegI); 3717 match(eDIRegI); 3718 3719 format %{ %} 3720 interface(REG_INTER); 3721 %} 3722 3723 operand nadxRegI() %{ 3724 constraint(ALLOC_IN_RC(nadx_reg)); 3725 match(RegI); 3726 match(eBXRegI); 3727 match(eCXRegI); 3728 match(eSIRegI); 3729 match(eDIRegI); 3730 3731 format %{ %} 3732 interface(REG_INTER); 3733 %} 3734 3735 operand ncxRegI() %{ 3736 constraint(ALLOC_IN_RC(ncx_reg)); 3737 match(RegI); 3738 match(eAXRegI); 3739 match(eDXRegI); 3740 match(eSIRegI); 3741 match(eDIRegI); 3742 3743 format %{ %} 3744 interface(REG_INTER); 3745 %} 3746 3747 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3748 // // 3749 operand eSIRegI(xRegI reg) %{ 3750 constraint(ALLOC_IN_RC(esi_reg)); 3751 match(reg); 3752 match(rRegI); 3753 3754 format %{ "ESI" %} 3755 interface(REG_INTER); 3756 %} 3757 3758 // Pointer Register 3759 operand anyRegP() %{ 3760 constraint(ALLOC_IN_RC(any_reg)); 3761 match(RegP); 3762 match(eAXRegP); 3763 match(eBXRegP); 3764 match(eCXRegP); 3765 match(eDIRegP); 3766 match(eRegP); 3767 3768 format %{ %} 3769 interface(REG_INTER); 3770 %} 3771 3772 operand eRegP() %{ 3773 constraint(ALLOC_IN_RC(int_reg)); 3774 match(RegP); 3775 match(eAXRegP); 3776 match(eBXRegP); 3777 match(eCXRegP); 3778 match(eDIRegP); 3779 3780 format %{ %} 3781 interface(REG_INTER); 3782 %} 3783 3784 operand rRegP() %{ 3785 constraint(ALLOC_IN_RC(int_reg)); 3786 match(RegP); 3787 match(eAXRegP); 3788 match(eBXRegP); 3789 match(eCXRegP); 3790 match(eDIRegP); 3791 3792 format %{ %} 3793 interface(REG_INTER); 3794 %} 3795 3796 // On windows95, EBP is not safe to use for implicit null tests. 3797 operand eRegP_no_EBP() %{ 3798 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3799 match(RegP); 3800 match(eAXRegP); 3801 match(eBXRegP); 3802 match(eCXRegP); 3803 match(eDIRegP); 3804 3805 op_cost(100); 3806 format %{ %} 3807 interface(REG_INTER); 3808 %} 3809 3810 operand naxRegP() %{ 3811 constraint(ALLOC_IN_RC(nax_reg)); 3812 match(RegP); 3813 match(eBXRegP); 3814 match(eDXRegP); 3815 match(eCXRegP); 3816 match(eSIRegP); 3817 match(eDIRegP); 3818 3819 format %{ %} 3820 interface(REG_INTER); 3821 %} 3822 3823 operand nabxRegP() %{ 3824 constraint(ALLOC_IN_RC(nabx_reg)); 3825 match(RegP); 3826 match(eCXRegP); 3827 match(eDXRegP); 3828 match(eSIRegP); 3829 match(eDIRegP); 3830 3831 format %{ %} 3832 interface(REG_INTER); 3833 %} 3834 3835 operand pRegP() %{ 3836 constraint(ALLOC_IN_RC(p_reg)); 3837 match(RegP); 3838 match(eBXRegP); 3839 match(eDXRegP); 3840 match(eSIRegP); 3841 match(eDIRegP); 3842 3843 format %{ %} 3844 interface(REG_INTER); 3845 %} 3846 3847 // Special Registers 3848 // Return a pointer value 3849 operand eAXRegP(eRegP reg) %{ 3850 constraint(ALLOC_IN_RC(eax_reg)); 3851 match(reg); 3852 format %{ "EAX" %} 3853 interface(REG_INTER); 3854 %} 3855 3856 // Used in AtomicAdd 3857 operand eBXRegP(eRegP reg) %{ 3858 constraint(ALLOC_IN_RC(ebx_reg)); 3859 match(reg); 3860 format %{ "EBX" %} 3861 interface(REG_INTER); 3862 %} 3863 3864 // Tail-call (interprocedural jump) to interpreter 3865 operand eCXRegP(eRegP reg) %{ 3866 constraint(ALLOC_IN_RC(ecx_reg)); 3867 match(reg); 3868 format %{ "ECX" %} 3869 interface(REG_INTER); 3870 %} 3871 3872 operand eDXRegP(eRegP reg) %{ 3873 constraint(ALLOC_IN_RC(edx_reg)); 3874 match(reg); 3875 format %{ "EDX" %} 3876 interface(REG_INTER); 3877 %} 3878 3879 operand eSIRegP(eRegP reg) %{ 3880 constraint(ALLOC_IN_RC(esi_reg)); 3881 match(reg); 3882 format %{ "ESI" %} 3883 interface(REG_INTER); 3884 %} 3885 3886 // Used in rep stosw 3887 operand eDIRegP(eRegP reg) %{ 3888 constraint(ALLOC_IN_RC(edi_reg)); 3889 match(reg); 3890 format %{ "EDI" %} 3891 interface(REG_INTER); 3892 %} 3893 3894 operand eRegL() %{ 3895 constraint(ALLOC_IN_RC(long_reg)); 3896 match(RegL); 3897 match(eADXRegL); 3898 3899 format %{ %} 3900 interface(REG_INTER); 3901 %} 3902 3903 operand eADXRegL( eRegL reg ) %{ 3904 constraint(ALLOC_IN_RC(eadx_reg)); 3905 match(reg); 3906 3907 format %{ "EDX:EAX" %} 3908 interface(REG_INTER); 3909 %} 3910 3911 operand eBCXRegL( eRegL reg ) %{ 3912 constraint(ALLOC_IN_RC(ebcx_reg)); 3913 match(reg); 3914 3915 format %{ "EBX:ECX" %} 3916 interface(REG_INTER); 3917 %} 3918 3919 // Special case for integer high multiply 3920 operand eADXRegL_low_only() %{ 3921 constraint(ALLOC_IN_RC(eadx_reg)); 3922 match(RegL); 3923 3924 format %{ "EAX" %} 3925 interface(REG_INTER); 3926 %} 3927 3928 // Flags register, used as output of compare instructions 3929 operand rFlagsReg() %{ 3930 constraint(ALLOC_IN_RC(int_flags)); 3931 match(RegFlags); 3932 3933 format %{ "EFLAGS" %} 3934 interface(REG_INTER); 3935 %} 3936 3937 // Flags register, used as output of compare instructions 3938 operand eFlagsReg() %{ 3939 constraint(ALLOC_IN_RC(int_flags)); 3940 match(RegFlags); 3941 3942 format %{ "EFLAGS" %} 3943 interface(REG_INTER); 3944 %} 3945 3946 // Flags register, used as output of FLOATING POINT compare instructions 3947 operand eFlagsRegU() %{ 3948 constraint(ALLOC_IN_RC(int_flags)); 3949 match(RegFlags); 3950 3951 format %{ "EFLAGS_U" %} 3952 interface(REG_INTER); 3953 %} 3954 3955 operand eFlagsRegUCF() %{ 3956 constraint(ALLOC_IN_RC(int_flags)); 3957 match(RegFlags); 3958 predicate(false); 3959 3960 format %{ "EFLAGS_U_CF" %} 3961 interface(REG_INTER); 3962 %} 3963 3964 // Condition Code Register used by long compare 3965 operand flagsReg_long_LTGE() %{ 3966 constraint(ALLOC_IN_RC(int_flags)); 3967 match(RegFlags); 3968 format %{ "FLAGS_LTGE" %} 3969 interface(REG_INTER); 3970 %} 3971 operand flagsReg_long_EQNE() %{ 3972 constraint(ALLOC_IN_RC(int_flags)); 3973 match(RegFlags); 3974 format %{ "FLAGS_EQNE" %} 3975 interface(REG_INTER); 3976 %} 3977 operand flagsReg_long_LEGT() %{ 3978 constraint(ALLOC_IN_RC(int_flags)); 3979 match(RegFlags); 3980 format %{ "FLAGS_LEGT" %} 3981 interface(REG_INTER); 3982 %} 3983 3984 // Condition Code Register used by unsigned long compare 3985 operand flagsReg_ulong_LTGE() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 format %{ "FLAGS_U_LTGE" %} 3989 interface(REG_INTER); 3990 %} 3991 operand flagsReg_ulong_EQNE() %{ 3992 constraint(ALLOC_IN_RC(int_flags)); 3993 match(RegFlags); 3994 format %{ "FLAGS_U_EQNE" %} 3995 interface(REG_INTER); 3996 %} 3997 operand flagsReg_ulong_LEGT() %{ 3998 constraint(ALLOC_IN_RC(int_flags)); 3999 match(RegFlags); 4000 format %{ "FLAGS_U_LEGT" %} 4001 interface(REG_INTER); 4002 %} 4003 4004 // Float register operands 4005 operand regDPR() %{ 4006 predicate( UseSSE < 2 ); 4007 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4008 match(RegD); 4009 match(regDPR1); 4010 match(regDPR2); 4011 format %{ %} 4012 interface(REG_INTER); 4013 %} 4014 4015 operand regDPR1(regDPR reg) %{ 4016 predicate( UseSSE < 2 ); 4017 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4018 match(reg); 4019 format %{ "FPR1" %} 4020 interface(REG_INTER); 4021 %} 4022 4023 operand regDPR2(regDPR reg) %{ 4024 predicate( UseSSE < 2 ); 4025 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4026 match(reg); 4027 format %{ "FPR2" %} 4028 interface(REG_INTER); 4029 %} 4030 4031 operand regnotDPR1(regDPR reg) %{ 4032 predicate( UseSSE < 2 ); 4033 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4034 match(reg); 4035 format %{ %} 4036 interface(REG_INTER); 4037 %} 4038 4039 // Float register operands 4040 operand regFPR() %{ 4041 predicate( UseSSE < 2 ); 4042 constraint(ALLOC_IN_RC(fp_flt_reg)); 4043 match(RegF); 4044 match(regFPR1); 4045 format %{ %} 4046 interface(REG_INTER); 4047 %} 4048 4049 // Float register operands 4050 operand regFPR1(regFPR reg) %{ 4051 predicate( UseSSE < 2 ); 4052 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4053 match(reg); 4054 format %{ "FPR1" %} 4055 interface(REG_INTER); 4056 %} 4057 4058 // XMM Float register operands 4059 operand regF() %{ 4060 predicate( UseSSE>=1 ); 4061 constraint(ALLOC_IN_RC(float_reg_legacy)); 4062 match(RegF); 4063 format %{ %} 4064 interface(REG_INTER); 4065 %} 4066 4067 operand legRegF() %{ 4068 predicate( UseSSE>=1 ); 4069 constraint(ALLOC_IN_RC(float_reg_legacy)); 4070 match(RegF); 4071 format %{ %} 4072 interface(REG_INTER); 4073 %} 4074 4075 // Float register operands 4076 operand vlRegF() %{ 4077 constraint(ALLOC_IN_RC(float_reg_vl)); 4078 match(RegF); 4079 4080 format %{ %} 4081 interface(REG_INTER); 4082 %} 4083 4084 // XMM Double register operands 4085 operand regD() %{ 4086 predicate( UseSSE>=2 ); 4087 constraint(ALLOC_IN_RC(double_reg_legacy)); 4088 match(RegD); 4089 format %{ %} 4090 interface(REG_INTER); 4091 %} 4092 4093 // Double register operands 4094 operand legRegD() %{ 4095 predicate( UseSSE>=2 ); 4096 constraint(ALLOC_IN_RC(double_reg_legacy)); 4097 match(RegD); 4098 format %{ %} 4099 interface(REG_INTER); 4100 %} 4101 4102 operand vlRegD() %{ 4103 constraint(ALLOC_IN_RC(double_reg_vl)); 4104 match(RegD); 4105 4106 format %{ %} 4107 interface(REG_INTER); 4108 %} 4109 4110 //----------Memory Operands---------------------------------------------------- 4111 // Direct Memory Operand 4112 operand direct(immP addr) %{ 4113 match(addr); 4114 4115 format %{ "[$addr]" %} 4116 interface(MEMORY_INTER) %{ 4117 base(0xFFFFFFFF); 4118 index(0x4); 4119 scale(0x0); 4120 disp($addr); 4121 %} 4122 %} 4123 4124 // Indirect Memory Operand 4125 operand indirect(eRegP reg) %{ 4126 constraint(ALLOC_IN_RC(int_reg)); 4127 match(reg); 4128 4129 format %{ "[$reg]" %} 4130 interface(MEMORY_INTER) %{ 4131 base($reg); 4132 index(0x4); 4133 scale(0x0); 4134 disp(0x0); 4135 %} 4136 %} 4137 4138 // Indirect Memory Plus Short Offset Operand 4139 operand indOffset8(eRegP reg, immI8 off) %{ 4140 match(AddP reg off); 4141 4142 format %{ "[$reg + $off]" %} 4143 interface(MEMORY_INTER) %{ 4144 base($reg); 4145 index(0x4); 4146 scale(0x0); 4147 disp($off); 4148 %} 4149 %} 4150 4151 // Indirect Memory Plus Long Offset Operand 4152 operand indOffset32(eRegP reg, immI off) %{ 4153 match(AddP reg off); 4154 4155 format %{ "[$reg + $off]" %} 4156 interface(MEMORY_INTER) %{ 4157 base($reg); 4158 index(0x4); 4159 scale(0x0); 4160 disp($off); 4161 %} 4162 %} 4163 4164 // Indirect Memory Plus Long Offset Operand 4165 operand indOffset32X(rRegI reg, immP off) %{ 4166 match(AddP off reg); 4167 4168 format %{ "[$reg + $off]" %} 4169 interface(MEMORY_INTER) %{ 4170 base($reg); 4171 index(0x4); 4172 scale(0x0); 4173 disp($off); 4174 %} 4175 %} 4176 4177 // Indirect Memory Plus Index Register Plus Offset Operand 4178 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4179 match(AddP (AddP reg ireg) off); 4180 4181 op_cost(10); 4182 format %{"[$reg + $off + $ireg]" %} 4183 interface(MEMORY_INTER) %{ 4184 base($reg); 4185 index($ireg); 4186 scale(0x0); 4187 disp($off); 4188 %} 4189 %} 4190 4191 // Indirect Memory Plus Index Register Plus Offset Operand 4192 operand indIndex(eRegP reg, rRegI ireg) %{ 4193 match(AddP reg ireg); 4194 4195 op_cost(10); 4196 format %{"[$reg + $ireg]" %} 4197 interface(MEMORY_INTER) %{ 4198 base($reg); 4199 index($ireg); 4200 scale(0x0); 4201 disp(0x0); 4202 %} 4203 %} 4204 4205 // // ------------------------------------------------------------------------- 4206 // // 486 architecture doesn't support "scale * index + offset" with out a base 4207 // // ------------------------------------------------------------------------- 4208 // // Scaled Memory Operands 4209 // // Indirect Memory Times Scale Plus Offset Operand 4210 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4211 // match(AddP off (LShiftI ireg scale)); 4212 // 4213 // op_cost(10); 4214 // format %{"[$off + $ireg << $scale]" %} 4215 // interface(MEMORY_INTER) %{ 4216 // base(0x4); 4217 // index($ireg); 4218 // scale($scale); 4219 // disp($off); 4220 // %} 4221 // %} 4222 4223 // Indirect Memory Times Scale Plus Index Register 4224 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4225 match(AddP reg (LShiftI ireg scale)); 4226 4227 op_cost(10); 4228 format %{"[$reg + $ireg << $scale]" %} 4229 interface(MEMORY_INTER) %{ 4230 base($reg); 4231 index($ireg); 4232 scale($scale); 4233 disp(0x0); 4234 %} 4235 %} 4236 4237 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4238 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4239 match(AddP (AddP reg (LShiftI ireg scale)) off); 4240 4241 op_cost(10); 4242 format %{"[$reg + $off + $ireg << $scale]" %} 4243 interface(MEMORY_INTER) %{ 4244 base($reg); 4245 index($ireg); 4246 scale($scale); 4247 disp($off); 4248 %} 4249 %} 4250 4251 //----------Load Long Memory Operands------------------------------------------ 4252 // The load-long idiom will use it's address expression again after loading 4253 // the first word of the long. If the load-long destination overlaps with 4254 // registers used in the addressing expression, the 2nd half will be loaded 4255 // from a clobbered address. Fix this by requiring that load-long use 4256 // address registers that do not overlap with the load-long target. 4257 4258 // load-long support 4259 operand load_long_RegP() %{ 4260 constraint(ALLOC_IN_RC(esi_reg)); 4261 match(RegP); 4262 match(eSIRegP); 4263 op_cost(100); 4264 format %{ %} 4265 interface(REG_INTER); 4266 %} 4267 4268 // Indirect Memory Operand Long 4269 operand load_long_indirect(load_long_RegP reg) %{ 4270 constraint(ALLOC_IN_RC(esi_reg)); 4271 match(reg); 4272 4273 format %{ "[$reg]" %} 4274 interface(MEMORY_INTER) %{ 4275 base($reg); 4276 index(0x4); 4277 scale(0x0); 4278 disp(0x0); 4279 %} 4280 %} 4281 4282 // Indirect Memory Plus Long Offset Operand 4283 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4284 match(AddP reg off); 4285 4286 format %{ "[$reg + $off]" %} 4287 interface(MEMORY_INTER) %{ 4288 base($reg); 4289 index(0x4); 4290 scale(0x0); 4291 disp($off); 4292 %} 4293 %} 4294 4295 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4296 4297 4298 //----------Special Memory Operands-------------------------------------------- 4299 // Stack Slot Operand - This operand is used for loading and storing temporary 4300 // values on the stack where a match requires a value to 4301 // flow through memory. 4302 operand stackSlotP(sRegP reg) %{ 4303 constraint(ALLOC_IN_RC(stack_slots)); 4304 // No match rule because this operand is only generated in matching 4305 format %{ "[$reg]" %} 4306 interface(MEMORY_INTER) %{ 4307 base(0x4); // ESP 4308 index(0x4); // No Index 4309 scale(0x0); // No Scale 4310 disp($reg); // Stack Offset 4311 %} 4312 %} 4313 4314 operand stackSlotI(sRegI reg) %{ 4315 constraint(ALLOC_IN_RC(stack_slots)); 4316 // No match rule because this operand is only generated in matching 4317 format %{ "[$reg]" %} 4318 interface(MEMORY_INTER) %{ 4319 base(0x4); // ESP 4320 index(0x4); // No Index 4321 scale(0x0); // No Scale 4322 disp($reg); // Stack Offset 4323 %} 4324 %} 4325 4326 operand stackSlotF(sRegF reg) %{ 4327 constraint(ALLOC_IN_RC(stack_slots)); 4328 // No match rule because this operand is only generated in matching 4329 format %{ "[$reg]" %} 4330 interface(MEMORY_INTER) %{ 4331 base(0x4); // ESP 4332 index(0x4); // No Index 4333 scale(0x0); // No Scale 4334 disp($reg); // Stack Offset 4335 %} 4336 %} 4337 4338 operand stackSlotD(sRegD reg) %{ 4339 constraint(ALLOC_IN_RC(stack_slots)); 4340 // No match rule because this operand is only generated in matching 4341 format %{ "[$reg]" %} 4342 interface(MEMORY_INTER) %{ 4343 base(0x4); // ESP 4344 index(0x4); // No Index 4345 scale(0x0); // No Scale 4346 disp($reg); // Stack Offset 4347 %} 4348 %} 4349 4350 operand stackSlotL(sRegL reg) %{ 4351 constraint(ALLOC_IN_RC(stack_slots)); 4352 // No match rule because this operand is only generated in matching 4353 format %{ "[$reg]" %} 4354 interface(MEMORY_INTER) %{ 4355 base(0x4); // ESP 4356 index(0x4); // No Index 4357 scale(0x0); // No Scale 4358 disp($reg); // Stack Offset 4359 %} 4360 %} 4361 4362 //----------Conditional Branch Operands---------------------------------------- 4363 // Comparison Op - This is the operation of the comparison, and is limited to 4364 // the following set of codes: 4365 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4366 // 4367 // Other attributes of the comparison, such as unsignedness, are specified 4368 // by the comparison instruction that sets a condition code flags register. 4369 // That result is represented by a flags operand whose subtype is appropriate 4370 // to the unsignedness (etc.) of the comparison. 4371 // 4372 // Later, the instruction which matches both the Comparison Op (a Bool) and 4373 // the flags (produced by the Cmp) specifies the coding of the comparison op 4374 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4375 4376 // Comparision Code 4377 operand cmpOp() %{ 4378 match(Bool); 4379 4380 format %{ "" %} 4381 interface(COND_INTER) %{ 4382 equal(0x4, "e"); 4383 not_equal(0x5, "ne"); 4384 less(0xC, "l"); 4385 greater_equal(0xD, "ge"); 4386 less_equal(0xE, "le"); 4387 greater(0xF, "g"); 4388 overflow(0x0, "o"); 4389 no_overflow(0x1, "no"); 4390 %} 4391 %} 4392 4393 // Comparison Code, unsigned compare. Used by FP also, with 4394 // C2 (unordered) turned into GT or LT already. The other bits 4395 // C0 and C3 are turned into Carry & Zero flags. 4396 operand cmpOpU() %{ 4397 match(Bool); 4398 4399 format %{ "" %} 4400 interface(COND_INTER) %{ 4401 equal(0x4, "e"); 4402 not_equal(0x5, "ne"); 4403 less(0x2, "b"); 4404 greater_equal(0x3, "nb"); 4405 less_equal(0x6, "be"); 4406 greater(0x7, "nbe"); 4407 overflow(0x0, "o"); 4408 no_overflow(0x1, "no"); 4409 %} 4410 %} 4411 4412 // Floating comparisons that don't require any fixup for the unordered case 4413 operand cmpOpUCF() %{ 4414 match(Bool); 4415 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4416 n->as_Bool()->_test._test == BoolTest::ge || 4417 n->as_Bool()->_test._test == BoolTest::le || 4418 n->as_Bool()->_test._test == BoolTest::gt); 4419 format %{ "" %} 4420 interface(COND_INTER) %{ 4421 equal(0x4, "e"); 4422 not_equal(0x5, "ne"); 4423 less(0x2, "b"); 4424 greater_equal(0x3, "nb"); 4425 less_equal(0x6, "be"); 4426 greater(0x7, "nbe"); 4427 overflow(0x0, "o"); 4428 no_overflow(0x1, "no"); 4429 %} 4430 %} 4431 4432 4433 // Floating comparisons that can be fixed up with extra conditional jumps 4434 operand cmpOpUCF2() %{ 4435 match(Bool); 4436 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4437 n->as_Bool()->_test._test == BoolTest::eq); 4438 format %{ "" %} 4439 interface(COND_INTER) %{ 4440 equal(0x4, "e"); 4441 not_equal(0x5, "ne"); 4442 less(0x2, "b"); 4443 greater_equal(0x3, "nb"); 4444 less_equal(0x6, "be"); 4445 greater(0x7, "nbe"); 4446 overflow(0x0, "o"); 4447 no_overflow(0x1, "no"); 4448 %} 4449 %} 4450 4451 // Comparison Code for FP conditional move 4452 operand cmpOp_fcmov() %{ 4453 match(Bool); 4454 4455 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4456 n->as_Bool()->_test._test != BoolTest::no_overflow); 4457 format %{ "" %} 4458 interface(COND_INTER) %{ 4459 equal (0x0C8); 4460 not_equal (0x1C8); 4461 less (0x0C0); 4462 greater_equal(0x1C0); 4463 less_equal (0x0D0); 4464 greater (0x1D0); 4465 overflow(0x0, "o"); // not really supported by the instruction 4466 no_overflow(0x1, "no"); // not really supported by the instruction 4467 %} 4468 %} 4469 4470 // Comparison Code used in long compares 4471 operand cmpOp_commute() %{ 4472 match(Bool); 4473 4474 format %{ "" %} 4475 interface(COND_INTER) %{ 4476 equal(0x4, "e"); 4477 not_equal(0x5, "ne"); 4478 less(0xF, "g"); 4479 greater_equal(0xE, "le"); 4480 less_equal(0xD, "ge"); 4481 greater(0xC, "l"); 4482 overflow(0x0, "o"); 4483 no_overflow(0x1, "no"); 4484 %} 4485 %} 4486 4487 // Comparison Code used in unsigned long compares 4488 operand cmpOpU_commute() %{ 4489 match(Bool); 4490 4491 format %{ "" %} 4492 interface(COND_INTER) %{ 4493 equal(0x4, "e"); 4494 not_equal(0x5, "ne"); 4495 less(0x7, "nbe"); 4496 greater_equal(0x6, "be"); 4497 less_equal(0x3, "nb"); 4498 greater(0x2, "b"); 4499 overflow(0x0, "o"); 4500 no_overflow(0x1, "no"); 4501 %} 4502 %} 4503 4504 //----------OPERAND CLASSES---------------------------------------------------- 4505 // Operand Classes are groups of operands that are used as to simplify 4506 // instruction definitions by not requiring the AD writer to specify separate 4507 // instructions for every form of operand when the instruction accepts 4508 // multiple operand types with the same basic encoding and format. The classic 4509 // case of this is memory operands. 4510 4511 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4512 indIndex, indIndexScale, indIndexScaleOffset); 4513 4514 // Long memory operations are encoded in 2 instructions and a +4 offset. 4515 // This means some kind of offset is always required and you cannot use 4516 // an oop as the offset (done when working on static globals). 4517 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4518 indIndex, indIndexScale, indIndexScaleOffset); 4519 4520 4521 //----------PIPELINE----------------------------------------------------------- 4522 // Rules which define the behavior of the target architectures pipeline. 4523 pipeline %{ 4524 4525 //----------ATTRIBUTES--------------------------------------------------------- 4526 attributes %{ 4527 variable_size_instructions; // Fixed size instructions 4528 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4529 instruction_unit_size = 1; // An instruction is 1 bytes long 4530 instruction_fetch_unit_size = 16; // The processor fetches one line 4531 instruction_fetch_units = 1; // of 16 bytes 4532 4533 // List of nop instructions 4534 nops( MachNop ); 4535 %} 4536 4537 //----------RESOURCES---------------------------------------------------------- 4538 // Resources are the functional units available to the machine 4539 4540 // Generic P2/P3 pipeline 4541 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4542 // 3 instructions decoded per cycle. 4543 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4544 // 2 ALU op, only ALU0 handles mul/div instructions. 4545 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4546 MS0, MS1, MEM = MS0 | MS1, 4547 BR, FPU, 4548 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4549 4550 //----------PIPELINE DESCRIPTION----------------------------------------------- 4551 // Pipeline Description specifies the stages in the machine's pipeline 4552 4553 // Generic P2/P3 pipeline 4554 pipe_desc(S0, S1, S2, S3, S4, S5); 4555 4556 //----------PIPELINE CLASSES--------------------------------------------------- 4557 // Pipeline Classes describe the stages in which input and output are 4558 // referenced by the hardware pipeline. 4559 4560 // Naming convention: ialu or fpu 4561 // Then: _reg 4562 // Then: _reg if there is a 2nd register 4563 // Then: _long if it's a pair of instructions implementing a long 4564 // Then: _fat if it requires the big decoder 4565 // Or: _mem if it requires the big decoder and a memory unit. 4566 4567 // Integer ALU reg operation 4568 pipe_class ialu_reg(rRegI dst) %{ 4569 single_instruction; 4570 dst : S4(write); 4571 dst : S3(read); 4572 DECODE : S0; // any decoder 4573 ALU : S3; // any alu 4574 %} 4575 4576 // Long ALU reg operation 4577 pipe_class ialu_reg_long(eRegL dst) %{ 4578 instruction_count(2); 4579 dst : S4(write); 4580 dst : S3(read); 4581 DECODE : S0(2); // any 2 decoders 4582 ALU : S3(2); // both alus 4583 %} 4584 4585 // Integer ALU reg operation using big decoder 4586 pipe_class ialu_reg_fat(rRegI dst) %{ 4587 single_instruction; 4588 dst : S4(write); 4589 dst : S3(read); 4590 D0 : S0; // big decoder only 4591 ALU : S3; // any alu 4592 %} 4593 4594 // Long ALU reg operation using big decoder 4595 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4596 instruction_count(2); 4597 dst : S4(write); 4598 dst : S3(read); 4599 D0 : S0(2); // big decoder only; twice 4600 ALU : S3(2); // any 2 alus 4601 %} 4602 4603 // Integer ALU reg-reg operation 4604 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4605 single_instruction; 4606 dst : S4(write); 4607 src : S3(read); 4608 DECODE : S0; // any decoder 4609 ALU : S3; // any alu 4610 %} 4611 4612 // Long ALU reg-reg operation 4613 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4614 instruction_count(2); 4615 dst : S4(write); 4616 src : S3(read); 4617 DECODE : S0(2); // any 2 decoders 4618 ALU : S3(2); // both alus 4619 %} 4620 4621 // Integer ALU reg-reg operation 4622 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4623 single_instruction; 4624 dst : S4(write); 4625 src : S3(read); 4626 D0 : S0; // big decoder only 4627 ALU : S3; // any alu 4628 %} 4629 4630 // Long ALU reg-reg operation 4631 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4632 instruction_count(2); 4633 dst : S4(write); 4634 src : S3(read); 4635 D0 : S0(2); // big decoder only; twice 4636 ALU : S3(2); // both alus 4637 %} 4638 4639 // Integer ALU reg-mem operation 4640 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4641 single_instruction; 4642 dst : S5(write); 4643 mem : S3(read); 4644 D0 : S0; // big decoder only 4645 ALU : S4; // any alu 4646 MEM : S3; // any mem 4647 %} 4648 4649 // Long ALU reg-mem operation 4650 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4651 instruction_count(2); 4652 dst : S5(write); 4653 mem : S3(read); 4654 D0 : S0(2); // big decoder only; twice 4655 ALU : S4(2); // any 2 alus 4656 MEM : S3(2); // both mems 4657 %} 4658 4659 // Integer mem operation (prefetch) 4660 pipe_class ialu_mem(memory mem) 4661 %{ 4662 single_instruction; 4663 mem : S3(read); 4664 D0 : S0; // big decoder only 4665 MEM : S3; // any mem 4666 %} 4667 4668 // Integer Store to Memory 4669 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4670 single_instruction; 4671 mem : S3(read); 4672 src : S5(read); 4673 D0 : S0; // big decoder only 4674 ALU : S4; // any alu 4675 MEM : S3; 4676 %} 4677 4678 // Long Store to Memory 4679 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4680 instruction_count(2); 4681 mem : S3(read); 4682 src : S5(read); 4683 D0 : S0(2); // big decoder only; twice 4684 ALU : S4(2); // any 2 alus 4685 MEM : S3(2); // Both mems 4686 %} 4687 4688 // Integer Store to Memory 4689 pipe_class ialu_mem_imm(memory mem) %{ 4690 single_instruction; 4691 mem : S3(read); 4692 D0 : S0; // big decoder only 4693 ALU : S4; // any alu 4694 MEM : S3; 4695 %} 4696 4697 // Integer ALU0 reg-reg operation 4698 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4699 single_instruction; 4700 dst : S4(write); 4701 src : S3(read); 4702 D0 : S0; // Big decoder only 4703 ALU0 : S3; // only alu0 4704 %} 4705 4706 // Integer ALU0 reg-mem operation 4707 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4708 single_instruction; 4709 dst : S5(write); 4710 mem : S3(read); 4711 D0 : S0; // big decoder only 4712 ALU0 : S4; // ALU0 only 4713 MEM : S3; // any mem 4714 %} 4715 4716 // Integer ALU reg-reg operation 4717 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4718 single_instruction; 4719 cr : S4(write); 4720 src1 : S3(read); 4721 src2 : S3(read); 4722 DECODE : S0; // any decoder 4723 ALU : S3; // any alu 4724 %} 4725 4726 // Integer ALU reg-imm operation 4727 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4728 single_instruction; 4729 cr : S4(write); 4730 src1 : S3(read); 4731 DECODE : S0; // any decoder 4732 ALU : S3; // any alu 4733 %} 4734 4735 // Integer ALU reg-mem operation 4736 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4737 single_instruction; 4738 cr : S4(write); 4739 src1 : S3(read); 4740 src2 : S3(read); 4741 D0 : S0; // big decoder only 4742 ALU : S4; // any alu 4743 MEM : S3; 4744 %} 4745 4746 // Conditional move reg-reg 4747 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4748 instruction_count(4); 4749 y : S4(read); 4750 q : S3(read); 4751 p : S3(read); 4752 DECODE : S0(4); // any decoder 4753 %} 4754 4755 // Conditional move reg-reg 4756 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4757 single_instruction; 4758 dst : S4(write); 4759 src : S3(read); 4760 cr : S3(read); 4761 DECODE : S0; // any decoder 4762 %} 4763 4764 // Conditional move reg-mem 4765 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4766 single_instruction; 4767 dst : S4(write); 4768 src : S3(read); 4769 cr : S3(read); 4770 DECODE : S0; // any decoder 4771 MEM : S3; 4772 %} 4773 4774 // Conditional move reg-reg long 4775 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4776 single_instruction; 4777 dst : S4(write); 4778 src : S3(read); 4779 cr : S3(read); 4780 DECODE : S0(2); // any 2 decoders 4781 %} 4782 4783 // Conditional move double reg-reg 4784 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4785 single_instruction; 4786 dst : S4(write); 4787 src : S3(read); 4788 cr : S3(read); 4789 DECODE : S0; // any decoder 4790 %} 4791 4792 // Float reg-reg operation 4793 pipe_class fpu_reg(regDPR dst) %{ 4794 instruction_count(2); 4795 dst : S3(read); 4796 DECODE : S0(2); // any 2 decoders 4797 FPU : S3; 4798 %} 4799 4800 // Float reg-reg operation 4801 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4802 instruction_count(2); 4803 dst : S4(write); 4804 src : S3(read); 4805 DECODE : S0(2); // any 2 decoders 4806 FPU : S3; 4807 %} 4808 4809 // Float reg-reg operation 4810 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4811 instruction_count(3); 4812 dst : S4(write); 4813 src1 : S3(read); 4814 src2 : S3(read); 4815 DECODE : S0(3); // any 3 decoders 4816 FPU : S3(2); 4817 %} 4818 4819 // Float reg-reg operation 4820 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4821 instruction_count(4); 4822 dst : S4(write); 4823 src1 : S3(read); 4824 src2 : S3(read); 4825 src3 : S3(read); 4826 DECODE : S0(4); // any 3 decoders 4827 FPU : S3(2); 4828 %} 4829 4830 // Float reg-reg operation 4831 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4832 instruction_count(4); 4833 dst : S4(write); 4834 src1 : S3(read); 4835 src2 : S3(read); 4836 src3 : S3(read); 4837 DECODE : S1(3); // any 3 decoders 4838 D0 : S0; // Big decoder only 4839 FPU : S3(2); 4840 MEM : S3; 4841 %} 4842 4843 // Float reg-mem operation 4844 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4845 instruction_count(2); 4846 dst : S5(write); 4847 mem : S3(read); 4848 D0 : S0; // big decoder only 4849 DECODE : S1; // any decoder for FPU POP 4850 FPU : S4; 4851 MEM : S3; // any mem 4852 %} 4853 4854 // Float reg-mem operation 4855 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4856 instruction_count(3); 4857 dst : S5(write); 4858 src1 : S3(read); 4859 mem : S3(read); 4860 D0 : S0; // big decoder only 4861 DECODE : S1(2); // any decoder for FPU POP 4862 FPU : S4; 4863 MEM : S3; // any mem 4864 %} 4865 4866 // Float mem-reg operation 4867 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4868 instruction_count(2); 4869 src : S5(read); 4870 mem : S3(read); 4871 DECODE : S0; // any decoder for FPU PUSH 4872 D0 : S1; // big decoder only 4873 FPU : S4; 4874 MEM : S3; // any mem 4875 %} 4876 4877 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4878 instruction_count(3); 4879 src1 : S3(read); 4880 src2 : S3(read); 4881 mem : S3(read); 4882 DECODE : S0(2); // any decoder for FPU PUSH 4883 D0 : S1; // big decoder only 4884 FPU : S4; 4885 MEM : S3; // any mem 4886 %} 4887 4888 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4889 instruction_count(3); 4890 src1 : S3(read); 4891 src2 : S3(read); 4892 mem : S4(read); 4893 DECODE : S0; // any decoder for FPU PUSH 4894 D0 : S0(2); // big decoder only 4895 FPU : S4; 4896 MEM : S3(2); // any mem 4897 %} 4898 4899 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4900 instruction_count(2); 4901 src1 : S3(read); 4902 dst : S4(read); 4903 D0 : S0(2); // big decoder only 4904 MEM : S3(2); // any mem 4905 %} 4906 4907 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4908 instruction_count(3); 4909 src1 : S3(read); 4910 src2 : S3(read); 4911 dst : S4(read); 4912 D0 : S0(3); // big decoder only 4913 FPU : S4; 4914 MEM : S3(3); // any mem 4915 %} 4916 4917 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4918 instruction_count(3); 4919 src1 : S4(read); 4920 mem : S4(read); 4921 DECODE : S0; // any decoder for FPU PUSH 4922 D0 : S0(2); // big decoder only 4923 FPU : S4; 4924 MEM : S3(2); // any mem 4925 %} 4926 4927 // Float load constant 4928 pipe_class fpu_reg_con(regDPR dst) %{ 4929 instruction_count(2); 4930 dst : S5(write); 4931 D0 : S0; // big decoder only for the load 4932 DECODE : S1; // any decoder for FPU POP 4933 FPU : S4; 4934 MEM : S3; // any mem 4935 %} 4936 4937 // Float load constant 4938 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4939 instruction_count(3); 4940 dst : S5(write); 4941 src : S3(read); 4942 D0 : S0; // big decoder only for the load 4943 DECODE : S1(2); // any decoder for FPU POP 4944 FPU : S4; 4945 MEM : S3; // any mem 4946 %} 4947 4948 // UnConditional branch 4949 pipe_class pipe_jmp( label labl ) %{ 4950 single_instruction; 4951 BR : S3; 4952 %} 4953 4954 // Conditional branch 4955 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4956 single_instruction; 4957 cr : S1(read); 4958 BR : S3; 4959 %} 4960 4961 // Allocation idiom 4962 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4963 instruction_count(1); force_serialization; 4964 fixed_latency(6); 4965 heap_ptr : S3(read); 4966 DECODE : S0(3); 4967 D0 : S2; 4968 MEM : S3; 4969 ALU : S3(2); 4970 dst : S5(write); 4971 BR : S5; 4972 %} 4973 4974 // Generic big/slow expanded idiom 4975 pipe_class pipe_slow( ) %{ 4976 instruction_count(10); multiple_bundles; force_serialization; 4977 fixed_latency(100); 4978 D0 : S0(2); 4979 MEM : S3(2); 4980 %} 4981 4982 // The real do-nothing guy 4983 pipe_class empty( ) %{ 4984 instruction_count(0); 4985 %} 4986 4987 // Define the class for the Nop node 4988 define %{ 4989 MachNop = empty; 4990 %} 4991 4992 %} 4993 4994 //----------INSTRUCTIONS------------------------------------------------------- 4995 // 4996 // match -- States which machine-independent subtree may be replaced 4997 // by this instruction. 4998 // ins_cost -- The estimated cost of this instruction is used by instruction 4999 // selection to identify a minimum cost tree of machine 5000 // instructions that matches a tree of machine-independent 5001 // instructions. 5002 // format -- A string providing the disassembly for this instruction. 5003 // The value of an instruction's operand may be inserted 5004 // by referring to it with a '$' prefix. 5005 // opcode -- Three instruction opcodes may be provided. These are referred 5006 // to within an encode class as $primary, $secondary, and $tertiary 5007 // respectively. The primary opcode is commonly used to 5008 // indicate the type of machine instruction, while secondary 5009 // and tertiary are often used for prefix options or addressing 5010 // modes. 5011 // ins_encode -- A list of encode classes with parameters. The encode class 5012 // name must have been defined in an 'enc_class' specification 5013 // in the encode section of the architecture description. 5014 5015 //----------BSWAP-Instruction-------------------------------------------------- 5016 instruct bytes_reverse_int(rRegI dst) %{ 5017 match(Set dst (ReverseBytesI dst)); 5018 5019 format %{ "BSWAP $dst" %} 5020 opcode(0x0F, 0xC8); 5021 ins_encode( OpcP, OpcSReg(dst) ); 5022 ins_pipe( ialu_reg ); 5023 %} 5024 5025 instruct bytes_reverse_long(eRegL dst) %{ 5026 match(Set dst (ReverseBytesL dst)); 5027 5028 format %{ "BSWAP $dst.lo\n\t" 5029 "BSWAP $dst.hi\n\t" 5030 "XCHG $dst.lo $dst.hi" %} 5031 5032 ins_cost(125); 5033 ins_encode( bswap_long_bytes(dst) ); 5034 ins_pipe( ialu_reg_reg); 5035 %} 5036 5037 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5038 match(Set dst (ReverseBytesUS dst)); 5039 effect(KILL cr); 5040 5041 format %{ "BSWAP $dst\n\t" 5042 "SHR $dst,16\n\t" %} 5043 ins_encode %{ 5044 __ bswapl($dst$$Register); 5045 __ shrl($dst$$Register, 16); 5046 %} 5047 ins_pipe( ialu_reg ); 5048 %} 5049 5050 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5051 match(Set dst (ReverseBytesS dst)); 5052 effect(KILL cr); 5053 5054 format %{ "BSWAP $dst\n\t" 5055 "SAR $dst,16\n\t" %} 5056 ins_encode %{ 5057 __ bswapl($dst$$Register); 5058 __ sarl($dst$$Register, 16); 5059 %} 5060 ins_pipe( ialu_reg ); 5061 %} 5062 5063 5064 //---------- Zeros Count Instructions ------------------------------------------ 5065 5066 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5067 predicate(UseCountLeadingZerosInstruction); 5068 match(Set dst (CountLeadingZerosI src)); 5069 effect(KILL cr); 5070 5071 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5072 ins_encode %{ 5073 __ lzcntl($dst$$Register, $src$$Register); 5074 %} 5075 ins_pipe(ialu_reg); 5076 %} 5077 5078 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5079 predicate(!UseCountLeadingZerosInstruction); 5080 match(Set dst (CountLeadingZerosI src)); 5081 effect(KILL cr); 5082 5083 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5084 "JNZ skip\n\t" 5085 "MOV $dst, -1\n" 5086 "skip:\n\t" 5087 "NEG $dst\n\t" 5088 "ADD $dst, 31" %} 5089 ins_encode %{ 5090 Register Rdst = $dst$$Register; 5091 Register Rsrc = $src$$Register; 5092 Label skip; 5093 __ bsrl(Rdst, Rsrc); 5094 __ jccb(Assembler::notZero, skip); 5095 __ movl(Rdst, -1); 5096 __ bind(skip); 5097 __ negl(Rdst); 5098 __ addl(Rdst, BitsPerInt - 1); 5099 %} 5100 ins_pipe(ialu_reg); 5101 %} 5102 5103 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5104 predicate(UseCountLeadingZerosInstruction); 5105 match(Set dst (CountLeadingZerosL src)); 5106 effect(TEMP dst, KILL cr); 5107 5108 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5109 "JNC done\n\t" 5110 "LZCNT $dst, $src.lo\n\t" 5111 "ADD $dst, 32\n" 5112 "done:" %} 5113 ins_encode %{ 5114 Register Rdst = $dst$$Register; 5115 Register Rsrc = $src$$Register; 5116 Label done; 5117 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5118 __ jccb(Assembler::carryClear, done); 5119 __ lzcntl(Rdst, Rsrc); 5120 __ addl(Rdst, BitsPerInt); 5121 __ bind(done); 5122 %} 5123 ins_pipe(ialu_reg); 5124 %} 5125 5126 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5127 predicate(!UseCountLeadingZerosInstruction); 5128 match(Set dst (CountLeadingZerosL src)); 5129 effect(TEMP dst, KILL cr); 5130 5131 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5132 "JZ msw_is_zero\n\t" 5133 "ADD $dst, 32\n\t" 5134 "JMP not_zero\n" 5135 "msw_is_zero:\n\t" 5136 "BSR $dst, $src.lo\n\t" 5137 "JNZ not_zero\n\t" 5138 "MOV $dst, -1\n" 5139 "not_zero:\n\t" 5140 "NEG $dst\n\t" 5141 "ADD $dst, 63\n" %} 5142 ins_encode %{ 5143 Register Rdst = $dst$$Register; 5144 Register Rsrc = $src$$Register; 5145 Label msw_is_zero; 5146 Label not_zero; 5147 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5148 __ jccb(Assembler::zero, msw_is_zero); 5149 __ addl(Rdst, BitsPerInt); 5150 __ jmpb(not_zero); 5151 __ bind(msw_is_zero); 5152 __ bsrl(Rdst, Rsrc); 5153 __ jccb(Assembler::notZero, not_zero); 5154 __ movl(Rdst, -1); 5155 __ bind(not_zero); 5156 __ negl(Rdst); 5157 __ addl(Rdst, BitsPerLong - 1); 5158 %} 5159 ins_pipe(ialu_reg); 5160 %} 5161 5162 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5163 predicate(UseCountTrailingZerosInstruction); 5164 match(Set dst (CountTrailingZerosI src)); 5165 effect(KILL cr); 5166 5167 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5168 ins_encode %{ 5169 __ tzcntl($dst$$Register, $src$$Register); 5170 %} 5171 ins_pipe(ialu_reg); 5172 %} 5173 5174 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5175 predicate(!UseCountTrailingZerosInstruction); 5176 match(Set dst (CountTrailingZerosI src)); 5177 effect(KILL cr); 5178 5179 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5180 "JNZ done\n\t" 5181 "MOV $dst, 32\n" 5182 "done:" %} 5183 ins_encode %{ 5184 Register Rdst = $dst$$Register; 5185 Label done; 5186 __ bsfl(Rdst, $src$$Register); 5187 __ jccb(Assembler::notZero, done); 5188 __ movl(Rdst, BitsPerInt); 5189 __ bind(done); 5190 %} 5191 ins_pipe(ialu_reg); 5192 %} 5193 5194 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5195 predicate(UseCountTrailingZerosInstruction); 5196 match(Set dst (CountTrailingZerosL src)); 5197 effect(TEMP dst, KILL cr); 5198 5199 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5200 "JNC done\n\t" 5201 "TZCNT $dst, $src.hi\n\t" 5202 "ADD $dst, 32\n" 5203 "done:" %} 5204 ins_encode %{ 5205 Register Rdst = $dst$$Register; 5206 Register Rsrc = $src$$Register; 5207 Label done; 5208 __ tzcntl(Rdst, Rsrc); 5209 __ jccb(Assembler::carryClear, done); 5210 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5211 __ addl(Rdst, BitsPerInt); 5212 __ bind(done); 5213 %} 5214 ins_pipe(ialu_reg); 5215 %} 5216 5217 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5218 predicate(!UseCountTrailingZerosInstruction); 5219 match(Set dst (CountTrailingZerosL src)); 5220 effect(TEMP dst, KILL cr); 5221 5222 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5223 "JNZ done\n\t" 5224 "BSF $dst, $src.hi\n\t" 5225 "JNZ msw_not_zero\n\t" 5226 "MOV $dst, 32\n" 5227 "msw_not_zero:\n\t" 5228 "ADD $dst, 32\n" 5229 "done:" %} 5230 ins_encode %{ 5231 Register Rdst = $dst$$Register; 5232 Register Rsrc = $src$$Register; 5233 Label msw_not_zero; 5234 Label done; 5235 __ bsfl(Rdst, Rsrc); 5236 __ jccb(Assembler::notZero, done); 5237 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5238 __ jccb(Assembler::notZero, msw_not_zero); 5239 __ movl(Rdst, BitsPerInt); 5240 __ bind(msw_not_zero); 5241 __ addl(Rdst, BitsPerInt); 5242 __ bind(done); 5243 %} 5244 ins_pipe(ialu_reg); 5245 %} 5246 5247 5248 //---------- Population Count Instructions ------------------------------------- 5249 5250 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5251 predicate(UsePopCountInstruction); 5252 match(Set dst (PopCountI src)); 5253 effect(KILL cr); 5254 5255 format %{ "POPCNT $dst, $src" %} 5256 ins_encode %{ 5257 __ popcntl($dst$$Register, $src$$Register); 5258 %} 5259 ins_pipe(ialu_reg); 5260 %} 5261 5262 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5263 predicate(UsePopCountInstruction); 5264 match(Set dst (PopCountI (LoadI mem))); 5265 effect(KILL cr); 5266 5267 format %{ "POPCNT $dst, $mem" %} 5268 ins_encode %{ 5269 __ popcntl($dst$$Register, $mem$$Address); 5270 %} 5271 ins_pipe(ialu_reg); 5272 %} 5273 5274 // Note: Long.bitCount(long) returns an int. 5275 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5276 predicate(UsePopCountInstruction); 5277 match(Set dst (PopCountL src)); 5278 effect(KILL cr, TEMP tmp, TEMP dst); 5279 5280 format %{ "POPCNT $dst, $src.lo\n\t" 5281 "POPCNT $tmp, $src.hi\n\t" 5282 "ADD $dst, $tmp" %} 5283 ins_encode %{ 5284 __ popcntl($dst$$Register, $src$$Register); 5285 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5286 __ addl($dst$$Register, $tmp$$Register); 5287 %} 5288 ins_pipe(ialu_reg); 5289 %} 5290 5291 // Note: Long.bitCount(long) returns an int. 5292 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5293 predicate(UsePopCountInstruction); 5294 match(Set dst (PopCountL (LoadL mem))); 5295 effect(KILL cr, TEMP tmp, TEMP dst); 5296 5297 format %{ "POPCNT $dst, $mem\n\t" 5298 "POPCNT $tmp, $mem+4\n\t" 5299 "ADD $dst, $tmp" %} 5300 ins_encode %{ 5301 //__ popcntl($dst$$Register, $mem$$Address$$first); 5302 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5303 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5304 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5305 __ addl($dst$$Register, $tmp$$Register); 5306 %} 5307 ins_pipe(ialu_reg); 5308 %} 5309 5310 5311 //----------Load/Store/Move Instructions--------------------------------------- 5312 //----------Load Instructions-------------------------------------------------- 5313 // Load Byte (8bit signed) 5314 instruct loadB(xRegI dst, memory mem) %{ 5315 match(Set dst (LoadB mem)); 5316 5317 ins_cost(125); 5318 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5319 5320 ins_encode %{ 5321 __ movsbl($dst$$Register, $mem$$Address); 5322 %} 5323 5324 ins_pipe(ialu_reg_mem); 5325 %} 5326 5327 // Load Byte (8bit signed) into Long Register 5328 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5329 match(Set dst (ConvI2L (LoadB mem))); 5330 effect(KILL cr); 5331 5332 ins_cost(375); 5333 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5334 "MOV $dst.hi,$dst.lo\n\t" 5335 "SAR $dst.hi,7" %} 5336 5337 ins_encode %{ 5338 __ movsbl($dst$$Register, $mem$$Address); 5339 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5340 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5341 %} 5342 5343 ins_pipe(ialu_reg_mem); 5344 %} 5345 5346 // Load Unsigned Byte (8bit UNsigned) 5347 instruct loadUB(xRegI dst, memory mem) %{ 5348 match(Set dst (LoadUB mem)); 5349 5350 ins_cost(125); 5351 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5352 5353 ins_encode %{ 5354 __ movzbl($dst$$Register, $mem$$Address); 5355 %} 5356 5357 ins_pipe(ialu_reg_mem); 5358 %} 5359 5360 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5361 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5362 match(Set dst (ConvI2L (LoadUB mem))); 5363 effect(KILL cr); 5364 5365 ins_cost(250); 5366 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5367 "XOR $dst.hi,$dst.hi" %} 5368 5369 ins_encode %{ 5370 Register Rdst = $dst$$Register; 5371 __ movzbl(Rdst, $mem$$Address); 5372 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5373 %} 5374 5375 ins_pipe(ialu_reg_mem); 5376 %} 5377 5378 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5379 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5380 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5381 effect(KILL cr); 5382 5383 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5384 "XOR $dst.hi,$dst.hi\n\t" 5385 "AND $dst.lo,right_n_bits($mask, 8)" %} 5386 ins_encode %{ 5387 Register Rdst = $dst$$Register; 5388 __ movzbl(Rdst, $mem$$Address); 5389 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5390 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5391 %} 5392 ins_pipe(ialu_reg_mem); 5393 %} 5394 5395 // Load Short (16bit signed) 5396 instruct loadS(rRegI dst, memory mem) %{ 5397 match(Set dst (LoadS mem)); 5398 5399 ins_cost(125); 5400 format %{ "MOVSX $dst,$mem\t# short" %} 5401 5402 ins_encode %{ 5403 __ movswl($dst$$Register, $mem$$Address); 5404 %} 5405 5406 ins_pipe(ialu_reg_mem); 5407 %} 5408 5409 // Load Short (16 bit signed) to Byte (8 bit signed) 5410 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5411 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5412 5413 ins_cost(125); 5414 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5415 ins_encode %{ 5416 __ movsbl($dst$$Register, $mem$$Address); 5417 %} 5418 ins_pipe(ialu_reg_mem); 5419 %} 5420 5421 // Load Short (16bit signed) into Long Register 5422 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5423 match(Set dst (ConvI2L (LoadS mem))); 5424 effect(KILL cr); 5425 5426 ins_cost(375); 5427 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5428 "MOV $dst.hi,$dst.lo\n\t" 5429 "SAR $dst.hi,15" %} 5430 5431 ins_encode %{ 5432 __ movswl($dst$$Register, $mem$$Address); 5433 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5434 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5435 %} 5436 5437 ins_pipe(ialu_reg_mem); 5438 %} 5439 5440 // Load Unsigned Short/Char (16bit unsigned) 5441 instruct loadUS(rRegI dst, memory mem) %{ 5442 match(Set dst (LoadUS mem)); 5443 5444 ins_cost(125); 5445 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5446 5447 ins_encode %{ 5448 __ movzwl($dst$$Register, $mem$$Address); 5449 %} 5450 5451 ins_pipe(ialu_reg_mem); 5452 %} 5453 5454 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5455 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5456 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5457 5458 ins_cost(125); 5459 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5460 ins_encode %{ 5461 __ movsbl($dst$$Register, $mem$$Address); 5462 %} 5463 ins_pipe(ialu_reg_mem); 5464 %} 5465 5466 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5467 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5468 match(Set dst (ConvI2L (LoadUS mem))); 5469 effect(KILL cr); 5470 5471 ins_cost(250); 5472 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5473 "XOR $dst.hi,$dst.hi" %} 5474 5475 ins_encode %{ 5476 __ movzwl($dst$$Register, $mem$$Address); 5477 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5478 %} 5479 5480 ins_pipe(ialu_reg_mem); 5481 %} 5482 5483 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5484 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5485 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5486 effect(KILL cr); 5487 5488 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5489 "XOR $dst.hi,$dst.hi" %} 5490 ins_encode %{ 5491 Register Rdst = $dst$$Register; 5492 __ movzbl(Rdst, $mem$$Address); 5493 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5494 %} 5495 ins_pipe(ialu_reg_mem); 5496 %} 5497 5498 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5499 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5500 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5501 effect(KILL cr); 5502 5503 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5504 "XOR $dst.hi,$dst.hi\n\t" 5505 "AND $dst.lo,right_n_bits($mask, 16)" %} 5506 ins_encode %{ 5507 Register Rdst = $dst$$Register; 5508 __ movzwl(Rdst, $mem$$Address); 5509 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5510 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5511 %} 5512 ins_pipe(ialu_reg_mem); 5513 %} 5514 5515 // Load Integer 5516 instruct loadI(rRegI dst, memory mem) %{ 5517 match(Set dst (LoadI mem)); 5518 5519 ins_cost(125); 5520 format %{ "MOV $dst,$mem\t# int" %} 5521 5522 ins_encode %{ 5523 __ movl($dst$$Register, $mem$$Address); 5524 %} 5525 5526 ins_pipe(ialu_reg_mem); 5527 %} 5528 5529 // Load Integer (32 bit signed) to Byte (8 bit signed) 5530 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5531 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5532 5533 ins_cost(125); 5534 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5535 ins_encode %{ 5536 __ movsbl($dst$$Register, $mem$$Address); 5537 %} 5538 ins_pipe(ialu_reg_mem); 5539 %} 5540 5541 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5542 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5543 match(Set dst (AndI (LoadI mem) mask)); 5544 5545 ins_cost(125); 5546 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5547 ins_encode %{ 5548 __ movzbl($dst$$Register, $mem$$Address); 5549 %} 5550 ins_pipe(ialu_reg_mem); 5551 %} 5552 5553 // Load Integer (32 bit signed) to Short (16 bit signed) 5554 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5555 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5556 5557 ins_cost(125); 5558 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5559 ins_encode %{ 5560 __ movswl($dst$$Register, $mem$$Address); 5561 %} 5562 ins_pipe(ialu_reg_mem); 5563 %} 5564 5565 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5566 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5567 match(Set dst (AndI (LoadI mem) mask)); 5568 5569 ins_cost(125); 5570 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5571 ins_encode %{ 5572 __ movzwl($dst$$Register, $mem$$Address); 5573 %} 5574 ins_pipe(ialu_reg_mem); 5575 %} 5576 5577 // Load Integer into Long Register 5578 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5579 match(Set dst (ConvI2L (LoadI mem))); 5580 effect(KILL cr); 5581 5582 ins_cost(375); 5583 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5584 "MOV $dst.hi,$dst.lo\n\t" 5585 "SAR $dst.hi,31" %} 5586 5587 ins_encode %{ 5588 __ movl($dst$$Register, $mem$$Address); 5589 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5590 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5591 %} 5592 5593 ins_pipe(ialu_reg_mem); 5594 %} 5595 5596 // Load Integer with mask 0xFF into Long Register 5597 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5598 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5599 effect(KILL cr); 5600 5601 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5602 "XOR $dst.hi,$dst.hi" %} 5603 ins_encode %{ 5604 Register Rdst = $dst$$Register; 5605 __ movzbl(Rdst, $mem$$Address); 5606 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5607 %} 5608 ins_pipe(ialu_reg_mem); 5609 %} 5610 5611 // Load Integer with mask 0xFFFF into Long Register 5612 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5613 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5614 effect(KILL cr); 5615 5616 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5617 "XOR $dst.hi,$dst.hi" %} 5618 ins_encode %{ 5619 Register Rdst = $dst$$Register; 5620 __ movzwl(Rdst, $mem$$Address); 5621 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5622 %} 5623 ins_pipe(ialu_reg_mem); 5624 %} 5625 5626 // Load Integer with 31-bit mask into Long Register 5627 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5628 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5629 effect(KILL cr); 5630 5631 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5632 "XOR $dst.hi,$dst.hi\n\t" 5633 "AND $dst.lo,$mask" %} 5634 ins_encode %{ 5635 Register Rdst = $dst$$Register; 5636 __ movl(Rdst, $mem$$Address); 5637 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5638 __ andl(Rdst, $mask$$constant); 5639 %} 5640 ins_pipe(ialu_reg_mem); 5641 %} 5642 5643 // Load Unsigned Integer into Long Register 5644 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5645 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5646 effect(KILL cr); 5647 5648 ins_cost(250); 5649 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5650 "XOR $dst.hi,$dst.hi" %} 5651 5652 ins_encode %{ 5653 __ movl($dst$$Register, $mem$$Address); 5654 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5655 %} 5656 5657 ins_pipe(ialu_reg_mem); 5658 %} 5659 5660 // Load Long. Cannot clobber address while loading, so restrict address 5661 // register to ESI 5662 instruct loadL(eRegL dst, load_long_memory mem) %{ 5663 predicate(!((LoadLNode*)n)->require_atomic_access()); 5664 match(Set dst (LoadL mem)); 5665 5666 ins_cost(250); 5667 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5668 "MOV $dst.hi,$mem+4" %} 5669 5670 ins_encode %{ 5671 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5672 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5673 __ movl($dst$$Register, Amemlo); 5674 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5675 %} 5676 5677 ins_pipe(ialu_reg_long_mem); 5678 %} 5679 5680 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5681 // then store it down to the stack and reload on the int 5682 // side. 5683 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5684 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5685 match(Set dst (LoadL mem)); 5686 5687 ins_cost(200); 5688 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5689 "FISTp $dst" %} 5690 ins_encode(enc_loadL_volatile(mem,dst)); 5691 ins_pipe( fpu_reg_mem ); 5692 %} 5693 5694 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5695 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5696 match(Set dst (LoadL mem)); 5697 effect(TEMP tmp); 5698 ins_cost(180); 5699 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5700 "MOVSD $dst,$tmp" %} 5701 ins_encode %{ 5702 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5703 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5704 %} 5705 ins_pipe( pipe_slow ); 5706 %} 5707 5708 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5709 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5710 match(Set dst (LoadL mem)); 5711 effect(TEMP tmp); 5712 ins_cost(160); 5713 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5714 "MOVD $dst.lo,$tmp\n\t" 5715 "PSRLQ $tmp,32\n\t" 5716 "MOVD $dst.hi,$tmp" %} 5717 ins_encode %{ 5718 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5719 __ movdl($dst$$Register, $tmp$$XMMRegister); 5720 __ psrlq($tmp$$XMMRegister, 32); 5721 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5722 %} 5723 ins_pipe( pipe_slow ); 5724 %} 5725 5726 // Load Range 5727 instruct loadRange(rRegI dst, memory mem) %{ 5728 match(Set dst (LoadRange mem)); 5729 5730 ins_cost(125); 5731 format %{ "MOV $dst,$mem" %} 5732 opcode(0x8B); 5733 ins_encode( OpcP, RegMem(dst,mem)); 5734 ins_pipe( ialu_reg_mem ); 5735 %} 5736 5737 5738 // Load Pointer 5739 instruct loadP(eRegP dst, memory mem) %{ 5740 match(Set dst (LoadP mem)); 5741 5742 ins_cost(125); 5743 format %{ "MOV $dst,$mem" %} 5744 opcode(0x8B); 5745 ins_encode( OpcP, RegMem(dst,mem)); 5746 ins_pipe( ialu_reg_mem ); 5747 %} 5748 5749 // Load Klass Pointer 5750 instruct loadKlass(eRegP dst, memory mem) %{ 5751 match(Set dst (LoadKlass mem)); 5752 5753 ins_cost(125); 5754 format %{ "MOV $dst,$mem" %} 5755 opcode(0x8B); 5756 ins_encode( OpcP, RegMem(dst,mem)); 5757 ins_pipe( ialu_reg_mem ); 5758 %} 5759 5760 // Load Float 5761 instruct MoveF2LEG(legRegF dst, regF src) %{ 5762 match(Set dst src); 5763 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5764 ins_encode %{ 5765 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5766 %} 5767 ins_pipe( fpu_reg_reg ); 5768 %} 5769 5770 // Load Float 5771 instruct MoveLEG2F(regF dst, legRegF src) %{ 5772 match(Set dst src); 5773 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5774 ins_encode %{ 5775 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5776 %} 5777 ins_pipe( fpu_reg_reg ); 5778 %} 5779 5780 // Load Double 5781 instruct MoveD2LEG(legRegD dst, regD src) %{ 5782 match(Set dst src); 5783 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5784 ins_encode %{ 5785 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5786 %} 5787 ins_pipe( fpu_reg_reg ); 5788 %} 5789 5790 // Load Double 5791 instruct MoveLEG2D(regD dst, legRegD src) %{ 5792 match(Set dst src); 5793 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5794 ins_encode %{ 5795 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5796 %} 5797 ins_pipe( fpu_reg_reg ); 5798 %} 5799 5800 // Load Double 5801 instruct loadDPR(regDPR dst, memory mem) %{ 5802 predicate(UseSSE<=1); 5803 match(Set dst (LoadD mem)); 5804 5805 ins_cost(150); 5806 format %{ "FLD_D ST,$mem\n\t" 5807 "FSTP $dst" %} 5808 opcode(0xDD); /* DD /0 */ 5809 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5810 Pop_Reg_DPR(dst) ); 5811 ins_pipe( fpu_reg_mem ); 5812 %} 5813 5814 // Load Double to XMM 5815 instruct loadD(regD dst, memory mem) %{ 5816 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5817 match(Set dst (LoadD mem)); 5818 ins_cost(145); 5819 format %{ "MOVSD $dst,$mem" %} 5820 ins_encode %{ 5821 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5822 %} 5823 ins_pipe( pipe_slow ); 5824 %} 5825 5826 instruct loadD_partial(regD dst, memory mem) %{ 5827 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5828 match(Set dst (LoadD mem)); 5829 ins_cost(145); 5830 format %{ "MOVLPD $dst,$mem" %} 5831 ins_encode %{ 5832 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5833 %} 5834 ins_pipe( pipe_slow ); 5835 %} 5836 5837 // Load to XMM register (single-precision floating point) 5838 // MOVSS instruction 5839 instruct loadF(regF dst, memory mem) %{ 5840 predicate(UseSSE>=1); 5841 match(Set dst (LoadF mem)); 5842 ins_cost(145); 5843 format %{ "MOVSS $dst,$mem" %} 5844 ins_encode %{ 5845 __ movflt ($dst$$XMMRegister, $mem$$Address); 5846 %} 5847 ins_pipe( pipe_slow ); 5848 %} 5849 5850 // Load Float 5851 instruct loadFPR(regFPR dst, memory mem) %{ 5852 predicate(UseSSE==0); 5853 match(Set dst (LoadF mem)); 5854 5855 ins_cost(150); 5856 format %{ "FLD_S ST,$mem\n\t" 5857 "FSTP $dst" %} 5858 opcode(0xD9); /* D9 /0 */ 5859 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5860 Pop_Reg_FPR(dst) ); 5861 ins_pipe( fpu_reg_mem ); 5862 %} 5863 5864 // Load Effective Address 5865 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5866 match(Set dst mem); 5867 5868 ins_cost(110); 5869 format %{ "LEA $dst,$mem" %} 5870 opcode(0x8D); 5871 ins_encode( OpcP, RegMem(dst,mem)); 5872 ins_pipe( ialu_reg_reg_fat ); 5873 %} 5874 5875 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5876 match(Set dst mem); 5877 5878 ins_cost(110); 5879 format %{ "LEA $dst,$mem" %} 5880 opcode(0x8D); 5881 ins_encode( OpcP, RegMem(dst,mem)); 5882 ins_pipe( ialu_reg_reg_fat ); 5883 %} 5884 5885 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5886 match(Set dst mem); 5887 5888 ins_cost(110); 5889 format %{ "LEA $dst,$mem" %} 5890 opcode(0x8D); 5891 ins_encode( OpcP, RegMem(dst,mem)); 5892 ins_pipe( ialu_reg_reg_fat ); 5893 %} 5894 5895 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5896 match(Set dst mem); 5897 5898 ins_cost(110); 5899 format %{ "LEA $dst,$mem" %} 5900 opcode(0x8D); 5901 ins_encode( OpcP, RegMem(dst,mem)); 5902 ins_pipe( ialu_reg_reg_fat ); 5903 %} 5904 5905 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5906 match(Set dst mem); 5907 5908 ins_cost(110); 5909 format %{ "LEA $dst,$mem" %} 5910 opcode(0x8D); 5911 ins_encode( OpcP, RegMem(dst,mem)); 5912 ins_pipe( ialu_reg_reg_fat ); 5913 %} 5914 5915 // Load Constant 5916 instruct loadConI(rRegI dst, immI src) %{ 5917 match(Set dst src); 5918 5919 format %{ "MOV $dst,$src" %} 5920 ins_encode( LdImmI(dst, src) ); 5921 ins_pipe( ialu_reg_fat ); 5922 %} 5923 5924 // Load Constant zero 5925 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5926 match(Set dst src); 5927 effect(KILL cr); 5928 5929 ins_cost(50); 5930 format %{ "XOR $dst,$dst" %} 5931 opcode(0x33); /* + rd */ 5932 ins_encode( OpcP, RegReg( dst, dst ) ); 5933 ins_pipe( ialu_reg ); 5934 %} 5935 5936 instruct loadConP(eRegP dst, immP src) %{ 5937 match(Set dst src); 5938 5939 format %{ "MOV $dst,$src" %} 5940 opcode(0xB8); /* + rd */ 5941 ins_encode( LdImmP(dst, src) ); 5942 ins_pipe( ialu_reg_fat ); 5943 %} 5944 5945 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5946 match(Set dst src); 5947 effect(KILL cr); 5948 ins_cost(200); 5949 format %{ "MOV $dst.lo,$src.lo\n\t" 5950 "MOV $dst.hi,$src.hi" %} 5951 opcode(0xB8); 5952 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5953 ins_pipe( ialu_reg_long_fat ); 5954 %} 5955 5956 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5957 match(Set dst src); 5958 effect(KILL cr); 5959 ins_cost(150); 5960 format %{ "XOR $dst.lo,$dst.lo\n\t" 5961 "XOR $dst.hi,$dst.hi" %} 5962 opcode(0x33,0x33); 5963 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5964 ins_pipe( ialu_reg_long ); 5965 %} 5966 5967 // The instruction usage is guarded by predicate in operand immFPR(). 5968 instruct loadConFPR(regFPR dst, immFPR con) %{ 5969 match(Set dst con); 5970 ins_cost(125); 5971 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5972 "FSTP $dst" %} 5973 ins_encode %{ 5974 __ fld_s($constantaddress($con)); 5975 __ fstp_d($dst$$reg); 5976 %} 5977 ins_pipe(fpu_reg_con); 5978 %} 5979 5980 // The instruction usage is guarded by predicate in operand immFPR0(). 5981 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5982 match(Set dst con); 5983 ins_cost(125); 5984 format %{ "FLDZ ST\n\t" 5985 "FSTP $dst" %} 5986 ins_encode %{ 5987 __ fldz(); 5988 __ fstp_d($dst$$reg); 5989 %} 5990 ins_pipe(fpu_reg_con); 5991 %} 5992 5993 // The instruction usage is guarded by predicate in operand immFPR1(). 5994 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5995 match(Set dst con); 5996 ins_cost(125); 5997 format %{ "FLD1 ST\n\t" 5998 "FSTP $dst" %} 5999 ins_encode %{ 6000 __ fld1(); 6001 __ fstp_d($dst$$reg); 6002 %} 6003 ins_pipe(fpu_reg_con); 6004 %} 6005 6006 // The instruction usage is guarded by predicate in operand immF(). 6007 instruct loadConF(regF dst, immF con) %{ 6008 match(Set dst con); 6009 ins_cost(125); 6010 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6011 ins_encode %{ 6012 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6013 %} 6014 ins_pipe(pipe_slow); 6015 %} 6016 6017 // The instruction usage is guarded by predicate in operand immF0(). 6018 instruct loadConF0(regF dst, immF0 src) %{ 6019 match(Set dst src); 6020 ins_cost(100); 6021 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6022 ins_encode %{ 6023 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6024 %} 6025 ins_pipe(pipe_slow); 6026 %} 6027 6028 // The instruction usage is guarded by predicate in operand immDPR(). 6029 instruct loadConDPR(regDPR dst, immDPR con) %{ 6030 match(Set dst con); 6031 ins_cost(125); 6032 6033 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6034 "FSTP $dst" %} 6035 ins_encode %{ 6036 __ fld_d($constantaddress($con)); 6037 __ fstp_d($dst$$reg); 6038 %} 6039 ins_pipe(fpu_reg_con); 6040 %} 6041 6042 // The instruction usage is guarded by predicate in operand immDPR0(). 6043 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6044 match(Set dst con); 6045 ins_cost(125); 6046 6047 format %{ "FLDZ ST\n\t" 6048 "FSTP $dst" %} 6049 ins_encode %{ 6050 __ fldz(); 6051 __ fstp_d($dst$$reg); 6052 %} 6053 ins_pipe(fpu_reg_con); 6054 %} 6055 6056 // The instruction usage is guarded by predicate in operand immDPR1(). 6057 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6058 match(Set dst con); 6059 ins_cost(125); 6060 6061 format %{ "FLD1 ST\n\t" 6062 "FSTP $dst" %} 6063 ins_encode %{ 6064 __ fld1(); 6065 __ fstp_d($dst$$reg); 6066 %} 6067 ins_pipe(fpu_reg_con); 6068 %} 6069 6070 // The instruction usage is guarded by predicate in operand immD(). 6071 instruct loadConD(regD dst, immD con) %{ 6072 match(Set dst con); 6073 ins_cost(125); 6074 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6075 ins_encode %{ 6076 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6077 %} 6078 ins_pipe(pipe_slow); 6079 %} 6080 6081 // The instruction usage is guarded by predicate in operand immD0(). 6082 instruct loadConD0(regD dst, immD0 src) %{ 6083 match(Set dst src); 6084 ins_cost(100); 6085 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6086 ins_encode %{ 6087 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6088 %} 6089 ins_pipe( pipe_slow ); 6090 %} 6091 6092 // Load Stack Slot 6093 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6094 match(Set dst src); 6095 ins_cost(125); 6096 6097 format %{ "MOV $dst,$src" %} 6098 opcode(0x8B); 6099 ins_encode( OpcP, RegMem(dst,src)); 6100 ins_pipe( ialu_reg_mem ); 6101 %} 6102 6103 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6104 match(Set dst src); 6105 6106 ins_cost(200); 6107 format %{ "MOV $dst,$src.lo\n\t" 6108 "MOV $dst+4,$src.hi" %} 6109 opcode(0x8B, 0x8B); 6110 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6111 ins_pipe( ialu_mem_long_reg ); 6112 %} 6113 6114 // Load Stack Slot 6115 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6116 match(Set dst src); 6117 ins_cost(125); 6118 6119 format %{ "MOV $dst,$src" %} 6120 opcode(0x8B); 6121 ins_encode( OpcP, RegMem(dst,src)); 6122 ins_pipe( ialu_reg_mem ); 6123 %} 6124 6125 // Load Stack Slot 6126 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6127 match(Set dst src); 6128 ins_cost(125); 6129 6130 format %{ "FLD_S $src\n\t" 6131 "FSTP $dst" %} 6132 opcode(0xD9); /* D9 /0, FLD m32real */ 6133 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6134 Pop_Reg_FPR(dst) ); 6135 ins_pipe( fpu_reg_mem ); 6136 %} 6137 6138 // Load Stack Slot 6139 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6140 match(Set dst src); 6141 ins_cost(125); 6142 6143 format %{ "FLD_D $src\n\t" 6144 "FSTP $dst" %} 6145 opcode(0xDD); /* DD /0, FLD m64real */ 6146 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6147 Pop_Reg_DPR(dst) ); 6148 ins_pipe( fpu_reg_mem ); 6149 %} 6150 6151 // Prefetch instructions for allocation. 6152 // Must be safe to execute with invalid address (cannot fault). 6153 6154 instruct prefetchAlloc0( memory mem ) %{ 6155 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6156 match(PrefetchAllocation mem); 6157 ins_cost(0); 6158 size(0); 6159 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6160 ins_encode(); 6161 ins_pipe(empty); 6162 %} 6163 6164 instruct prefetchAlloc( memory mem ) %{ 6165 predicate(AllocatePrefetchInstr==3); 6166 match( PrefetchAllocation mem ); 6167 ins_cost(100); 6168 6169 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6170 ins_encode %{ 6171 __ prefetchw($mem$$Address); 6172 %} 6173 ins_pipe(ialu_mem); 6174 %} 6175 6176 instruct prefetchAllocNTA( memory mem ) %{ 6177 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6178 match(PrefetchAllocation mem); 6179 ins_cost(100); 6180 6181 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6182 ins_encode %{ 6183 __ prefetchnta($mem$$Address); 6184 %} 6185 ins_pipe(ialu_mem); 6186 %} 6187 6188 instruct prefetchAllocT0( memory mem ) %{ 6189 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6190 match(PrefetchAllocation mem); 6191 ins_cost(100); 6192 6193 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6194 ins_encode %{ 6195 __ prefetcht0($mem$$Address); 6196 %} 6197 ins_pipe(ialu_mem); 6198 %} 6199 6200 instruct prefetchAllocT2( memory mem ) %{ 6201 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6202 match(PrefetchAllocation mem); 6203 ins_cost(100); 6204 6205 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6206 ins_encode %{ 6207 __ prefetcht2($mem$$Address); 6208 %} 6209 ins_pipe(ialu_mem); 6210 %} 6211 6212 //----------Store Instructions------------------------------------------------- 6213 6214 // Store Byte 6215 instruct storeB(memory mem, xRegI src) %{ 6216 match(Set mem (StoreB mem src)); 6217 6218 ins_cost(125); 6219 format %{ "MOV8 $mem,$src" %} 6220 opcode(0x88); 6221 ins_encode( OpcP, RegMem( src, mem ) ); 6222 ins_pipe( ialu_mem_reg ); 6223 %} 6224 6225 // Store Char/Short 6226 instruct storeC(memory mem, rRegI src) %{ 6227 match(Set mem (StoreC mem src)); 6228 6229 ins_cost(125); 6230 format %{ "MOV16 $mem,$src" %} 6231 opcode(0x89, 0x66); 6232 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6233 ins_pipe( ialu_mem_reg ); 6234 %} 6235 6236 // Store Integer 6237 instruct storeI(memory mem, rRegI src) %{ 6238 match(Set mem (StoreI mem src)); 6239 6240 ins_cost(125); 6241 format %{ "MOV $mem,$src" %} 6242 opcode(0x89); 6243 ins_encode( OpcP, RegMem( src, mem ) ); 6244 ins_pipe( ialu_mem_reg ); 6245 %} 6246 6247 // Store Long 6248 instruct storeL(long_memory mem, eRegL src) %{ 6249 predicate(!((StoreLNode*)n)->require_atomic_access()); 6250 match(Set mem (StoreL mem src)); 6251 6252 ins_cost(200); 6253 format %{ "MOV $mem,$src.lo\n\t" 6254 "MOV $mem+4,$src.hi" %} 6255 opcode(0x89, 0x89); 6256 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6257 ins_pipe( ialu_mem_long_reg ); 6258 %} 6259 6260 // Store Long to Integer 6261 instruct storeL2I(memory mem, eRegL src) %{ 6262 match(Set mem (StoreI mem (ConvL2I src))); 6263 6264 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6265 ins_encode %{ 6266 __ movl($mem$$Address, $src$$Register); 6267 %} 6268 ins_pipe(ialu_mem_reg); 6269 %} 6270 6271 // Volatile Store Long. Must be atomic, so move it into 6272 // the FP TOS and then do a 64-bit FIST. Has to probe the 6273 // target address before the store (for null-ptr checks) 6274 // so the memory operand is used twice in the encoding. 6275 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6276 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6277 match(Set mem (StoreL mem src)); 6278 effect( KILL cr ); 6279 ins_cost(400); 6280 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6281 "FILD $src\n\t" 6282 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6283 opcode(0x3B); 6284 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6285 ins_pipe( fpu_reg_mem ); 6286 %} 6287 6288 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6289 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6290 match(Set mem (StoreL mem src)); 6291 effect( TEMP tmp, KILL cr ); 6292 ins_cost(380); 6293 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6294 "MOVSD $tmp,$src\n\t" 6295 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6296 ins_encode %{ 6297 __ cmpl(rax, $mem$$Address); 6298 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6299 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6300 %} 6301 ins_pipe( pipe_slow ); 6302 %} 6303 6304 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6305 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6306 match(Set mem (StoreL mem src)); 6307 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6308 ins_cost(360); 6309 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6310 "MOVD $tmp,$src.lo\n\t" 6311 "MOVD $tmp2,$src.hi\n\t" 6312 "PUNPCKLDQ $tmp,$tmp2\n\t" 6313 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6314 ins_encode %{ 6315 __ cmpl(rax, $mem$$Address); 6316 __ movdl($tmp$$XMMRegister, $src$$Register); 6317 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6318 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6319 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6320 %} 6321 ins_pipe( pipe_slow ); 6322 %} 6323 6324 // Store Pointer; for storing unknown oops and raw pointers 6325 instruct storeP(memory mem, anyRegP src) %{ 6326 match(Set mem (StoreP mem src)); 6327 6328 ins_cost(125); 6329 format %{ "MOV $mem,$src" %} 6330 opcode(0x89); 6331 ins_encode( OpcP, RegMem( src, mem ) ); 6332 ins_pipe( ialu_mem_reg ); 6333 %} 6334 6335 // Store Integer Immediate 6336 instruct storeImmI(memory mem, immI src) %{ 6337 match(Set mem (StoreI mem src)); 6338 6339 ins_cost(150); 6340 format %{ "MOV $mem,$src" %} 6341 opcode(0xC7); /* C7 /0 */ 6342 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6343 ins_pipe( ialu_mem_imm ); 6344 %} 6345 6346 // Store Short/Char Immediate 6347 instruct storeImmI16(memory mem, immI16 src) %{ 6348 predicate(UseStoreImmI16); 6349 match(Set mem (StoreC mem src)); 6350 6351 ins_cost(150); 6352 format %{ "MOV16 $mem,$src" %} 6353 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6354 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6355 ins_pipe( ialu_mem_imm ); 6356 %} 6357 6358 // Store Pointer Immediate; null pointers or constant oops that do not 6359 // need card-mark barriers. 6360 instruct storeImmP(memory mem, immP src) %{ 6361 match(Set mem (StoreP mem src)); 6362 6363 ins_cost(150); 6364 format %{ "MOV $mem,$src" %} 6365 opcode(0xC7); /* C7 /0 */ 6366 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6367 ins_pipe( ialu_mem_imm ); 6368 %} 6369 6370 // Store Byte Immediate 6371 instruct storeImmB(memory mem, immI8 src) %{ 6372 match(Set mem (StoreB mem src)); 6373 6374 ins_cost(150); 6375 format %{ "MOV8 $mem,$src" %} 6376 opcode(0xC6); /* C6 /0 */ 6377 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6378 ins_pipe( ialu_mem_imm ); 6379 %} 6380 6381 // Store CMS card-mark Immediate 6382 instruct storeImmCM(memory mem, immI8 src) %{ 6383 match(Set mem (StoreCM mem src)); 6384 6385 ins_cost(150); 6386 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6387 opcode(0xC6); /* C6 /0 */ 6388 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6389 ins_pipe( ialu_mem_imm ); 6390 %} 6391 6392 // Store Double 6393 instruct storeDPR( memory mem, regDPR1 src) %{ 6394 predicate(UseSSE<=1); 6395 match(Set mem (StoreD mem src)); 6396 6397 ins_cost(100); 6398 format %{ "FST_D $mem,$src" %} 6399 opcode(0xDD); /* DD /2 */ 6400 ins_encode( enc_FPR_store(mem,src) ); 6401 ins_pipe( fpu_mem_reg ); 6402 %} 6403 6404 // Store double does rounding on x86 6405 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6406 predicate(UseSSE<=1); 6407 match(Set mem (StoreD mem (RoundDouble src))); 6408 6409 ins_cost(100); 6410 format %{ "FST_D $mem,$src\t# round" %} 6411 opcode(0xDD); /* DD /2 */ 6412 ins_encode( enc_FPR_store(mem,src) ); 6413 ins_pipe( fpu_mem_reg ); 6414 %} 6415 6416 // Store XMM register to memory (double-precision floating points) 6417 // MOVSD instruction 6418 instruct storeD(memory mem, regD src) %{ 6419 predicate(UseSSE>=2); 6420 match(Set mem (StoreD mem src)); 6421 ins_cost(95); 6422 format %{ "MOVSD $mem,$src" %} 6423 ins_encode %{ 6424 __ movdbl($mem$$Address, $src$$XMMRegister); 6425 %} 6426 ins_pipe( pipe_slow ); 6427 %} 6428 6429 // Load Double 6430 instruct MoveD2VL(vlRegD dst, regD src) %{ 6431 match(Set dst src); 6432 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6433 ins_encode %{ 6434 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6435 %} 6436 ins_pipe( fpu_reg_reg ); 6437 %} 6438 6439 // Load Double 6440 instruct MoveVL2D(regD dst, vlRegD src) %{ 6441 match(Set dst src); 6442 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6443 ins_encode %{ 6444 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6445 %} 6446 ins_pipe( fpu_reg_reg ); 6447 %} 6448 6449 // Store XMM register to memory (single-precision floating point) 6450 // MOVSS instruction 6451 instruct storeF(memory mem, regF src) %{ 6452 predicate(UseSSE>=1); 6453 match(Set mem (StoreF mem src)); 6454 ins_cost(95); 6455 format %{ "MOVSS $mem,$src" %} 6456 ins_encode %{ 6457 __ movflt($mem$$Address, $src$$XMMRegister); 6458 %} 6459 ins_pipe( pipe_slow ); 6460 %} 6461 6462 // Load Float 6463 instruct MoveF2VL(vlRegF dst, regF src) %{ 6464 match(Set dst src); 6465 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6466 ins_encode %{ 6467 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6468 %} 6469 ins_pipe( fpu_reg_reg ); 6470 %} 6471 6472 // Load Float 6473 instruct MoveVL2F(regF dst, vlRegF src) %{ 6474 match(Set dst src); 6475 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6476 ins_encode %{ 6477 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6478 %} 6479 ins_pipe( fpu_reg_reg ); 6480 %} 6481 6482 // Store Float 6483 instruct storeFPR( memory mem, regFPR1 src) %{ 6484 predicate(UseSSE==0); 6485 match(Set mem (StoreF mem src)); 6486 6487 ins_cost(100); 6488 format %{ "FST_S $mem,$src" %} 6489 opcode(0xD9); /* D9 /2 */ 6490 ins_encode( enc_FPR_store(mem,src) ); 6491 ins_pipe( fpu_mem_reg ); 6492 %} 6493 6494 // Store Float does rounding on x86 6495 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6496 predicate(UseSSE==0); 6497 match(Set mem (StoreF mem (RoundFloat src))); 6498 6499 ins_cost(100); 6500 format %{ "FST_S $mem,$src\t# round" %} 6501 opcode(0xD9); /* D9 /2 */ 6502 ins_encode( enc_FPR_store(mem,src) ); 6503 ins_pipe( fpu_mem_reg ); 6504 %} 6505 6506 // Store Float does rounding on x86 6507 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6508 predicate(UseSSE<=1); 6509 match(Set mem (StoreF mem (ConvD2F src))); 6510 6511 ins_cost(100); 6512 format %{ "FST_S $mem,$src\t# D-round" %} 6513 opcode(0xD9); /* D9 /2 */ 6514 ins_encode( enc_FPR_store(mem,src) ); 6515 ins_pipe( fpu_mem_reg ); 6516 %} 6517 6518 // Store immediate Float value (it is faster than store from FPU register) 6519 // The instruction usage is guarded by predicate in operand immFPR(). 6520 instruct storeFPR_imm( memory mem, immFPR src) %{ 6521 match(Set mem (StoreF mem src)); 6522 6523 ins_cost(50); 6524 format %{ "MOV $mem,$src\t# store float" %} 6525 opcode(0xC7); /* C7 /0 */ 6526 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6527 ins_pipe( ialu_mem_imm ); 6528 %} 6529 6530 // Store immediate Float value (it is faster than store from XMM register) 6531 // The instruction usage is guarded by predicate in operand immF(). 6532 instruct storeF_imm( memory mem, immF src) %{ 6533 match(Set mem (StoreF mem src)); 6534 6535 ins_cost(50); 6536 format %{ "MOV $mem,$src\t# store float" %} 6537 opcode(0xC7); /* C7 /0 */ 6538 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6539 ins_pipe( ialu_mem_imm ); 6540 %} 6541 6542 // Store Integer to stack slot 6543 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6544 match(Set dst src); 6545 6546 ins_cost(100); 6547 format %{ "MOV $dst,$src" %} 6548 opcode(0x89); 6549 ins_encode( OpcPRegSS( dst, src ) ); 6550 ins_pipe( ialu_mem_reg ); 6551 %} 6552 6553 // Store Integer to stack slot 6554 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6555 match(Set dst src); 6556 6557 ins_cost(100); 6558 format %{ "MOV $dst,$src" %} 6559 opcode(0x89); 6560 ins_encode( OpcPRegSS( dst, src ) ); 6561 ins_pipe( ialu_mem_reg ); 6562 %} 6563 6564 // Store Long to stack slot 6565 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6566 match(Set dst src); 6567 6568 ins_cost(200); 6569 format %{ "MOV $dst,$src.lo\n\t" 6570 "MOV $dst+4,$src.hi" %} 6571 opcode(0x89, 0x89); 6572 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6573 ins_pipe( ialu_mem_long_reg ); 6574 %} 6575 6576 //----------MemBar Instructions----------------------------------------------- 6577 // Memory barrier flavors 6578 6579 instruct membar_acquire() %{ 6580 match(MemBarAcquire); 6581 match(LoadFence); 6582 ins_cost(400); 6583 6584 size(0); 6585 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6586 ins_encode(); 6587 ins_pipe(empty); 6588 %} 6589 6590 instruct membar_acquire_lock() %{ 6591 match(MemBarAcquireLock); 6592 ins_cost(0); 6593 6594 size(0); 6595 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6596 ins_encode( ); 6597 ins_pipe(empty); 6598 %} 6599 6600 instruct membar_release() %{ 6601 match(MemBarRelease); 6602 match(StoreFence); 6603 ins_cost(400); 6604 6605 size(0); 6606 format %{ "MEMBAR-release ! (empty encoding)" %} 6607 ins_encode( ); 6608 ins_pipe(empty); 6609 %} 6610 6611 instruct membar_release_lock() %{ 6612 match(MemBarReleaseLock); 6613 ins_cost(0); 6614 6615 size(0); 6616 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6617 ins_encode( ); 6618 ins_pipe(empty); 6619 %} 6620 6621 instruct membar_volatile(eFlagsReg cr) %{ 6622 match(MemBarVolatile); 6623 effect(KILL cr); 6624 ins_cost(400); 6625 6626 format %{ 6627 $$template 6628 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6629 %} 6630 ins_encode %{ 6631 __ membar(Assembler::StoreLoad); 6632 %} 6633 ins_pipe(pipe_slow); 6634 %} 6635 6636 instruct unnecessary_membar_volatile() %{ 6637 match(MemBarVolatile); 6638 predicate(Matcher::post_store_load_barrier(n)); 6639 ins_cost(0); 6640 6641 size(0); 6642 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6643 ins_encode( ); 6644 ins_pipe(empty); 6645 %} 6646 6647 instruct membar_storestore() %{ 6648 match(MemBarStoreStore); 6649 match(StoreStoreFence); 6650 ins_cost(0); 6651 6652 size(0); 6653 format %{ "MEMBAR-storestore (empty encoding)" %} 6654 ins_encode( ); 6655 ins_pipe(empty); 6656 %} 6657 6658 //----------Move Instructions-------------------------------------------------- 6659 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6660 match(Set dst (CastX2P src)); 6661 format %{ "# X2P $dst, $src" %} 6662 ins_encode( /*empty encoding*/ ); 6663 ins_cost(0); 6664 ins_pipe(empty); 6665 %} 6666 6667 instruct castP2X(rRegI dst, eRegP src ) %{ 6668 match(Set dst (CastP2X src)); 6669 ins_cost(50); 6670 format %{ "MOV $dst, $src\t# CastP2X" %} 6671 ins_encode( enc_Copy( dst, src) ); 6672 ins_pipe( ialu_reg_reg ); 6673 %} 6674 6675 //----------Conditional Move--------------------------------------------------- 6676 // Conditional move 6677 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6678 predicate(!VM_Version::supports_cmov() ); 6679 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6680 ins_cost(200); 6681 format %{ "J$cop,us skip\t# signed cmove\n\t" 6682 "MOV $dst,$src\n" 6683 "skip:" %} 6684 ins_encode %{ 6685 Label Lskip; 6686 // Invert sense of branch from sense of CMOV 6687 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6688 __ movl($dst$$Register, $src$$Register); 6689 __ bind(Lskip); 6690 %} 6691 ins_pipe( pipe_cmov_reg ); 6692 %} 6693 6694 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6695 predicate(!VM_Version::supports_cmov() ); 6696 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6697 ins_cost(200); 6698 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6699 "MOV $dst,$src\n" 6700 "skip:" %} 6701 ins_encode %{ 6702 Label Lskip; 6703 // Invert sense of branch from sense of CMOV 6704 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6705 __ movl($dst$$Register, $src$$Register); 6706 __ bind(Lskip); 6707 %} 6708 ins_pipe( pipe_cmov_reg ); 6709 %} 6710 6711 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6712 predicate(VM_Version::supports_cmov() ); 6713 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6714 ins_cost(200); 6715 format %{ "CMOV$cop $dst,$src" %} 6716 opcode(0x0F,0x40); 6717 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6718 ins_pipe( pipe_cmov_reg ); 6719 %} 6720 6721 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6722 predicate(VM_Version::supports_cmov() ); 6723 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6724 ins_cost(200); 6725 format %{ "CMOV$cop $dst,$src" %} 6726 opcode(0x0F,0x40); 6727 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6728 ins_pipe( pipe_cmov_reg ); 6729 %} 6730 6731 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6732 predicate(VM_Version::supports_cmov() ); 6733 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6734 ins_cost(200); 6735 expand %{ 6736 cmovI_regU(cop, cr, dst, src); 6737 %} 6738 %} 6739 6740 // Conditional move 6741 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6742 predicate(VM_Version::supports_cmov() ); 6743 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6744 ins_cost(250); 6745 format %{ "CMOV$cop $dst,$src" %} 6746 opcode(0x0F,0x40); 6747 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6748 ins_pipe( pipe_cmov_mem ); 6749 %} 6750 6751 // Conditional move 6752 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6753 predicate(VM_Version::supports_cmov() ); 6754 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6755 ins_cost(250); 6756 format %{ "CMOV$cop $dst,$src" %} 6757 opcode(0x0F,0x40); 6758 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6759 ins_pipe( pipe_cmov_mem ); 6760 %} 6761 6762 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6763 predicate(VM_Version::supports_cmov() ); 6764 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6765 ins_cost(250); 6766 expand %{ 6767 cmovI_memU(cop, cr, dst, src); 6768 %} 6769 %} 6770 6771 // Conditional move 6772 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6773 predicate(VM_Version::supports_cmov() ); 6774 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6775 ins_cost(200); 6776 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6777 opcode(0x0F,0x40); 6778 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6779 ins_pipe( pipe_cmov_reg ); 6780 %} 6781 6782 // Conditional move (non-P6 version) 6783 // Note: a CMoveP is generated for stubs and native wrappers 6784 // regardless of whether we are on a P6, so we 6785 // emulate a cmov here 6786 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6787 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6788 ins_cost(300); 6789 format %{ "Jn$cop skip\n\t" 6790 "MOV $dst,$src\t# pointer\n" 6791 "skip:" %} 6792 opcode(0x8b); 6793 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6794 ins_pipe( pipe_cmov_reg ); 6795 %} 6796 6797 // Conditional move 6798 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6799 predicate(VM_Version::supports_cmov() ); 6800 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6801 ins_cost(200); 6802 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6803 opcode(0x0F,0x40); 6804 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6805 ins_pipe( pipe_cmov_reg ); 6806 %} 6807 6808 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6809 predicate(VM_Version::supports_cmov() ); 6810 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6811 ins_cost(200); 6812 expand %{ 6813 cmovP_regU(cop, cr, dst, src); 6814 %} 6815 %} 6816 6817 // DISABLED: Requires the ADLC to emit a bottom_type call that 6818 // correctly meets the two pointer arguments; one is an incoming 6819 // register but the other is a memory operand. ALSO appears to 6820 // be buggy with implicit null checks. 6821 // 6822 //// Conditional move 6823 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6824 // predicate(VM_Version::supports_cmov() ); 6825 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6826 // ins_cost(250); 6827 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6828 // opcode(0x0F,0x40); 6829 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6830 // ins_pipe( pipe_cmov_mem ); 6831 //%} 6832 // 6833 //// Conditional move 6834 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6835 // predicate(VM_Version::supports_cmov() ); 6836 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6837 // ins_cost(250); 6838 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6839 // opcode(0x0F,0x40); 6840 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6841 // ins_pipe( pipe_cmov_mem ); 6842 //%} 6843 6844 // Conditional move 6845 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6846 predicate(UseSSE<=1); 6847 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6848 ins_cost(200); 6849 format %{ "FCMOV$cop $dst,$src\t# double" %} 6850 opcode(0xDA); 6851 ins_encode( enc_cmov_dpr(cop,src) ); 6852 ins_pipe( pipe_cmovDPR_reg ); 6853 %} 6854 6855 // Conditional move 6856 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6857 predicate(UseSSE==0); 6858 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6859 ins_cost(200); 6860 format %{ "FCMOV$cop $dst,$src\t# float" %} 6861 opcode(0xDA); 6862 ins_encode( enc_cmov_dpr(cop,src) ); 6863 ins_pipe( pipe_cmovDPR_reg ); 6864 %} 6865 6866 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6867 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6868 predicate(UseSSE<=1); 6869 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6870 ins_cost(200); 6871 format %{ "Jn$cop skip\n\t" 6872 "MOV $dst,$src\t# double\n" 6873 "skip:" %} 6874 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6875 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6876 ins_pipe( pipe_cmovDPR_reg ); 6877 %} 6878 6879 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6880 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6881 predicate(UseSSE==0); 6882 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6883 ins_cost(200); 6884 format %{ "Jn$cop skip\n\t" 6885 "MOV $dst,$src\t# float\n" 6886 "skip:" %} 6887 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6888 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6889 ins_pipe( pipe_cmovDPR_reg ); 6890 %} 6891 6892 // No CMOVE with SSE/SSE2 6893 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6894 predicate (UseSSE>=1); 6895 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6896 ins_cost(200); 6897 format %{ "Jn$cop skip\n\t" 6898 "MOVSS $dst,$src\t# float\n" 6899 "skip:" %} 6900 ins_encode %{ 6901 Label skip; 6902 // Invert sense of branch from sense of CMOV 6903 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6904 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6905 __ bind(skip); 6906 %} 6907 ins_pipe( pipe_slow ); 6908 %} 6909 6910 // No CMOVE with SSE/SSE2 6911 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6912 predicate (UseSSE>=2); 6913 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6914 ins_cost(200); 6915 format %{ "Jn$cop skip\n\t" 6916 "MOVSD $dst,$src\t# float\n" 6917 "skip:" %} 6918 ins_encode %{ 6919 Label skip; 6920 // Invert sense of branch from sense of CMOV 6921 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6922 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6923 __ bind(skip); 6924 %} 6925 ins_pipe( pipe_slow ); 6926 %} 6927 6928 // unsigned version 6929 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6930 predicate (UseSSE>=1); 6931 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6932 ins_cost(200); 6933 format %{ "Jn$cop skip\n\t" 6934 "MOVSS $dst,$src\t# float\n" 6935 "skip:" %} 6936 ins_encode %{ 6937 Label skip; 6938 // Invert sense of branch from sense of CMOV 6939 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6940 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6941 __ bind(skip); 6942 %} 6943 ins_pipe( pipe_slow ); 6944 %} 6945 6946 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6947 predicate (UseSSE>=1); 6948 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6949 ins_cost(200); 6950 expand %{ 6951 fcmovF_regU(cop, cr, dst, src); 6952 %} 6953 %} 6954 6955 // unsigned version 6956 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6957 predicate (UseSSE>=2); 6958 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6959 ins_cost(200); 6960 format %{ "Jn$cop skip\n\t" 6961 "MOVSD $dst,$src\t# float\n" 6962 "skip:" %} 6963 ins_encode %{ 6964 Label skip; 6965 // Invert sense of branch from sense of CMOV 6966 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6967 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6968 __ bind(skip); 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6974 predicate (UseSSE>=2); 6975 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6976 ins_cost(200); 6977 expand %{ 6978 fcmovD_regU(cop, cr, dst, src); 6979 %} 6980 %} 6981 6982 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6983 predicate(VM_Version::supports_cmov() ); 6984 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6985 ins_cost(200); 6986 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6987 "CMOV$cop $dst.hi,$src.hi" %} 6988 opcode(0x0F,0x40); 6989 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6990 ins_pipe( pipe_cmov_reg_long ); 6991 %} 6992 6993 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6994 predicate(VM_Version::supports_cmov() ); 6995 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6996 ins_cost(200); 6997 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6998 "CMOV$cop $dst.hi,$src.hi" %} 6999 opcode(0x0F,0x40); 7000 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7001 ins_pipe( pipe_cmov_reg_long ); 7002 %} 7003 7004 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7005 predicate(VM_Version::supports_cmov() ); 7006 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7007 ins_cost(200); 7008 expand %{ 7009 cmovL_regU(cop, cr, dst, src); 7010 %} 7011 %} 7012 7013 //----------Arithmetic Instructions-------------------------------------------- 7014 //----------Addition Instructions---------------------------------------------- 7015 7016 // Integer Addition Instructions 7017 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7018 match(Set dst (AddI dst src)); 7019 effect(KILL cr); 7020 7021 size(2); 7022 format %{ "ADD $dst,$src" %} 7023 opcode(0x03); 7024 ins_encode( OpcP, RegReg( dst, src) ); 7025 ins_pipe( ialu_reg_reg ); 7026 %} 7027 7028 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7029 match(Set dst (AddI dst src)); 7030 effect(KILL cr); 7031 7032 format %{ "ADD $dst,$src" %} 7033 opcode(0x81, 0x00); /* /0 id */ 7034 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7035 ins_pipe( ialu_reg ); 7036 %} 7037 7038 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7039 predicate(UseIncDec); 7040 match(Set dst (AddI dst src)); 7041 effect(KILL cr); 7042 7043 size(1); 7044 format %{ "INC $dst" %} 7045 opcode(0x40); /* */ 7046 ins_encode( Opc_plus( primary, dst ) ); 7047 ins_pipe( ialu_reg ); 7048 %} 7049 7050 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7051 match(Set dst (AddI src0 src1)); 7052 ins_cost(110); 7053 7054 format %{ "LEA $dst,[$src0 + $src1]" %} 7055 opcode(0x8D); /* 0x8D /r */ 7056 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7057 ins_pipe( ialu_reg_reg ); 7058 %} 7059 7060 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7061 match(Set dst (AddP src0 src1)); 7062 ins_cost(110); 7063 7064 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7065 opcode(0x8D); /* 0x8D /r */ 7066 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7067 ins_pipe( ialu_reg_reg ); 7068 %} 7069 7070 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7071 predicate(UseIncDec); 7072 match(Set dst (AddI dst src)); 7073 effect(KILL cr); 7074 7075 size(1); 7076 format %{ "DEC $dst" %} 7077 opcode(0x48); /* */ 7078 ins_encode( Opc_plus( primary, dst ) ); 7079 ins_pipe( ialu_reg ); 7080 %} 7081 7082 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7083 match(Set dst (AddP dst src)); 7084 effect(KILL cr); 7085 7086 size(2); 7087 format %{ "ADD $dst,$src" %} 7088 opcode(0x03); 7089 ins_encode( OpcP, RegReg( dst, src) ); 7090 ins_pipe( ialu_reg_reg ); 7091 %} 7092 7093 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7094 match(Set dst (AddP dst src)); 7095 effect(KILL cr); 7096 7097 format %{ "ADD $dst,$src" %} 7098 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7099 // ins_encode( RegImm( dst, src) ); 7100 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7101 ins_pipe( ialu_reg ); 7102 %} 7103 7104 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7105 match(Set dst (AddI dst (LoadI src))); 7106 effect(KILL cr); 7107 7108 ins_cost(125); 7109 format %{ "ADD $dst,$src" %} 7110 opcode(0x03); 7111 ins_encode( OpcP, RegMem( dst, src) ); 7112 ins_pipe( ialu_reg_mem ); 7113 %} 7114 7115 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7116 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7117 effect(KILL cr); 7118 7119 ins_cost(150); 7120 format %{ "ADD $dst,$src" %} 7121 opcode(0x01); /* Opcode 01 /r */ 7122 ins_encode( OpcP, RegMem( src, dst ) ); 7123 ins_pipe( ialu_mem_reg ); 7124 %} 7125 7126 // Add Memory with Immediate 7127 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7128 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7129 effect(KILL cr); 7130 7131 ins_cost(125); 7132 format %{ "ADD $dst,$src" %} 7133 opcode(0x81); /* Opcode 81 /0 id */ 7134 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7135 ins_pipe( ialu_mem_imm ); 7136 %} 7137 7138 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7139 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7140 effect(KILL cr); 7141 7142 ins_cost(125); 7143 format %{ "INC $dst" %} 7144 opcode(0xFF); /* Opcode FF /0 */ 7145 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7146 ins_pipe( ialu_mem_imm ); 7147 %} 7148 7149 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7150 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7151 effect(KILL cr); 7152 7153 ins_cost(125); 7154 format %{ "DEC $dst" %} 7155 opcode(0xFF); /* Opcode FF /1 */ 7156 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7157 ins_pipe( ialu_mem_imm ); 7158 %} 7159 7160 7161 instruct checkCastPP( eRegP dst ) %{ 7162 match(Set dst (CheckCastPP dst)); 7163 7164 size(0); 7165 format %{ "#checkcastPP of $dst" %} 7166 ins_encode( /*empty encoding*/ ); 7167 ins_pipe( empty ); 7168 %} 7169 7170 instruct castPP( eRegP dst ) %{ 7171 match(Set dst (CastPP dst)); 7172 format %{ "#castPP of $dst" %} 7173 ins_encode( /*empty encoding*/ ); 7174 ins_pipe( empty ); 7175 %} 7176 7177 instruct castII( rRegI dst ) %{ 7178 match(Set dst (CastII dst)); 7179 format %{ "#castII of $dst" %} 7180 ins_encode( /*empty encoding*/ ); 7181 ins_cost(0); 7182 ins_pipe( empty ); 7183 %} 7184 7185 instruct castLL( eRegL dst ) %{ 7186 match(Set dst (CastLL dst)); 7187 format %{ "#castLL of $dst" %} 7188 ins_encode( /*empty encoding*/ ); 7189 ins_cost(0); 7190 ins_pipe( empty ); 7191 %} 7192 7193 instruct castFF( regF dst ) %{ 7194 predicate(UseSSE >= 1); 7195 match(Set dst (CastFF dst)); 7196 format %{ "#castFF of $dst" %} 7197 ins_encode( /*empty encoding*/ ); 7198 ins_cost(0); 7199 ins_pipe( empty ); 7200 %} 7201 7202 instruct castDD( regD dst ) %{ 7203 predicate(UseSSE >= 2); 7204 match(Set dst (CastDD dst)); 7205 format %{ "#castDD of $dst" %} 7206 ins_encode( /*empty encoding*/ ); 7207 ins_cost(0); 7208 ins_pipe( empty ); 7209 %} 7210 7211 instruct castFF_PR( regFPR dst ) %{ 7212 predicate(UseSSE < 1); 7213 match(Set dst (CastFF dst)); 7214 format %{ "#castFF of $dst" %} 7215 ins_encode( /*empty encoding*/ ); 7216 ins_cost(0); 7217 ins_pipe( empty ); 7218 %} 7219 7220 instruct castDD_PR( regDPR dst ) %{ 7221 predicate(UseSSE < 2); 7222 match(Set dst (CastDD dst)); 7223 format %{ "#castDD of $dst" %} 7224 ins_encode( /*empty encoding*/ ); 7225 ins_cost(0); 7226 ins_pipe( empty ); 7227 %} 7228 7229 // Load-locked - same as a regular pointer load when used with compare-swap 7230 instruct loadPLocked(eRegP dst, memory mem) %{ 7231 match(Set dst (LoadPLocked mem)); 7232 7233 ins_cost(125); 7234 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7235 opcode(0x8B); 7236 ins_encode( OpcP, RegMem(dst,mem)); 7237 ins_pipe( ialu_reg_mem ); 7238 %} 7239 7240 // Conditional-store of the updated heap-top. 7241 // Used during allocation of the shared heap. 7242 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7243 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7244 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7245 // EAX is killed if there is contention, but then it's also unused. 7246 // In the common case of no contention, EAX holds the new oop address. 7247 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7248 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7249 ins_pipe( pipe_cmpxchg ); 7250 %} 7251 7252 // Conditional-store of an int value. 7253 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7254 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7255 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7256 effect(KILL oldval); 7257 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7258 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7259 ins_pipe( pipe_cmpxchg ); 7260 %} 7261 7262 // Conditional-store of a long value. 7263 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7264 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7265 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7266 effect(KILL oldval); 7267 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7268 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7269 "XCHG EBX,ECX" 7270 %} 7271 ins_encode %{ 7272 // Note: we need to swap rbx, and rcx before and after the 7273 // cmpxchg8 instruction because the instruction uses 7274 // rcx as the high order word of the new value to store but 7275 // our register encoding uses rbx. 7276 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7277 __ lock(); 7278 __ cmpxchg8($mem$$Address); 7279 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7280 %} 7281 ins_pipe( pipe_cmpxchg ); 7282 %} 7283 7284 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7285 7286 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7287 predicate(VM_Version::supports_cx8()); 7288 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7289 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7290 effect(KILL cr, KILL oldval); 7291 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7292 "MOV $res,0\n\t" 7293 "JNE,s fail\n\t" 7294 "MOV $res,1\n" 7295 "fail:" %} 7296 ins_encode( enc_cmpxchg8(mem_ptr), 7297 enc_flags_ne_to_boolean(res) ); 7298 ins_pipe( pipe_cmpxchg ); 7299 %} 7300 7301 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7302 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7303 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7304 effect(KILL cr, KILL oldval); 7305 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7306 "MOV $res,0\n\t" 7307 "JNE,s fail\n\t" 7308 "MOV $res,1\n" 7309 "fail:" %} 7310 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7311 ins_pipe( pipe_cmpxchg ); 7312 %} 7313 7314 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7315 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7316 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7317 effect(KILL cr, KILL oldval); 7318 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7319 "MOV $res,0\n\t" 7320 "JNE,s fail\n\t" 7321 "MOV $res,1\n" 7322 "fail:" %} 7323 ins_encode( enc_cmpxchgb(mem_ptr), 7324 enc_flags_ne_to_boolean(res) ); 7325 ins_pipe( pipe_cmpxchg ); 7326 %} 7327 7328 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7329 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7330 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7331 effect(KILL cr, KILL oldval); 7332 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7333 "MOV $res,0\n\t" 7334 "JNE,s fail\n\t" 7335 "MOV $res,1\n" 7336 "fail:" %} 7337 ins_encode( enc_cmpxchgw(mem_ptr), 7338 enc_flags_ne_to_boolean(res) ); 7339 ins_pipe( pipe_cmpxchg ); 7340 %} 7341 7342 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7343 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7344 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7345 effect(KILL cr, KILL oldval); 7346 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7347 "MOV $res,0\n\t" 7348 "JNE,s fail\n\t" 7349 "MOV $res,1\n" 7350 "fail:" %} 7351 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7352 ins_pipe( pipe_cmpxchg ); 7353 %} 7354 7355 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7356 predicate(VM_Version::supports_cx8()); 7357 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7358 effect(KILL cr); 7359 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7360 ins_encode( enc_cmpxchg8(mem_ptr) ); 7361 ins_pipe( pipe_cmpxchg ); 7362 %} 7363 7364 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7365 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7366 effect(KILL cr); 7367 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7368 ins_encode( enc_cmpxchg(mem_ptr) ); 7369 ins_pipe( pipe_cmpxchg ); 7370 %} 7371 7372 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7373 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7374 effect(KILL cr); 7375 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7376 ins_encode( enc_cmpxchgb(mem_ptr) ); 7377 ins_pipe( pipe_cmpxchg ); 7378 %} 7379 7380 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7381 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7382 effect(KILL cr); 7383 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7384 ins_encode( enc_cmpxchgw(mem_ptr) ); 7385 ins_pipe( pipe_cmpxchg ); 7386 %} 7387 7388 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7389 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7390 effect(KILL cr); 7391 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7392 ins_encode( enc_cmpxchg(mem_ptr) ); 7393 ins_pipe( pipe_cmpxchg ); 7394 %} 7395 7396 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7397 predicate(n->as_LoadStore()->result_not_used()); 7398 match(Set dummy (GetAndAddB mem add)); 7399 effect(KILL cr); 7400 format %{ "ADDB [$mem],$add" %} 7401 ins_encode %{ 7402 __ lock(); 7403 __ addb($mem$$Address, $add$$constant); 7404 %} 7405 ins_pipe( pipe_cmpxchg ); 7406 %} 7407 7408 // Important to match to xRegI: only 8-bit regs. 7409 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7410 match(Set newval (GetAndAddB mem newval)); 7411 effect(KILL cr); 7412 format %{ "XADDB [$mem],$newval" %} 7413 ins_encode %{ 7414 __ lock(); 7415 __ xaddb($mem$$Address, $newval$$Register); 7416 %} 7417 ins_pipe( pipe_cmpxchg ); 7418 %} 7419 7420 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7421 predicate(n->as_LoadStore()->result_not_used()); 7422 match(Set dummy (GetAndAddS mem add)); 7423 effect(KILL cr); 7424 format %{ "ADDS [$mem],$add" %} 7425 ins_encode %{ 7426 __ lock(); 7427 __ addw($mem$$Address, $add$$constant); 7428 %} 7429 ins_pipe( pipe_cmpxchg ); 7430 %} 7431 7432 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7433 match(Set newval (GetAndAddS mem newval)); 7434 effect(KILL cr); 7435 format %{ "XADDS [$mem],$newval" %} 7436 ins_encode %{ 7437 __ lock(); 7438 __ xaddw($mem$$Address, $newval$$Register); 7439 %} 7440 ins_pipe( pipe_cmpxchg ); 7441 %} 7442 7443 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7444 predicate(n->as_LoadStore()->result_not_used()); 7445 match(Set dummy (GetAndAddI mem add)); 7446 effect(KILL cr); 7447 format %{ "ADDL [$mem],$add" %} 7448 ins_encode %{ 7449 __ lock(); 7450 __ addl($mem$$Address, $add$$constant); 7451 %} 7452 ins_pipe( pipe_cmpxchg ); 7453 %} 7454 7455 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7456 match(Set newval (GetAndAddI mem newval)); 7457 effect(KILL cr); 7458 format %{ "XADDL [$mem],$newval" %} 7459 ins_encode %{ 7460 __ lock(); 7461 __ xaddl($mem$$Address, $newval$$Register); 7462 %} 7463 ins_pipe( pipe_cmpxchg ); 7464 %} 7465 7466 // Important to match to xRegI: only 8-bit regs. 7467 instruct xchgB( memory mem, xRegI newval) %{ 7468 match(Set newval (GetAndSetB mem newval)); 7469 format %{ "XCHGB $newval,[$mem]" %} 7470 ins_encode %{ 7471 __ xchgb($newval$$Register, $mem$$Address); 7472 %} 7473 ins_pipe( pipe_cmpxchg ); 7474 %} 7475 7476 instruct xchgS( memory mem, rRegI newval) %{ 7477 match(Set newval (GetAndSetS mem newval)); 7478 format %{ "XCHGW $newval,[$mem]" %} 7479 ins_encode %{ 7480 __ xchgw($newval$$Register, $mem$$Address); 7481 %} 7482 ins_pipe( pipe_cmpxchg ); 7483 %} 7484 7485 instruct xchgI( memory mem, rRegI newval) %{ 7486 match(Set newval (GetAndSetI mem newval)); 7487 format %{ "XCHGL $newval,[$mem]" %} 7488 ins_encode %{ 7489 __ xchgl($newval$$Register, $mem$$Address); 7490 %} 7491 ins_pipe( pipe_cmpxchg ); 7492 %} 7493 7494 instruct xchgP( memory mem, pRegP newval) %{ 7495 match(Set newval (GetAndSetP mem newval)); 7496 format %{ "XCHGL $newval,[$mem]" %} 7497 ins_encode %{ 7498 __ xchgl($newval$$Register, $mem$$Address); 7499 %} 7500 ins_pipe( pipe_cmpxchg ); 7501 %} 7502 7503 //----------Subtraction Instructions------------------------------------------- 7504 7505 // Integer Subtraction Instructions 7506 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7507 match(Set dst (SubI dst src)); 7508 effect(KILL cr); 7509 7510 size(2); 7511 format %{ "SUB $dst,$src" %} 7512 opcode(0x2B); 7513 ins_encode( OpcP, RegReg( dst, src) ); 7514 ins_pipe( ialu_reg_reg ); 7515 %} 7516 7517 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7518 match(Set dst (SubI dst src)); 7519 effect(KILL cr); 7520 7521 format %{ "SUB $dst,$src" %} 7522 opcode(0x81,0x05); /* Opcode 81 /5 */ 7523 // ins_encode( RegImm( dst, src) ); 7524 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7525 ins_pipe( ialu_reg ); 7526 %} 7527 7528 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7529 match(Set dst (SubI dst (LoadI src))); 7530 effect(KILL cr); 7531 7532 ins_cost(125); 7533 format %{ "SUB $dst,$src" %} 7534 opcode(0x2B); 7535 ins_encode( OpcP, RegMem( dst, src) ); 7536 ins_pipe( ialu_reg_mem ); 7537 %} 7538 7539 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7540 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7541 effect(KILL cr); 7542 7543 ins_cost(150); 7544 format %{ "SUB $dst,$src" %} 7545 opcode(0x29); /* Opcode 29 /r */ 7546 ins_encode( OpcP, RegMem( src, dst ) ); 7547 ins_pipe( ialu_mem_reg ); 7548 %} 7549 7550 // Subtract from a pointer 7551 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7552 match(Set dst (AddP dst (SubI zero src))); 7553 effect(KILL cr); 7554 7555 size(2); 7556 format %{ "SUB $dst,$src" %} 7557 opcode(0x2B); 7558 ins_encode( OpcP, RegReg( dst, src) ); 7559 ins_pipe( ialu_reg_reg ); 7560 %} 7561 7562 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7563 match(Set dst (SubI zero dst)); 7564 effect(KILL cr); 7565 7566 size(2); 7567 format %{ "NEG $dst" %} 7568 opcode(0xF7,0x03); // Opcode F7 /3 7569 ins_encode( OpcP, RegOpc( dst ) ); 7570 ins_pipe( ialu_reg ); 7571 %} 7572 7573 //----------Multiplication/Division Instructions------------------------------- 7574 // Integer Multiplication Instructions 7575 // Multiply Register 7576 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7577 match(Set dst (MulI dst src)); 7578 effect(KILL cr); 7579 7580 size(3); 7581 ins_cost(300); 7582 format %{ "IMUL $dst,$src" %} 7583 opcode(0xAF, 0x0F); 7584 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7585 ins_pipe( ialu_reg_reg_alu0 ); 7586 %} 7587 7588 // Multiply 32-bit Immediate 7589 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7590 match(Set dst (MulI src imm)); 7591 effect(KILL cr); 7592 7593 ins_cost(300); 7594 format %{ "IMUL $dst,$src,$imm" %} 7595 opcode(0x69); /* 69 /r id */ 7596 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7597 ins_pipe( ialu_reg_reg_alu0 ); 7598 %} 7599 7600 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7601 match(Set dst src); 7602 effect(KILL cr); 7603 7604 // Note that this is artificially increased to make it more expensive than loadConL 7605 ins_cost(250); 7606 format %{ "MOV EAX,$src\t// low word only" %} 7607 opcode(0xB8); 7608 ins_encode( LdImmL_Lo(dst, src) ); 7609 ins_pipe( ialu_reg_fat ); 7610 %} 7611 7612 // Multiply by 32-bit Immediate, taking the shifted high order results 7613 // (special case for shift by 32) 7614 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7615 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7616 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7617 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7618 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7619 effect(USE src1, KILL cr); 7620 7621 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7622 ins_cost(0*100 + 1*400 - 150); 7623 format %{ "IMUL EDX:EAX,$src1" %} 7624 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7625 ins_pipe( pipe_slow ); 7626 %} 7627 7628 // Multiply by 32-bit Immediate, taking the shifted high order results 7629 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7630 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7631 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7632 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7633 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7634 effect(USE src1, KILL cr); 7635 7636 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7637 ins_cost(1*100 + 1*400 - 150); 7638 format %{ "IMUL EDX:EAX,$src1\n\t" 7639 "SAR EDX,$cnt-32" %} 7640 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7641 ins_pipe( pipe_slow ); 7642 %} 7643 7644 // Multiply Memory 32-bit Immediate 7645 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7646 match(Set dst (MulI (LoadI src) imm)); 7647 effect(KILL cr); 7648 7649 ins_cost(300); 7650 format %{ "IMUL $dst,$src,$imm" %} 7651 opcode(0x69); /* 69 /r id */ 7652 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7653 ins_pipe( ialu_reg_mem_alu0 ); 7654 %} 7655 7656 // Multiply Memory 7657 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7658 match(Set dst (MulI dst (LoadI src))); 7659 effect(KILL cr); 7660 7661 ins_cost(350); 7662 format %{ "IMUL $dst,$src" %} 7663 opcode(0xAF, 0x0F); 7664 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7665 ins_pipe( ialu_reg_mem_alu0 ); 7666 %} 7667 7668 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7669 %{ 7670 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7671 effect(KILL cr, KILL src2); 7672 7673 expand %{ mulI_eReg(dst, src1, cr); 7674 mulI_eReg(src2, src3, cr); 7675 addI_eReg(dst, src2, cr); %} 7676 %} 7677 7678 // Multiply Register Int to Long 7679 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7680 // Basic Idea: long = (long)int * (long)int 7681 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7682 effect(DEF dst, USE src, USE src1, KILL flags); 7683 7684 ins_cost(300); 7685 format %{ "IMUL $dst,$src1" %} 7686 7687 ins_encode( long_int_multiply( dst, src1 ) ); 7688 ins_pipe( ialu_reg_reg_alu0 ); 7689 %} 7690 7691 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7692 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7693 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7694 effect(KILL flags); 7695 7696 ins_cost(300); 7697 format %{ "MUL $dst,$src1" %} 7698 7699 ins_encode( long_uint_multiply(dst, src1) ); 7700 ins_pipe( ialu_reg_reg_alu0 ); 7701 %} 7702 7703 // Multiply Register Long 7704 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7705 match(Set dst (MulL dst src)); 7706 effect(KILL cr, TEMP tmp); 7707 ins_cost(4*100+3*400); 7708 // Basic idea: lo(result) = lo(x_lo * y_lo) 7709 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7710 format %{ "MOV $tmp,$src.lo\n\t" 7711 "IMUL $tmp,EDX\n\t" 7712 "MOV EDX,$src.hi\n\t" 7713 "IMUL EDX,EAX\n\t" 7714 "ADD $tmp,EDX\n\t" 7715 "MUL EDX:EAX,$src.lo\n\t" 7716 "ADD EDX,$tmp" %} 7717 ins_encode( long_multiply( dst, src, tmp ) ); 7718 ins_pipe( pipe_slow ); 7719 %} 7720 7721 // Multiply Register Long where the left operand's high 32 bits are zero 7722 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7723 predicate(is_operand_hi32_zero(n->in(1))); 7724 match(Set dst (MulL dst src)); 7725 effect(KILL cr, TEMP tmp); 7726 ins_cost(2*100+2*400); 7727 // Basic idea: lo(result) = lo(x_lo * y_lo) 7728 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7729 format %{ "MOV $tmp,$src.hi\n\t" 7730 "IMUL $tmp,EAX\n\t" 7731 "MUL EDX:EAX,$src.lo\n\t" 7732 "ADD EDX,$tmp" %} 7733 ins_encode %{ 7734 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7735 __ imull($tmp$$Register, rax); 7736 __ mull($src$$Register); 7737 __ addl(rdx, $tmp$$Register); 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 // Multiply Register Long where the right operand's high 32 bits are zero 7743 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7744 predicate(is_operand_hi32_zero(n->in(2))); 7745 match(Set dst (MulL dst src)); 7746 effect(KILL cr, TEMP tmp); 7747 ins_cost(2*100+2*400); 7748 // Basic idea: lo(result) = lo(x_lo * y_lo) 7749 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7750 format %{ "MOV $tmp,$src.lo\n\t" 7751 "IMUL $tmp,EDX\n\t" 7752 "MUL EDX:EAX,$src.lo\n\t" 7753 "ADD EDX,$tmp" %} 7754 ins_encode %{ 7755 __ movl($tmp$$Register, $src$$Register); 7756 __ imull($tmp$$Register, rdx); 7757 __ mull($src$$Register); 7758 __ addl(rdx, $tmp$$Register); 7759 %} 7760 ins_pipe( pipe_slow ); 7761 %} 7762 7763 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7764 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7765 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7766 match(Set dst (MulL dst src)); 7767 effect(KILL cr); 7768 ins_cost(1*400); 7769 // Basic idea: lo(result) = lo(x_lo * y_lo) 7770 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7771 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7772 ins_encode %{ 7773 __ mull($src$$Register); 7774 %} 7775 ins_pipe( pipe_slow ); 7776 %} 7777 7778 // Multiply Register Long by small constant 7779 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7780 match(Set dst (MulL dst src)); 7781 effect(KILL cr, TEMP tmp); 7782 ins_cost(2*100+2*400); 7783 size(12); 7784 // Basic idea: lo(result) = lo(src * EAX) 7785 // hi(result) = hi(src * EAX) + lo(src * EDX) 7786 format %{ "IMUL $tmp,EDX,$src\n\t" 7787 "MOV EDX,$src\n\t" 7788 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7789 "ADD EDX,$tmp" %} 7790 ins_encode( long_multiply_con( dst, src, tmp ) ); 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 // Integer DIV with Register 7795 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7796 match(Set rax (DivI rax div)); 7797 effect(KILL rdx, KILL cr); 7798 size(26); 7799 ins_cost(30*100+10*100); 7800 format %{ "CMP EAX,0x80000000\n\t" 7801 "JNE,s normal\n\t" 7802 "XOR EDX,EDX\n\t" 7803 "CMP ECX,-1\n\t" 7804 "JE,s done\n" 7805 "normal: CDQ\n\t" 7806 "IDIV $div\n\t" 7807 "done:" %} 7808 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7809 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7810 ins_pipe( ialu_reg_reg_alu0 ); 7811 %} 7812 7813 // Divide Register Long 7814 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7815 match(Set dst (DivL src1 src2)); 7816 effect(CALL); 7817 ins_cost(10000); 7818 format %{ "PUSH $src1.hi\n\t" 7819 "PUSH $src1.lo\n\t" 7820 "PUSH $src2.hi\n\t" 7821 "PUSH $src2.lo\n\t" 7822 "CALL SharedRuntime::ldiv\n\t" 7823 "ADD ESP,16" %} 7824 ins_encode( long_div(src1,src2) ); 7825 ins_pipe( pipe_slow ); 7826 %} 7827 7828 // Integer DIVMOD with Register, both quotient and mod results 7829 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7830 match(DivModI rax div); 7831 effect(KILL cr); 7832 size(26); 7833 ins_cost(30*100+10*100); 7834 format %{ "CMP EAX,0x80000000\n\t" 7835 "JNE,s normal\n\t" 7836 "XOR EDX,EDX\n\t" 7837 "CMP ECX,-1\n\t" 7838 "JE,s done\n" 7839 "normal: CDQ\n\t" 7840 "IDIV $div\n\t" 7841 "done:" %} 7842 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7843 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7844 ins_pipe( pipe_slow ); 7845 %} 7846 7847 // Integer MOD with Register 7848 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7849 match(Set rdx (ModI rax div)); 7850 effect(KILL rax, KILL cr); 7851 7852 size(26); 7853 ins_cost(300); 7854 format %{ "CDQ\n\t" 7855 "IDIV $div" %} 7856 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7857 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7858 ins_pipe( ialu_reg_reg_alu0 ); 7859 %} 7860 7861 // Remainder Register Long 7862 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7863 match(Set dst (ModL src1 src2)); 7864 effect(CALL); 7865 ins_cost(10000); 7866 format %{ "PUSH $src1.hi\n\t" 7867 "PUSH $src1.lo\n\t" 7868 "PUSH $src2.hi\n\t" 7869 "PUSH $src2.lo\n\t" 7870 "CALL SharedRuntime::lrem\n\t" 7871 "ADD ESP,16" %} 7872 ins_encode( long_mod(src1,src2) ); 7873 ins_pipe( pipe_slow ); 7874 %} 7875 7876 // Divide Register Long (no special case since divisor != -1) 7877 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7878 match(Set dst (DivL dst imm)); 7879 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7880 ins_cost(1000); 7881 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7882 "XOR $tmp2,$tmp2\n\t" 7883 "CMP $tmp,EDX\n\t" 7884 "JA,s fast\n\t" 7885 "MOV $tmp2,EAX\n\t" 7886 "MOV EAX,EDX\n\t" 7887 "MOV EDX,0\n\t" 7888 "JLE,s pos\n\t" 7889 "LNEG EAX : $tmp2\n\t" 7890 "DIV $tmp # unsigned division\n\t" 7891 "XCHG EAX,$tmp2\n\t" 7892 "DIV $tmp\n\t" 7893 "LNEG $tmp2 : EAX\n\t" 7894 "JMP,s done\n" 7895 "pos:\n\t" 7896 "DIV $tmp\n\t" 7897 "XCHG EAX,$tmp2\n" 7898 "fast:\n\t" 7899 "DIV $tmp\n" 7900 "done:\n\t" 7901 "MOV EDX,$tmp2\n\t" 7902 "NEG EDX:EAX # if $imm < 0" %} 7903 ins_encode %{ 7904 int con = (int)$imm$$constant; 7905 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7906 int pcon = (con > 0) ? con : -con; 7907 Label Lfast, Lpos, Ldone; 7908 7909 __ movl($tmp$$Register, pcon); 7910 __ xorl($tmp2$$Register,$tmp2$$Register); 7911 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7912 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7913 7914 __ movl($tmp2$$Register, $dst$$Register); // save 7915 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7916 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7917 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7918 7919 // Negative dividend. 7920 // convert value to positive to use unsigned division 7921 __ lneg($dst$$Register, $tmp2$$Register); 7922 __ divl($tmp$$Register); 7923 __ xchgl($dst$$Register, $tmp2$$Register); 7924 __ divl($tmp$$Register); 7925 // revert result back to negative 7926 __ lneg($tmp2$$Register, $dst$$Register); 7927 __ jmpb(Ldone); 7928 7929 __ bind(Lpos); 7930 __ divl($tmp$$Register); // Use unsigned division 7931 __ xchgl($dst$$Register, $tmp2$$Register); 7932 // Fallthrow for final divide, tmp2 has 32 bit hi result 7933 7934 __ bind(Lfast); 7935 // fast path: src is positive 7936 __ divl($tmp$$Register); // Use unsigned division 7937 7938 __ bind(Ldone); 7939 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7940 if (con < 0) { 7941 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7942 } 7943 %} 7944 ins_pipe( pipe_slow ); 7945 %} 7946 7947 // Remainder Register Long (remainder fit into 32 bits) 7948 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7949 match(Set dst (ModL dst imm)); 7950 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7951 ins_cost(1000); 7952 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7953 "CMP $tmp,EDX\n\t" 7954 "JA,s fast\n\t" 7955 "MOV $tmp2,EAX\n\t" 7956 "MOV EAX,EDX\n\t" 7957 "MOV EDX,0\n\t" 7958 "JLE,s pos\n\t" 7959 "LNEG EAX : $tmp2\n\t" 7960 "DIV $tmp # unsigned division\n\t" 7961 "MOV EAX,$tmp2\n\t" 7962 "DIV $tmp\n\t" 7963 "NEG EDX\n\t" 7964 "JMP,s done\n" 7965 "pos:\n\t" 7966 "DIV $tmp\n\t" 7967 "MOV EAX,$tmp2\n" 7968 "fast:\n\t" 7969 "DIV $tmp\n" 7970 "done:\n\t" 7971 "MOV EAX,EDX\n\t" 7972 "SAR EDX,31\n\t" %} 7973 ins_encode %{ 7974 int con = (int)$imm$$constant; 7975 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7976 int pcon = (con > 0) ? con : -con; 7977 Label Lfast, Lpos, Ldone; 7978 7979 __ movl($tmp$$Register, pcon); 7980 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7981 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7982 7983 __ movl($tmp2$$Register, $dst$$Register); // save 7984 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7985 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7986 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7987 7988 // Negative dividend. 7989 // convert value to positive to use unsigned division 7990 __ lneg($dst$$Register, $tmp2$$Register); 7991 __ divl($tmp$$Register); 7992 __ movl($dst$$Register, $tmp2$$Register); 7993 __ divl($tmp$$Register); 7994 // revert remainder back to negative 7995 __ negl(HIGH_FROM_LOW($dst$$Register)); 7996 __ jmpb(Ldone); 7997 7998 __ bind(Lpos); 7999 __ divl($tmp$$Register); 8000 __ movl($dst$$Register, $tmp2$$Register); 8001 8002 __ bind(Lfast); 8003 // fast path: src is positive 8004 __ divl($tmp$$Register); 8005 8006 __ bind(Ldone); 8007 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8008 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8009 8010 %} 8011 ins_pipe( pipe_slow ); 8012 %} 8013 8014 // Integer Shift Instructions 8015 // Shift Left by one 8016 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8017 match(Set dst (LShiftI dst shift)); 8018 effect(KILL cr); 8019 8020 size(2); 8021 format %{ "SHL $dst,$shift" %} 8022 opcode(0xD1, 0x4); /* D1 /4 */ 8023 ins_encode( OpcP, RegOpc( dst ) ); 8024 ins_pipe( ialu_reg ); 8025 %} 8026 8027 // Shift Left by 8-bit immediate 8028 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8029 match(Set dst (LShiftI dst shift)); 8030 effect(KILL cr); 8031 8032 size(3); 8033 format %{ "SHL $dst,$shift" %} 8034 opcode(0xC1, 0x4); /* C1 /4 ib */ 8035 ins_encode( RegOpcImm( dst, shift) ); 8036 ins_pipe( ialu_reg ); 8037 %} 8038 8039 // Shift Left by variable 8040 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8041 match(Set dst (LShiftI dst shift)); 8042 effect(KILL cr); 8043 8044 size(2); 8045 format %{ "SHL $dst,$shift" %} 8046 opcode(0xD3, 0x4); /* D3 /4 */ 8047 ins_encode( OpcP, RegOpc( dst ) ); 8048 ins_pipe( ialu_reg_reg ); 8049 %} 8050 8051 // Arithmetic shift right by one 8052 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8053 match(Set dst (RShiftI dst shift)); 8054 effect(KILL cr); 8055 8056 size(2); 8057 format %{ "SAR $dst,$shift" %} 8058 opcode(0xD1, 0x7); /* D1 /7 */ 8059 ins_encode( OpcP, RegOpc( dst ) ); 8060 ins_pipe( ialu_reg ); 8061 %} 8062 8063 // Arithmetic shift right by one 8064 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8065 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8066 effect(KILL cr); 8067 format %{ "SAR $dst,$shift" %} 8068 opcode(0xD1, 0x7); /* D1 /7 */ 8069 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8070 ins_pipe( ialu_mem_imm ); 8071 %} 8072 8073 // Arithmetic Shift Right by 8-bit immediate 8074 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8075 match(Set dst (RShiftI dst shift)); 8076 effect(KILL cr); 8077 8078 size(3); 8079 format %{ "SAR $dst,$shift" %} 8080 opcode(0xC1, 0x7); /* C1 /7 ib */ 8081 ins_encode( RegOpcImm( dst, shift ) ); 8082 ins_pipe( ialu_mem_imm ); 8083 %} 8084 8085 // Arithmetic Shift Right by 8-bit immediate 8086 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8087 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8088 effect(KILL cr); 8089 8090 format %{ "SAR $dst,$shift" %} 8091 opcode(0xC1, 0x7); /* C1 /7 ib */ 8092 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8093 ins_pipe( ialu_mem_imm ); 8094 %} 8095 8096 // Arithmetic Shift Right by variable 8097 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8098 match(Set dst (RShiftI dst shift)); 8099 effect(KILL cr); 8100 8101 size(2); 8102 format %{ "SAR $dst,$shift" %} 8103 opcode(0xD3, 0x7); /* D3 /7 */ 8104 ins_encode( OpcP, RegOpc( dst ) ); 8105 ins_pipe( ialu_reg_reg ); 8106 %} 8107 8108 // Logical shift right by one 8109 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8110 match(Set dst (URShiftI dst shift)); 8111 effect(KILL cr); 8112 8113 size(2); 8114 format %{ "SHR $dst,$shift" %} 8115 opcode(0xD1, 0x5); /* D1 /5 */ 8116 ins_encode( OpcP, RegOpc( dst ) ); 8117 ins_pipe( ialu_reg ); 8118 %} 8119 8120 // Logical Shift Right by 8-bit immediate 8121 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8122 match(Set dst (URShiftI dst shift)); 8123 effect(KILL cr); 8124 8125 size(3); 8126 format %{ "SHR $dst,$shift" %} 8127 opcode(0xC1, 0x5); /* C1 /5 ib */ 8128 ins_encode( RegOpcImm( dst, shift) ); 8129 ins_pipe( ialu_reg ); 8130 %} 8131 8132 8133 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8134 // This idiom is used by the compiler for the i2b bytecode. 8135 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8136 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8137 8138 size(3); 8139 format %{ "MOVSX $dst,$src :8" %} 8140 ins_encode %{ 8141 __ movsbl($dst$$Register, $src$$Register); 8142 %} 8143 ins_pipe(ialu_reg_reg); 8144 %} 8145 8146 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8147 // This idiom is used by the compiler the i2s bytecode. 8148 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8149 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8150 8151 size(3); 8152 format %{ "MOVSX $dst,$src :16" %} 8153 ins_encode %{ 8154 __ movswl($dst$$Register, $src$$Register); 8155 %} 8156 ins_pipe(ialu_reg_reg); 8157 %} 8158 8159 8160 // Logical Shift Right by variable 8161 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8162 match(Set dst (URShiftI dst shift)); 8163 effect(KILL cr); 8164 8165 size(2); 8166 format %{ "SHR $dst,$shift" %} 8167 opcode(0xD3, 0x5); /* D3 /5 */ 8168 ins_encode( OpcP, RegOpc( dst ) ); 8169 ins_pipe( ialu_reg_reg ); 8170 %} 8171 8172 8173 //----------Logical Instructions----------------------------------------------- 8174 //----------Integer Logical Instructions--------------------------------------- 8175 // And Instructions 8176 // And Register with Register 8177 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8178 match(Set dst (AndI dst src)); 8179 effect(KILL cr); 8180 8181 size(2); 8182 format %{ "AND $dst,$src" %} 8183 opcode(0x23); 8184 ins_encode( OpcP, RegReg( dst, src) ); 8185 ins_pipe( ialu_reg_reg ); 8186 %} 8187 8188 // And Register with Immediate 8189 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8190 match(Set dst (AndI dst src)); 8191 effect(KILL cr); 8192 8193 format %{ "AND $dst,$src" %} 8194 opcode(0x81,0x04); /* Opcode 81 /4 */ 8195 // ins_encode( RegImm( dst, src) ); 8196 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8197 ins_pipe( ialu_reg ); 8198 %} 8199 8200 // And Register with Memory 8201 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8202 match(Set dst (AndI dst (LoadI src))); 8203 effect(KILL cr); 8204 8205 ins_cost(125); 8206 format %{ "AND $dst,$src" %} 8207 opcode(0x23); 8208 ins_encode( OpcP, RegMem( dst, src) ); 8209 ins_pipe( ialu_reg_mem ); 8210 %} 8211 8212 // And Memory with Register 8213 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8214 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8215 effect(KILL cr); 8216 8217 ins_cost(150); 8218 format %{ "AND $dst,$src" %} 8219 opcode(0x21); /* Opcode 21 /r */ 8220 ins_encode( OpcP, RegMem( src, dst ) ); 8221 ins_pipe( ialu_mem_reg ); 8222 %} 8223 8224 // And Memory with Immediate 8225 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8226 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8227 effect(KILL cr); 8228 8229 ins_cost(125); 8230 format %{ "AND $dst,$src" %} 8231 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8232 // ins_encode( MemImm( dst, src) ); 8233 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8234 ins_pipe( ialu_mem_imm ); 8235 %} 8236 8237 // BMI1 instructions 8238 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8239 match(Set dst (AndI (XorI src1 minus_1) src2)); 8240 predicate(UseBMI1Instructions); 8241 effect(KILL cr); 8242 8243 format %{ "ANDNL $dst, $src1, $src2" %} 8244 8245 ins_encode %{ 8246 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8247 %} 8248 ins_pipe(ialu_reg); 8249 %} 8250 8251 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8252 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8253 predicate(UseBMI1Instructions); 8254 effect(KILL cr); 8255 8256 ins_cost(125); 8257 format %{ "ANDNL $dst, $src1, $src2" %} 8258 8259 ins_encode %{ 8260 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8261 %} 8262 ins_pipe(ialu_reg_mem); 8263 %} 8264 8265 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8266 match(Set dst (AndI (SubI imm_zero src) src)); 8267 predicate(UseBMI1Instructions); 8268 effect(KILL cr); 8269 8270 format %{ "BLSIL $dst, $src" %} 8271 8272 ins_encode %{ 8273 __ blsil($dst$$Register, $src$$Register); 8274 %} 8275 ins_pipe(ialu_reg); 8276 %} 8277 8278 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8279 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8280 predicate(UseBMI1Instructions); 8281 effect(KILL cr); 8282 8283 ins_cost(125); 8284 format %{ "BLSIL $dst, $src" %} 8285 8286 ins_encode %{ 8287 __ blsil($dst$$Register, $src$$Address); 8288 %} 8289 ins_pipe(ialu_reg_mem); 8290 %} 8291 8292 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8293 %{ 8294 match(Set dst (XorI (AddI src minus_1) src)); 8295 predicate(UseBMI1Instructions); 8296 effect(KILL cr); 8297 8298 format %{ "BLSMSKL $dst, $src" %} 8299 8300 ins_encode %{ 8301 __ blsmskl($dst$$Register, $src$$Register); 8302 %} 8303 8304 ins_pipe(ialu_reg); 8305 %} 8306 8307 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8308 %{ 8309 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8310 predicate(UseBMI1Instructions); 8311 effect(KILL cr); 8312 8313 ins_cost(125); 8314 format %{ "BLSMSKL $dst, $src" %} 8315 8316 ins_encode %{ 8317 __ blsmskl($dst$$Register, $src$$Address); 8318 %} 8319 8320 ins_pipe(ialu_reg_mem); 8321 %} 8322 8323 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8324 %{ 8325 match(Set dst (AndI (AddI src minus_1) src) ); 8326 predicate(UseBMI1Instructions); 8327 effect(KILL cr); 8328 8329 format %{ "BLSRL $dst, $src" %} 8330 8331 ins_encode %{ 8332 __ blsrl($dst$$Register, $src$$Register); 8333 %} 8334 8335 ins_pipe(ialu_reg); 8336 %} 8337 8338 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8339 %{ 8340 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8341 predicate(UseBMI1Instructions); 8342 effect(KILL cr); 8343 8344 ins_cost(125); 8345 format %{ "BLSRL $dst, $src" %} 8346 8347 ins_encode %{ 8348 __ blsrl($dst$$Register, $src$$Address); 8349 %} 8350 8351 ins_pipe(ialu_reg_mem); 8352 %} 8353 8354 // Or Instructions 8355 // Or Register with Register 8356 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8357 match(Set dst (OrI dst src)); 8358 effect(KILL cr); 8359 8360 size(2); 8361 format %{ "OR $dst,$src" %} 8362 opcode(0x0B); 8363 ins_encode( OpcP, RegReg( dst, src) ); 8364 ins_pipe( ialu_reg_reg ); 8365 %} 8366 8367 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8368 match(Set dst (OrI dst (CastP2X src))); 8369 effect(KILL cr); 8370 8371 size(2); 8372 format %{ "OR $dst,$src" %} 8373 opcode(0x0B); 8374 ins_encode( OpcP, RegReg( dst, src) ); 8375 ins_pipe( ialu_reg_reg ); 8376 %} 8377 8378 8379 // Or Register with Immediate 8380 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8381 match(Set dst (OrI dst src)); 8382 effect(KILL cr); 8383 8384 format %{ "OR $dst,$src" %} 8385 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8386 // ins_encode( RegImm( dst, src) ); 8387 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8388 ins_pipe( ialu_reg ); 8389 %} 8390 8391 // Or Register with Memory 8392 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8393 match(Set dst (OrI dst (LoadI src))); 8394 effect(KILL cr); 8395 8396 ins_cost(125); 8397 format %{ "OR $dst,$src" %} 8398 opcode(0x0B); 8399 ins_encode( OpcP, RegMem( dst, src) ); 8400 ins_pipe( ialu_reg_mem ); 8401 %} 8402 8403 // Or Memory with Register 8404 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8405 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8406 effect(KILL cr); 8407 8408 ins_cost(150); 8409 format %{ "OR $dst,$src" %} 8410 opcode(0x09); /* Opcode 09 /r */ 8411 ins_encode( OpcP, RegMem( src, dst ) ); 8412 ins_pipe( ialu_mem_reg ); 8413 %} 8414 8415 // Or Memory with Immediate 8416 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8417 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8418 effect(KILL cr); 8419 8420 ins_cost(125); 8421 format %{ "OR $dst,$src" %} 8422 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8423 // ins_encode( MemImm( dst, src) ); 8424 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8425 ins_pipe( ialu_mem_imm ); 8426 %} 8427 8428 // ROL/ROR 8429 // ROL expand 8430 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8431 effect(USE_DEF dst, USE shift, KILL cr); 8432 8433 format %{ "ROL $dst, $shift" %} 8434 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8435 ins_encode( OpcP, RegOpc( dst )); 8436 ins_pipe( ialu_reg ); 8437 %} 8438 8439 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8440 effect(USE_DEF dst, USE shift, KILL cr); 8441 8442 format %{ "ROL $dst, $shift" %} 8443 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8444 ins_encode( RegOpcImm(dst, shift) ); 8445 ins_pipe(ialu_reg); 8446 %} 8447 8448 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8449 effect(USE_DEF dst, USE shift, KILL cr); 8450 8451 format %{ "ROL $dst, $shift" %} 8452 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8453 ins_encode(OpcP, RegOpc(dst)); 8454 ins_pipe( ialu_reg_reg ); 8455 %} 8456 // end of ROL expand 8457 8458 // ROL 32bit by one once 8459 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8460 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8461 8462 expand %{ 8463 rolI_eReg_imm1(dst, lshift, cr); 8464 %} 8465 %} 8466 8467 // ROL 32bit var by imm8 once 8468 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8469 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8470 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8471 8472 expand %{ 8473 rolI_eReg_imm8(dst, lshift, cr); 8474 %} 8475 %} 8476 8477 // ROL 32bit var by var once 8478 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8479 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8480 8481 expand %{ 8482 rolI_eReg_CL(dst, shift, cr); 8483 %} 8484 %} 8485 8486 // ROL 32bit var by var once 8487 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8488 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8489 8490 expand %{ 8491 rolI_eReg_CL(dst, shift, cr); 8492 %} 8493 %} 8494 8495 // ROR expand 8496 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8497 effect(USE_DEF dst, USE shift, KILL cr); 8498 8499 format %{ "ROR $dst, $shift" %} 8500 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8501 ins_encode( OpcP, RegOpc( dst ) ); 8502 ins_pipe( ialu_reg ); 8503 %} 8504 8505 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8506 effect (USE_DEF dst, USE shift, KILL cr); 8507 8508 format %{ "ROR $dst, $shift" %} 8509 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8510 ins_encode( RegOpcImm(dst, shift) ); 8511 ins_pipe( ialu_reg ); 8512 %} 8513 8514 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8515 effect(USE_DEF dst, USE shift, KILL cr); 8516 8517 format %{ "ROR $dst, $shift" %} 8518 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8519 ins_encode(OpcP, RegOpc(dst)); 8520 ins_pipe( ialu_reg_reg ); 8521 %} 8522 // end of ROR expand 8523 8524 // ROR right once 8525 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8526 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8527 8528 expand %{ 8529 rorI_eReg_imm1(dst, rshift, cr); 8530 %} 8531 %} 8532 8533 // ROR 32bit by immI8 once 8534 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8535 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8536 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8537 8538 expand %{ 8539 rorI_eReg_imm8(dst, rshift, cr); 8540 %} 8541 %} 8542 8543 // ROR 32bit var by var once 8544 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8545 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8546 8547 expand %{ 8548 rorI_eReg_CL(dst, shift, cr); 8549 %} 8550 %} 8551 8552 // ROR 32bit var by var once 8553 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8554 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8555 8556 expand %{ 8557 rorI_eReg_CL(dst, shift, cr); 8558 %} 8559 %} 8560 8561 // Xor Instructions 8562 // Xor Register with Register 8563 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8564 match(Set dst (XorI dst src)); 8565 effect(KILL cr); 8566 8567 size(2); 8568 format %{ "XOR $dst,$src" %} 8569 opcode(0x33); 8570 ins_encode( OpcP, RegReg( dst, src) ); 8571 ins_pipe( ialu_reg_reg ); 8572 %} 8573 8574 // Xor Register with Immediate -1 8575 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8576 match(Set dst (XorI dst imm)); 8577 8578 size(2); 8579 format %{ "NOT $dst" %} 8580 ins_encode %{ 8581 __ notl($dst$$Register); 8582 %} 8583 ins_pipe( ialu_reg ); 8584 %} 8585 8586 // Xor Register with Immediate 8587 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8588 match(Set dst (XorI dst src)); 8589 effect(KILL cr); 8590 8591 format %{ "XOR $dst,$src" %} 8592 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8593 // ins_encode( RegImm( dst, src) ); 8594 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8595 ins_pipe( ialu_reg ); 8596 %} 8597 8598 // Xor Register with Memory 8599 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8600 match(Set dst (XorI dst (LoadI src))); 8601 effect(KILL cr); 8602 8603 ins_cost(125); 8604 format %{ "XOR $dst,$src" %} 8605 opcode(0x33); 8606 ins_encode( OpcP, RegMem(dst, src) ); 8607 ins_pipe( ialu_reg_mem ); 8608 %} 8609 8610 // Xor Memory with Register 8611 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8612 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8613 effect(KILL cr); 8614 8615 ins_cost(150); 8616 format %{ "XOR $dst,$src" %} 8617 opcode(0x31); /* Opcode 31 /r */ 8618 ins_encode( OpcP, RegMem( src, dst ) ); 8619 ins_pipe( ialu_mem_reg ); 8620 %} 8621 8622 // Xor Memory with Immediate 8623 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8624 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8625 effect(KILL cr); 8626 8627 ins_cost(125); 8628 format %{ "XOR $dst,$src" %} 8629 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8630 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8631 ins_pipe( ialu_mem_imm ); 8632 %} 8633 8634 //----------Convert Int to Boolean--------------------------------------------- 8635 8636 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8637 effect( DEF dst, USE src ); 8638 format %{ "MOV $dst,$src" %} 8639 ins_encode( enc_Copy( dst, src) ); 8640 ins_pipe( ialu_reg_reg ); 8641 %} 8642 8643 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8644 effect( USE_DEF dst, USE src, KILL cr ); 8645 8646 size(4); 8647 format %{ "NEG $dst\n\t" 8648 "ADC $dst,$src" %} 8649 ins_encode( neg_reg(dst), 8650 OpcRegReg(0x13,dst,src) ); 8651 ins_pipe( ialu_reg_reg_long ); 8652 %} 8653 8654 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8655 match(Set dst (Conv2B src)); 8656 8657 expand %{ 8658 movI_nocopy(dst,src); 8659 ci2b(dst,src,cr); 8660 %} 8661 %} 8662 8663 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8664 effect( DEF dst, USE src ); 8665 format %{ "MOV $dst,$src" %} 8666 ins_encode( enc_Copy( dst, src) ); 8667 ins_pipe( ialu_reg_reg ); 8668 %} 8669 8670 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8671 effect( USE_DEF dst, USE src, KILL cr ); 8672 format %{ "NEG $dst\n\t" 8673 "ADC $dst,$src" %} 8674 ins_encode( neg_reg(dst), 8675 OpcRegReg(0x13,dst,src) ); 8676 ins_pipe( ialu_reg_reg_long ); 8677 %} 8678 8679 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8680 match(Set dst (Conv2B src)); 8681 8682 expand %{ 8683 movP_nocopy(dst,src); 8684 cp2b(dst,src,cr); 8685 %} 8686 %} 8687 8688 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8689 match(Set dst (CmpLTMask p q)); 8690 effect(KILL cr); 8691 ins_cost(400); 8692 8693 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8694 format %{ "XOR $dst,$dst\n\t" 8695 "CMP $p,$q\n\t" 8696 "SETlt $dst\n\t" 8697 "NEG $dst" %} 8698 ins_encode %{ 8699 Register Rp = $p$$Register; 8700 Register Rq = $q$$Register; 8701 Register Rd = $dst$$Register; 8702 Label done; 8703 __ xorl(Rd, Rd); 8704 __ cmpl(Rp, Rq); 8705 __ setb(Assembler::less, Rd); 8706 __ negl(Rd); 8707 %} 8708 8709 ins_pipe(pipe_slow); 8710 %} 8711 8712 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8713 match(Set dst (CmpLTMask dst zero)); 8714 effect(DEF dst, KILL cr); 8715 ins_cost(100); 8716 8717 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8718 ins_encode %{ 8719 __ sarl($dst$$Register, 31); 8720 %} 8721 ins_pipe(ialu_reg); 8722 %} 8723 8724 /* better to save a register than avoid a branch */ 8725 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8726 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8727 effect(KILL cr); 8728 ins_cost(400); 8729 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8730 "JGE done\n\t" 8731 "ADD $p,$y\n" 8732 "done: " %} 8733 ins_encode %{ 8734 Register Rp = $p$$Register; 8735 Register Rq = $q$$Register; 8736 Register Ry = $y$$Register; 8737 Label done; 8738 __ subl(Rp, Rq); 8739 __ jccb(Assembler::greaterEqual, done); 8740 __ addl(Rp, Ry); 8741 __ bind(done); 8742 %} 8743 8744 ins_pipe(pipe_cmplt); 8745 %} 8746 8747 /* better to save a register than avoid a branch */ 8748 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8749 match(Set y (AndI (CmpLTMask p q) y)); 8750 effect(KILL cr); 8751 8752 ins_cost(300); 8753 8754 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8755 "JLT done\n\t" 8756 "XORL $y, $y\n" 8757 "done: " %} 8758 ins_encode %{ 8759 Register Rp = $p$$Register; 8760 Register Rq = $q$$Register; 8761 Register Ry = $y$$Register; 8762 Label done; 8763 __ cmpl(Rp, Rq); 8764 __ jccb(Assembler::less, done); 8765 __ xorl(Ry, Ry); 8766 __ bind(done); 8767 %} 8768 8769 ins_pipe(pipe_cmplt); 8770 %} 8771 8772 /* If I enable this, I encourage spilling in the inner loop of compress. 8773 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8774 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8775 */ 8776 //----------Overflow Math Instructions----------------------------------------- 8777 8778 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8779 %{ 8780 match(Set cr (OverflowAddI op1 op2)); 8781 effect(DEF cr, USE_KILL op1, USE op2); 8782 8783 format %{ "ADD $op1, $op2\t# overflow check int" %} 8784 8785 ins_encode %{ 8786 __ addl($op1$$Register, $op2$$Register); 8787 %} 8788 ins_pipe(ialu_reg_reg); 8789 %} 8790 8791 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8792 %{ 8793 match(Set cr (OverflowAddI op1 op2)); 8794 effect(DEF cr, USE_KILL op1, USE op2); 8795 8796 format %{ "ADD $op1, $op2\t# overflow check int" %} 8797 8798 ins_encode %{ 8799 __ addl($op1$$Register, $op2$$constant); 8800 %} 8801 ins_pipe(ialu_reg_reg); 8802 %} 8803 8804 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8805 %{ 8806 match(Set cr (OverflowSubI op1 op2)); 8807 8808 format %{ "CMP $op1, $op2\t# overflow check int" %} 8809 ins_encode %{ 8810 __ cmpl($op1$$Register, $op2$$Register); 8811 %} 8812 ins_pipe(ialu_reg_reg); 8813 %} 8814 8815 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8816 %{ 8817 match(Set cr (OverflowSubI op1 op2)); 8818 8819 format %{ "CMP $op1, $op2\t# overflow check int" %} 8820 ins_encode %{ 8821 __ cmpl($op1$$Register, $op2$$constant); 8822 %} 8823 ins_pipe(ialu_reg_reg); 8824 %} 8825 8826 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8827 %{ 8828 match(Set cr (OverflowSubI zero op2)); 8829 effect(DEF cr, USE_KILL op2); 8830 8831 format %{ "NEG $op2\t# overflow check int" %} 8832 ins_encode %{ 8833 __ negl($op2$$Register); 8834 %} 8835 ins_pipe(ialu_reg_reg); 8836 %} 8837 8838 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8839 %{ 8840 match(Set cr (OverflowMulI op1 op2)); 8841 effect(DEF cr, USE_KILL op1, USE op2); 8842 8843 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8844 ins_encode %{ 8845 __ imull($op1$$Register, $op2$$Register); 8846 %} 8847 ins_pipe(ialu_reg_reg_alu0); 8848 %} 8849 8850 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8851 %{ 8852 match(Set cr (OverflowMulI op1 op2)); 8853 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8854 8855 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8856 ins_encode %{ 8857 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8858 %} 8859 ins_pipe(ialu_reg_reg_alu0); 8860 %} 8861 8862 // Integer Absolute Instructions 8863 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8864 %{ 8865 match(Set dst (AbsI src)); 8866 effect(TEMP dst, TEMP tmp, KILL cr); 8867 format %{ "movl $tmp, $src\n\t" 8868 "sarl $tmp, 31\n\t" 8869 "movl $dst, $src\n\t" 8870 "xorl $dst, $tmp\n\t" 8871 "subl $dst, $tmp\n" 8872 %} 8873 ins_encode %{ 8874 __ movl($tmp$$Register, $src$$Register); 8875 __ sarl($tmp$$Register, 31); 8876 __ movl($dst$$Register, $src$$Register); 8877 __ xorl($dst$$Register, $tmp$$Register); 8878 __ subl($dst$$Register, $tmp$$Register); 8879 %} 8880 8881 ins_pipe(ialu_reg_reg); 8882 %} 8883 8884 //----------Long Instructions------------------------------------------------ 8885 // Add Long Register with Register 8886 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8887 match(Set dst (AddL dst src)); 8888 effect(KILL cr); 8889 ins_cost(200); 8890 format %{ "ADD $dst.lo,$src.lo\n\t" 8891 "ADC $dst.hi,$src.hi" %} 8892 opcode(0x03, 0x13); 8893 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8894 ins_pipe( ialu_reg_reg_long ); 8895 %} 8896 8897 // Add Long Register with Immediate 8898 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8899 match(Set dst (AddL dst src)); 8900 effect(KILL cr); 8901 format %{ "ADD $dst.lo,$src.lo\n\t" 8902 "ADC $dst.hi,$src.hi" %} 8903 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8904 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8905 ins_pipe( ialu_reg_long ); 8906 %} 8907 8908 // Add Long Register with Memory 8909 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8910 match(Set dst (AddL dst (LoadL mem))); 8911 effect(KILL cr); 8912 ins_cost(125); 8913 format %{ "ADD $dst.lo,$mem\n\t" 8914 "ADC $dst.hi,$mem+4" %} 8915 opcode(0x03, 0x13); 8916 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8917 ins_pipe( ialu_reg_long_mem ); 8918 %} 8919 8920 // Subtract Long Register with Register. 8921 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8922 match(Set dst (SubL dst src)); 8923 effect(KILL cr); 8924 ins_cost(200); 8925 format %{ "SUB $dst.lo,$src.lo\n\t" 8926 "SBB $dst.hi,$src.hi" %} 8927 opcode(0x2B, 0x1B); 8928 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8929 ins_pipe( ialu_reg_reg_long ); 8930 %} 8931 8932 // Subtract Long Register with Immediate 8933 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8934 match(Set dst (SubL dst src)); 8935 effect(KILL cr); 8936 format %{ "SUB $dst.lo,$src.lo\n\t" 8937 "SBB $dst.hi,$src.hi" %} 8938 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8939 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8940 ins_pipe( ialu_reg_long ); 8941 %} 8942 8943 // Subtract Long Register with Memory 8944 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8945 match(Set dst (SubL dst (LoadL mem))); 8946 effect(KILL cr); 8947 ins_cost(125); 8948 format %{ "SUB $dst.lo,$mem\n\t" 8949 "SBB $dst.hi,$mem+4" %} 8950 opcode(0x2B, 0x1B); 8951 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8952 ins_pipe( ialu_reg_long_mem ); 8953 %} 8954 8955 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8956 match(Set dst (SubL zero dst)); 8957 effect(KILL cr); 8958 ins_cost(300); 8959 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8960 ins_encode( neg_long(dst) ); 8961 ins_pipe( ialu_reg_reg_long ); 8962 %} 8963 8964 // And Long Register with Register 8965 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8966 match(Set dst (AndL dst src)); 8967 effect(KILL cr); 8968 format %{ "AND $dst.lo,$src.lo\n\t" 8969 "AND $dst.hi,$src.hi" %} 8970 opcode(0x23,0x23); 8971 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8972 ins_pipe( ialu_reg_reg_long ); 8973 %} 8974 8975 // And Long Register with Immediate 8976 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8977 match(Set dst (AndL dst src)); 8978 effect(KILL cr); 8979 format %{ "AND $dst.lo,$src.lo\n\t" 8980 "AND $dst.hi,$src.hi" %} 8981 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8982 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8983 ins_pipe( ialu_reg_long ); 8984 %} 8985 8986 // And Long Register with Memory 8987 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8988 match(Set dst (AndL dst (LoadL mem))); 8989 effect(KILL cr); 8990 ins_cost(125); 8991 format %{ "AND $dst.lo,$mem\n\t" 8992 "AND $dst.hi,$mem+4" %} 8993 opcode(0x23, 0x23); 8994 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8995 ins_pipe( ialu_reg_long_mem ); 8996 %} 8997 8998 // BMI1 instructions 8999 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9000 match(Set dst (AndL (XorL src1 minus_1) src2)); 9001 predicate(UseBMI1Instructions); 9002 effect(KILL cr, TEMP dst); 9003 9004 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9005 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9006 %} 9007 9008 ins_encode %{ 9009 Register Rdst = $dst$$Register; 9010 Register Rsrc1 = $src1$$Register; 9011 Register Rsrc2 = $src2$$Register; 9012 __ andnl(Rdst, Rsrc1, Rsrc2); 9013 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9014 %} 9015 ins_pipe(ialu_reg_reg_long); 9016 %} 9017 9018 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9019 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9020 predicate(UseBMI1Instructions); 9021 effect(KILL cr, TEMP dst); 9022 9023 ins_cost(125); 9024 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9025 "ANDNL $dst.hi, $src1.hi, $src2+4" 9026 %} 9027 9028 ins_encode %{ 9029 Register Rdst = $dst$$Register; 9030 Register Rsrc1 = $src1$$Register; 9031 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9032 9033 __ andnl(Rdst, Rsrc1, $src2$$Address); 9034 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9035 %} 9036 ins_pipe(ialu_reg_mem); 9037 %} 9038 9039 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9040 match(Set dst (AndL (SubL imm_zero src) src)); 9041 predicate(UseBMI1Instructions); 9042 effect(KILL cr, TEMP dst); 9043 9044 format %{ "MOVL $dst.hi, 0\n\t" 9045 "BLSIL $dst.lo, $src.lo\n\t" 9046 "JNZ done\n\t" 9047 "BLSIL $dst.hi, $src.hi\n" 9048 "done:" 9049 %} 9050 9051 ins_encode %{ 9052 Label done; 9053 Register Rdst = $dst$$Register; 9054 Register Rsrc = $src$$Register; 9055 __ movl(HIGH_FROM_LOW(Rdst), 0); 9056 __ blsil(Rdst, Rsrc); 9057 __ jccb(Assembler::notZero, done); 9058 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9059 __ bind(done); 9060 %} 9061 ins_pipe(ialu_reg); 9062 %} 9063 9064 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9065 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9066 predicate(UseBMI1Instructions); 9067 effect(KILL cr, TEMP dst); 9068 9069 ins_cost(125); 9070 format %{ "MOVL $dst.hi, 0\n\t" 9071 "BLSIL $dst.lo, $src\n\t" 9072 "JNZ done\n\t" 9073 "BLSIL $dst.hi, $src+4\n" 9074 "done:" 9075 %} 9076 9077 ins_encode %{ 9078 Label done; 9079 Register Rdst = $dst$$Register; 9080 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9081 9082 __ movl(HIGH_FROM_LOW(Rdst), 0); 9083 __ blsil(Rdst, $src$$Address); 9084 __ jccb(Assembler::notZero, done); 9085 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9086 __ bind(done); 9087 %} 9088 ins_pipe(ialu_reg_mem); 9089 %} 9090 9091 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9092 %{ 9093 match(Set dst (XorL (AddL src minus_1) src)); 9094 predicate(UseBMI1Instructions); 9095 effect(KILL cr, TEMP dst); 9096 9097 format %{ "MOVL $dst.hi, 0\n\t" 9098 "BLSMSKL $dst.lo, $src.lo\n\t" 9099 "JNC done\n\t" 9100 "BLSMSKL $dst.hi, $src.hi\n" 9101 "done:" 9102 %} 9103 9104 ins_encode %{ 9105 Label done; 9106 Register Rdst = $dst$$Register; 9107 Register Rsrc = $src$$Register; 9108 __ movl(HIGH_FROM_LOW(Rdst), 0); 9109 __ blsmskl(Rdst, Rsrc); 9110 __ jccb(Assembler::carryClear, done); 9111 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9112 __ bind(done); 9113 %} 9114 9115 ins_pipe(ialu_reg); 9116 %} 9117 9118 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9119 %{ 9120 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9121 predicate(UseBMI1Instructions); 9122 effect(KILL cr, TEMP dst); 9123 9124 ins_cost(125); 9125 format %{ "MOVL $dst.hi, 0\n\t" 9126 "BLSMSKL $dst.lo, $src\n\t" 9127 "JNC done\n\t" 9128 "BLSMSKL $dst.hi, $src+4\n" 9129 "done:" 9130 %} 9131 9132 ins_encode %{ 9133 Label done; 9134 Register Rdst = $dst$$Register; 9135 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9136 9137 __ movl(HIGH_FROM_LOW(Rdst), 0); 9138 __ blsmskl(Rdst, $src$$Address); 9139 __ jccb(Assembler::carryClear, done); 9140 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9141 __ bind(done); 9142 %} 9143 9144 ins_pipe(ialu_reg_mem); 9145 %} 9146 9147 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9148 %{ 9149 match(Set dst (AndL (AddL src minus_1) src) ); 9150 predicate(UseBMI1Instructions); 9151 effect(KILL cr, TEMP dst); 9152 9153 format %{ "MOVL $dst.hi, $src.hi\n\t" 9154 "BLSRL $dst.lo, $src.lo\n\t" 9155 "JNC done\n\t" 9156 "BLSRL $dst.hi, $src.hi\n" 9157 "done:" 9158 %} 9159 9160 ins_encode %{ 9161 Label done; 9162 Register Rdst = $dst$$Register; 9163 Register Rsrc = $src$$Register; 9164 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9165 __ blsrl(Rdst, Rsrc); 9166 __ jccb(Assembler::carryClear, done); 9167 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9168 __ bind(done); 9169 %} 9170 9171 ins_pipe(ialu_reg); 9172 %} 9173 9174 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9175 %{ 9176 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9177 predicate(UseBMI1Instructions); 9178 effect(KILL cr, TEMP dst); 9179 9180 ins_cost(125); 9181 format %{ "MOVL $dst.hi, $src+4\n\t" 9182 "BLSRL $dst.lo, $src\n\t" 9183 "JNC done\n\t" 9184 "BLSRL $dst.hi, $src+4\n" 9185 "done:" 9186 %} 9187 9188 ins_encode %{ 9189 Label done; 9190 Register Rdst = $dst$$Register; 9191 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9192 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9193 __ blsrl(Rdst, $src$$Address); 9194 __ jccb(Assembler::carryClear, done); 9195 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9196 __ bind(done); 9197 %} 9198 9199 ins_pipe(ialu_reg_mem); 9200 %} 9201 9202 // Or Long Register with Register 9203 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9204 match(Set dst (OrL dst src)); 9205 effect(KILL cr); 9206 format %{ "OR $dst.lo,$src.lo\n\t" 9207 "OR $dst.hi,$src.hi" %} 9208 opcode(0x0B,0x0B); 9209 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9210 ins_pipe( ialu_reg_reg_long ); 9211 %} 9212 9213 // Or Long Register with Immediate 9214 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9215 match(Set dst (OrL dst src)); 9216 effect(KILL cr); 9217 format %{ "OR $dst.lo,$src.lo\n\t" 9218 "OR $dst.hi,$src.hi" %} 9219 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9220 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9221 ins_pipe( ialu_reg_long ); 9222 %} 9223 9224 // Or Long Register with Memory 9225 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9226 match(Set dst (OrL dst (LoadL mem))); 9227 effect(KILL cr); 9228 ins_cost(125); 9229 format %{ "OR $dst.lo,$mem\n\t" 9230 "OR $dst.hi,$mem+4" %} 9231 opcode(0x0B,0x0B); 9232 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9233 ins_pipe( ialu_reg_long_mem ); 9234 %} 9235 9236 // Xor Long Register with Register 9237 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9238 match(Set dst (XorL dst src)); 9239 effect(KILL cr); 9240 format %{ "XOR $dst.lo,$src.lo\n\t" 9241 "XOR $dst.hi,$src.hi" %} 9242 opcode(0x33,0x33); 9243 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9244 ins_pipe( ialu_reg_reg_long ); 9245 %} 9246 9247 // Xor Long Register with Immediate -1 9248 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9249 match(Set dst (XorL dst imm)); 9250 format %{ "NOT $dst.lo\n\t" 9251 "NOT $dst.hi" %} 9252 ins_encode %{ 9253 __ notl($dst$$Register); 9254 __ notl(HIGH_FROM_LOW($dst$$Register)); 9255 %} 9256 ins_pipe( ialu_reg_long ); 9257 %} 9258 9259 // Xor Long Register with Immediate 9260 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9261 match(Set dst (XorL dst src)); 9262 effect(KILL cr); 9263 format %{ "XOR $dst.lo,$src.lo\n\t" 9264 "XOR $dst.hi,$src.hi" %} 9265 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9266 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9267 ins_pipe( ialu_reg_long ); 9268 %} 9269 9270 // Xor Long Register with Memory 9271 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9272 match(Set dst (XorL dst (LoadL mem))); 9273 effect(KILL cr); 9274 ins_cost(125); 9275 format %{ "XOR $dst.lo,$mem\n\t" 9276 "XOR $dst.hi,$mem+4" %} 9277 opcode(0x33,0x33); 9278 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9279 ins_pipe( ialu_reg_long_mem ); 9280 %} 9281 9282 // Shift Left Long by 1 9283 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9284 predicate(UseNewLongLShift); 9285 match(Set dst (LShiftL dst cnt)); 9286 effect(KILL cr); 9287 ins_cost(100); 9288 format %{ "ADD $dst.lo,$dst.lo\n\t" 9289 "ADC $dst.hi,$dst.hi" %} 9290 ins_encode %{ 9291 __ addl($dst$$Register,$dst$$Register); 9292 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9293 %} 9294 ins_pipe( ialu_reg_long ); 9295 %} 9296 9297 // Shift Left Long by 2 9298 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9299 predicate(UseNewLongLShift); 9300 match(Set dst (LShiftL dst cnt)); 9301 effect(KILL cr); 9302 ins_cost(100); 9303 format %{ "ADD $dst.lo,$dst.lo\n\t" 9304 "ADC $dst.hi,$dst.hi\n\t" 9305 "ADD $dst.lo,$dst.lo\n\t" 9306 "ADC $dst.hi,$dst.hi" %} 9307 ins_encode %{ 9308 __ addl($dst$$Register,$dst$$Register); 9309 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9310 __ addl($dst$$Register,$dst$$Register); 9311 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9312 %} 9313 ins_pipe( ialu_reg_long ); 9314 %} 9315 9316 // Shift Left Long by 3 9317 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9318 predicate(UseNewLongLShift); 9319 match(Set dst (LShiftL dst cnt)); 9320 effect(KILL cr); 9321 ins_cost(100); 9322 format %{ "ADD $dst.lo,$dst.lo\n\t" 9323 "ADC $dst.hi,$dst.hi\n\t" 9324 "ADD $dst.lo,$dst.lo\n\t" 9325 "ADC $dst.hi,$dst.hi\n\t" 9326 "ADD $dst.lo,$dst.lo\n\t" 9327 "ADC $dst.hi,$dst.hi" %} 9328 ins_encode %{ 9329 __ addl($dst$$Register,$dst$$Register); 9330 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9331 __ addl($dst$$Register,$dst$$Register); 9332 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9333 __ addl($dst$$Register,$dst$$Register); 9334 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9335 %} 9336 ins_pipe( ialu_reg_long ); 9337 %} 9338 9339 // Shift Left Long by 1-31 9340 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9341 match(Set dst (LShiftL dst cnt)); 9342 effect(KILL cr); 9343 ins_cost(200); 9344 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9345 "SHL $dst.lo,$cnt" %} 9346 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9347 ins_encode( move_long_small_shift(dst,cnt) ); 9348 ins_pipe( ialu_reg_long ); 9349 %} 9350 9351 // Shift Left Long by 32-63 9352 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9353 match(Set dst (LShiftL dst cnt)); 9354 effect(KILL cr); 9355 ins_cost(300); 9356 format %{ "MOV $dst.hi,$dst.lo\n" 9357 "\tSHL $dst.hi,$cnt-32\n" 9358 "\tXOR $dst.lo,$dst.lo" %} 9359 opcode(0xC1, 0x4); /* C1 /4 ib */ 9360 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9361 ins_pipe( ialu_reg_long ); 9362 %} 9363 9364 // Shift Left Long by variable 9365 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9366 match(Set dst (LShiftL dst shift)); 9367 effect(KILL cr); 9368 ins_cost(500+200); 9369 size(17); 9370 format %{ "TEST $shift,32\n\t" 9371 "JEQ,s small\n\t" 9372 "MOV $dst.hi,$dst.lo\n\t" 9373 "XOR $dst.lo,$dst.lo\n" 9374 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9375 "SHL $dst.lo,$shift" %} 9376 ins_encode( shift_left_long( dst, shift ) ); 9377 ins_pipe( pipe_slow ); 9378 %} 9379 9380 // Shift Right Long by 1-31 9381 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9382 match(Set dst (URShiftL dst cnt)); 9383 effect(KILL cr); 9384 ins_cost(200); 9385 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9386 "SHR $dst.hi,$cnt" %} 9387 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9388 ins_encode( move_long_small_shift(dst,cnt) ); 9389 ins_pipe( ialu_reg_long ); 9390 %} 9391 9392 // Shift Right Long by 32-63 9393 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9394 match(Set dst (URShiftL dst cnt)); 9395 effect(KILL cr); 9396 ins_cost(300); 9397 format %{ "MOV $dst.lo,$dst.hi\n" 9398 "\tSHR $dst.lo,$cnt-32\n" 9399 "\tXOR $dst.hi,$dst.hi" %} 9400 opcode(0xC1, 0x5); /* C1 /5 ib */ 9401 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9402 ins_pipe( ialu_reg_long ); 9403 %} 9404 9405 // Shift Right Long by variable 9406 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9407 match(Set dst (URShiftL dst shift)); 9408 effect(KILL cr); 9409 ins_cost(600); 9410 size(17); 9411 format %{ "TEST $shift,32\n\t" 9412 "JEQ,s small\n\t" 9413 "MOV $dst.lo,$dst.hi\n\t" 9414 "XOR $dst.hi,$dst.hi\n" 9415 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9416 "SHR $dst.hi,$shift" %} 9417 ins_encode( shift_right_long( dst, shift ) ); 9418 ins_pipe( pipe_slow ); 9419 %} 9420 9421 // Shift Right Long by 1-31 9422 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9423 match(Set dst (RShiftL dst cnt)); 9424 effect(KILL cr); 9425 ins_cost(200); 9426 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9427 "SAR $dst.hi,$cnt" %} 9428 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9429 ins_encode( move_long_small_shift(dst,cnt) ); 9430 ins_pipe( ialu_reg_long ); 9431 %} 9432 9433 // Shift Right Long by 32-63 9434 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9435 match(Set dst (RShiftL dst cnt)); 9436 effect(KILL cr); 9437 ins_cost(300); 9438 format %{ "MOV $dst.lo,$dst.hi\n" 9439 "\tSAR $dst.lo,$cnt-32\n" 9440 "\tSAR $dst.hi,31" %} 9441 opcode(0xC1, 0x7); /* C1 /7 ib */ 9442 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9443 ins_pipe( ialu_reg_long ); 9444 %} 9445 9446 // Shift Right arithmetic Long by variable 9447 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9448 match(Set dst (RShiftL dst shift)); 9449 effect(KILL cr); 9450 ins_cost(600); 9451 size(18); 9452 format %{ "TEST $shift,32\n\t" 9453 "JEQ,s small\n\t" 9454 "MOV $dst.lo,$dst.hi\n\t" 9455 "SAR $dst.hi,31\n" 9456 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9457 "SAR $dst.hi,$shift" %} 9458 ins_encode( shift_right_arith_long( dst, shift ) ); 9459 ins_pipe( pipe_slow ); 9460 %} 9461 9462 9463 //----------Double Instructions------------------------------------------------ 9464 // Double Math 9465 9466 // Compare & branch 9467 9468 // P6 version of float compare, sets condition codes in EFLAGS 9469 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9470 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9471 match(Set cr (CmpD src1 src2)); 9472 effect(KILL rax); 9473 ins_cost(150); 9474 format %{ "FLD $src1\n\t" 9475 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9476 "JNP exit\n\t" 9477 "MOV ah,1 // saw a NaN, set CF\n\t" 9478 "SAHF\n" 9479 "exit:\tNOP // avoid branch to branch" %} 9480 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9481 ins_encode( Push_Reg_DPR(src1), 9482 OpcP, RegOpc(src2), 9483 cmpF_P6_fixup ); 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9488 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9489 match(Set cr (CmpD src1 src2)); 9490 ins_cost(150); 9491 format %{ "FLD $src1\n\t" 9492 "FUCOMIP ST,$src2 // P6 instruction" %} 9493 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9494 ins_encode( Push_Reg_DPR(src1), 9495 OpcP, RegOpc(src2)); 9496 ins_pipe( pipe_slow ); 9497 %} 9498 9499 // Compare & branch 9500 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9501 predicate(UseSSE<=1); 9502 match(Set cr (CmpD src1 src2)); 9503 effect(KILL rax); 9504 ins_cost(200); 9505 format %{ "FLD $src1\n\t" 9506 "FCOMp $src2\n\t" 9507 "FNSTSW AX\n\t" 9508 "TEST AX,0x400\n\t" 9509 "JZ,s flags\n\t" 9510 "MOV AH,1\t# unordered treat as LT\n" 9511 "flags:\tSAHF" %} 9512 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9513 ins_encode( Push_Reg_DPR(src1), 9514 OpcP, RegOpc(src2), 9515 fpu_flags); 9516 ins_pipe( pipe_slow ); 9517 %} 9518 9519 // Compare vs zero into -1,0,1 9520 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9521 predicate(UseSSE<=1); 9522 match(Set dst (CmpD3 src1 zero)); 9523 effect(KILL cr, KILL rax); 9524 ins_cost(280); 9525 format %{ "FTSTD $dst,$src1" %} 9526 opcode(0xE4, 0xD9); 9527 ins_encode( Push_Reg_DPR(src1), 9528 OpcS, OpcP, PopFPU, 9529 CmpF_Result(dst)); 9530 ins_pipe( pipe_slow ); 9531 %} 9532 9533 // Compare into -1,0,1 9534 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9535 predicate(UseSSE<=1); 9536 match(Set dst (CmpD3 src1 src2)); 9537 effect(KILL cr, KILL rax); 9538 ins_cost(300); 9539 format %{ "FCMPD $dst,$src1,$src2" %} 9540 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9541 ins_encode( Push_Reg_DPR(src1), 9542 OpcP, RegOpc(src2), 9543 CmpF_Result(dst)); 9544 ins_pipe( pipe_slow ); 9545 %} 9546 9547 // float compare and set condition codes in EFLAGS by XMM regs 9548 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9549 predicate(UseSSE>=2); 9550 match(Set cr (CmpD src1 src2)); 9551 ins_cost(145); 9552 format %{ "UCOMISD $src1,$src2\n\t" 9553 "JNP,s exit\n\t" 9554 "PUSHF\t# saw NaN, set CF\n\t" 9555 "AND [rsp], #0xffffff2b\n\t" 9556 "POPF\n" 9557 "exit:" %} 9558 ins_encode %{ 9559 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9560 emit_cmpfp_fixup(_masm); 9561 %} 9562 ins_pipe( pipe_slow ); 9563 %} 9564 9565 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9566 predicate(UseSSE>=2); 9567 match(Set cr (CmpD src1 src2)); 9568 ins_cost(100); 9569 format %{ "UCOMISD $src1,$src2" %} 9570 ins_encode %{ 9571 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9572 %} 9573 ins_pipe( pipe_slow ); 9574 %} 9575 9576 // float compare and set condition codes in EFLAGS by XMM regs 9577 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9578 predicate(UseSSE>=2); 9579 match(Set cr (CmpD src1 (LoadD src2))); 9580 ins_cost(145); 9581 format %{ "UCOMISD $src1,$src2\n\t" 9582 "JNP,s exit\n\t" 9583 "PUSHF\t# saw NaN, set CF\n\t" 9584 "AND [rsp], #0xffffff2b\n\t" 9585 "POPF\n" 9586 "exit:" %} 9587 ins_encode %{ 9588 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9589 emit_cmpfp_fixup(_masm); 9590 %} 9591 ins_pipe( pipe_slow ); 9592 %} 9593 9594 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9595 predicate(UseSSE>=2); 9596 match(Set cr (CmpD src1 (LoadD src2))); 9597 ins_cost(100); 9598 format %{ "UCOMISD $src1,$src2" %} 9599 ins_encode %{ 9600 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 // Compare into -1,0,1 in XMM 9606 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9607 predicate(UseSSE>=2); 9608 match(Set dst (CmpD3 src1 src2)); 9609 effect(KILL cr); 9610 ins_cost(255); 9611 format %{ "UCOMISD $src1, $src2\n\t" 9612 "MOV $dst, #-1\n\t" 9613 "JP,s done\n\t" 9614 "JB,s done\n\t" 9615 "SETNE $dst\n\t" 9616 "MOVZB $dst, $dst\n" 9617 "done:" %} 9618 ins_encode %{ 9619 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9620 emit_cmpfp3(_masm, $dst$$Register); 9621 %} 9622 ins_pipe( pipe_slow ); 9623 %} 9624 9625 // Compare into -1,0,1 in XMM and memory 9626 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9627 predicate(UseSSE>=2); 9628 match(Set dst (CmpD3 src1 (LoadD src2))); 9629 effect(KILL cr); 9630 ins_cost(275); 9631 format %{ "UCOMISD $src1, $src2\n\t" 9632 "MOV $dst, #-1\n\t" 9633 "JP,s done\n\t" 9634 "JB,s done\n\t" 9635 "SETNE $dst\n\t" 9636 "MOVZB $dst, $dst\n" 9637 "done:" %} 9638 ins_encode %{ 9639 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9640 emit_cmpfp3(_masm, $dst$$Register); 9641 %} 9642 ins_pipe( pipe_slow ); 9643 %} 9644 9645 9646 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9647 predicate (UseSSE <=1); 9648 match(Set dst (SubD dst src)); 9649 9650 format %{ "FLD $src\n\t" 9651 "DSUBp $dst,ST" %} 9652 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9653 ins_cost(150); 9654 ins_encode( Push_Reg_DPR(src), 9655 OpcP, RegOpc(dst) ); 9656 ins_pipe( fpu_reg_reg ); 9657 %} 9658 9659 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9660 predicate (UseSSE <=1); 9661 match(Set dst (RoundDouble (SubD src1 src2))); 9662 ins_cost(250); 9663 9664 format %{ "FLD $src2\n\t" 9665 "DSUB ST,$src1\n\t" 9666 "FSTP_D $dst\t# D-round" %} 9667 opcode(0xD8, 0x5); 9668 ins_encode( Push_Reg_DPR(src2), 9669 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9670 ins_pipe( fpu_mem_reg_reg ); 9671 %} 9672 9673 9674 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9675 predicate (UseSSE <=1); 9676 match(Set dst (SubD dst (LoadD src))); 9677 ins_cost(150); 9678 9679 format %{ "FLD $src\n\t" 9680 "DSUBp $dst,ST" %} 9681 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9682 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9683 OpcP, RegOpc(dst) ); 9684 ins_pipe( fpu_reg_mem ); 9685 %} 9686 9687 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9688 predicate (UseSSE<=1); 9689 match(Set dst (AbsD src)); 9690 ins_cost(100); 9691 format %{ "FABS" %} 9692 opcode(0xE1, 0xD9); 9693 ins_encode( OpcS, OpcP ); 9694 ins_pipe( fpu_reg_reg ); 9695 %} 9696 9697 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9698 predicate(UseSSE<=1); 9699 match(Set dst (NegD src)); 9700 ins_cost(100); 9701 format %{ "FCHS" %} 9702 opcode(0xE0, 0xD9); 9703 ins_encode( OpcS, OpcP ); 9704 ins_pipe( fpu_reg_reg ); 9705 %} 9706 9707 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9708 predicate(UseSSE<=1); 9709 match(Set dst (AddD dst src)); 9710 format %{ "FLD $src\n\t" 9711 "DADD $dst,ST" %} 9712 size(4); 9713 ins_cost(150); 9714 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9715 ins_encode( Push_Reg_DPR(src), 9716 OpcP, RegOpc(dst) ); 9717 ins_pipe( fpu_reg_reg ); 9718 %} 9719 9720 9721 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9722 predicate(UseSSE<=1); 9723 match(Set dst (RoundDouble (AddD src1 src2))); 9724 ins_cost(250); 9725 9726 format %{ "FLD $src2\n\t" 9727 "DADD ST,$src1\n\t" 9728 "FSTP_D $dst\t# D-round" %} 9729 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9730 ins_encode( Push_Reg_DPR(src2), 9731 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9732 ins_pipe( fpu_mem_reg_reg ); 9733 %} 9734 9735 9736 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9737 predicate(UseSSE<=1); 9738 match(Set dst (AddD dst (LoadD src))); 9739 ins_cost(150); 9740 9741 format %{ "FLD $src\n\t" 9742 "DADDp $dst,ST" %} 9743 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9744 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9745 OpcP, RegOpc(dst) ); 9746 ins_pipe( fpu_reg_mem ); 9747 %} 9748 9749 // add-to-memory 9750 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9751 predicate(UseSSE<=1); 9752 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9753 ins_cost(150); 9754 9755 format %{ "FLD_D $dst\n\t" 9756 "DADD ST,$src\n\t" 9757 "FST_D $dst" %} 9758 opcode(0xDD, 0x0); 9759 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9760 Opcode(0xD8), RegOpc(src), 9761 set_instruction_start, 9762 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9763 ins_pipe( fpu_reg_mem ); 9764 %} 9765 9766 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9767 predicate(UseSSE<=1); 9768 match(Set dst (AddD dst con)); 9769 ins_cost(125); 9770 format %{ "FLD1\n\t" 9771 "DADDp $dst,ST" %} 9772 ins_encode %{ 9773 __ fld1(); 9774 __ faddp($dst$$reg); 9775 %} 9776 ins_pipe(fpu_reg); 9777 %} 9778 9779 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9780 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9781 match(Set dst (AddD dst con)); 9782 ins_cost(200); 9783 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9784 "DADDp $dst,ST" %} 9785 ins_encode %{ 9786 __ fld_d($constantaddress($con)); 9787 __ faddp($dst$$reg); 9788 %} 9789 ins_pipe(fpu_reg_mem); 9790 %} 9791 9792 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9793 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9794 match(Set dst (RoundDouble (AddD src con))); 9795 ins_cost(200); 9796 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9797 "DADD ST,$src\n\t" 9798 "FSTP_D $dst\t# D-round" %} 9799 ins_encode %{ 9800 __ fld_d($constantaddress($con)); 9801 __ fadd($src$$reg); 9802 __ fstp_d(Address(rsp, $dst$$disp)); 9803 %} 9804 ins_pipe(fpu_mem_reg_con); 9805 %} 9806 9807 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9808 predicate(UseSSE<=1); 9809 match(Set dst (MulD dst src)); 9810 format %{ "FLD $src\n\t" 9811 "DMULp $dst,ST" %} 9812 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9813 ins_cost(150); 9814 ins_encode( Push_Reg_DPR(src), 9815 OpcP, RegOpc(dst) ); 9816 ins_pipe( fpu_reg_reg ); 9817 %} 9818 9819 // Strict FP instruction biases argument before multiply then 9820 // biases result to avoid double rounding of subnormals. 9821 // 9822 // scale arg1 by multiplying arg1 by 2^(-15360) 9823 // load arg2 9824 // multiply scaled arg1 by arg2 9825 // rescale product by 2^(15360) 9826 // 9827 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9828 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9829 match(Set dst (MulD dst src)); 9830 ins_cost(1); // Select this instruction for all FP double multiplies 9831 9832 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9833 "DMULp $dst,ST\n\t" 9834 "FLD $src\n\t" 9835 "DMULp $dst,ST\n\t" 9836 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9837 "DMULp $dst,ST\n\t" %} 9838 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9839 ins_encode( strictfp_bias1(dst), 9840 Push_Reg_DPR(src), 9841 OpcP, RegOpc(dst), 9842 strictfp_bias2(dst) ); 9843 ins_pipe( fpu_reg_reg ); 9844 %} 9845 9846 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9847 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9848 match(Set dst (MulD dst con)); 9849 ins_cost(200); 9850 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9851 "DMULp $dst,ST" %} 9852 ins_encode %{ 9853 __ fld_d($constantaddress($con)); 9854 __ fmulp($dst$$reg); 9855 %} 9856 ins_pipe(fpu_reg_mem); 9857 %} 9858 9859 9860 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9861 predicate( UseSSE<=1 ); 9862 match(Set dst (MulD dst (LoadD src))); 9863 ins_cost(200); 9864 format %{ "FLD_D $src\n\t" 9865 "DMULp $dst,ST" %} 9866 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9867 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9868 OpcP, RegOpc(dst) ); 9869 ins_pipe( fpu_reg_mem ); 9870 %} 9871 9872 // 9873 // Cisc-alternate to reg-reg multiply 9874 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9875 predicate( UseSSE<=1 ); 9876 match(Set dst (MulD src (LoadD mem))); 9877 ins_cost(250); 9878 format %{ "FLD_D $mem\n\t" 9879 "DMUL ST,$src\n\t" 9880 "FSTP_D $dst" %} 9881 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9882 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9883 OpcReg_FPR(src), 9884 Pop_Reg_DPR(dst) ); 9885 ins_pipe( fpu_reg_reg_mem ); 9886 %} 9887 9888 9889 // MACRO3 -- addDPR a mulDPR 9890 // This instruction is a '2-address' instruction in that the result goes 9891 // back to src2. This eliminates a move from the macro; possibly the 9892 // register allocator will have to add it back (and maybe not). 9893 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9894 predicate( UseSSE<=1 ); 9895 match(Set src2 (AddD (MulD src0 src1) src2)); 9896 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9897 "DMUL ST,$src1\n\t" 9898 "DADDp $src2,ST" %} 9899 ins_cost(250); 9900 opcode(0xDD); /* LoadD DD /0 */ 9901 ins_encode( Push_Reg_FPR(src0), 9902 FMul_ST_reg(src1), 9903 FAddP_reg_ST(src2) ); 9904 ins_pipe( fpu_reg_reg_reg ); 9905 %} 9906 9907 9908 // MACRO3 -- subDPR a mulDPR 9909 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9910 predicate( UseSSE<=1 ); 9911 match(Set src2 (SubD (MulD src0 src1) src2)); 9912 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9913 "DMUL ST,$src1\n\t" 9914 "DSUBRp $src2,ST" %} 9915 ins_cost(250); 9916 ins_encode( Push_Reg_FPR(src0), 9917 FMul_ST_reg(src1), 9918 Opcode(0xDE), Opc_plus(0xE0,src2)); 9919 ins_pipe( fpu_reg_reg_reg ); 9920 %} 9921 9922 9923 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9924 predicate( UseSSE<=1 ); 9925 match(Set dst (DivD dst src)); 9926 9927 format %{ "FLD $src\n\t" 9928 "FDIVp $dst,ST" %} 9929 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9930 ins_cost(150); 9931 ins_encode( Push_Reg_DPR(src), 9932 OpcP, RegOpc(dst) ); 9933 ins_pipe( fpu_reg_reg ); 9934 %} 9935 9936 // Strict FP instruction biases argument before division then 9937 // biases result, to avoid double rounding of subnormals. 9938 // 9939 // scale dividend by multiplying dividend by 2^(-15360) 9940 // load divisor 9941 // divide scaled dividend by divisor 9942 // rescale quotient by 2^(15360) 9943 // 9944 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9945 predicate (UseSSE<=1); 9946 match(Set dst (DivD dst src)); 9947 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9948 ins_cost(01); 9949 9950 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9951 "DMULp $dst,ST\n\t" 9952 "FLD $src\n\t" 9953 "FDIVp $dst,ST\n\t" 9954 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9955 "DMULp $dst,ST\n\t" %} 9956 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9957 ins_encode( strictfp_bias1(dst), 9958 Push_Reg_DPR(src), 9959 OpcP, RegOpc(dst), 9960 strictfp_bias2(dst) ); 9961 ins_pipe( fpu_reg_reg ); 9962 %} 9963 9964 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9965 predicate(UseSSE<=1); 9966 match(Set dst (ModD dst src)); 9967 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9968 9969 format %{ "DMOD $dst,$src" %} 9970 ins_cost(250); 9971 ins_encode(Push_Reg_Mod_DPR(dst, src), 9972 emitModDPR(), 9973 Push_Result_Mod_DPR(src), 9974 Pop_Reg_DPR(dst)); 9975 ins_pipe( pipe_slow ); 9976 %} 9977 9978 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9979 predicate(UseSSE>=2); 9980 match(Set dst (ModD src0 src1)); 9981 effect(KILL rax, KILL cr); 9982 9983 format %{ "SUB ESP,8\t # DMOD\n" 9984 "\tMOVSD [ESP+0],$src1\n" 9985 "\tFLD_D [ESP+0]\n" 9986 "\tMOVSD [ESP+0],$src0\n" 9987 "\tFLD_D [ESP+0]\n" 9988 "loop:\tFPREM\n" 9989 "\tFWAIT\n" 9990 "\tFNSTSW AX\n" 9991 "\tSAHF\n" 9992 "\tJP loop\n" 9993 "\tFSTP_D [ESP+0]\n" 9994 "\tMOVSD $dst,[ESP+0]\n" 9995 "\tADD ESP,8\n" 9996 "\tFSTP ST0\t # Restore FPU Stack" 9997 %} 9998 ins_cost(250); 9999 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10004 predicate (UseSSE<=1); 10005 match(Set dst(AtanD dst src)); 10006 format %{ "DATA $dst,$src" %} 10007 opcode(0xD9, 0xF3); 10008 ins_encode( Push_Reg_DPR(src), 10009 OpcP, OpcS, RegOpc(dst) ); 10010 ins_pipe( pipe_slow ); 10011 %} 10012 10013 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10014 predicate (UseSSE>=2); 10015 match(Set dst(AtanD dst src)); 10016 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10017 format %{ "DATA $dst,$src" %} 10018 opcode(0xD9, 0xF3); 10019 ins_encode( Push_SrcD(src), 10020 OpcP, OpcS, Push_ResultD(dst) ); 10021 ins_pipe( pipe_slow ); 10022 %} 10023 10024 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10025 predicate (UseSSE<=1); 10026 match(Set dst (SqrtD src)); 10027 format %{ "DSQRT $dst,$src" %} 10028 opcode(0xFA, 0xD9); 10029 ins_encode( Push_Reg_DPR(src), 10030 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 //-------------Float Instructions------------------------------- 10035 // Float Math 10036 10037 // Code for float compare: 10038 // fcompp(); 10039 // fwait(); fnstsw_ax(); 10040 // sahf(); 10041 // movl(dst, unordered_result); 10042 // jcc(Assembler::parity, exit); 10043 // movl(dst, less_result); 10044 // jcc(Assembler::below, exit); 10045 // movl(dst, equal_result); 10046 // jcc(Assembler::equal, exit); 10047 // movl(dst, greater_result); 10048 // exit: 10049 10050 // P6 version of float compare, sets condition codes in EFLAGS 10051 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10052 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10053 match(Set cr (CmpF src1 src2)); 10054 effect(KILL rax); 10055 ins_cost(150); 10056 format %{ "FLD $src1\n\t" 10057 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10058 "JNP exit\n\t" 10059 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10060 "SAHF\n" 10061 "exit:\tNOP // avoid branch to branch" %} 10062 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10063 ins_encode( Push_Reg_DPR(src1), 10064 OpcP, RegOpc(src2), 10065 cmpF_P6_fixup ); 10066 ins_pipe( pipe_slow ); 10067 %} 10068 10069 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10070 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10071 match(Set cr (CmpF src1 src2)); 10072 ins_cost(100); 10073 format %{ "FLD $src1\n\t" 10074 "FUCOMIP ST,$src2 // P6 instruction" %} 10075 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10076 ins_encode( Push_Reg_DPR(src1), 10077 OpcP, RegOpc(src2)); 10078 ins_pipe( pipe_slow ); 10079 %} 10080 10081 10082 // Compare & branch 10083 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10084 predicate(UseSSE == 0); 10085 match(Set cr (CmpF src1 src2)); 10086 effect(KILL rax); 10087 ins_cost(200); 10088 format %{ "FLD $src1\n\t" 10089 "FCOMp $src2\n\t" 10090 "FNSTSW AX\n\t" 10091 "TEST AX,0x400\n\t" 10092 "JZ,s flags\n\t" 10093 "MOV AH,1\t# unordered treat as LT\n" 10094 "flags:\tSAHF" %} 10095 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10096 ins_encode( Push_Reg_DPR(src1), 10097 OpcP, RegOpc(src2), 10098 fpu_flags); 10099 ins_pipe( pipe_slow ); 10100 %} 10101 10102 // Compare vs zero into -1,0,1 10103 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10104 predicate(UseSSE == 0); 10105 match(Set dst (CmpF3 src1 zero)); 10106 effect(KILL cr, KILL rax); 10107 ins_cost(280); 10108 format %{ "FTSTF $dst,$src1" %} 10109 opcode(0xE4, 0xD9); 10110 ins_encode( Push_Reg_DPR(src1), 10111 OpcS, OpcP, PopFPU, 10112 CmpF_Result(dst)); 10113 ins_pipe( pipe_slow ); 10114 %} 10115 10116 // Compare into -1,0,1 10117 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10118 predicate(UseSSE == 0); 10119 match(Set dst (CmpF3 src1 src2)); 10120 effect(KILL cr, KILL rax); 10121 ins_cost(300); 10122 format %{ "FCMPF $dst,$src1,$src2" %} 10123 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10124 ins_encode( Push_Reg_DPR(src1), 10125 OpcP, RegOpc(src2), 10126 CmpF_Result(dst)); 10127 ins_pipe( pipe_slow ); 10128 %} 10129 10130 // float compare and set condition codes in EFLAGS by XMM regs 10131 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10132 predicate(UseSSE>=1); 10133 match(Set cr (CmpF src1 src2)); 10134 ins_cost(145); 10135 format %{ "UCOMISS $src1,$src2\n\t" 10136 "JNP,s exit\n\t" 10137 "PUSHF\t# saw NaN, set CF\n\t" 10138 "AND [rsp], #0xffffff2b\n\t" 10139 "POPF\n" 10140 "exit:" %} 10141 ins_encode %{ 10142 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10143 emit_cmpfp_fixup(_masm); 10144 %} 10145 ins_pipe( pipe_slow ); 10146 %} 10147 10148 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10149 predicate(UseSSE>=1); 10150 match(Set cr (CmpF src1 src2)); 10151 ins_cost(100); 10152 format %{ "UCOMISS $src1,$src2" %} 10153 ins_encode %{ 10154 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10155 %} 10156 ins_pipe( pipe_slow ); 10157 %} 10158 10159 // float compare and set condition codes in EFLAGS by XMM regs 10160 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10161 predicate(UseSSE>=1); 10162 match(Set cr (CmpF src1 (LoadF src2))); 10163 ins_cost(165); 10164 format %{ "UCOMISS $src1,$src2\n\t" 10165 "JNP,s exit\n\t" 10166 "PUSHF\t# saw NaN, set CF\n\t" 10167 "AND [rsp], #0xffffff2b\n\t" 10168 "POPF\n" 10169 "exit:" %} 10170 ins_encode %{ 10171 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10172 emit_cmpfp_fixup(_masm); 10173 %} 10174 ins_pipe( pipe_slow ); 10175 %} 10176 10177 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10178 predicate(UseSSE>=1); 10179 match(Set cr (CmpF src1 (LoadF src2))); 10180 ins_cost(100); 10181 format %{ "UCOMISS $src1,$src2" %} 10182 ins_encode %{ 10183 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10184 %} 10185 ins_pipe( pipe_slow ); 10186 %} 10187 10188 // Compare into -1,0,1 in XMM 10189 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10190 predicate(UseSSE>=1); 10191 match(Set dst (CmpF3 src1 src2)); 10192 effect(KILL cr); 10193 ins_cost(255); 10194 format %{ "UCOMISS $src1, $src2\n\t" 10195 "MOV $dst, #-1\n\t" 10196 "JP,s done\n\t" 10197 "JB,s done\n\t" 10198 "SETNE $dst\n\t" 10199 "MOVZB $dst, $dst\n" 10200 "done:" %} 10201 ins_encode %{ 10202 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10203 emit_cmpfp3(_masm, $dst$$Register); 10204 %} 10205 ins_pipe( pipe_slow ); 10206 %} 10207 10208 // Compare into -1,0,1 in XMM and memory 10209 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10210 predicate(UseSSE>=1); 10211 match(Set dst (CmpF3 src1 (LoadF src2))); 10212 effect(KILL cr); 10213 ins_cost(275); 10214 format %{ "UCOMISS $src1, $src2\n\t" 10215 "MOV $dst, #-1\n\t" 10216 "JP,s done\n\t" 10217 "JB,s done\n\t" 10218 "SETNE $dst\n\t" 10219 "MOVZB $dst, $dst\n" 10220 "done:" %} 10221 ins_encode %{ 10222 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10223 emit_cmpfp3(_masm, $dst$$Register); 10224 %} 10225 ins_pipe( pipe_slow ); 10226 %} 10227 10228 // Spill to obtain 24-bit precision 10229 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10230 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10231 match(Set dst (SubF src1 src2)); 10232 10233 format %{ "FSUB $dst,$src1 - $src2" %} 10234 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10235 ins_encode( Push_Reg_FPR(src1), 10236 OpcReg_FPR(src2), 10237 Pop_Mem_FPR(dst) ); 10238 ins_pipe( fpu_mem_reg_reg ); 10239 %} 10240 // 10241 // This instruction does not round to 24-bits 10242 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10243 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10244 match(Set dst (SubF dst src)); 10245 10246 format %{ "FSUB $dst,$src" %} 10247 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10248 ins_encode( Push_Reg_FPR(src), 10249 OpcP, RegOpc(dst) ); 10250 ins_pipe( fpu_reg_reg ); 10251 %} 10252 10253 // Spill to obtain 24-bit precision 10254 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10255 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10256 match(Set dst (AddF src1 src2)); 10257 10258 format %{ "FADD $dst,$src1,$src2" %} 10259 opcode(0xD8, 0x0); /* D8 C0+i */ 10260 ins_encode( Push_Reg_FPR(src2), 10261 OpcReg_FPR(src1), 10262 Pop_Mem_FPR(dst) ); 10263 ins_pipe( fpu_mem_reg_reg ); 10264 %} 10265 // 10266 // This instruction does not round to 24-bits 10267 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10269 match(Set dst (AddF dst src)); 10270 10271 format %{ "FLD $src\n\t" 10272 "FADDp $dst,ST" %} 10273 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10274 ins_encode( Push_Reg_FPR(src), 10275 OpcP, RegOpc(dst) ); 10276 ins_pipe( fpu_reg_reg ); 10277 %} 10278 10279 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10280 predicate(UseSSE==0); 10281 match(Set dst (AbsF src)); 10282 ins_cost(100); 10283 format %{ "FABS" %} 10284 opcode(0xE1, 0xD9); 10285 ins_encode( OpcS, OpcP ); 10286 ins_pipe( fpu_reg_reg ); 10287 %} 10288 10289 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10290 predicate(UseSSE==0); 10291 match(Set dst (NegF src)); 10292 ins_cost(100); 10293 format %{ "FCHS" %} 10294 opcode(0xE0, 0xD9); 10295 ins_encode( OpcS, OpcP ); 10296 ins_pipe( fpu_reg_reg ); 10297 %} 10298 10299 // Cisc-alternate to addFPR_reg 10300 // Spill to obtain 24-bit precision 10301 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10302 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10303 match(Set dst (AddF src1 (LoadF src2))); 10304 10305 format %{ "FLD $src2\n\t" 10306 "FADD ST,$src1\n\t" 10307 "FSTP_S $dst" %} 10308 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10309 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10310 OpcReg_FPR(src1), 10311 Pop_Mem_FPR(dst) ); 10312 ins_pipe( fpu_mem_reg_mem ); 10313 %} 10314 // 10315 // Cisc-alternate to addFPR_reg 10316 // This instruction does not round to 24-bits 10317 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10318 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10319 match(Set dst (AddF dst (LoadF src))); 10320 10321 format %{ "FADD $dst,$src" %} 10322 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10323 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10324 OpcP, RegOpc(dst) ); 10325 ins_pipe( fpu_reg_mem ); 10326 %} 10327 10328 // // Following two instructions for _222_mpegaudio 10329 // Spill to obtain 24-bit precision 10330 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10331 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10332 match(Set dst (AddF src1 src2)); 10333 10334 format %{ "FADD $dst,$src1,$src2" %} 10335 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10336 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10337 OpcReg_FPR(src2), 10338 Pop_Mem_FPR(dst) ); 10339 ins_pipe( fpu_mem_reg_mem ); 10340 %} 10341 10342 // Cisc-spill variant 10343 // Spill to obtain 24-bit precision 10344 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10345 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10346 match(Set dst (AddF src1 (LoadF src2))); 10347 10348 format %{ "FADD $dst,$src1,$src2 cisc" %} 10349 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10350 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10351 set_instruction_start, 10352 OpcP, RMopc_Mem(secondary,src1), 10353 Pop_Mem_FPR(dst) ); 10354 ins_pipe( fpu_mem_mem_mem ); 10355 %} 10356 10357 // Spill to obtain 24-bit precision 10358 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10359 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10360 match(Set dst (AddF src1 src2)); 10361 10362 format %{ "FADD $dst,$src1,$src2" %} 10363 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10364 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10365 set_instruction_start, 10366 OpcP, RMopc_Mem(secondary,src1), 10367 Pop_Mem_FPR(dst) ); 10368 ins_pipe( fpu_mem_mem_mem ); 10369 %} 10370 10371 10372 // Spill to obtain 24-bit precision 10373 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10374 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10375 match(Set dst (AddF src con)); 10376 format %{ "FLD $src\n\t" 10377 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10378 "FSTP_S $dst" %} 10379 ins_encode %{ 10380 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10381 __ fadd_s($constantaddress($con)); 10382 __ fstp_s(Address(rsp, $dst$$disp)); 10383 %} 10384 ins_pipe(fpu_mem_reg_con); 10385 %} 10386 // 10387 // This instruction does not round to 24-bits 10388 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10389 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10390 match(Set dst (AddF src con)); 10391 format %{ "FLD $src\n\t" 10392 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10393 "FSTP $dst" %} 10394 ins_encode %{ 10395 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10396 __ fadd_s($constantaddress($con)); 10397 __ fstp_d($dst$$reg); 10398 %} 10399 ins_pipe(fpu_reg_reg_con); 10400 %} 10401 10402 // Spill to obtain 24-bit precision 10403 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10404 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10405 match(Set dst (MulF src1 src2)); 10406 10407 format %{ "FLD $src1\n\t" 10408 "FMUL $src2\n\t" 10409 "FSTP_S $dst" %} 10410 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10411 ins_encode( Push_Reg_FPR(src1), 10412 OpcReg_FPR(src2), 10413 Pop_Mem_FPR(dst) ); 10414 ins_pipe( fpu_mem_reg_reg ); 10415 %} 10416 // 10417 // This instruction does not round to 24-bits 10418 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10419 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10420 match(Set dst (MulF src1 src2)); 10421 10422 format %{ "FLD $src1\n\t" 10423 "FMUL $src2\n\t" 10424 "FSTP_S $dst" %} 10425 opcode(0xD8, 0x1); /* D8 C8+i */ 10426 ins_encode( Push_Reg_FPR(src2), 10427 OpcReg_FPR(src1), 10428 Pop_Reg_FPR(dst) ); 10429 ins_pipe( fpu_reg_reg_reg ); 10430 %} 10431 10432 10433 // Spill to obtain 24-bit precision 10434 // Cisc-alternate to reg-reg multiply 10435 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10436 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10437 match(Set dst (MulF src1 (LoadF src2))); 10438 10439 format %{ "FLD_S $src2\n\t" 10440 "FMUL $src1\n\t" 10441 "FSTP_S $dst" %} 10442 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10443 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10444 OpcReg_FPR(src1), 10445 Pop_Mem_FPR(dst) ); 10446 ins_pipe( fpu_mem_reg_mem ); 10447 %} 10448 // 10449 // This instruction does not round to 24-bits 10450 // Cisc-alternate to reg-reg multiply 10451 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10452 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10453 match(Set dst (MulF src1 (LoadF src2))); 10454 10455 format %{ "FMUL $dst,$src1,$src2" %} 10456 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10457 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10458 OpcReg_FPR(src1), 10459 Pop_Reg_FPR(dst) ); 10460 ins_pipe( fpu_reg_reg_mem ); 10461 %} 10462 10463 // Spill to obtain 24-bit precision 10464 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10465 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10466 match(Set dst (MulF src1 src2)); 10467 10468 format %{ "FMUL $dst,$src1,$src2" %} 10469 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10470 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10471 set_instruction_start, 10472 OpcP, RMopc_Mem(secondary,src1), 10473 Pop_Mem_FPR(dst) ); 10474 ins_pipe( fpu_mem_mem_mem ); 10475 %} 10476 10477 // Spill to obtain 24-bit precision 10478 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10479 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10480 match(Set dst (MulF src con)); 10481 10482 format %{ "FLD $src\n\t" 10483 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10484 "FSTP_S $dst" %} 10485 ins_encode %{ 10486 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10487 __ fmul_s($constantaddress($con)); 10488 __ fstp_s(Address(rsp, $dst$$disp)); 10489 %} 10490 ins_pipe(fpu_mem_reg_con); 10491 %} 10492 // 10493 // This instruction does not round to 24-bits 10494 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10495 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10496 match(Set dst (MulF src con)); 10497 10498 format %{ "FLD $src\n\t" 10499 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10500 "FSTP $dst" %} 10501 ins_encode %{ 10502 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10503 __ fmul_s($constantaddress($con)); 10504 __ fstp_d($dst$$reg); 10505 %} 10506 ins_pipe(fpu_reg_reg_con); 10507 %} 10508 10509 10510 // 10511 // MACRO1 -- subsume unshared load into mulFPR 10512 // This instruction does not round to 24-bits 10513 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10514 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10515 match(Set dst (MulF (LoadF mem1) src)); 10516 10517 format %{ "FLD $mem1 ===MACRO1===\n\t" 10518 "FMUL ST,$src\n\t" 10519 "FSTP $dst" %} 10520 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10521 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10522 OpcReg_FPR(src), 10523 Pop_Reg_FPR(dst) ); 10524 ins_pipe( fpu_reg_reg_mem ); 10525 %} 10526 // 10527 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10528 // This instruction does not round to 24-bits 10529 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10530 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10531 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10532 ins_cost(95); 10533 10534 format %{ "FLD $mem1 ===MACRO2===\n\t" 10535 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10536 "FADD ST,$src2\n\t" 10537 "FSTP $dst" %} 10538 opcode(0xD9); /* LoadF D9 /0 */ 10539 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10540 FMul_ST_reg(src1), 10541 FAdd_ST_reg(src2), 10542 Pop_Reg_FPR(dst) ); 10543 ins_pipe( fpu_reg_mem_reg_reg ); 10544 %} 10545 10546 // MACRO3 -- addFPR a mulFPR 10547 // This instruction does not round to 24-bits. It is a '2-address' 10548 // instruction in that the result goes back to src2. This eliminates 10549 // a move from the macro; possibly the register allocator will have 10550 // to add it back (and maybe not). 10551 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10552 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10553 match(Set src2 (AddF (MulF src0 src1) src2)); 10554 10555 format %{ "FLD $src0 ===MACRO3===\n\t" 10556 "FMUL ST,$src1\n\t" 10557 "FADDP $src2,ST" %} 10558 opcode(0xD9); /* LoadF D9 /0 */ 10559 ins_encode( Push_Reg_FPR(src0), 10560 FMul_ST_reg(src1), 10561 FAddP_reg_ST(src2) ); 10562 ins_pipe( fpu_reg_reg_reg ); 10563 %} 10564 10565 // MACRO4 -- divFPR subFPR 10566 // This instruction does not round to 24-bits 10567 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10568 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10569 match(Set dst (DivF (SubF src2 src1) src3)); 10570 10571 format %{ "FLD $src2 ===MACRO4===\n\t" 10572 "FSUB ST,$src1\n\t" 10573 "FDIV ST,$src3\n\t" 10574 "FSTP $dst" %} 10575 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10576 ins_encode( Push_Reg_FPR(src2), 10577 subFPR_divFPR_encode(src1,src3), 10578 Pop_Reg_FPR(dst) ); 10579 ins_pipe( fpu_reg_reg_reg_reg ); 10580 %} 10581 10582 // Spill to obtain 24-bit precision 10583 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10584 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10585 match(Set dst (DivF src1 src2)); 10586 10587 format %{ "FDIV $dst,$src1,$src2" %} 10588 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10589 ins_encode( Push_Reg_FPR(src1), 10590 OpcReg_FPR(src2), 10591 Pop_Mem_FPR(dst) ); 10592 ins_pipe( fpu_mem_reg_reg ); 10593 %} 10594 // 10595 // This instruction does not round to 24-bits 10596 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10597 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10598 match(Set dst (DivF dst src)); 10599 10600 format %{ "FDIV $dst,$src" %} 10601 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10602 ins_encode( Push_Reg_FPR(src), 10603 OpcP, RegOpc(dst) ); 10604 ins_pipe( fpu_reg_reg ); 10605 %} 10606 10607 10608 // Spill to obtain 24-bit precision 10609 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10610 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10611 match(Set dst (ModF src1 src2)); 10612 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10613 10614 format %{ "FMOD $dst,$src1,$src2" %} 10615 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10616 emitModDPR(), 10617 Push_Result_Mod_DPR(src2), 10618 Pop_Mem_FPR(dst)); 10619 ins_pipe( pipe_slow ); 10620 %} 10621 // 10622 // This instruction does not round to 24-bits 10623 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10624 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10625 match(Set dst (ModF dst src)); 10626 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10627 10628 format %{ "FMOD $dst,$src" %} 10629 ins_encode(Push_Reg_Mod_DPR(dst, src), 10630 emitModDPR(), 10631 Push_Result_Mod_DPR(src), 10632 Pop_Reg_FPR(dst)); 10633 ins_pipe( pipe_slow ); 10634 %} 10635 10636 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10637 predicate(UseSSE>=1); 10638 match(Set dst (ModF src0 src1)); 10639 effect(KILL rax, KILL cr); 10640 format %{ "SUB ESP,4\t # FMOD\n" 10641 "\tMOVSS [ESP+0],$src1\n" 10642 "\tFLD_S [ESP+0]\n" 10643 "\tMOVSS [ESP+0],$src0\n" 10644 "\tFLD_S [ESP+0]\n" 10645 "loop:\tFPREM\n" 10646 "\tFWAIT\n" 10647 "\tFNSTSW AX\n" 10648 "\tSAHF\n" 10649 "\tJP loop\n" 10650 "\tFSTP_S [ESP+0]\n" 10651 "\tMOVSS $dst,[ESP+0]\n" 10652 "\tADD ESP,4\n" 10653 "\tFSTP ST0\t # Restore FPU Stack" 10654 %} 10655 ins_cost(250); 10656 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10657 ins_pipe( pipe_slow ); 10658 %} 10659 10660 10661 //----------Arithmetic Conversion Instructions--------------------------------- 10662 // The conversions operations are all Alpha sorted. Please keep it that way! 10663 10664 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10665 predicate(UseSSE==0); 10666 match(Set dst (RoundFloat src)); 10667 ins_cost(125); 10668 format %{ "FST_S $dst,$src\t# F-round" %} 10669 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10670 ins_pipe( fpu_mem_reg ); 10671 %} 10672 10673 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10674 predicate(UseSSE<=1); 10675 match(Set dst (RoundDouble src)); 10676 ins_cost(125); 10677 format %{ "FST_D $dst,$src\t# D-round" %} 10678 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10679 ins_pipe( fpu_mem_reg ); 10680 %} 10681 10682 // Force rounding to 24-bit precision and 6-bit exponent 10683 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10684 predicate(UseSSE==0); 10685 match(Set dst (ConvD2F src)); 10686 format %{ "FST_S $dst,$src\t# F-round" %} 10687 expand %{ 10688 roundFloat_mem_reg(dst,src); 10689 %} 10690 %} 10691 10692 // Force rounding to 24-bit precision and 6-bit exponent 10693 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10694 predicate(UseSSE==1); 10695 match(Set dst (ConvD2F src)); 10696 effect( KILL cr ); 10697 format %{ "SUB ESP,4\n\t" 10698 "FST_S [ESP],$src\t# F-round\n\t" 10699 "MOVSS $dst,[ESP]\n\t" 10700 "ADD ESP,4" %} 10701 ins_encode %{ 10702 __ subptr(rsp, 4); 10703 if ($src$$reg != FPR1L_enc) { 10704 __ fld_s($src$$reg-1); 10705 __ fstp_s(Address(rsp, 0)); 10706 } else { 10707 __ fst_s(Address(rsp, 0)); 10708 } 10709 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10710 __ addptr(rsp, 4); 10711 %} 10712 ins_pipe( pipe_slow ); 10713 %} 10714 10715 // Force rounding double precision to single precision 10716 instruct convD2F_reg(regF dst, regD src) %{ 10717 predicate(UseSSE>=2); 10718 match(Set dst (ConvD2F src)); 10719 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10720 ins_encode %{ 10721 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10722 %} 10723 ins_pipe( pipe_slow ); 10724 %} 10725 10726 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10727 predicate(UseSSE==0); 10728 match(Set dst (ConvF2D src)); 10729 format %{ "FST_S $dst,$src\t# D-round" %} 10730 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10731 ins_pipe( fpu_reg_reg ); 10732 %} 10733 10734 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10735 predicate(UseSSE==1); 10736 match(Set dst (ConvF2D src)); 10737 format %{ "FST_D $dst,$src\t# D-round" %} 10738 expand %{ 10739 roundDouble_mem_reg(dst,src); 10740 %} 10741 %} 10742 10743 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10744 predicate(UseSSE==1); 10745 match(Set dst (ConvF2D src)); 10746 effect( KILL cr ); 10747 format %{ "SUB ESP,4\n\t" 10748 "MOVSS [ESP] $src\n\t" 10749 "FLD_S [ESP]\n\t" 10750 "ADD ESP,4\n\t" 10751 "FSTP $dst\t# D-round" %} 10752 ins_encode %{ 10753 __ subptr(rsp, 4); 10754 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10755 __ fld_s(Address(rsp, 0)); 10756 __ addptr(rsp, 4); 10757 __ fstp_d($dst$$reg); 10758 %} 10759 ins_pipe( pipe_slow ); 10760 %} 10761 10762 instruct convF2D_reg(regD dst, regF src) %{ 10763 predicate(UseSSE>=2); 10764 match(Set dst (ConvF2D src)); 10765 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10766 ins_encode %{ 10767 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10768 %} 10769 ins_pipe( pipe_slow ); 10770 %} 10771 10772 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10773 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10774 predicate(UseSSE<=1); 10775 match(Set dst (ConvD2I src)); 10776 effect( KILL tmp, KILL cr ); 10777 format %{ "FLD $src\t# Convert double to int \n\t" 10778 "FLDCW trunc mode\n\t" 10779 "SUB ESP,4\n\t" 10780 "FISTp [ESP + #0]\n\t" 10781 "FLDCW std/24-bit mode\n\t" 10782 "POP EAX\n\t" 10783 "CMP EAX,0x80000000\n\t" 10784 "JNE,s fast\n\t" 10785 "FLD_D $src\n\t" 10786 "CALL d2i_wrapper\n" 10787 "fast:" %} 10788 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10789 ins_pipe( pipe_slow ); 10790 %} 10791 10792 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10793 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10794 predicate(UseSSE>=2); 10795 match(Set dst (ConvD2I src)); 10796 effect( KILL tmp, KILL cr ); 10797 format %{ "CVTTSD2SI $dst, $src\n\t" 10798 "CMP $dst,0x80000000\n\t" 10799 "JNE,s fast\n\t" 10800 "SUB ESP, 8\n\t" 10801 "MOVSD [ESP], $src\n\t" 10802 "FLD_D [ESP]\n\t" 10803 "ADD ESP, 8\n\t" 10804 "CALL d2i_wrapper\n" 10805 "fast:" %} 10806 ins_encode %{ 10807 Label fast; 10808 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10809 __ cmpl($dst$$Register, 0x80000000); 10810 __ jccb(Assembler::notEqual, fast); 10811 __ subptr(rsp, 8); 10812 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10813 __ fld_d(Address(rsp, 0)); 10814 __ addptr(rsp, 8); 10815 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10816 __ bind(fast); 10817 %} 10818 ins_pipe( pipe_slow ); 10819 %} 10820 10821 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10822 predicate(UseSSE<=1); 10823 match(Set dst (ConvD2L src)); 10824 effect( KILL cr ); 10825 format %{ "FLD $src\t# Convert double to long\n\t" 10826 "FLDCW trunc mode\n\t" 10827 "SUB ESP,8\n\t" 10828 "FISTp [ESP + #0]\n\t" 10829 "FLDCW std/24-bit mode\n\t" 10830 "POP EAX\n\t" 10831 "POP EDX\n\t" 10832 "CMP EDX,0x80000000\n\t" 10833 "JNE,s fast\n\t" 10834 "TEST EAX,EAX\n\t" 10835 "JNE,s fast\n\t" 10836 "FLD $src\n\t" 10837 "CALL d2l_wrapper\n" 10838 "fast:" %} 10839 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10840 ins_pipe( pipe_slow ); 10841 %} 10842 10843 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10844 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10845 predicate (UseSSE>=2); 10846 match(Set dst (ConvD2L src)); 10847 effect( KILL cr ); 10848 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10849 "MOVSD [ESP],$src\n\t" 10850 "FLD_D [ESP]\n\t" 10851 "FLDCW trunc mode\n\t" 10852 "FISTp [ESP + #0]\n\t" 10853 "FLDCW std/24-bit mode\n\t" 10854 "POP EAX\n\t" 10855 "POP EDX\n\t" 10856 "CMP EDX,0x80000000\n\t" 10857 "JNE,s fast\n\t" 10858 "TEST EAX,EAX\n\t" 10859 "JNE,s fast\n\t" 10860 "SUB ESP,8\n\t" 10861 "MOVSD [ESP],$src\n\t" 10862 "FLD_D [ESP]\n\t" 10863 "ADD ESP,8\n\t" 10864 "CALL d2l_wrapper\n" 10865 "fast:" %} 10866 ins_encode %{ 10867 Label fast; 10868 __ subptr(rsp, 8); 10869 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10870 __ fld_d(Address(rsp, 0)); 10871 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10872 __ fistp_d(Address(rsp, 0)); 10873 // Restore the rounding mode, mask the exception 10874 if (Compile::current()->in_24_bit_fp_mode()) { 10875 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10876 } else { 10877 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10878 } 10879 // Load the converted long, adjust CPU stack 10880 __ pop(rax); 10881 __ pop(rdx); 10882 __ cmpl(rdx, 0x80000000); 10883 __ jccb(Assembler::notEqual, fast); 10884 __ testl(rax, rax); 10885 __ jccb(Assembler::notEqual, fast); 10886 __ subptr(rsp, 8); 10887 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10888 __ fld_d(Address(rsp, 0)); 10889 __ addptr(rsp, 8); 10890 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10891 __ bind(fast); 10892 %} 10893 ins_pipe( pipe_slow ); 10894 %} 10895 10896 // Convert a double to an int. Java semantics require we do complex 10897 // manglations in the corner cases. So we set the rounding mode to 10898 // 'zero', store the darned double down as an int, and reset the 10899 // rounding mode to 'nearest'. The hardware stores a flag value down 10900 // if we would overflow or converted a NAN; we check for this and 10901 // and go the slow path if needed. 10902 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10903 predicate(UseSSE==0); 10904 match(Set dst (ConvF2I src)); 10905 effect( KILL tmp, KILL cr ); 10906 format %{ "FLD $src\t# Convert float to int \n\t" 10907 "FLDCW trunc mode\n\t" 10908 "SUB ESP,4\n\t" 10909 "FISTp [ESP + #0]\n\t" 10910 "FLDCW std/24-bit mode\n\t" 10911 "POP EAX\n\t" 10912 "CMP EAX,0x80000000\n\t" 10913 "JNE,s fast\n\t" 10914 "FLD $src\n\t" 10915 "CALL d2i_wrapper\n" 10916 "fast:" %} 10917 // DPR2I_encoding works for FPR2I 10918 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10919 ins_pipe( pipe_slow ); 10920 %} 10921 10922 // Convert a float in xmm to an int reg. 10923 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10924 predicate(UseSSE>=1); 10925 match(Set dst (ConvF2I src)); 10926 effect( KILL tmp, KILL cr ); 10927 format %{ "CVTTSS2SI $dst, $src\n\t" 10928 "CMP $dst,0x80000000\n\t" 10929 "JNE,s fast\n\t" 10930 "SUB ESP, 4\n\t" 10931 "MOVSS [ESP], $src\n\t" 10932 "FLD [ESP]\n\t" 10933 "ADD ESP, 4\n\t" 10934 "CALL d2i_wrapper\n" 10935 "fast:" %} 10936 ins_encode %{ 10937 Label fast; 10938 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10939 __ cmpl($dst$$Register, 0x80000000); 10940 __ jccb(Assembler::notEqual, fast); 10941 __ subptr(rsp, 4); 10942 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10943 __ fld_s(Address(rsp, 0)); 10944 __ addptr(rsp, 4); 10945 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10946 __ bind(fast); 10947 %} 10948 ins_pipe( pipe_slow ); 10949 %} 10950 10951 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10952 predicate(UseSSE==0); 10953 match(Set dst (ConvF2L src)); 10954 effect( KILL cr ); 10955 format %{ "FLD $src\t# Convert float to long\n\t" 10956 "FLDCW trunc mode\n\t" 10957 "SUB ESP,8\n\t" 10958 "FISTp [ESP + #0]\n\t" 10959 "FLDCW std/24-bit mode\n\t" 10960 "POP EAX\n\t" 10961 "POP EDX\n\t" 10962 "CMP EDX,0x80000000\n\t" 10963 "JNE,s fast\n\t" 10964 "TEST EAX,EAX\n\t" 10965 "JNE,s fast\n\t" 10966 "FLD $src\n\t" 10967 "CALL d2l_wrapper\n" 10968 "fast:" %} 10969 // DPR2L_encoding works for FPR2L 10970 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10971 ins_pipe( pipe_slow ); 10972 %} 10973 10974 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10975 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10976 predicate (UseSSE>=1); 10977 match(Set dst (ConvF2L src)); 10978 effect( KILL cr ); 10979 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10980 "MOVSS [ESP],$src\n\t" 10981 "FLD_S [ESP]\n\t" 10982 "FLDCW trunc mode\n\t" 10983 "FISTp [ESP + #0]\n\t" 10984 "FLDCW std/24-bit mode\n\t" 10985 "POP EAX\n\t" 10986 "POP EDX\n\t" 10987 "CMP EDX,0x80000000\n\t" 10988 "JNE,s fast\n\t" 10989 "TEST EAX,EAX\n\t" 10990 "JNE,s fast\n\t" 10991 "SUB ESP,4\t# Convert float to long\n\t" 10992 "MOVSS [ESP],$src\n\t" 10993 "FLD_S [ESP]\n\t" 10994 "ADD ESP,4\n\t" 10995 "CALL d2l_wrapper\n" 10996 "fast:" %} 10997 ins_encode %{ 10998 Label fast; 10999 __ subptr(rsp, 8); 11000 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11001 __ fld_s(Address(rsp, 0)); 11002 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 11003 __ fistp_d(Address(rsp, 0)); 11004 // Restore the rounding mode, mask the exception 11005 if (Compile::current()->in_24_bit_fp_mode()) { 11006 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 11007 } else { 11008 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 11009 } 11010 // Load the converted long, adjust CPU stack 11011 __ pop(rax); 11012 __ pop(rdx); 11013 __ cmpl(rdx, 0x80000000); 11014 __ jccb(Assembler::notEqual, fast); 11015 __ testl(rax, rax); 11016 __ jccb(Assembler::notEqual, fast); 11017 __ subptr(rsp, 4); 11018 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11019 __ fld_s(Address(rsp, 0)); 11020 __ addptr(rsp, 4); 11021 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11022 __ bind(fast); 11023 %} 11024 ins_pipe( pipe_slow ); 11025 %} 11026 11027 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11028 predicate( UseSSE<=1 ); 11029 match(Set dst (ConvI2D src)); 11030 format %{ "FILD $src\n\t" 11031 "FSTP $dst" %} 11032 opcode(0xDB, 0x0); /* DB /0 */ 11033 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11034 ins_pipe( fpu_reg_mem ); 11035 %} 11036 11037 instruct convI2D_reg(regD dst, rRegI src) %{ 11038 predicate( UseSSE>=2 && !UseXmmI2D ); 11039 match(Set dst (ConvI2D src)); 11040 format %{ "CVTSI2SD $dst,$src" %} 11041 ins_encode %{ 11042 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11043 %} 11044 ins_pipe( pipe_slow ); 11045 %} 11046 11047 instruct convI2D_mem(regD dst, memory mem) %{ 11048 predicate( UseSSE>=2 ); 11049 match(Set dst (ConvI2D (LoadI mem))); 11050 format %{ "CVTSI2SD $dst,$mem" %} 11051 ins_encode %{ 11052 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11053 %} 11054 ins_pipe( pipe_slow ); 11055 %} 11056 11057 instruct convXI2D_reg(regD dst, rRegI src) 11058 %{ 11059 predicate( UseSSE>=2 && UseXmmI2D ); 11060 match(Set dst (ConvI2D src)); 11061 11062 format %{ "MOVD $dst,$src\n\t" 11063 "CVTDQ2PD $dst,$dst\t# i2d" %} 11064 ins_encode %{ 11065 __ movdl($dst$$XMMRegister, $src$$Register); 11066 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11067 %} 11068 ins_pipe(pipe_slow); // XXX 11069 %} 11070 11071 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11072 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11073 match(Set dst (ConvI2D (LoadI mem))); 11074 format %{ "FILD $mem\n\t" 11075 "FSTP $dst" %} 11076 opcode(0xDB); /* DB /0 */ 11077 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11078 Pop_Reg_DPR(dst)); 11079 ins_pipe( fpu_reg_mem ); 11080 %} 11081 11082 // Convert a byte to a float; no rounding step needed. 11083 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11084 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11085 match(Set dst (ConvI2F src)); 11086 format %{ "FILD $src\n\t" 11087 "FSTP $dst" %} 11088 11089 opcode(0xDB, 0x0); /* DB /0 */ 11090 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11091 ins_pipe( fpu_reg_mem ); 11092 %} 11093 11094 // In 24-bit mode, force exponent rounding by storing back out 11095 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11096 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11097 match(Set dst (ConvI2F src)); 11098 ins_cost(200); 11099 format %{ "FILD $src\n\t" 11100 "FSTP_S $dst" %} 11101 opcode(0xDB, 0x0); /* DB /0 */ 11102 ins_encode( Push_Mem_I(src), 11103 Pop_Mem_FPR(dst)); 11104 ins_pipe( fpu_mem_mem ); 11105 %} 11106 11107 // In 24-bit mode, force exponent rounding by storing back out 11108 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11109 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11110 match(Set dst (ConvI2F (LoadI mem))); 11111 ins_cost(200); 11112 format %{ "FILD $mem\n\t" 11113 "FSTP_S $dst" %} 11114 opcode(0xDB); /* DB /0 */ 11115 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11116 Pop_Mem_FPR(dst)); 11117 ins_pipe( fpu_mem_mem ); 11118 %} 11119 11120 // This instruction does not round to 24-bits 11121 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11122 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11123 match(Set dst (ConvI2F src)); 11124 format %{ "FILD $src\n\t" 11125 "FSTP $dst" %} 11126 opcode(0xDB, 0x0); /* DB /0 */ 11127 ins_encode( Push_Mem_I(src), 11128 Pop_Reg_FPR(dst)); 11129 ins_pipe( fpu_reg_mem ); 11130 %} 11131 11132 // This instruction does not round to 24-bits 11133 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11134 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11135 match(Set dst (ConvI2F (LoadI mem))); 11136 format %{ "FILD $mem\n\t" 11137 "FSTP $dst" %} 11138 opcode(0xDB); /* DB /0 */ 11139 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11140 Pop_Reg_FPR(dst)); 11141 ins_pipe( fpu_reg_mem ); 11142 %} 11143 11144 // Convert an int to a float in xmm; no rounding step needed. 11145 instruct convI2F_reg(regF dst, rRegI src) %{ 11146 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11147 match(Set dst (ConvI2F src)); 11148 format %{ "CVTSI2SS $dst, $src" %} 11149 ins_encode %{ 11150 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11151 %} 11152 ins_pipe( pipe_slow ); 11153 %} 11154 11155 instruct convXI2F_reg(regF dst, rRegI src) 11156 %{ 11157 predicate( UseSSE>=2 && UseXmmI2F ); 11158 match(Set dst (ConvI2F src)); 11159 11160 format %{ "MOVD $dst,$src\n\t" 11161 "CVTDQ2PS $dst,$dst\t# i2f" %} 11162 ins_encode %{ 11163 __ movdl($dst$$XMMRegister, $src$$Register); 11164 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11165 %} 11166 ins_pipe(pipe_slow); // XXX 11167 %} 11168 11169 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11170 match(Set dst (ConvI2L src)); 11171 effect(KILL cr); 11172 ins_cost(375); 11173 format %{ "MOV $dst.lo,$src\n\t" 11174 "MOV $dst.hi,$src\n\t" 11175 "SAR $dst.hi,31" %} 11176 ins_encode(convert_int_long(dst,src)); 11177 ins_pipe( ialu_reg_reg_long ); 11178 %} 11179 11180 // Zero-extend convert int to long 11181 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11182 match(Set dst (AndL (ConvI2L src) mask) ); 11183 effect( KILL flags ); 11184 ins_cost(250); 11185 format %{ "MOV $dst.lo,$src\n\t" 11186 "XOR $dst.hi,$dst.hi" %} 11187 opcode(0x33); // XOR 11188 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11189 ins_pipe( ialu_reg_reg_long ); 11190 %} 11191 11192 // Zero-extend long 11193 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11194 match(Set dst (AndL src mask) ); 11195 effect( KILL flags ); 11196 ins_cost(250); 11197 format %{ "MOV $dst.lo,$src.lo\n\t" 11198 "XOR $dst.hi,$dst.hi\n\t" %} 11199 opcode(0x33); // XOR 11200 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11201 ins_pipe( ialu_reg_reg_long ); 11202 %} 11203 11204 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11205 predicate (UseSSE<=1); 11206 match(Set dst (ConvL2D src)); 11207 effect( KILL cr ); 11208 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11209 "PUSH $src.lo\n\t" 11210 "FILD ST,[ESP + #0]\n\t" 11211 "ADD ESP,8\n\t" 11212 "FSTP_D $dst\t# D-round" %} 11213 opcode(0xDF, 0x5); /* DF /5 */ 11214 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11215 ins_pipe( pipe_slow ); 11216 %} 11217 11218 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11219 predicate (UseSSE>=2); 11220 match(Set dst (ConvL2D src)); 11221 effect( KILL cr ); 11222 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11223 "PUSH $src.lo\n\t" 11224 "FILD_D [ESP]\n\t" 11225 "FSTP_D [ESP]\n\t" 11226 "MOVSD $dst,[ESP]\n\t" 11227 "ADD ESP,8" %} 11228 opcode(0xDF, 0x5); /* DF /5 */ 11229 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11230 ins_pipe( pipe_slow ); 11231 %} 11232 11233 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11234 predicate (UseSSE>=1); 11235 match(Set dst (ConvL2F src)); 11236 effect( KILL cr ); 11237 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11238 "PUSH $src.lo\n\t" 11239 "FILD_D [ESP]\n\t" 11240 "FSTP_S [ESP]\n\t" 11241 "MOVSS $dst,[ESP]\n\t" 11242 "ADD ESP,8" %} 11243 opcode(0xDF, 0x5); /* DF /5 */ 11244 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11245 ins_pipe( pipe_slow ); 11246 %} 11247 11248 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11249 match(Set dst (ConvL2F src)); 11250 effect( KILL cr ); 11251 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11252 "PUSH $src.lo\n\t" 11253 "FILD ST,[ESP + #0]\n\t" 11254 "ADD ESP,8\n\t" 11255 "FSTP_S $dst\t# F-round" %} 11256 opcode(0xDF, 0x5); /* DF /5 */ 11257 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11258 ins_pipe( pipe_slow ); 11259 %} 11260 11261 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11262 match(Set dst (ConvL2I src)); 11263 effect( DEF dst, USE src ); 11264 format %{ "MOV $dst,$src.lo" %} 11265 ins_encode(enc_CopyL_Lo(dst,src)); 11266 ins_pipe( ialu_reg_reg ); 11267 %} 11268 11269 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11270 match(Set dst (MoveF2I src)); 11271 effect( DEF dst, USE src ); 11272 ins_cost(100); 11273 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11274 ins_encode %{ 11275 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11276 %} 11277 ins_pipe( ialu_reg_mem ); 11278 %} 11279 11280 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11281 predicate(UseSSE==0); 11282 match(Set dst (MoveF2I src)); 11283 effect( DEF dst, USE src ); 11284 11285 ins_cost(125); 11286 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11287 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11288 ins_pipe( fpu_mem_reg ); 11289 %} 11290 11291 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11292 predicate(UseSSE>=1); 11293 match(Set dst (MoveF2I src)); 11294 effect( DEF dst, USE src ); 11295 11296 ins_cost(95); 11297 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11298 ins_encode %{ 11299 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11300 %} 11301 ins_pipe( pipe_slow ); 11302 %} 11303 11304 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11305 predicate(UseSSE>=2); 11306 match(Set dst (MoveF2I src)); 11307 effect( DEF dst, USE src ); 11308 ins_cost(85); 11309 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11310 ins_encode %{ 11311 __ movdl($dst$$Register, $src$$XMMRegister); 11312 %} 11313 ins_pipe( pipe_slow ); 11314 %} 11315 11316 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11317 match(Set dst (MoveI2F src)); 11318 effect( DEF dst, USE src ); 11319 11320 ins_cost(100); 11321 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11322 ins_encode %{ 11323 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11324 %} 11325 ins_pipe( ialu_mem_reg ); 11326 %} 11327 11328 11329 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11330 predicate(UseSSE==0); 11331 match(Set dst (MoveI2F src)); 11332 effect(DEF dst, USE src); 11333 11334 ins_cost(125); 11335 format %{ "FLD_S $src\n\t" 11336 "FSTP $dst\t# MoveI2F_stack_reg" %} 11337 opcode(0xD9); /* D9 /0, FLD m32real */ 11338 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11339 Pop_Reg_FPR(dst) ); 11340 ins_pipe( fpu_reg_mem ); 11341 %} 11342 11343 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11344 predicate(UseSSE>=1); 11345 match(Set dst (MoveI2F src)); 11346 effect( DEF dst, USE src ); 11347 11348 ins_cost(95); 11349 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11350 ins_encode %{ 11351 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11352 %} 11353 ins_pipe( pipe_slow ); 11354 %} 11355 11356 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11357 predicate(UseSSE>=2); 11358 match(Set dst (MoveI2F src)); 11359 effect( DEF dst, USE src ); 11360 11361 ins_cost(85); 11362 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11363 ins_encode %{ 11364 __ movdl($dst$$XMMRegister, $src$$Register); 11365 %} 11366 ins_pipe( pipe_slow ); 11367 %} 11368 11369 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11370 match(Set dst (MoveD2L src)); 11371 effect(DEF dst, USE src); 11372 11373 ins_cost(250); 11374 format %{ "MOV $dst.lo,$src\n\t" 11375 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11376 opcode(0x8B, 0x8B); 11377 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11378 ins_pipe( ialu_mem_long_reg ); 11379 %} 11380 11381 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11382 predicate(UseSSE<=1); 11383 match(Set dst (MoveD2L src)); 11384 effect(DEF dst, USE src); 11385 11386 ins_cost(125); 11387 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11388 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11389 ins_pipe( fpu_mem_reg ); 11390 %} 11391 11392 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11393 predicate(UseSSE>=2); 11394 match(Set dst (MoveD2L src)); 11395 effect(DEF dst, USE src); 11396 ins_cost(95); 11397 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11398 ins_encode %{ 11399 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11400 %} 11401 ins_pipe( pipe_slow ); 11402 %} 11403 11404 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11405 predicate(UseSSE>=2); 11406 match(Set dst (MoveD2L src)); 11407 effect(DEF dst, USE src, TEMP tmp); 11408 ins_cost(85); 11409 format %{ "MOVD $dst.lo,$src\n\t" 11410 "PSHUFLW $tmp,$src,0x4E\n\t" 11411 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11412 ins_encode %{ 11413 __ movdl($dst$$Register, $src$$XMMRegister); 11414 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11415 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11416 %} 11417 ins_pipe( pipe_slow ); 11418 %} 11419 11420 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11421 match(Set dst (MoveL2D src)); 11422 effect(DEF dst, USE src); 11423 11424 ins_cost(200); 11425 format %{ "MOV $dst,$src.lo\n\t" 11426 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11427 opcode(0x89, 0x89); 11428 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11429 ins_pipe( ialu_mem_long_reg ); 11430 %} 11431 11432 11433 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11434 predicate(UseSSE<=1); 11435 match(Set dst (MoveL2D src)); 11436 effect(DEF dst, USE src); 11437 ins_cost(125); 11438 11439 format %{ "FLD_D $src\n\t" 11440 "FSTP $dst\t# MoveL2D_stack_reg" %} 11441 opcode(0xDD); /* DD /0, FLD m64real */ 11442 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11443 Pop_Reg_DPR(dst) ); 11444 ins_pipe( fpu_reg_mem ); 11445 %} 11446 11447 11448 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11449 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11450 match(Set dst (MoveL2D src)); 11451 effect(DEF dst, USE src); 11452 11453 ins_cost(95); 11454 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11455 ins_encode %{ 11456 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11457 %} 11458 ins_pipe( pipe_slow ); 11459 %} 11460 11461 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11462 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11463 match(Set dst (MoveL2D src)); 11464 effect(DEF dst, USE src); 11465 11466 ins_cost(95); 11467 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11468 ins_encode %{ 11469 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11470 %} 11471 ins_pipe( pipe_slow ); 11472 %} 11473 11474 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11475 predicate(UseSSE>=2); 11476 match(Set dst (MoveL2D src)); 11477 effect(TEMP dst, USE src, TEMP tmp); 11478 ins_cost(85); 11479 format %{ "MOVD $dst,$src.lo\n\t" 11480 "MOVD $tmp,$src.hi\n\t" 11481 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11482 ins_encode %{ 11483 __ movdl($dst$$XMMRegister, $src$$Register); 11484 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11485 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11486 %} 11487 ins_pipe( pipe_slow ); 11488 %} 11489 11490 11491 // ======================================================================= 11492 // fast clearing of an array 11493 // Small ClearArray non-AVX512. 11494 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11495 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11496 match(Set dummy (ClearArray cnt base)); 11497 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11498 11499 format %{ $$template 11500 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11501 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11502 $$emit$$"JG LARGE\n\t" 11503 $$emit$$"SHL ECX, 1\n\t" 11504 $$emit$$"DEC ECX\n\t" 11505 $$emit$$"JS DONE\t# Zero length\n\t" 11506 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11507 $$emit$$"DEC ECX\n\t" 11508 $$emit$$"JGE LOOP\n\t" 11509 $$emit$$"JMP DONE\n\t" 11510 $$emit$$"# LARGE:\n\t" 11511 if (UseFastStosb) { 11512 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11513 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11514 } else if (UseXMMForObjInit) { 11515 $$emit$$"MOV RDI,RAX\n\t" 11516 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11517 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11518 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11519 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11520 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11521 $$emit$$"ADD 0x40,RAX\n\t" 11522 $$emit$$"# L_zero_64_bytes:\n\t" 11523 $$emit$$"SUB 0x8,RCX\n\t" 11524 $$emit$$"JGE L_loop\n\t" 11525 $$emit$$"ADD 0x4,RCX\n\t" 11526 $$emit$$"JL L_tail\n\t" 11527 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11528 $$emit$$"ADD 0x20,RAX\n\t" 11529 $$emit$$"SUB 0x4,RCX\n\t" 11530 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11531 $$emit$$"ADD 0x4,RCX\n\t" 11532 $$emit$$"JLE L_end\n\t" 11533 $$emit$$"DEC RCX\n\t" 11534 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11535 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11536 $$emit$$"ADD 0x8,RAX\n\t" 11537 $$emit$$"DEC RCX\n\t" 11538 $$emit$$"JGE L_sloop\n\t" 11539 $$emit$$"# L_end:\n\t" 11540 } else { 11541 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11542 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11543 } 11544 $$emit$$"# DONE" 11545 %} 11546 ins_encode %{ 11547 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11548 $tmp$$XMMRegister, false, knoreg); 11549 %} 11550 ins_pipe( pipe_slow ); 11551 %} 11552 11553 // Small ClearArray AVX512 non-constant length. 11554 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11555 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11556 match(Set dummy (ClearArray cnt base)); 11557 ins_cost(125); 11558 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11559 11560 format %{ $$template 11561 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11562 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11563 $$emit$$"JG LARGE\n\t" 11564 $$emit$$"SHL ECX, 1\n\t" 11565 $$emit$$"DEC ECX\n\t" 11566 $$emit$$"JS DONE\t# Zero length\n\t" 11567 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11568 $$emit$$"DEC ECX\n\t" 11569 $$emit$$"JGE LOOP\n\t" 11570 $$emit$$"JMP DONE\n\t" 11571 $$emit$$"# LARGE:\n\t" 11572 if (UseFastStosb) { 11573 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11574 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11575 } else if (UseXMMForObjInit) { 11576 $$emit$$"MOV RDI,RAX\n\t" 11577 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11578 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11579 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11580 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11581 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11582 $$emit$$"ADD 0x40,RAX\n\t" 11583 $$emit$$"# L_zero_64_bytes:\n\t" 11584 $$emit$$"SUB 0x8,RCX\n\t" 11585 $$emit$$"JGE L_loop\n\t" 11586 $$emit$$"ADD 0x4,RCX\n\t" 11587 $$emit$$"JL L_tail\n\t" 11588 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11589 $$emit$$"ADD 0x20,RAX\n\t" 11590 $$emit$$"SUB 0x4,RCX\n\t" 11591 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11592 $$emit$$"ADD 0x4,RCX\n\t" 11593 $$emit$$"JLE L_end\n\t" 11594 $$emit$$"DEC RCX\n\t" 11595 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11596 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11597 $$emit$$"ADD 0x8,RAX\n\t" 11598 $$emit$$"DEC RCX\n\t" 11599 $$emit$$"JGE L_sloop\n\t" 11600 $$emit$$"# L_end:\n\t" 11601 } else { 11602 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11603 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11604 } 11605 $$emit$$"# DONE" 11606 %} 11607 ins_encode %{ 11608 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11609 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11610 %} 11611 ins_pipe( pipe_slow ); 11612 %} 11613 11614 // Large ClearArray non-AVX512. 11615 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11616 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11617 match(Set dummy (ClearArray cnt base)); 11618 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11619 format %{ $$template 11620 if (UseFastStosb) { 11621 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11622 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11623 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11624 } else if (UseXMMForObjInit) { 11625 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11626 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11627 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11628 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11629 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11630 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11631 $$emit$$"ADD 0x40,RAX\n\t" 11632 $$emit$$"# L_zero_64_bytes:\n\t" 11633 $$emit$$"SUB 0x8,RCX\n\t" 11634 $$emit$$"JGE L_loop\n\t" 11635 $$emit$$"ADD 0x4,RCX\n\t" 11636 $$emit$$"JL L_tail\n\t" 11637 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11638 $$emit$$"ADD 0x20,RAX\n\t" 11639 $$emit$$"SUB 0x4,RCX\n\t" 11640 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11641 $$emit$$"ADD 0x4,RCX\n\t" 11642 $$emit$$"JLE L_end\n\t" 11643 $$emit$$"DEC RCX\n\t" 11644 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11645 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11646 $$emit$$"ADD 0x8,RAX\n\t" 11647 $$emit$$"DEC RCX\n\t" 11648 $$emit$$"JGE L_sloop\n\t" 11649 $$emit$$"# L_end:\n\t" 11650 } else { 11651 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11652 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11653 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11654 } 11655 $$emit$$"# DONE" 11656 %} 11657 ins_encode %{ 11658 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11659 $tmp$$XMMRegister, true, knoreg); 11660 %} 11661 ins_pipe( pipe_slow ); 11662 %} 11663 11664 // Large ClearArray AVX512. 11665 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11666 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11667 match(Set dummy (ClearArray cnt base)); 11668 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11669 format %{ $$template 11670 if (UseFastStosb) { 11671 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11672 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11673 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11674 } else if (UseXMMForObjInit) { 11675 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11676 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11677 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11678 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11679 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11680 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11681 $$emit$$"ADD 0x40,RAX\n\t" 11682 $$emit$$"# L_zero_64_bytes:\n\t" 11683 $$emit$$"SUB 0x8,RCX\n\t" 11684 $$emit$$"JGE L_loop\n\t" 11685 $$emit$$"ADD 0x4,RCX\n\t" 11686 $$emit$$"JL L_tail\n\t" 11687 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11688 $$emit$$"ADD 0x20,RAX\n\t" 11689 $$emit$$"SUB 0x4,RCX\n\t" 11690 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11691 $$emit$$"ADD 0x4,RCX\n\t" 11692 $$emit$$"JLE L_end\n\t" 11693 $$emit$$"DEC RCX\n\t" 11694 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11695 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11696 $$emit$$"ADD 0x8,RAX\n\t" 11697 $$emit$$"DEC RCX\n\t" 11698 $$emit$$"JGE L_sloop\n\t" 11699 $$emit$$"# L_end:\n\t" 11700 } else { 11701 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11702 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11703 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11704 } 11705 $$emit$$"# DONE" 11706 %} 11707 ins_encode %{ 11708 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11709 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11710 %} 11711 ins_pipe( pipe_slow ); 11712 %} 11713 11714 // Small ClearArray AVX512 constant length. 11715 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11716 %{ 11717 predicate(!((ClearArrayNode*)n)->is_large() && 11718 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11719 match(Set dummy (ClearArray cnt base)); 11720 ins_cost(100); 11721 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11722 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11723 ins_encode %{ 11724 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11725 %} 11726 ins_pipe(pipe_slow); 11727 %} 11728 11729 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11730 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11731 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11732 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11733 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11734 11735 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11736 ins_encode %{ 11737 __ string_compare($str1$$Register, $str2$$Register, 11738 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11739 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11740 %} 11741 ins_pipe( pipe_slow ); 11742 %} 11743 11744 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11745 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11746 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11747 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11748 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11749 11750 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11751 ins_encode %{ 11752 __ string_compare($str1$$Register, $str2$$Register, 11753 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11754 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11755 %} 11756 ins_pipe( pipe_slow ); 11757 %} 11758 11759 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11760 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11761 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11762 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11763 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11764 11765 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11766 ins_encode %{ 11767 __ string_compare($str1$$Register, $str2$$Register, 11768 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11769 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11770 %} 11771 ins_pipe( pipe_slow ); 11772 %} 11773 11774 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11775 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11776 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11777 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11778 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11779 11780 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11781 ins_encode %{ 11782 __ string_compare($str1$$Register, $str2$$Register, 11783 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11784 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11785 %} 11786 ins_pipe( pipe_slow ); 11787 %} 11788 11789 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11790 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11791 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11792 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11793 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11794 11795 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11796 ins_encode %{ 11797 __ string_compare($str1$$Register, $str2$$Register, 11798 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11799 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11800 %} 11801 ins_pipe( pipe_slow ); 11802 %} 11803 11804 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11805 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11806 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11807 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11808 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11809 11810 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11811 ins_encode %{ 11812 __ string_compare($str1$$Register, $str2$$Register, 11813 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11814 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11815 %} 11816 ins_pipe( pipe_slow ); 11817 %} 11818 11819 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11820 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11821 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11822 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11823 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11824 11825 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11826 ins_encode %{ 11827 __ string_compare($str2$$Register, $str1$$Register, 11828 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11829 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11830 %} 11831 ins_pipe( pipe_slow ); 11832 %} 11833 11834 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11835 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11836 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11837 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11838 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11839 11840 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11841 ins_encode %{ 11842 __ string_compare($str2$$Register, $str1$$Register, 11843 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11844 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11845 %} 11846 ins_pipe( pipe_slow ); 11847 %} 11848 11849 // fast string equals 11850 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11851 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11852 predicate(!VM_Version::supports_avx512vlbw()); 11853 match(Set result (StrEquals (Binary str1 str2) cnt)); 11854 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11855 11856 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11857 ins_encode %{ 11858 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11859 $cnt$$Register, $result$$Register, $tmp3$$Register, 11860 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11861 %} 11862 11863 ins_pipe( pipe_slow ); 11864 %} 11865 11866 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11867 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11868 predicate(VM_Version::supports_avx512vlbw()); 11869 match(Set result (StrEquals (Binary str1 str2) cnt)); 11870 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11871 11872 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11873 ins_encode %{ 11874 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11875 $cnt$$Register, $result$$Register, $tmp3$$Register, 11876 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11877 %} 11878 11879 ins_pipe( pipe_slow ); 11880 %} 11881 11882 11883 // fast search of substring with known size. 11884 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11885 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11886 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11887 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11888 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11889 11890 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11891 ins_encode %{ 11892 int icnt2 = (int)$int_cnt2$$constant; 11893 if (icnt2 >= 16) { 11894 // IndexOf for constant substrings with size >= 16 elements 11895 // which don't need to be loaded through stack. 11896 __ string_indexofC8($str1$$Register, $str2$$Register, 11897 $cnt1$$Register, $cnt2$$Register, 11898 icnt2, $result$$Register, 11899 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11900 } else { 11901 // Small strings are loaded through stack if they cross page boundary. 11902 __ string_indexof($str1$$Register, $str2$$Register, 11903 $cnt1$$Register, $cnt2$$Register, 11904 icnt2, $result$$Register, 11905 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11906 } 11907 %} 11908 ins_pipe( pipe_slow ); 11909 %} 11910 11911 // fast search of substring with known size. 11912 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11913 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11914 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11915 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11916 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11917 11918 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11919 ins_encode %{ 11920 int icnt2 = (int)$int_cnt2$$constant; 11921 if (icnt2 >= 8) { 11922 // IndexOf for constant substrings with size >= 8 elements 11923 // which don't need to be loaded through stack. 11924 __ string_indexofC8($str1$$Register, $str2$$Register, 11925 $cnt1$$Register, $cnt2$$Register, 11926 icnt2, $result$$Register, 11927 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11928 } else { 11929 // Small strings are loaded through stack if they cross page boundary. 11930 __ string_indexof($str1$$Register, $str2$$Register, 11931 $cnt1$$Register, $cnt2$$Register, 11932 icnt2, $result$$Register, 11933 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11934 } 11935 %} 11936 ins_pipe( pipe_slow ); 11937 %} 11938 11939 // fast search of substring with known size. 11940 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11941 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11942 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11943 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11944 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11945 11946 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11947 ins_encode %{ 11948 int icnt2 = (int)$int_cnt2$$constant; 11949 if (icnt2 >= 8) { 11950 // IndexOf for constant substrings with size >= 8 elements 11951 // which don't need to be loaded through stack. 11952 __ string_indexofC8($str1$$Register, $str2$$Register, 11953 $cnt1$$Register, $cnt2$$Register, 11954 icnt2, $result$$Register, 11955 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11956 } else { 11957 // Small strings are loaded through stack if they cross page boundary. 11958 __ string_indexof($str1$$Register, $str2$$Register, 11959 $cnt1$$Register, $cnt2$$Register, 11960 icnt2, $result$$Register, 11961 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11962 } 11963 %} 11964 ins_pipe( pipe_slow ); 11965 %} 11966 11967 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11968 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11969 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11970 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11971 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11972 11973 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11974 ins_encode %{ 11975 __ string_indexof($str1$$Register, $str2$$Register, 11976 $cnt1$$Register, $cnt2$$Register, 11977 (-1), $result$$Register, 11978 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11979 %} 11980 ins_pipe( pipe_slow ); 11981 %} 11982 11983 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11984 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11985 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11986 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11987 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11988 11989 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11990 ins_encode %{ 11991 __ string_indexof($str1$$Register, $str2$$Register, 11992 $cnt1$$Register, $cnt2$$Register, 11993 (-1), $result$$Register, 11994 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11995 %} 11996 ins_pipe( pipe_slow ); 11997 %} 11998 11999 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12000 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12001 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12002 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12003 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12004 12005 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12006 ins_encode %{ 12007 __ string_indexof($str1$$Register, $str2$$Register, 12008 $cnt1$$Register, $cnt2$$Register, 12009 (-1), $result$$Register, 12010 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12011 %} 12012 ins_pipe( pipe_slow ); 12013 %} 12014 12015 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12016 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12017 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12018 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12019 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12020 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12021 ins_encode %{ 12022 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12023 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12024 %} 12025 ins_pipe( pipe_slow ); 12026 %} 12027 12028 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12029 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12030 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12031 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12032 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12033 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12034 ins_encode %{ 12035 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12036 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12037 %} 12038 ins_pipe( pipe_slow ); 12039 %} 12040 12041 12042 // fast array equals 12043 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12044 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12045 %{ 12046 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12047 match(Set result (AryEq ary1 ary2)); 12048 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12049 //ins_cost(300); 12050 12051 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12052 ins_encode %{ 12053 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12054 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12055 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12056 %} 12057 ins_pipe( pipe_slow ); 12058 %} 12059 12060 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12061 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12062 %{ 12063 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12064 match(Set result (AryEq ary1 ary2)); 12065 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12066 //ins_cost(300); 12067 12068 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12069 ins_encode %{ 12070 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12071 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12072 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12073 %} 12074 ins_pipe( pipe_slow ); 12075 %} 12076 12077 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12078 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12079 %{ 12080 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12081 match(Set result (AryEq ary1 ary2)); 12082 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12083 //ins_cost(300); 12084 12085 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12086 ins_encode %{ 12087 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12088 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12089 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12090 %} 12091 ins_pipe( pipe_slow ); 12092 %} 12093 12094 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12095 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12096 %{ 12097 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12098 match(Set result (AryEq ary1 ary2)); 12099 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12100 //ins_cost(300); 12101 12102 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12103 ins_encode %{ 12104 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12105 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12106 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12107 %} 12108 ins_pipe( pipe_slow ); 12109 %} 12110 12111 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12112 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12113 %{ 12114 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12115 match(Set result (HasNegatives ary1 len)); 12116 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12117 12118 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12119 ins_encode %{ 12120 __ has_negatives($ary1$$Register, $len$$Register, 12121 $result$$Register, $tmp3$$Register, 12122 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12123 %} 12124 ins_pipe( pipe_slow ); 12125 %} 12126 12127 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12128 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12129 %{ 12130 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12131 match(Set result (HasNegatives ary1 len)); 12132 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12133 12134 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12135 ins_encode %{ 12136 __ has_negatives($ary1$$Register, $len$$Register, 12137 $result$$Register, $tmp3$$Register, 12138 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12139 %} 12140 ins_pipe( pipe_slow ); 12141 %} 12142 12143 12144 // fast char[] to byte[] compression 12145 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12146 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12147 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12148 match(Set result (StrCompressedCopy src (Binary dst len))); 12149 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12150 12151 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12152 ins_encode %{ 12153 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12154 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12155 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12156 knoreg, knoreg); 12157 %} 12158 ins_pipe( pipe_slow ); 12159 %} 12160 12161 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12162 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12163 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12164 match(Set result (StrCompressedCopy src (Binary dst len))); 12165 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12166 12167 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12168 ins_encode %{ 12169 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12170 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12171 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12172 $ktmp1$$KRegister, $ktmp2$$KRegister); 12173 %} 12174 ins_pipe( pipe_slow ); 12175 %} 12176 12177 // fast byte[] to char[] inflation 12178 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12179 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12180 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12181 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12182 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12183 12184 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12185 ins_encode %{ 12186 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12187 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12188 %} 12189 ins_pipe( pipe_slow ); 12190 %} 12191 12192 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12193 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12194 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12195 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12196 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12197 12198 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12199 ins_encode %{ 12200 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12201 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12202 %} 12203 ins_pipe( pipe_slow ); 12204 %} 12205 12206 // encode char[] to byte[] in ISO_8859_1 12207 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12208 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12209 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12210 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12211 match(Set result (EncodeISOArray src (Binary dst len))); 12212 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12213 12214 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12215 ins_encode %{ 12216 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12217 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12218 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12219 %} 12220 ins_pipe( pipe_slow ); 12221 %} 12222 12223 // encode char[] to byte[] in ASCII 12224 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12225 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12226 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12227 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12228 match(Set result (EncodeISOArray src (Binary dst len))); 12229 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12230 12231 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12232 ins_encode %{ 12233 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12234 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12235 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12236 %} 12237 ins_pipe( pipe_slow ); 12238 %} 12239 12240 //----------Control Flow Instructions------------------------------------------ 12241 // Signed compare Instructions 12242 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12243 match(Set cr (CmpI op1 op2)); 12244 effect( DEF cr, USE op1, USE op2 ); 12245 format %{ "CMP $op1,$op2" %} 12246 opcode(0x3B); /* Opcode 3B /r */ 12247 ins_encode( OpcP, RegReg( op1, op2) ); 12248 ins_pipe( ialu_cr_reg_reg ); 12249 %} 12250 12251 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12252 match(Set cr (CmpI op1 op2)); 12253 effect( DEF cr, USE op1 ); 12254 format %{ "CMP $op1,$op2" %} 12255 opcode(0x81,0x07); /* Opcode 81 /7 */ 12256 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12257 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12258 ins_pipe( ialu_cr_reg_imm ); 12259 %} 12260 12261 // Cisc-spilled version of cmpI_eReg 12262 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12263 match(Set cr (CmpI op1 (LoadI op2))); 12264 12265 format %{ "CMP $op1,$op2" %} 12266 ins_cost(500); 12267 opcode(0x3B); /* Opcode 3B /r */ 12268 ins_encode( OpcP, RegMem( op1, op2) ); 12269 ins_pipe( ialu_cr_reg_mem ); 12270 %} 12271 12272 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12273 match(Set cr (CmpI src zero)); 12274 effect( DEF cr, USE src ); 12275 12276 format %{ "TEST $src,$src" %} 12277 opcode(0x85); 12278 ins_encode( OpcP, RegReg( src, src ) ); 12279 ins_pipe( ialu_cr_reg_imm ); 12280 %} 12281 12282 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12283 match(Set cr (CmpI (AndI src con) zero)); 12284 12285 format %{ "TEST $src,$con" %} 12286 opcode(0xF7,0x00); 12287 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12288 ins_pipe( ialu_cr_reg_imm ); 12289 %} 12290 12291 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12292 match(Set cr (CmpI (AndI src mem) zero)); 12293 12294 format %{ "TEST $src,$mem" %} 12295 opcode(0x85); 12296 ins_encode( OpcP, RegMem( src, mem ) ); 12297 ins_pipe( ialu_cr_reg_mem ); 12298 %} 12299 12300 // Unsigned compare Instructions; really, same as signed except they 12301 // produce an eFlagsRegU instead of eFlagsReg. 12302 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12303 match(Set cr (CmpU op1 op2)); 12304 12305 format %{ "CMPu $op1,$op2" %} 12306 opcode(0x3B); /* Opcode 3B /r */ 12307 ins_encode( OpcP, RegReg( op1, op2) ); 12308 ins_pipe( ialu_cr_reg_reg ); 12309 %} 12310 12311 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12312 match(Set cr (CmpU op1 op2)); 12313 12314 format %{ "CMPu $op1,$op2" %} 12315 opcode(0x81,0x07); /* Opcode 81 /7 */ 12316 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12317 ins_pipe( ialu_cr_reg_imm ); 12318 %} 12319 12320 // // Cisc-spilled version of cmpU_eReg 12321 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12322 match(Set cr (CmpU op1 (LoadI op2))); 12323 12324 format %{ "CMPu $op1,$op2" %} 12325 ins_cost(500); 12326 opcode(0x3B); /* Opcode 3B /r */ 12327 ins_encode( OpcP, RegMem( op1, op2) ); 12328 ins_pipe( ialu_cr_reg_mem ); 12329 %} 12330 12331 // // Cisc-spilled version of cmpU_eReg 12332 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12333 // match(Set cr (CmpU (LoadI op1) op2)); 12334 // 12335 // format %{ "CMPu $op1,$op2" %} 12336 // ins_cost(500); 12337 // opcode(0x39); /* Opcode 39 /r */ 12338 // ins_encode( OpcP, RegMem( op1, op2) ); 12339 //%} 12340 12341 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12342 match(Set cr (CmpU src zero)); 12343 12344 format %{ "TESTu $src,$src" %} 12345 opcode(0x85); 12346 ins_encode( OpcP, RegReg( src, src ) ); 12347 ins_pipe( ialu_cr_reg_imm ); 12348 %} 12349 12350 // Unsigned pointer compare Instructions 12351 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12352 match(Set cr (CmpP op1 op2)); 12353 12354 format %{ "CMPu $op1,$op2" %} 12355 opcode(0x3B); /* Opcode 3B /r */ 12356 ins_encode( OpcP, RegReg( op1, op2) ); 12357 ins_pipe( ialu_cr_reg_reg ); 12358 %} 12359 12360 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12361 match(Set cr (CmpP op1 op2)); 12362 12363 format %{ "CMPu $op1,$op2" %} 12364 opcode(0x81,0x07); /* Opcode 81 /7 */ 12365 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12366 ins_pipe( ialu_cr_reg_imm ); 12367 %} 12368 12369 // // Cisc-spilled version of cmpP_eReg 12370 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12371 match(Set cr (CmpP op1 (LoadP op2))); 12372 12373 format %{ "CMPu $op1,$op2" %} 12374 ins_cost(500); 12375 opcode(0x3B); /* Opcode 3B /r */ 12376 ins_encode( OpcP, RegMem( op1, op2) ); 12377 ins_pipe( ialu_cr_reg_mem ); 12378 %} 12379 12380 // // Cisc-spilled version of cmpP_eReg 12381 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12382 // match(Set cr (CmpP (LoadP op1) op2)); 12383 // 12384 // format %{ "CMPu $op1,$op2" %} 12385 // ins_cost(500); 12386 // opcode(0x39); /* Opcode 39 /r */ 12387 // ins_encode( OpcP, RegMem( op1, op2) ); 12388 //%} 12389 12390 // Compare raw pointer (used in out-of-heap check). 12391 // Only works because non-oop pointers must be raw pointers 12392 // and raw pointers have no anti-dependencies. 12393 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12394 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12395 match(Set cr (CmpP op1 (LoadP op2))); 12396 12397 format %{ "CMPu $op1,$op2" %} 12398 opcode(0x3B); /* Opcode 3B /r */ 12399 ins_encode( OpcP, RegMem( op1, op2) ); 12400 ins_pipe( ialu_cr_reg_mem ); 12401 %} 12402 12403 // 12404 // This will generate a signed flags result. This should be ok 12405 // since any compare to a zero should be eq/neq. 12406 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12407 match(Set cr (CmpP src zero)); 12408 12409 format %{ "TEST $src,$src" %} 12410 opcode(0x85); 12411 ins_encode( OpcP, RegReg( src, src ) ); 12412 ins_pipe( ialu_cr_reg_imm ); 12413 %} 12414 12415 // Cisc-spilled version of testP_reg 12416 // This will generate a signed flags result. This should be ok 12417 // since any compare to a zero should be eq/neq. 12418 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12419 match(Set cr (CmpP (LoadP op) zero)); 12420 12421 format %{ "TEST $op,0xFFFFFFFF" %} 12422 ins_cost(500); 12423 opcode(0xF7); /* Opcode F7 /0 */ 12424 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12425 ins_pipe( ialu_cr_reg_imm ); 12426 %} 12427 12428 // Yanked all unsigned pointer compare operations. 12429 // Pointer compares are done with CmpP which is already unsigned. 12430 12431 //----------Max and Min-------------------------------------------------------- 12432 // Min Instructions 12433 //// 12434 // *** Min and Max using the conditional move are slower than the 12435 // *** branch version on a Pentium III. 12436 // // Conditional move for min 12437 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12438 // effect( USE_DEF op2, USE op1, USE cr ); 12439 // format %{ "CMOVlt $op2,$op1\t! min" %} 12440 // opcode(0x4C,0x0F); 12441 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12442 // ins_pipe( pipe_cmov_reg ); 12443 //%} 12444 // 12445 //// Min Register with Register (P6 version) 12446 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12447 // predicate(VM_Version::supports_cmov() ); 12448 // match(Set op2 (MinI op1 op2)); 12449 // ins_cost(200); 12450 // expand %{ 12451 // eFlagsReg cr; 12452 // compI_eReg(cr,op1,op2); 12453 // cmovI_reg_lt(op2,op1,cr); 12454 // %} 12455 //%} 12456 12457 // Min Register with Register (generic version) 12458 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12459 match(Set dst (MinI dst src)); 12460 effect(KILL flags); 12461 ins_cost(300); 12462 12463 format %{ "MIN $dst,$src" %} 12464 opcode(0xCC); 12465 ins_encode( min_enc(dst,src) ); 12466 ins_pipe( pipe_slow ); 12467 %} 12468 12469 // Max Register with Register 12470 // *** Min and Max using the conditional move are slower than the 12471 // *** branch version on a Pentium III. 12472 // // Conditional move for max 12473 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12474 // effect( USE_DEF op2, USE op1, USE cr ); 12475 // format %{ "CMOVgt $op2,$op1\t! max" %} 12476 // opcode(0x4F,0x0F); 12477 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12478 // ins_pipe( pipe_cmov_reg ); 12479 //%} 12480 // 12481 // // Max Register with Register (P6 version) 12482 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12483 // predicate(VM_Version::supports_cmov() ); 12484 // match(Set op2 (MaxI op1 op2)); 12485 // ins_cost(200); 12486 // expand %{ 12487 // eFlagsReg cr; 12488 // compI_eReg(cr,op1,op2); 12489 // cmovI_reg_gt(op2,op1,cr); 12490 // %} 12491 //%} 12492 12493 // Max Register with Register (generic version) 12494 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12495 match(Set dst (MaxI dst src)); 12496 effect(KILL flags); 12497 ins_cost(300); 12498 12499 format %{ "MAX $dst,$src" %} 12500 opcode(0xCC); 12501 ins_encode( max_enc(dst,src) ); 12502 ins_pipe( pipe_slow ); 12503 %} 12504 12505 // ============================================================================ 12506 // Counted Loop limit node which represents exact final iterator value. 12507 // Note: the resulting value should fit into integer range since 12508 // counted loops have limit check on overflow. 12509 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12510 match(Set limit (LoopLimit (Binary init limit) stride)); 12511 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12512 ins_cost(300); 12513 12514 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12515 ins_encode %{ 12516 int strd = (int)$stride$$constant; 12517 assert(strd != 1 && strd != -1, "sanity"); 12518 int m1 = (strd > 0) ? 1 : -1; 12519 // Convert limit to long (EAX:EDX) 12520 __ cdql(); 12521 // Convert init to long (init:tmp) 12522 __ movl($tmp$$Register, $init$$Register); 12523 __ sarl($tmp$$Register, 31); 12524 // $limit - $init 12525 __ subl($limit$$Register, $init$$Register); 12526 __ sbbl($limit_hi$$Register, $tmp$$Register); 12527 // + ($stride - 1) 12528 if (strd > 0) { 12529 __ addl($limit$$Register, (strd - 1)); 12530 __ adcl($limit_hi$$Register, 0); 12531 __ movl($tmp$$Register, strd); 12532 } else { 12533 __ addl($limit$$Register, (strd + 1)); 12534 __ adcl($limit_hi$$Register, -1); 12535 __ lneg($limit_hi$$Register, $limit$$Register); 12536 __ movl($tmp$$Register, -strd); 12537 } 12538 // signed devision: (EAX:EDX) / pos_stride 12539 __ idivl($tmp$$Register); 12540 if (strd < 0) { 12541 // restore sign 12542 __ negl($tmp$$Register); 12543 } 12544 // (EAX) * stride 12545 __ mull($tmp$$Register); 12546 // + init (ignore upper bits) 12547 __ addl($limit$$Register, $init$$Register); 12548 %} 12549 ins_pipe( pipe_slow ); 12550 %} 12551 12552 // ============================================================================ 12553 // Branch Instructions 12554 // Jump Table 12555 instruct jumpXtnd(rRegI switch_val) %{ 12556 match(Jump switch_val); 12557 ins_cost(350); 12558 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12559 ins_encode %{ 12560 // Jump to Address(table_base + switch_reg) 12561 Address index(noreg, $switch_val$$Register, Address::times_1); 12562 __ jump(ArrayAddress($constantaddress, index)); 12563 %} 12564 ins_pipe(pipe_jmp); 12565 %} 12566 12567 // Jump Direct - Label defines a relative address from JMP+1 12568 instruct jmpDir(label labl) %{ 12569 match(Goto); 12570 effect(USE labl); 12571 12572 ins_cost(300); 12573 format %{ "JMP $labl" %} 12574 size(5); 12575 ins_encode %{ 12576 Label* L = $labl$$label; 12577 __ jmp(*L, false); // Always long jump 12578 %} 12579 ins_pipe( pipe_jmp ); 12580 %} 12581 12582 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12583 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12584 match(If cop cr); 12585 effect(USE labl); 12586 12587 ins_cost(300); 12588 format %{ "J$cop $labl" %} 12589 size(6); 12590 ins_encode %{ 12591 Label* L = $labl$$label; 12592 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12593 %} 12594 ins_pipe( pipe_jcc ); 12595 %} 12596 12597 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12598 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12599 predicate(!n->has_vector_mask_set()); 12600 match(CountedLoopEnd cop cr); 12601 effect(USE labl); 12602 12603 ins_cost(300); 12604 format %{ "J$cop $labl\t# Loop end" %} 12605 size(6); 12606 ins_encode %{ 12607 Label* L = $labl$$label; 12608 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12609 %} 12610 ins_pipe( pipe_jcc ); 12611 %} 12612 12613 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12614 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12615 predicate(!n->has_vector_mask_set()); 12616 match(CountedLoopEnd cop cmp); 12617 effect(USE labl); 12618 12619 ins_cost(300); 12620 format %{ "J$cop,u $labl\t# Loop end" %} 12621 size(6); 12622 ins_encode %{ 12623 Label* L = $labl$$label; 12624 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12625 %} 12626 ins_pipe( pipe_jcc ); 12627 %} 12628 12629 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12630 predicate(!n->has_vector_mask_set()); 12631 match(CountedLoopEnd cop cmp); 12632 effect(USE labl); 12633 12634 ins_cost(200); 12635 format %{ "J$cop,u $labl\t# Loop end" %} 12636 size(6); 12637 ins_encode %{ 12638 Label* L = $labl$$label; 12639 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12640 %} 12641 ins_pipe( pipe_jcc ); 12642 %} 12643 12644 // mask version 12645 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12646 // Bounded mask operand used in following patten is needed for 12647 // post-loop multiversioning. 12648 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{ 12649 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12650 match(CountedLoopEnd cop cr); 12651 effect(USE labl, TEMP ktmp); 12652 12653 ins_cost(400); 12654 format %{ "J$cop $labl\t# Loop end\n\t" 12655 "restorevectmask \t# vector mask restore for loops" %} 12656 size(10); 12657 ins_encode %{ 12658 Label* L = $labl$$label; 12659 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12660 __ restorevectmask($ktmp$$KRegister); 12661 %} 12662 ins_pipe( pipe_jcc ); 12663 %} 12664 12665 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12666 // Bounded mask operand used in following patten is needed for 12667 // post-loop multiversioning. 12668 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{ 12669 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12670 match(CountedLoopEnd cop cmp); 12671 effect(USE labl, TEMP ktmp); 12672 12673 ins_cost(400); 12674 format %{ "J$cop,u $labl\t# Loop end\n\t" 12675 "restorevectmask \t# vector mask restore for loops" %} 12676 size(10); 12677 ins_encode %{ 12678 Label* L = $labl$$label; 12679 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12680 __ restorevectmask($ktmp$$KRegister); 12681 %} 12682 ins_pipe( pipe_jcc ); 12683 %} 12684 12685 // Bounded mask operand used in following patten is needed for 12686 // post-loop multiversioning. 12687 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{ 12688 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12689 match(CountedLoopEnd cop cmp); 12690 effect(USE labl, TEMP ktmp); 12691 12692 ins_cost(300); 12693 format %{ "J$cop,u $labl\t# Loop end\n\t" 12694 "restorevectmask \t# vector mask restore for loops" %} 12695 size(10); 12696 ins_encode %{ 12697 Label* L = $labl$$label; 12698 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12699 __ restorevectmask($ktmp$$KRegister); 12700 %} 12701 ins_pipe( pipe_jcc ); 12702 %} 12703 12704 // Jump Direct Conditional - using unsigned comparison 12705 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12706 match(If cop cmp); 12707 effect(USE labl); 12708 12709 ins_cost(300); 12710 format %{ "J$cop,u $labl" %} 12711 size(6); 12712 ins_encode %{ 12713 Label* L = $labl$$label; 12714 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12715 %} 12716 ins_pipe(pipe_jcc); 12717 %} 12718 12719 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12720 match(If cop cmp); 12721 effect(USE labl); 12722 12723 ins_cost(200); 12724 format %{ "J$cop,u $labl" %} 12725 size(6); 12726 ins_encode %{ 12727 Label* L = $labl$$label; 12728 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12729 %} 12730 ins_pipe(pipe_jcc); 12731 %} 12732 12733 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12734 match(If cop cmp); 12735 effect(USE labl); 12736 12737 ins_cost(200); 12738 format %{ $$template 12739 if ($cop$$cmpcode == Assembler::notEqual) { 12740 $$emit$$"JP,u $labl\n\t" 12741 $$emit$$"J$cop,u $labl" 12742 } else { 12743 $$emit$$"JP,u done\n\t" 12744 $$emit$$"J$cop,u $labl\n\t" 12745 $$emit$$"done:" 12746 } 12747 %} 12748 ins_encode %{ 12749 Label* l = $labl$$label; 12750 if ($cop$$cmpcode == Assembler::notEqual) { 12751 __ jcc(Assembler::parity, *l, false); 12752 __ jcc(Assembler::notEqual, *l, false); 12753 } else if ($cop$$cmpcode == Assembler::equal) { 12754 Label done; 12755 __ jccb(Assembler::parity, done); 12756 __ jcc(Assembler::equal, *l, false); 12757 __ bind(done); 12758 } else { 12759 ShouldNotReachHere(); 12760 } 12761 %} 12762 ins_pipe(pipe_jcc); 12763 %} 12764 12765 // ============================================================================ 12766 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12767 // array for an instance of the superklass. Set a hidden internal cache on a 12768 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12769 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12770 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12771 match(Set result (PartialSubtypeCheck sub super)); 12772 effect( KILL rcx, KILL cr ); 12773 12774 ins_cost(1100); // slightly larger than the next version 12775 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12776 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12777 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12778 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12779 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12780 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12781 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12782 "miss:\t" %} 12783 12784 opcode(0x1); // Force a XOR of EDI 12785 ins_encode( enc_PartialSubtypeCheck() ); 12786 ins_pipe( pipe_slow ); 12787 %} 12788 12789 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12790 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12791 effect( KILL rcx, KILL result ); 12792 12793 ins_cost(1000); 12794 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12795 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12796 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12797 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12798 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12799 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12800 "miss:\t" %} 12801 12802 opcode(0x0); // No need to XOR EDI 12803 ins_encode( enc_PartialSubtypeCheck() ); 12804 ins_pipe( pipe_slow ); 12805 %} 12806 12807 // ============================================================================ 12808 // Branch Instructions -- short offset versions 12809 // 12810 // These instructions are used to replace jumps of a long offset (the default 12811 // match) with jumps of a shorter offset. These instructions are all tagged 12812 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12813 // match rules in general matching. Instead, the ADLC generates a conversion 12814 // method in the MachNode which can be used to do in-place replacement of the 12815 // long variant with the shorter variant. The compiler will determine if a 12816 // branch can be taken by the is_short_branch_offset() predicate in the machine 12817 // specific code section of the file. 12818 12819 // Jump Direct - Label defines a relative address from JMP+1 12820 instruct jmpDir_short(label labl) %{ 12821 match(Goto); 12822 effect(USE labl); 12823 12824 ins_cost(300); 12825 format %{ "JMP,s $labl" %} 12826 size(2); 12827 ins_encode %{ 12828 Label* L = $labl$$label; 12829 __ jmpb(*L); 12830 %} 12831 ins_pipe( pipe_jmp ); 12832 ins_short_branch(1); 12833 %} 12834 12835 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12836 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12837 match(If cop cr); 12838 effect(USE labl); 12839 12840 ins_cost(300); 12841 format %{ "J$cop,s $labl" %} 12842 size(2); 12843 ins_encode %{ 12844 Label* L = $labl$$label; 12845 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12846 %} 12847 ins_pipe( pipe_jcc ); 12848 ins_short_branch(1); 12849 %} 12850 12851 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12852 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12853 match(CountedLoopEnd cop cr); 12854 effect(USE labl); 12855 12856 ins_cost(300); 12857 format %{ "J$cop,s $labl\t# Loop end" %} 12858 size(2); 12859 ins_encode %{ 12860 Label* L = $labl$$label; 12861 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12862 %} 12863 ins_pipe( pipe_jcc ); 12864 ins_short_branch(1); 12865 %} 12866 12867 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12868 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12869 match(CountedLoopEnd cop cmp); 12870 effect(USE labl); 12871 12872 ins_cost(300); 12873 format %{ "J$cop,us $labl\t# Loop end" %} 12874 size(2); 12875 ins_encode %{ 12876 Label* L = $labl$$label; 12877 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12878 %} 12879 ins_pipe( pipe_jcc ); 12880 ins_short_branch(1); 12881 %} 12882 12883 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12884 match(CountedLoopEnd cop cmp); 12885 effect(USE labl); 12886 12887 ins_cost(300); 12888 format %{ "J$cop,us $labl\t# Loop end" %} 12889 size(2); 12890 ins_encode %{ 12891 Label* L = $labl$$label; 12892 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12893 %} 12894 ins_pipe( pipe_jcc ); 12895 ins_short_branch(1); 12896 %} 12897 12898 // Jump Direct Conditional - using unsigned comparison 12899 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12900 match(If cop cmp); 12901 effect(USE labl); 12902 12903 ins_cost(300); 12904 format %{ "J$cop,us $labl" %} 12905 size(2); 12906 ins_encode %{ 12907 Label* L = $labl$$label; 12908 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12909 %} 12910 ins_pipe( pipe_jcc ); 12911 ins_short_branch(1); 12912 %} 12913 12914 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12915 match(If cop cmp); 12916 effect(USE labl); 12917 12918 ins_cost(300); 12919 format %{ "J$cop,us $labl" %} 12920 size(2); 12921 ins_encode %{ 12922 Label* L = $labl$$label; 12923 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12924 %} 12925 ins_pipe( pipe_jcc ); 12926 ins_short_branch(1); 12927 %} 12928 12929 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12930 match(If cop cmp); 12931 effect(USE labl); 12932 12933 ins_cost(300); 12934 format %{ $$template 12935 if ($cop$$cmpcode == Assembler::notEqual) { 12936 $$emit$$"JP,u,s $labl\n\t" 12937 $$emit$$"J$cop,u,s $labl" 12938 } else { 12939 $$emit$$"JP,u,s done\n\t" 12940 $$emit$$"J$cop,u,s $labl\n\t" 12941 $$emit$$"done:" 12942 } 12943 %} 12944 size(4); 12945 ins_encode %{ 12946 Label* l = $labl$$label; 12947 if ($cop$$cmpcode == Assembler::notEqual) { 12948 __ jccb(Assembler::parity, *l); 12949 __ jccb(Assembler::notEqual, *l); 12950 } else if ($cop$$cmpcode == Assembler::equal) { 12951 Label done; 12952 __ jccb(Assembler::parity, done); 12953 __ jccb(Assembler::equal, *l); 12954 __ bind(done); 12955 } else { 12956 ShouldNotReachHere(); 12957 } 12958 %} 12959 ins_pipe(pipe_jcc); 12960 ins_short_branch(1); 12961 %} 12962 12963 // ============================================================================ 12964 // Long Compare 12965 // 12966 // Currently we hold longs in 2 registers. Comparing such values efficiently 12967 // is tricky. The flavor of compare used depends on whether we are testing 12968 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12969 // The GE test is the negated LT test. The LE test can be had by commuting 12970 // the operands (yielding a GE test) and then negating; negate again for the 12971 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12972 // NE test is negated from that. 12973 12974 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12975 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12976 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12977 // are collapsed internally in the ADLC's dfa-gen code. The match for 12978 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12979 // foo match ends up with the wrong leaf. One fix is to not match both 12980 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12981 // both forms beat the trinary form of long-compare and both are very useful 12982 // on Intel which has so few registers. 12983 12984 // Manifest a CmpL result in an integer register. Very painful. 12985 // This is the test to avoid. 12986 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12987 match(Set dst (CmpL3 src1 src2)); 12988 effect( KILL flags ); 12989 ins_cost(1000); 12990 format %{ "XOR $dst,$dst\n\t" 12991 "CMP $src1.hi,$src2.hi\n\t" 12992 "JLT,s m_one\n\t" 12993 "JGT,s p_one\n\t" 12994 "CMP $src1.lo,$src2.lo\n\t" 12995 "JB,s m_one\n\t" 12996 "JEQ,s done\n" 12997 "p_one:\tINC $dst\n\t" 12998 "JMP,s done\n" 12999 "m_one:\tDEC $dst\n" 13000 "done:" %} 13001 ins_encode %{ 13002 Label p_one, m_one, done; 13003 __ xorptr($dst$$Register, $dst$$Register); 13004 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13005 __ jccb(Assembler::less, m_one); 13006 __ jccb(Assembler::greater, p_one); 13007 __ cmpl($src1$$Register, $src2$$Register); 13008 __ jccb(Assembler::below, m_one); 13009 __ jccb(Assembler::equal, done); 13010 __ bind(p_one); 13011 __ incrementl($dst$$Register); 13012 __ jmpb(done); 13013 __ bind(m_one); 13014 __ decrementl($dst$$Register); 13015 __ bind(done); 13016 %} 13017 ins_pipe( pipe_slow ); 13018 %} 13019 13020 //====== 13021 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13022 // compares. Can be used for LE or GT compares by reversing arguments. 13023 // NOT GOOD FOR EQ/NE tests. 13024 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13025 match( Set flags (CmpL src zero )); 13026 ins_cost(100); 13027 format %{ "TEST $src.hi,$src.hi" %} 13028 opcode(0x85); 13029 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13030 ins_pipe( ialu_cr_reg_reg ); 13031 %} 13032 13033 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13034 // compares. Can be used for LE or GT compares by reversing arguments. 13035 // NOT GOOD FOR EQ/NE tests. 13036 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13037 match( Set flags (CmpL src1 src2 )); 13038 effect( TEMP tmp ); 13039 ins_cost(300); 13040 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13041 "MOV $tmp,$src1.hi\n\t" 13042 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13043 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13044 ins_pipe( ialu_cr_reg_reg ); 13045 %} 13046 13047 // Long compares reg < zero/req OR reg >= zero/req. 13048 // Just a wrapper for a normal branch, plus the predicate test. 13049 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13050 match(If cmp flags); 13051 effect(USE labl); 13052 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13053 expand %{ 13054 jmpCon(cmp,flags,labl); // JLT or JGE... 13055 %} 13056 %} 13057 13058 //====== 13059 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13060 // compares. Can be used for LE or GT compares by reversing arguments. 13061 // NOT GOOD FOR EQ/NE tests. 13062 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13063 match(Set flags (CmpUL src zero)); 13064 ins_cost(100); 13065 format %{ "TEST $src.hi,$src.hi" %} 13066 opcode(0x85); 13067 ins_encode(OpcP, RegReg_Hi2(src, src)); 13068 ins_pipe(ialu_cr_reg_reg); 13069 %} 13070 13071 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13072 // compares. Can be used for LE or GT compares by reversing arguments. 13073 // NOT GOOD FOR EQ/NE tests. 13074 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13075 match(Set flags (CmpUL src1 src2)); 13076 effect(TEMP tmp); 13077 ins_cost(300); 13078 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13079 "MOV $tmp,$src1.hi\n\t" 13080 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13081 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13082 ins_pipe(ialu_cr_reg_reg); 13083 %} 13084 13085 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13086 // Just a wrapper for a normal branch, plus the predicate test. 13087 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13088 match(If cmp flags); 13089 effect(USE labl); 13090 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13091 expand %{ 13092 jmpCon(cmp, flags, labl); // JLT or JGE... 13093 %} 13094 %} 13095 13096 // Compare 2 longs and CMOVE longs. 13097 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13098 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13099 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13100 ins_cost(400); 13101 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13102 "CMOV$cmp $dst.hi,$src.hi" %} 13103 opcode(0x0F,0x40); 13104 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13105 ins_pipe( pipe_cmov_reg_long ); 13106 %} 13107 13108 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13109 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13110 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13111 ins_cost(500); 13112 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13113 "CMOV$cmp $dst.hi,$src.hi" %} 13114 opcode(0x0F,0x40); 13115 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13116 ins_pipe( pipe_cmov_reg_long ); 13117 %} 13118 13119 // Compare 2 longs and CMOVE ints. 13120 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13121 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13122 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13123 ins_cost(200); 13124 format %{ "CMOV$cmp $dst,$src" %} 13125 opcode(0x0F,0x40); 13126 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13127 ins_pipe( pipe_cmov_reg ); 13128 %} 13129 13130 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13131 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13132 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13133 ins_cost(250); 13134 format %{ "CMOV$cmp $dst,$src" %} 13135 opcode(0x0F,0x40); 13136 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13137 ins_pipe( pipe_cmov_mem ); 13138 %} 13139 13140 // Compare 2 longs and CMOVE ints. 13141 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13142 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13143 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13144 ins_cost(200); 13145 format %{ "CMOV$cmp $dst,$src" %} 13146 opcode(0x0F,0x40); 13147 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13148 ins_pipe( pipe_cmov_reg ); 13149 %} 13150 13151 // Compare 2 longs and CMOVE doubles 13152 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13153 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13154 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13155 ins_cost(200); 13156 expand %{ 13157 fcmovDPR_regS(cmp,flags,dst,src); 13158 %} 13159 %} 13160 13161 // Compare 2 longs and CMOVE doubles 13162 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13163 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13164 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13165 ins_cost(200); 13166 expand %{ 13167 fcmovD_regS(cmp,flags,dst,src); 13168 %} 13169 %} 13170 13171 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13172 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13173 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13174 ins_cost(200); 13175 expand %{ 13176 fcmovFPR_regS(cmp,flags,dst,src); 13177 %} 13178 %} 13179 13180 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13181 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13182 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13183 ins_cost(200); 13184 expand %{ 13185 fcmovF_regS(cmp,flags,dst,src); 13186 %} 13187 %} 13188 13189 //====== 13190 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13191 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13192 match( Set flags (CmpL src zero )); 13193 effect(TEMP tmp); 13194 ins_cost(200); 13195 format %{ "MOV $tmp,$src.lo\n\t" 13196 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13197 ins_encode( long_cmp_flags0( src, tmp ) ); 13198 ins_pipe( ialu_reg_reg_long ); 13199 %} 13200 13201 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13202 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13203 match( Set flags (CmpL src1 src2 )); 13204 ins_cost(200+300); 13205 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13206 "JNE,s skip\n\t" 13207 "CMP $src1.hi,$src2.hi\n\t" 13208 "skip:\t" %} 13209 ins_encode( long_cmp_flags1( src1, src2 ) ); 13210 ins_pipe( ialu_cr_reg_reg ); 13211 %} 13212 13213 // Long compare reg == zero/reg OR reg != zero/reg 13214 // Just a wrapper for a normal branch, plus the predicate test. 13215 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13216 match(If cmp flags); 13217 effect(USE labl); 13218 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13219 expand %{ 13220 jmpCon(cmp,flags,labl); // JEQ or JNE... 13221 %} 13222 %} 13223 13224 //====== 13225 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13226 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13227 match(Set flags (CmpUL src zero)); 13228 effect(TEMP tmp); 13229 ins_cost(200); 13230 format %{ "MOV $tmp,$src.lo\n\t" 13231 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13232 ins_encode(long_cmp_flags0(src, tmp)); 13233 ins_pipe(ialu_reg_reg_long); 13234 %} 13235 13236 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13237 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13238 match(Set flags (CmpUL src1 src2)); 13239 ins_cost(200+300); 13240 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13241 "JNE,s skip\n\t" 13242 "CMP $src1.hi,$src2.hi\n\t" 13243 "skip:\t" %} 13244 ins_encode(long_cmp_flags1(src1, src2)); 13245 ins_pipe(ialu_cr_reg_reg); 13246 %} 13247 13248 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13249 // Just a wrapper for a normal branch, plus the predicate test. 13250 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13251 match(If cmp flags); 13252 effect(USE labl); 13253 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13254 expand %{ 13255 jmpCon(cmp, flags, labl); // JEQ or JNE... 13256 %} 13257 %} 13258 13259 // Compare 2 longs and CMOVE longs. 13260 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13261 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13262 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13263 ins_cost(400); 13264 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13265 "CMOV$cmp $dst.hi,$src.hi" %} 13266 opcode(0x0F,0x40); 13267 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13268 ins_pipe( pipe_cmov_reg_long ); 13269 %} 13270 13271 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13272 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13273 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13274 ins_cost(500); 13275 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13276 "CMOV$cmp $dst.hi,$src.hi" %} 13277 opcode(0x0F,0x40); 13278 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13279 ins_pipe( pipe_cmov_reg_long ); 13280 %} 13281 13282 // Compare 2 longs and CMOVE ints. 13283 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13284 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13285 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13286 ins_cost(200); 13287 format %{ "CMOV$cmp $dst,$src" %} 13288 opcode(0x0F,0x40); 13289 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13290 ins_pipe( pipe_cmov_reg ); 13291 %} 13292 13293 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13294 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13295 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13296 ins_cost(250); 13297 format %{ "CMOV$cmp $dst,$src" %} 13298 opcode(0x0F,0x40); 13299 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13300 ins_pipe( pipe_cmov_mem ); 13301 %} 13302 13303 // Compare 2 longs and CMOVE ints. 13304 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13305 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13306 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13307 ins_cost(200); 13308 format %{ "CMOV$cmp $dst,$src" %} 13309 opcode(0x0F,0x40); 13310 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13311 ins_pipe( pipe_cmov_reg ); 13312 %} 13313 13314 // Compare 2 longs and CMOVE doubles 13315 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13316 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13317 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13318 ins_cost(200); 13319 expand %{ 13320 fcmovDPR_regS(cmp,flags,dst,src); 13321 %} 13322 %} 13323 13324 // Compare 2 longs and CMOVE doubles 13325 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13326 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13327 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13328 ins_cost(200); 13329 expand %{ 13330 fcmovD_regS(cmp,flags,dst,src); 13331 %} 13332 %} 13333 13334 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13335 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13336 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13337 ins_cost(200); 13338 expand %{ 13339 fcmovFPR_regS(cmp,flags,dst,src); 13340 %} 13341 %} 13342 13343 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13344 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13345 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13346 ins_cost(200); 13347 expand %{ 13348 fcmovF_regS(cmp,flags,dst,src); 13349 %} 13350 %} 13351 13352 //====== 13353 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13354 // Same as cmpL_reg_flags_LEGT except must negate src 13355 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13356 match( Set flags (CmpL src zero )); 13357 effect( TEMP tmp ); 13358 ins_cost(300); 13359 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13360 "CMP $tmp,$src.lo\n\t" 13361 "SBB $tmp,$src.hi\n\t" %} 13362 ins_encode( long_cmp_flags3(src, tmp) ); 13363 ins_pipe( ialu_reg_reg_long ); 13364 %} 13365 13366 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13367 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13368 // requires a commuted test to get the same result. 13369 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13370 match( Set flags (CmpL src1 src2 )); 13371 effect( TEMP tmp ); 13372 ins_cost(300); 13373 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13374 "MOV $tmp,$src2.hi\n\t" 13375 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13376 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13377 ins_pipe( ialu_cr_reg_reg ); 13378 %} 13379 13380 // Long compares reg < zero/req OR reg >= zero/req. 13381 // Just a wrapper for a normal branch, plus the predicate test 13382 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13383 match(If cmp flags); 13384 effect(USE labl); 13385 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13386 ins_cost(300); 13387 expand %{ 13388 jmpCon(cmp,flags,labl); // JGT or JLE... 13389 %} 13390 %} 13391 13392 //====== 13393 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13394 // Same as cmpUL_reg_flags_LEGT except must negate src 13395 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13396 match(Set flags (CmpUL src zero)); 13397 effect(TEMP tmp); 13398 ins_cost(300); 13399 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13400 "CMP $tmp,$src.lo\n\t" 13401 "SBB $tmp,$src.hi\n\t" %} 13402 ins_encode(long_cmp_flags3(src, tmp)); 13403 ins_pipe(ialu_reg_reg_long); 13404 %} 13405 13406 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13407 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13408 // requires a commuted test to get the same result. 13409 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13410 match(Set flags (CmpUL src1 src2)); 13411 effect(TEMP tmp); 13412 ins_cost(300); 13413 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13414 "MOV $tmp,$src2.hi\n\t" 13415 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13416 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13417 ins_pipe(ialu_cr_reg_reg); 13418 %} 13419 13420 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13421 // Just a wrapper for a normal branch, plus the predicate test 13422 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13423 match(If cmp flags); 13424 effect(USE labl); 13425 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13426 ins_cost(300); 13427 expand %{ 13428 jmpCon(cmp, flags, labl); // JGT or JLE... 13429 %} 13430 %} 13431 13432 // Compare 2 longs and CMOVE longs. 13433 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13434 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13435 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13436 ins_cost(400); 13437 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13438 "CMOV$cmp $dst.hi,$src.hi" %} 13439 opcode(0x0F,0x40); 13440 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13441 ins_pipe( pipe_cmov_reg_long ); 13442 %} 13443 13444 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13445 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13446 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13447 ins_cost(500); 13448 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13449 "CMOV$cmp $dst.hi,$src.hi+4" %} 13450 opcode(0x0F,0x40); 13451 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13452 ins_pipe( pipe_cmov_reg_long ); 13453 %} 13454 13455 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13456 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13457 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13458 ins_cost(400); 13459 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13460 "CMOV$cmp $dst.hi,$src.hi" %} 13461 opcode(0x0F,0x40); 13462 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13463 ins_pipe( pipe_cmov_reg_long ); 13464 %} 13465 13466 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13467 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13468 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13469 ins_cost(500); 13470 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13471 "CMOV$cmp $dst.hi,$src.hi+4" %} 13472 opcode(0x0F,0x40); 13473 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13474 ins_pipe( pipe_cmov_reg_long ); 13475 %} 13476 13477 // Compare 2 longs and CMOVE ints. 13478 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13479 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13480 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13481 ins_cost(200); 13482 format %{ "CMOV$cmp $dst,$src" %} 13483 opcode(0x0F,0x40); 13484 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13485 ins_pipe( pipe_cmov_reg ); 13486 %} 13487 13488 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13489 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13490 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13491 ins_cost(250); 13492 format %{ "CMOV$cmp $dst,$src" %} 13493 opcode(0x0F,0x40); 13494 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13495 ins_pipe( pipe_cmov_mem ); 13496 %} 13497 13498 // Compare 2 longs and CMOVE ptrs. 13499 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13500 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13501 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13502 ins_cost(200); 13503 format %{ "CMOV$cmp $dst,$src" %} 13504 opcode(0x0F,0x40); 13505 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13506 ins_pipe( pipe_cmov_reg ); 13507 %} 13508 13509 // Compare 2 longs and CMOVE doubles 13510 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13511 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13512 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13513 ins_cost(200); 13514 expand %{ 13515 fcmovDPR_regS(cmp,flags,dst,src); 13516 %} 13517 %} 13518 13519 // Compare 2 longs and CMOVE doubles 13520 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13521 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13522 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13523 ins_cost(200); 13524 expand %{ 13525 fcmovD_regS(cmp,flags,dst,src); 13526 %} 13527 %} 13528 13529 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13530 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13531 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13532 ins_cost(200); 13533 expand %{ 13534 fcmovFPR_regS(cmp,flags,dst,src); 13535 %} 13536 %} 13537 13538 13539 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13540 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13541 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13542 ins_cost(200); 13543 expand %{ 13544 fcmovF_regS(cmp,flags,dst,src); 13545 %} 13546 %} 13547 13548 13549 // ============================================================================ 13550 // Procedure Call/Return Instructions 13551 // Call Java Static Instruction 13552 // Note: If this code changes, the corresponding ret_addr_offset() and 13553 // compute_padding() functions will have to be adjusted. 13554 instruct CallStaticJavaDirect(method meth) %{ 13555 match(CallStaticJava); 13556 effect(USE meth); 13557 13558 ins_cost(300); 13559 format %{ "CALL,static " %} 13560 opcode(0xE8); /* E8 cd */ 13561 ins_encode( pre_call_resets, 13562 Java_Static_Call( meth ), 13563 call_epilog, 13564 post_call_FPU ); 13565 ins_pipe( pipe_slow ); 13566 ins_alignment(4); 13567 %} 13568 13569 // Call Java Dynamic Instruction 13570 // Note: If this code changes, the corresponding ret_addr_offset() and 13571 // compute_padding() functions will have to be adjusted. 13572 instruct CallDynamicJavaDirect(method meth) %{ 13573 match(CallDynamicJava); 13574 effect(USE meth); 13575 13576 ins_cost(300); 13577 format %{ "MOV EAX,(oop)-1\n\t" 13578 "CALL,dynamic" %} 13579 opcode(0xE8); /* E8 cd */ 13580 ins_encode( pre_call_resets, 13581 Java_Dynamic_Call( meth ), 13582 call_epilog, 13583 post_call_FPU ); 13584 ins_pipe( pipe_slow ); 13585 ins_alignment(4); 13586 %} 13587 13588 // Call Runtime Instruction 13589 instruct CallRuntimeDirect(method meth) %{ 13590 match(CallRuntime ); 13591 effect(USE meth); 13592 13593 ins_cost(300); 13594 format %{ "CALL,runtime " %} 13595 opcode(0xE8); /* E8 cd */ 13596 // Use FFREEs to clear entries in float stack 13597 ins_encode( pre_call_resets, 13598 FFree_Float_Stack_All, 13599 Java_To_Runtime( meth ), 13600 post_call_FPU ); 13601 ins_pipe( pipe_slow ); 13602 %} 13603 13604 // Call runtime without safepoint 13605 instruct CallLeafDirect(method meth) %{ 13606 match(CallLeaf); 13607 effect(USE meth); 13608 13609 ins_cost(300); 13610 format %{ "CALL_LEAF,runtime " %} 13611 opcode(0xE8); /* E8 cd */ 13612 ins_encode( pre_call_resets, 13613 FFree_Float_Stack_All, 13614 Java_To_Runtime( meth ), 13615 Verify_FPU_For_Leaf, post_call_FPU ); 13616 ins_pipe( pipe_slow ); 13617 %} 13618 13619 instruct CallLeafNoFPDirect(method meth) %{ 13620 match(CallLeafNoFP); 13621 effect(USE meth); 13622 13623 ins_cost(300); 13624 format %{ "CALL_LEAF_NOFP,runtime " %} 13625 opcode(0xE8); /* E8 cd */ 13626 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13627 ins_pipe( pipe_slow ); 13628 %} 13629 13630 13631 // Return Instruction 13632 // Remove the return address & jump to it. 13633 instruct Ret() %{ 13634 match(Return); 13635 format %{ "RET" %} 13636 opcode(0xC3); 13637 ins_encode(OpcP); 13638 ins_pipe( pipe_jmp ); 13639 %} 13640 13641 // Tail Call; Jump from runtime stub to Java code. 13642 // Also known as an 'interprocedural jump'. 13643 // Target of jump will eventually return to caller. 13644 // TailJump below removes the return address. 13645 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13646 match(TailCall jump_target method_ptr); 13647 ins_cost(300); 13648 format %{ "JMP $jump_target \t# EBX holds method" %} 13649 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13650 ins_encode( OpcP, RegOpc(jump_target) ); 13651 ins_pipe( pipe_jmp ); 13652 %} 13653 13654 13655 // Tail Jump; remove the return address; jump to target. 13656 // TailCall above leaves the return address around. 13657 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13658 match( TailJump jump_target ex_oop ); 13659 ins_cost(300); 13660 format %{ "POP EDX\t# pop return address into dummy\n\t" 13661 "JMP $jump_target " %} 13662 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13663 ins_encode( enc_pop_rdx, 13664 OpcP, RegOpc(jump_target) ); 13665 ins_pipe( pipe_jmp ); 13666 %} 13667 13668 // Create exception oop: created by stack-crawling runtime code. 13669 // Created exception is now available to this handler, and is setup 13670 // just prior to jumping to this handler. No code emitted. 13671 instruct CreateException( eAXRegP ex_oop ) 13672 %{ 13673 match(Set ex_oop (CreateEx)); 13674 13675 size(0); 13676 // use the following format syntax 13677 format %{ "# exception oop is in EAX; no code emitted" %} 13678 ins_encode(); 13679 ins_pipe( empty ); 13680 %} 13681 13682 13683 // Rethrow exception: 13684 // The exception oop will come in the first argument position. 13685 // Then JUMP (not call) to the rethrow stub code. 13686 instruct RethrowException() 13687 %{ 13688 match(Rethrow); 13689 13690 // use the following format syntax 13691 format %{ "JMP rethrow_stub" %} 13692 ins_encode(enc_rethrow); 13693 ins_pipe( pipe_jmp ); 13694 %} 13695 13696 // inlined locking and unlocking 13697 13698 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13699 predicate(Compile::current()->use_rtm()); 13700 match(Set cr (FastLock object box)); 13701 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13702 ins_cost(300); 13703 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13704 ins_encode %{ 13705 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13706 $scr$$Register, $cx1$$Register, $cx2$$Register, 13707 _counters, _rtm_counters, _stack_rtm_counters, 13708 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13709 true, ra_->C->profile_rtm()); 13710 %} 13711 ins_pipe(pipe_slow); 13712 %} 13713 13714 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13715 predicate(!Compile::current()->use_rtm()); 13716 match(Set cr (FastLock object box)); 13717 effect(TEMP tmp, TEMP scr, USE_KILL box); 13718 ins_cost(300); 13719 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13720 ins_encode %{ 13721 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13722 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13723 %} 13724 ins_pipe(pipe_slow); 13725 %} 13726 13727 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13728 match(Set cr (FastUnlock object box)); 13729 effect(TEMP tmp, USE_KILL box); 13730 ins_cost(300); 13731 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13732 ins_encode %{ 13733 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13734 %} 13735 ins_pipe(pipe_slow); 13736 %} 13737 13738 13739 13740 // ============================================================================ 13741 // Safepoint Instruction 13742 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13743 match(SafePoint poll); 13744 effect(KILL cr, USE poll); 13745 13746 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13747 ins_cost(125); 13748 // EBP would need size(3) 13749 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13750 ins_encode %{ 13751 __ relocate(relocInfo::poll_type); 13752 address pre_pc = __ pc(); 13753 __ testl(rax, Address($poll$$Register, 0)); 13754 address post_pc = __ pc(); 13755 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13756 %} 13757 ins_pipe(ialu_reg_mem); 13758 %} 13759 13760 13761 // ============================================================================ 13762 // This name is KNOWN by the ADLC and cannot be changed. 13763 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13764 // for this guy. 13765 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13766 match(Set dst (ThreadLocal)); 13767 effect(DEF dst, KILL cr); 13768 13769 format %{ "MOV $dst, Thread::current()" %} 13770 ins_encode %{ 13771 Register dstReg = as_Register($dst$$reg); 13772 __ get_thread(dstReg); 13773 %} 13774 ins_pipe( ialu_reg_fat ); 13775 %} 13776 13777 13778 13779 //----------PEEPHOLE RULES----------------------------------------------------- 13780 // These must follow all instruction definitions as they use the names 13781 // defined in the instructions definitions. 13782 // 13783 // peepmatch ( root_instr_name [preceding_instruction]* ); 13784 // 13785 // peepconstraint %{ 13786 // (instruction_number.operand_name relational_op instruction_number.operand_name 13787 // [, ...] ); 13788 // // instruction numbers are zero-based using left to right order in peepmatch 13789 // 13790 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13791 // // provide an instruction_number.operand_name for each operand that appears 13792 // // in the replacement instruction's match rule 13793 // 13794 // ---------VM FLAGS--------------------------------------------------------- 13795 // 13796 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13797 // 13798 // Each peephole rule is given an identifying number starting with zero and 13799 // increasing by one in the order seen by the parser. An individual peephole 13800 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13801 // on the command-line. 13802 // 13803 // ---------CURRENT LIMITATIONS---------------------------------------------- 13804 // 13805 // Only match adjacent instructions in same basic block 13806 // Only equality constraints 13807 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13808 // Only one replacement instruction 13809 // 13810 // ---------EXAMPLE---------------------------------------------------------- 13811 // 13812 // // pertinent parts of existing instructions in architecture description 13813 // instruct movI(rRegI dst, rRegI src) %{ 13814 // match(Set dst (CopyI src)); 13815 // %} 13816 // 13817 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13818 // match(Set dst (AddI dst src)); 13819 // effect(KILL cr); 13820 // %} 13821 // 13822 // // Change (inc mov) to lea 13823 // peephole %{ 13824 // // increment preceeded by register-register move 13825 // peepmatch ( incI_eReg movI ); 13826 // // require that the destination register of the increment 13827 // // match the destination register of the move 13828 // peepconstraint ( 0.dst == 1.dst ); 13829 // // construct a replacement instruction that sets 13830 // // the destination to ( move's source register + one ) 13831 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13832 // %} 13833 // 13834 // Implementation no longer uses movX instructions since 13835 // machine-independent system no longer uses CopyX nodes. 13836 // 13837 // peephole %{ 13838 // peepmatch ( incI_eReg movI ); 13839 // peepconstraint ( 0.dst == 1.dst ); 13840 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13841 // %} 13842 // 13843 // peephole %{ 13844 // peepmatch ( decI_eReg movI ); 13845 // peepconstraint ( 0.dst == 1.dst ); 13846 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13847 // %} 13848 // 13849 // peephole %{ 13850 // peepmatch ( addI_eReg_imm movI ); 13851 // peepconstraint ( 0.dst == 1.dst ); 13852 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13853 // %} 13854 // 13855 // peephole %{ 13856 // peepmatch ( addP_eReg_imm movP ); 13857 // peepconstraint ( 0.dst == 1.dst ); 13858 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13859 // %} 13860 13861 // // Change load of spilled value to only a spill 13862 // instruct storeI(memory mem, rRegI src) %{ 13863 // match(Set mem (StoreI mem src)); 13864 // %} 13865 // 13866 // instruct loadI(rRegI dst, memory mem) %{ 13867 // match(Set dst (LoadI mem)); 13868 // %} 13869 // 13870 peephole %{ 13871 peepmatch ( loadI storeI ); 13872 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13873 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13874 %} 13875 13876 //----------SMARTSPILL RULES--------------------------------------------------- 13877 // These must follow all instruction definitions as they use the names 13878 // defined in the instructions definitions.