1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 217 // Not AX or DX, used in divides 218 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 219 // Not AX or DX (and neither EBP), used in divides 220 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 221 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 222 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 223 224 // Floating point registers. Notice FPR0 is not a choice. 225 // FPR0 is not ever allocated; we use clever encodings to fake 226 // a 2-address instructions out of Intels FP stack. 227 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 228 229 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 230 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 231 FPR7L,FPR7H ); 232 233 reg_class fp_flt_reg0( FPR1L ); 234 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 235 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 236 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 237 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 238 239 %} 240 241 242 //----------SOURCE BLOCK------------------------------------------------------- 243 // This is a block of C++ code which provides values, functions, and 244 // definitions necessary in the rest of the architecture description 245 source_hpp %{ 246 // Must be visible to the DFA in dfa_x86_32.cpp 247 extern bool is_operand_hi32_zero(Node* n); 248 %} 249 250 source %{ 251 #define RELOC_IMM32 Assembler::imm_operand 252 #define RELOC_DISP32 Assembler::disp32_operand 253 254 #define __ _masm. 255 256 // How to find the high register of a Long pair, given the low register 257 #define HIGH_FROM_LOW(x) ((x)+2) 258 259 // These masks are used to provide 128-bit aligned bitmasks to the XMM 260 // instructions, to allow sign-masking or sign-bit flipping. They allow 261 // fast versions of NegF/NegD and AbsF/AbsD. 262 263 void reg_mask_init() { 264 if (Matcher::has_predicated_vectors()) { 265 // Post-loop multi-versioning expects mask to be present in K1 register, till the time 266 // its fixed, RA should not be allocting K1 register, this shall prevent any accidental 267 // curruption of value held in K1 register. 268 if (PostLoopMultiversioning) { 269 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg())); 270 const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next())); 271 } 272 } 273 } 274 275 // Note: 'double' and 'long long' have 32-bits alignment on x86. 276 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 277 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 278 // of 128-bits operands for SSE instructions. 279 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 280 // Store the value to a 128-bits operand. 281 operand[0] = lo; 282 operand[1] = hi; 283 return operand; 284 } 285 286 // Buffer for 128-bits masks used by SSE instructions. 287 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 288 289 // Static initialization during VM startup. 290 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 291 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 292 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 293 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 294 295 // Offset hacking within calls. 296 static int pre_call_resets_size() { 297 int size = 0; 298 Compile* C = Compile::current(); 299 if (C->in_24_bit_fp_mode()) { 300 size += 6; // fldcw 301 } 302 if (VM_Version::supports_vzeroupper()) { 303 size += 3; // vzeroupper 304 } 305 return size; 306 } 307 308 // !!!!! Special hack to get all type of calls to specify the byte offset 309 // from the start of the call to the point where the return address 310 // will point. 311 int MachCallStaticJavaNode::ret_addr_offset() { 312 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 313 } 314 315 int MachCallDynamicJavaNode::ret_addr_offset() { 316 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 317 } 318 319 static int sizeof_FFree_Float_Stack_All = -1; 320 321 int MachCallRuntimeNode::ret_addr_offset() { 322 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 323 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 324 } 325 326 int MachCallNativeNode::ret_addr_offset() { 327 ShouldNotCallThis(); 328 return -1; 329 } 330 331 // 332 // Compute padding required for nodes which need alignment 333 // 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 1; // skip call opcode byte 340 return align_up(current_offset, alignment_required()) - current_offset; 341 } 342 343 // The address of the call instruction needs to be 4-byte aligned to 344 // ensure that it does not span a cache line so that it can be patched. 345 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 346 current_offset += pre_call_resets_size(); // skip fldcw, if any 347 current_offset += 5; // skip MOV instruction 348 current_offset += 1; // skip call opcode byte 349 return align_up(current_offset, alignment_required()) - current_offset; 350 } 351 352 // EMIT_RM() 353 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 354 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_CC() 359 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 360 unsigned char c = (unsigned char)( f1 | f2 ); 361 cbuf.insts()->emit_int8(c); 362 } 363 364 // EMIT_OPCODE() 365 void emit_opcode(CodeBuffer &cbuf, int code) { 366 cbuf.insts()->emit_int8((unsigned char) code); 367 } 368 369 // EMIT_OPCODE() w/ relocation information 370 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 371 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 372 emit_opcode(cbuf, code); 373 } 374 375 // EMIT_D8() 376 void emit_d8(CodeBuffer &cbuf, int d8) { 377 cbuf.insts()->emit_int8((unsigned char) d8); 378 } 379 380 // EMIT_D16() 381 void emit_d16(CodeBuffer &cbuf, int d16) { 382 cbuf.insts()->emit_int16(d16); 383 } 384 385 // EMIT_D32() 386 void emit_d32(CodeBuffer &cbuf, int d32) { 387 cbuf.insts()->emit_int32(d32); 388 } 389 390 // emit 32 bit value and construct relocation entry from relocInfo::relocType 391 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 392 int format) { 393 cbuf.relocate(cbuf.insts_mark(), reloc, format); 394 cbuf.insts()->emit_int32(d32); 395 } 396 397 // emit 32 bit value and construct relocation entry from RelocationHolder 398 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 399 int format) { 400 #ifdef ASSERT 401 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 402 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 403 } 404 #endif 405 cbuf.relocate(cbuf.insts_mark(), rspec, format); 406 cbuf.insts()->emit_int32(d32); 407 } 408 409 // Access stack slot for load or store 410 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 411 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 412 if( -128 <= disp && disp <= 127 ) { 413 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 414 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 415 emit_d8 (cbuf, disp); // Displacement // R/M byte 416 } else { 417 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 418 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 419 emit_d32(cbuf, disp); // Displacement // R/M byte 420 } 421 } 422 423 // rRegI ereg, memory mem) %{ // emit_reg_mem 424 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 425 // There is no index & no scale, use form without SIB byte 426 if ((index == 0x4) && 427 (scale == 0) && (base != ESP_enc)) { 428 // If no displacement, mode is 0x0; unless base is [EBP] 429 if ( (displace == 0) && (base != EBP_enc) ) { 430 emit_rm(cbuf, 0x0, reg_encoding, base); 431 } 432 else { // If 8-bit displacement, mode 0x1 433 if ((displace >= -128) && (displace <= 127) 434 && (disp_reloc == relocInfo::none) ) { 435 emit_rm(cbuf, 0x1, reg_encoding, base); 436 emit_d8(cbuf, displace); 437 } 438 else { // If 32-bit displacement 439 if (base == -1) { // Special flag for absolute address 440 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 441 // (manual lies; no SIB needed here) 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 else { // Normal base + offset 449 emit_rm(cbuf, 0x2, reg_encoding, base); 450 if ( disp_reloc != relocInfo::none ) { 451 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 452 } else { 453 emit_d32 (cbuf, displace); 454 } 455 } 456 } 457 } 458 } 459 else { // Else, encode with the SIB byte 460 // If no displacement, mode is 0x0; unless base is [EBP] 461 if (displace == 0 && (base != EBP_enc)) { // If no displacement 462 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 } 465 else { // If 8-bit displacement, mode 0x1 466 if ((displace >= -128) && (displace <= 127) 467 && (disp_reloc == relocInfo::none) ) { 468 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, base); 470 emit_d8(cbuf, displace); 471 } 472 else { // If 32-bit displacement 473 if (base == 0x04 ) { 474 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 475 emit_rm(cbuf, scale, index, 0x04); 476 } else { 477 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 478 emit_rm(cbuf, scale, index, base); 479 } 480 if ( disp_reloc != relocInfo::none ) { 481 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 482 } else { 483 emit_d32 (cbuf, displace); 484 } 485 } 486 } 487 } 488 } 489 490 491 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 492 if( dst_encoding == src_encoding ) { 493 // reg-reg copy, use an empty encoding 494 } else { 495 emit_opcode( cbuf, 0x8B ); 496 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 497 } 498 } 499 500 void emit_cmpfp_fixup(MacroAssembler& _masm) { 501 Label exit; 502 __ jccb(Assembler::noParity, exit); 503 __ pushf(); 504 // 505 // comiss/ucomiss instructions set ZF,PF,CF flags and 506 // zero OF,AF,SF for NaN values. 507 // Fixup flags by zeroing ZF,PF so that compare of NaN 508 // values returns 'less than' result (CF is set). 509 // Leave the rest of flags unchanged. 510 // 511 // 7 6 5 4 3 2 1 0 512 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 513 // 0 0 1 0 1 0 1 1 (0x2B) 514 // 515 __ andl(Address(rsp, 0), 0xffffff2b); 516 __ popf(); 517 __ bind(exit); 518 } 519 520 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 521 Label done; 522 __ movl(dst, -1); 523 __ jcc(Assembler::parity, done); 524 __ jcc(Assembler::below, done); 525 __ setb(Assembler::notEqual, dst); 526 __ movzbl(dst, dst); 527 __ bind(done); 528 } 529 530 531 //============================================================================= 532 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 533 534 int ConstantTable::calculate_table_base_offset() const { 535 return 0; // absolute addressing, no offset 536 } 537 538 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 539 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 540 ShouldNotReachHere(); 541 } 542 543 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 544 // Empty encoding 545 } 546 547 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 548 return 0; 549 } 550 551 #ifndef PRODUCT 552 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 st->print("# MachConstantBaseNode (empty encoding)"); 554 } 555 #endif 556 557 558 //============================================================================= 559 #ifndef PRODUCT 560 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 561 Compile* C = ra_->C; 562 563 int framesize = C->output()->frame_size_in_bytes(); 564 int bangsize = C->output()->bang_size_in_bytes(); 565 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 566 // Remove wordSize for return addr which is already pushed. 567 framesize -= wordSize; 568 569 if (C->output()->need_stack_bang(bangsize)) { 570 framesize -= wordSize; 571 st->print("# stack bang (%d bytes)", bangsize); 572 st->print("\n\t"); 573 st->print("PUSH EBP\t# Save EBP"); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 } 578 if (framesize) { 579 st->print("\n\t"); 580 st->print("SUB ESP, #%d\t# Create frame",framesize); 581 } 582 } else { 583 st->print("SUB ESP, #%d\t# Create frame",framesize); 584 st->print("\n\t"); 585 framesize -= wordSize; 586 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 587 if (PreserveFramePointer) { 588 st->print("\n\t"); 589 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 590 if (framesize > 0) { 591 st->print("\n\t"); 592 st->print("ADD EBP, #%d", framesize); 593 } 594 } 595 } 596 597 if (VerifyStackAtCalls) { 598 st->print("\n\t"); 599 framesize -= wordSize; 600 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 601 } 602 603 if( C->in_24_bit_fp_mode() ) { 604 st->print("\n\t"); 605 st->print("FLDCW \t# load 24 bit fpu control word"); 606 } 607 if (UseSSE >= 2 && VerifyFPU) { 608 st->print("\n\t"); 609 st->print("# verify FPU stack (must be clean on entry)"); 610 } 611 612 #ifdef ASSERT 613 if (VerifyStackAtCalls) { 614 st->print("\n\t"); 615 st->print("# stack alignment check"); 616 } 617 #endif 618 st->cr(); 619 } 620 #endif 621 622 623 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 624 Compile* C = ra_->C; 625 MacroAssembler _masm(&cbuf); 626 627 int framesize = C->output()->frame_size_in_bytes(); 628 int bangsize = C->output()->bang_size_in_bytes(); 629 630 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 631 632 C->output()->set_frame_complete(cbuf.insts_size()); 633 634 if (C->has_mach_constant_base_node()) { 635 // NOTE: We set the table base offset here because users might be 636 // emitted before MachConstantBaseNode. 637 ConstantTable& constant_table = C->output()->constant_table(); 638 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 639 } 640 } 641 642 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 643 return MachNode::size(ra_); // too many variables; just compute it the hard way 644 } 645 646 int MachPrologNode::reloc() const { 647 return 0; // a large enough number 648 } 649 650 //============================================================================= 651 #ifndef PRODUCT 652 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 653 Compile *C = ra_->C; 654 int framesize = C->output()->frame_size_in_bytes(); 655 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 656 // Remove two words for return addr and rbp, 657 framesize -= 2*wordSize; 658 659 if (C->max_vector_size() > 16) { 660 st->print("VZEROUPPER"); 661 st->cr(); st->print("\t"); 662 } 663 if (C->in_24_bit_fp_mode()) { 664 st->print("FLDCW standard control word"); 665 st->cr(); st->print("\t"); 666 } 667 if (framesize) { 668 st->print("ADD ESP,%d\t# Destroy frame",framesize); 669 st->cr(); st->print("\t"); 670 } 671 st->print_cr("POPL EBP"); st->print("\t"); 672 if (do_polling() && C->is_method_compilation()) { 673 st->print("CMPL rsp, poll_offset[thread] \n\t" 674 "JA #safepoint_stub\t" 675 "# Safepoint: poll for GC"); 676 } 677 } 678 #endif 679 680 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 681 Compile *C = ra_->C; 682 MacroAssembler _masm(&cbuf); 683 684 if (C->max_vector_size() > 16) { 685 // Clear upper bits of YMM registers when current compiled code uses 686 // wide vectors to avoid AVX <-> SSE transition penalty during call. 687 _masm.vzeroupper(); 688 } 689 // If method set FPU control word, restore to standard control word 690 if (C->in_24_bit_fp_mode()) { 691 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 692 } 693 694 int framesize = C->output()->frame_size_in_bytes(); 695 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 696 // Remove two words for return addr and rbp, 697 framesize -= 2*wordSize; 698 699 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 700 701 if (framesize >= 128) { 702 emit_opcode(cbuf, 0x81); // add SP, #framesize 703 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 704 emit_d32(cbuf, framesize); 705 } else if (framesize) { 706 emit_opcode(cbuf, 0x83); // add SP, #framesize 707 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 708 emit_d8(cbuf, framesize); 709 } 710 711 emit_opcode(cbuf, 0x58 | EBP_enc); 712 713 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 714 __ reserved_stack_check(); 715 } 716 717 if (do_polling() && C->is_method_compilation()) { 718 Register thread = as_Register(EBX_enc); 719 MacroAssembler masm(&cbuf); 720 __ get_thread(thread); 721 Label dummy_label; 722 Label* code_stub = &dummy_label; 723 if (!C->output()->in_scratch_emit_size()) { 724 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 725 C->output()->add_stub(stub); 726 code_stub = &stub->entry(); 727 } 728 __ relocate(relocInfo::poll_return_type); 729 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 730 } 731 } 732 733 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 734 return MachNode::size(ra_); // too many variables; just compute it 735 // the hard way 736 } 737 738 int MachEpilogNode::reloc() const { 739 return 0; // a large enough number 740 } 741 742 const Pipeline * MachEpilogNode::pipeline() const { 743 return MachNode::pipeline_class(); 744 } 745 746 //============================================================================= 747 748 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 749 static enum RC rc_class( OptoReg::Name reg ) { 750 751 if( !OptoReg::is_valid(reg) ) return rc_bad; 752 if (OptoReg::is_stack(reg)) return rc_stack; 753 754 VMReg r = OptoReg::as_VMReg(reg); 755 if (r->is_Register()) return rc_int; 756 if (r->is_FloatRegister()) { 757 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 758 return rc_float; 759 } 760 if (r->is_KRegister()) return rc_kreg; 761 assert(r->is_XMMRegister(), "must be"); 762 return rc_xmm; 763 } 764 765 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 766 int opcode, const char *op_str, int size, outputStream* st ) { 767 if( cbuf ) { 768 emit_opcode (*cbuf, opcode ); 769 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 770 #ifndef PRODUCT 771 } else if( !do_size ) { 772 if( size != 0 ) st->print("\n\t"); 773 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 774 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 775 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 776 } else { // FLD, FST, PUSH, POP 777 st->print("%s [ESP + #%d]",op_str,offset); 778 } 779 #endif 780 } 781 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 782 return size+3+offset_size; 783 } 784 785 // Helper for XMM registers. Extra opcode bits, limited syntax. 786 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 787 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 788 int in_size_in_bits = Assembler::EVEX_32bit; 789 int evex_encoding = 0; 790 if (reg_lo+1 == reg_hi) { 791 in_size_in_bits = Assembler::EVEX_64bit; 792 evex_encoding = Assembler::VEX_W; 793 } 794 if (cbuf) { 795 MacroAssembler _masm(cbuf); 796 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 797 // it maps more cases to single byte displacement 798 _masm.set_managed(); 799 if (reg_lo+1 == reg_hi) { // double move? 800 if (is_load) { 801 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 802 } else { 803 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 804 } 805 } else { 806 if (is_load) { 807 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 808 } else { 809 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 810 } 811 } 812 #ifndef PRODUCT 813 } else if (!do_size) { 814 if (size != 0) st->print("\n\t"); 815 if (reg_lo+1 == reg_hi) { // double move? 816 if (is_load) st->print("%s %s,[ESP + #%d]", 817 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 818 Matcher::regName[reg_lo], offset); 819 else st->print("MOVSD [ESP + #%d],%s", 820 offset, Matcher::regName[reg_lo]); 821 } else { 822 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 823 Matcher::regName[reg_lo], offset); 824 else st->print("MOVSS [ESP + #%d],%s", 825 offset, Matcher::regName[reg_lo]); 826 } 827 #endif 828 } 829 bool is_single_byte = false; 830 if ((UseAVX > 2) && (offset != 0)) { 831 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 832 } 833 int offset_size = 0; 834 if (UseAVX > 2 ) { 835 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 836 } else { 837 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 838 } 839 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 840 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 841 return size+5+offset_size; 842 } 843 844 845 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 846 int src_hi, int dst_hi, int size, outputStream* st ) { 847 if (cbuf) { 848 MacroAssembler _masm(cbuf); 849 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 850 _masm.set_managed(); 851 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 852 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 853 as_XMMRegister(Matcher::_regEncode[src_lo])); 854 } else { 855 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 856 as_XMMRegister(Matcher::_regEncode[src_lo])); 857 } 858 #ifndef PRODUCT 859 } else if (!do_size) { 860 if (size != 0) st->print("\n\t"); 861 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 862 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 863 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 864 } else { 865 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 866 } 867 } else { 868 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 869 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 870 } else { 871 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 872 } 873 } 874 #endif 875 } 876 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 877 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 878 int sz = (UseAVX > 2) ? 6 : 4; 879 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 880 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 881 return size + sz; 882 } 883 884 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 885 int src_hi, int dst_hi, int size, outputStream* st ) { 886 // 32-bit 887 if (cbuf) { 888 MacroAssembler _masm(cbuf); 889 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 890 _masm.set_managed(); 891 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 892 as_Register(Matcher::_regEncode[src_lo])); 893 #ifndef PRODUCT 894 } else if (!do_size) { 895 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 896 #endif 897 } 898 return (UseAVX> 2) ? 6 : 4; 899 } 900 901 902 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 903 int src_hi, int dst_hi, int size, outputStream* st ) { 904 // 32-bit 905 if (cbuf) { 906 MacroAssembler _masm(cbuf); 907 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 908 _masm.set_managed(); 909 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 910 as_XMMRegister(Matcher::_regEncode[src_lo])); 911 #ifndef PRODUCT 912 } else if (!do_size) { 913 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 914 #endif 915 } 916 return (UseAVX> 2) ? 6 : 4; 917 } 918 919 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 920 if( cbuf ) { 921 emit_opcode(*cbuf, 0x8B ); 922 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 923 #ifndef PRODUCT 924 } else if( !do_size ) { 925 if( size != 0 ) st->print("\n\t"); 926 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 927 #endif 928 } 929 return size+2; 930 } 931 932 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 933 int offset, int size, outputStream* st ) { 934 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 935 if( cbuf ) { 936 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 937 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 938 #ifndef PRODUCT 939 } else if( !do_size ) { 940 if( size != 0 ) st->print("\n\t"); 941 st->print("FLD %s",Matcher::regName[src_lo]); 942 #endif 943 } 944 size += 2; 945 } 946 947 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 948 const char *op_str; 949 int op; 950 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 951 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 952 op = 0xDD; 953 } else { // 32-bit store 954 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 955 op = 0xD9; 956 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 957 } 958 959 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 960 } 961 962 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 963 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 964 int src_hi, int dst_hi, uint ireg, outputStream* st); 965 966 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 967 int stack_offset, int reg, uint ireg, outputStream* st); 968 969 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 970 int dst_offset, uint ireg, outputStream* st) { 971 if (cbuf) { 972 MacroAssembler _masm(cbuf); 973 switch (ireg) { 974 case Op_VecS: 975 __ pushl(Address(rsp, src_offset)); 976 __ popl (Address(rsp, dst_offset)); 977 break; 978 case Op_VecD: 979 __ pushl(Address(rsp, src_offset)); 980 __ popl (Address(rsp, dst_offset)); 981 __ pushl(Address(rsp, src_offset+4)); 982 __ popl (Address(rsp, dst_offset+4)); 983 break; 984 case Op_VecX: 985 __ movdqu(Address(rsp, -16), xmm0); 986 __ movdqu(xmm0, Address(rsp, src_offset)); 987 __ movdqu(Address(rsp, dst_offset), xmm0); 988 __ movdqu(xmm0, Address(rsp, -16)); 989 break; 990 case Op_VecY: 991 __ vmovdqu(Address(rsp, -32), xmm0); 992 __ vmovdqu(xmm0, Address(rsp, src_offset)); 993 __ vmovdqu(Address(rsp, dst_offset), xmm0); 994 __ vmovdqu(xmm0, Address(rsp, -32)); 995 break; 996 case Op_VecZ: 997 __ evmovdquq(Address(rsp, -64), xmm0, 2); 998 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 999 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1000 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1001 break; 1002 default: 1003 ShouldNotReachHere(); 1004 } 1005 #ifndef PRODUCT 1006 } else { 1007 switch (ireg) { 1008 case Op_VecS: 1009 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1010 "popl [rsp + #%d]", 1011 src_offset, dst_offset); 1012 break; 1013 case Op_VecD: 1014 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1015 "popq [rsp + #%d]\n\t" 1016 "pushl [rsp + #%d]\n\t" 1017 "popq [rsp + #%d]", 1018 src_offset, dst_offset, src_offset+4, dst_offset+4); 1019 break; 1020 case Op_VecX: 1021 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1022 "movdqu xmm0, [rsp + #%d]\n\t" 1023 "movdqu [rsp + #%d], xmm0\n\t" 1024 "movdqu xmm0, [rsp - #16]", 1025 src_offset, dst_offset); 1026 break; 1027 case Op_VecY: 1028 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1029 "vmovdqu xmm0, [rsp + #%d]\n\t" 1030 "vmovdqu [rsp + #%d], xmm0\n\t" 1031 "vmovdqu xmm0, [rsp - #32]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecZ: 1035 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1036 "vmovdqu xmm0, [rsp + #%d]\n\t" 1037 "vmovdqu [rsp + #%d], xmm0\n\t" 1038 "vmovdqu xmm0, [rsp - #64]", 1039 src_offset, dst_offset); 1040 break; 1041 default: 1042 ShouldNotReachHere(); 1043 } 1044 #endif 1045 } 1046 } 1047 1048 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1049 // Get registers to move 1050 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1051 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1052 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1053 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1054 1055 enum RC src_second_rc = rc_class(src_second); 1056 enum RC src_first_rc = rc_class(src_first); 1057 enum RC dst_second_rc = rc_class(dst_second); 1058 enum RC dst_first_rc = rc_class(dst_first); 1059 1060 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1061 1062 // Generate spill code! 1063 int size = 0; 1064 1065 if( src_first == dst_first && src_second == dst_second ) 1066 return size; // Self copy, no move 1067 1068 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1069 uint ireg = ideal_reg(); 1070 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1071 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1072 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1073 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1074 // mem -> mem 1075 int src_offset = ra_->reg2offset(src_first); 1076 int dst_offset = ra_->reg2offset(dst_first); 1077 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1078 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1079 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1080 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1081 int stack_offset = ra_->reg2offset(dst_first); 1082 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1083 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1084 int stack_offset = ra_->reg2offset(src_first); 1085 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1086 } else { 1087 ShouldNotReachHere(); 1088 } 1089 return 0; 1090 } 1091 1092 // -------------------------------------- 1093 // Check for mem-mem move. push/pop to move. 1094 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1095 if( src_second == dst_first ) { // overlapping stack copy ranges 1096 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1097 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1098 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1099 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1100 } 1101 // move low bits 1102 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1103 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1104 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1105 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1106 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1107 } 1108 return size; 1109 } 1110 1111 // -------------------------------------- 1112 // Check for integer reg-reg copy 1113 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1114 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1115 1116 // Check for integer store 1117 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1118 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1119 1120 // Check for integer load 1121 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1122 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1123 1124 // Check for integer reg-xmm reg copy 1125 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1126 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1127 "no 64 bit integer-float reg moves" ); 1128 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1129 } 1130 // -------------------------------------- 1131 // Check for float reg-reg copy 1132 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1133 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1134 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1135 if( cbuf ) { 1136 1137 // Note the mucking with the register encode to compensate for the 0/1 1138 // indexing issue mentioned in a comment in the reg_def sections 1139 // for FPR registers many lines above here. 1140 1141 if( src_first != FPR1L_num ) { 1142 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1143 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1144 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1145 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1146 } else { 1147 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1148 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1149 } 1150 #ifndef PRODUCT 1151 } else if( !do_size ) { 1152 if( size != 0 ) st->print("\n\t"); 1153 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1154 else st->print( "FST %s", Matcher::regName[dst_first]); 1155 #endif 1156 } 1157 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1158 } 1159 1160 // Check for float store 1161 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1162 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1163 } 1164 1165 // Check for float load 1166 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1167 int offset = ra_->reg2offset(src_first); 1168 const char *op_str; 1169 int op; 1170 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1171 op_str = "FLD_D"; 1172 op = 0xDD; 1173 } else { // 32-bit load 1174 op_str = "FLD_S"; 1175 op = 0xD9; 1176 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1177 } 1178 if( cbuf ) { 1179 emit_opcode (*cbuf, op ); 1180 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1181 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1182 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1183 #ifndef PRODUCT 1184 } else if( !do_size ) { 1185 if( size != 0 ) st->print("\n\t"); 1186 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1187 #endif 1188 } 1189 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1190 return size + 3+offset_size+2; 1191 } 1192 1193 // Check for xmm reg-reg copy 1194 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1195 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1196 (src_first+1 == src_second && dst_first+1 == dst_second), 1197 "no non-adjacent float-moves" ); 1198 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1199 } 1200 1201 // Check for xmm reg-integer reg copy 1202 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1203 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1204 "no 64 bit float-integer reg moves" ); 1205 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1206 } 1207 1208 // Check for xmm store 1209 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1210 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1211 } 1212 1213 // Check for float xmm load 1214 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1215 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1216 } 1217 1218 // Copy from float reg to xmm reg 1219 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1220 // copy to the top of stack from floating point reg 1221 // and use LEA to preserve flags 1222 if( cbuf ) { 1223 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1224 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1225 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1226 emit_d8(*cbuf,0xF8); 1227 #ifndef PRODUCT 1228 } else if( !do_size ) { 1229 if( size != 0 ) st->print("\n\t"); 1230 st->print("LEA ESP,[ESP-8]"); 1231 #endif 1232 } 1233 size += 4; 1234 1235 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1236 1237 // Copy from the temp memory to the xmm reg. 1238 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1239 1240 if( cbuf ) { 1241 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1242 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1243 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1244 emit_d8(*cbuf,0x08); 1245 #ifndef PRODUCT 1246 } else if( !do_size ) { 1247 if( size != 0 ) st->print("\n\t"); 1248 st->print("LEA ESP,[ESP+8]"); 1249 #endif 1250 } 1251 size += 4; 1252 return size; 1253 } 1254 1255 // AVX-512 opmask specific spilling. 1256 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1257 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1258 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1259 MacroAssembler _masm(cbuf); 1260 int offset = ra_->reg2offset(src_first); 1261 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1262 return 0; 1263 } 1264 1265 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1266 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1267 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1268 MacroAssembler _masm(cbuf); 1269 int offset = ra_->reg2offset(dst_first); 1270 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1271 return 0; 1272 } 1273 1274 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1275 Unimplemented(); 1276 return 0; 1277 } 1278 1279 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1280 Unimplemented(); 1281 return 0; 1282 } 1283 1284 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1285 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1286 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1287 MacroAssembler _masm(cbuf); 1288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1289 return 0; 1290 } 1291 1292 assert( size > 0, "missed a case" ); 1293 1294 // -------------------------------------------------------------------- 1295 // Check for second bits still needing moving. 1296 if( src_second == dst_second ) 1297 return size; // Self copy; no move 1298 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1299 1300 // Check for second word int-int move 1301 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1302 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1303 1304 // Check for second word integer store 1305 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1306 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1307 1308 // Check for second word integer load 1309 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1310 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1311 1312 Unimplemented(); 1313 return 0; // Mute compiler 1314 } 1315 1316 #ifndef PRODUCT 1317 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1318 implementation( NULL, ra_, false, st ); 1319 } 1320 #endif 1321 1322 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1323 implementation( &cbuf, ra_, false, NULL ); 1324 } 1325 1326 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1327 return MachNode::size(ra_); 1328 } 1329 1330 1331 //============================================================================= 1332 #ifndef PRODUCT 1333 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1334 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1335 int reg = ra_->get_reg_first(this); 1336 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1337 } 1338 #endif 1339 1340 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1341 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1342 int reg = ra_->get_encode(this); 1343 if( offset >= 128 ) { 1344 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(cbuf, 0x2, reg, 0x04); 1346 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1347 emit_d32(cbuf, offset); 1348 } 1349 else { 1350 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1351 emit_rm(cbuf, 0x1, reg, 0x04); 1352 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1353 emit_d8(cbuf, offset); 1354 } 1355 } 1356 1357 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1358 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1359 if( offset >= 128 ) { 1360 return 7; 1361 } 1362 else { 1363 return 4; 1364 } 1365 } 1366 1367 //============================================================================= 1368 #ifndef PRODUCT 1369 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1370 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1371 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1372 st->print_cr("\tNOP"); 1373 st->print_cr("\tNOP"); 1374 if( !OptoBreakpoint ) 1375 st->print_cr("\tNOP"); 1376 } 1377 #endif 1378 1379 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1380 MacroAssembler masm(&cbuf); 1381 #ifdef ASSERT 1382 uint insts_size = cbuf.insts_size(); 1383 #endif 1384 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1385 masm.jump_cc(Assembler::notEqual, 1386 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1387 /* WARNING these NOPs are critical so that verified entry point is properly 1388 aligned for patching by NativeJump::patch_verified_entry() */ 1389 int nops_cnt = 2; 1390 if( !OptoBreakpoint ) // Leave space for int3 1391 nops_cnt += 1; 1392 masm.nop(nops_cnt); 1393 1394 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1395 } 1396 1397 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1398 return OptoBreakpoint ? 11 : 12; 1399 } 1400 1401 1402 //============================================================================= 1403 1404 // Vector calling convention not supported. 1405 const bool Matcher::supports_vector_calling_convention() { 1406 return false; 1407 } 1408 1409 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1410 Unimplemented(); 1411 return OptoRegPair(0, 0); 1412 } 1413 1414 // Is this branch offset short enough that a short branch can be used? 1415 // 1416 // NOTE: If the platform does not provide any short branch variants, then 1417 // this method should return false for offset 0. 1418 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1419 // The passed offset is relative to address of the branch. 1420 // On 86 a branch displacement is calculated relative to address 1421 // of a next instruction. 1422 offset -= br_size; 1423 1424 // the short version of jmpConUCF2 contains multiple branches, 1425 // making the reach slightly less 1426 if (rule == jmpConUCF2_rule) 1427 return (-126 <= offset && offset <= 125); 1428 return (-128 <= offset && offset <= 127); 1429 } 1430 1431 // Return whether or not this register is ever used as an argument. This 1432 // function is used on startup to build the trampoline stubs in generateOptoStub. 1433 // Registers not mentioned will be killed by the VM call in the trampoline, and 1434 // arguments in those registers not be available to the callee. 1435 bool Matcher::can_be_java_arg( int reg ) { 1436 if( reg == ECX_num || reg == EDX_num ) return true; 1437 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1438 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1439 return false; 1440 } 1441 1442 bool Matcher::is_spillable_arg( int reg ) { 1443 return can_be_java_arg(reg); 1444 } 1445 1446 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1447 // Use hardware integer DIV instruction when 1448 // it is faster than a code which use multiply. 1449 // Only when constant divisor fits into 32 bit 1450 // (min_jint is excluded to get only correct 1451 // positive 32 bit values from negative). 1452 return VM_Version::has_fast_idiv() && 1453 (divisor == (int)divisor && divisor != min_jint); 1454 } 1455 1456 // Register for DIVI projection of divmodI 1457 RegMask Matcher::divI_proj_mask() { 1458 return EAX_REG_mask(); 1459 } 1460 1461 // Register for MODI projection of divmodI 1462 RegMask Matcher::modI_proj_mask() { 1463 return EDX_REG_mask(); 1464 } 1465 1466 // Register for DIVL projection of divmodL 1467 RegMask Matcher::divL_proj_mask() { 1468 ShouldNotReachHere(); 1469 return RegMask(); 1470 } 1471 1472 // Register for MODL projection of divmodL 1473 RegMask Matcher::modL_proj_mask() { 1474 ShouldNotReachHere(); 1475 return RegMask(); 1476 } 1477 1478 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1479 return NO_REG_mask(); 1480 } 1481 1482 // Returns true if the high 32 bits of the value is known to be zero. 1483 bool is_operand_hi32_zero(Node* n) { 1484 int opc = n->Opcode(); 1485 if (opc == Op_AndL) { 1486 Node* o2 = n->in(2); 1487 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1488 return true; 1489 } 1490 } 1491 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1492 return true; 1493 } 1494 return false; 1495 } 1496 1497 %} 1498 1499 //----------ENCODING BLOCK----------------------------------------------------- 1500 // This block specifies the encoding classes used by the compiler to output 1501 // byte streams. Encoding classes generate functions which are called by 1502 // Machine Instruction Nodes in order to generate the bit encoding of the 1503 // instruction. Operands specify their base encoding interface with the 1504 // interface keyword. There are currently supported four interfaces, 1505 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1506 // operand to generate a function which returns its register number when 1507 // queried. CONST_INTER causes an operand to generate a function which 1508 // returns the value of the constant when queried. MEMORY_INTER causes an 1509 // operand to generate four functions which return the Base Register, the 1510 // Index Register, the Scale Value, and the Offset Value of the operand when 1511 // queried. COND_INTER causes an operand to generate six functions which 1512 // return the encoding code (ie - encoding bits for the instruction) 1513 // associated with each basic boolean condition for a conditional instruction. 1514 // Instructions specify two basic values for encoding. They use the 1515 // ins_encode keyword to specify their encoding class (which must be one of 1516 // the class names specified in the encoding block), and they use the 1517 // opcode keyword to specify, in order, their primary, secondary, and 1518 // tertiary opcode. Only the opcode sections which a particular instruction 1519 // needs for encoding need to be specified. 1520 encode %{ 1521 // Build emit functions for each basic byte or larger field in the intel 1522 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1523 // code in the enc_class source block. Emit functions will live in the 1524 // main source block for now. In future, we can generalize this by 1525 // adding a syntax that specifies the sizes of fields in an order, 1526 // so that the adlc can build the emit functions automagically 1527 1528 // Emit primary opcode 1529 enc_class OpcP %{ 1530 emit_opcode(cbuf, $primary); 1531 %} 1532 1533 // Emit secondary opcode 1534 enc_class OpcS %{ 1535 emit_opcode(cbuf, $secondary); 1536 %} 1537 1538 // Emit opcode directly 1539 enc_class Opcode(immI d8) %{ 1540 emit_opcode(cbuf, $d8$$constant); 1541 %} 1542 1543 enc_class SizePrefix %{ 1544 emit_opcode(cbuf,0x66); 1545 %} 1546 1547 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1548 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1549 %} 1550 1551 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1552 emit_opcode(cbuf,$opcode$$constant); 1553 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1554 %} 1555 1556 enc_class mov_r32_imm0( rRegI dst ) %{ 1557 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1558 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1559 %} 1560 1561 enc_class cdq_enc %{ 1562 // Full implementation of Java idiv and irem; checks for 1563 // special case as described in JVM spec., p.243 & p.271. 1564 // 1565 // normal case special case 1566 // 1567 // input : rax,: dividend min_int 1568 // reg: divisor -1 1569 // 1570 // output: rax,: quotient (= rax, idiv reg) min_int 1571 // rdx: remainder (= rax, irem reg) 0 1572 // 1573 // Code sequnce: 1574 // 1575 // 81 F8 00 00 00 80 cmp rax,80000000h 1576 // 0F 85 0B 00 00 00 jne normal_case 1577 // 33 D2 xor rdx,edx 1578 // 83 F9 FF cmp rcx,0FFh 1579 // 0F 84 03 00 00 00 je done 1580 // normal_case: 1581 // 99 cdq 1582 // F7 F9 idiv rax,ecx 1583 // done: 1584 // 1585 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1586 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1587 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1588 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1589 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1590 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1591 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1592 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1593 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1594 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1595 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1596 // normal_case: 1597 emit_opcode(cbuf,0x99); // cdq 1598 // idiv (note: must be emitted by the user of this rule) 1599 // normal: 1600 %} 1601 1602 // Dense encoding for older common ops 1603 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1604 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1605 %} 1606 1607 1608 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1609 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1610 // Check for 8-bit immediate, and set sign extend bit in opcode 1611 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1612 emit_opcode(cbuf, $primary | 0x02); 1613 } 1614 else { // If 32-bit immediate 1615 emit_opcode(cbuf, $primary); 1616 } 1617 %} 1618 1619 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1620 // Emit primary opcode and set sign-extend bit 1621 // Check for 8-bit immediate, and set sign extend bit in opcode 1622 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1623 emit_opcode(cbuf, $primary | 0x02); } 1624 else { // If 32-bit immediate 1625 emit_opcode(cbuf, $primary); 1626 } 1627 // Emit r/m byte with secondary opcode, after primary opcode. 1628 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1629 %} 1630 1631 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1632 // Check for 8-bit immediate, and set sign extend bit in opcode 1633 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1634 $$$emit8$imm$$constant; 1635 } 1636 else { // If 32-bit immediate 1637 // Output immediate 1638 $$$emit32$imm$$constant; 1639 } 1640 %} 1641 1642 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1643 // Emit primary opcode and set sign-extend bit 1644 // Check for 8-bit immediate, and set sign extend bit in opcode 1645 int con = (int)$imm$$constant; // Throw away top bits 1646 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1647 // Emit r/m byte with secondary opcode, after primary opcode. 1648 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1649 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1650 else emit_d32(cbuf,con); 1651 %} 1652 1653 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1654 // Emit primary opcode and set sign-extend bit 1655 // Check for 8-bit immediate, and set sign extend bit in opcode 1656 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1657 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1658 // Emit r/m byte with tertiary opcode, after primary opcode. 1659 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1660 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1661 else emit_d32(cbuf,con); 1662 %} 1663 1664 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1665 emit_cc(cbuf, $secondary, $dst$$reg ); 1666 %} 1667 1668 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1669 int destlo = $dst$$reg; 1670 int desthi = HIGH_FROM_LOW(destlo); 1671 // bswap lo 1672 emit_opcode(cbuf, 0x0F); 1673 emit_cc(cbuf, 0xC8, destlo); 1674 // bswap hi 1675 emit_opcode(cbuf, 0x0F); 1676 emit_cc(cbuf, 0xC8, desthi); 1677 // xchg lo and hi 1678 emit_opcode(cbuf, 0x87); 1679 emit_rm(cbuf, 0x3, destlo, desthi); 1680 %} 1681 1682 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1683 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1684 %} 1685 1686 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1687 $$$emit8$primary; 1688 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1689 %} 1690 1691 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1692 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1693 emit_d8(cbuf, op >> 8 ); 1694 emit_d8(cbuf, op & 255); 1695 %} 1696 1697 // emulate a CMOV with a conditional branch around a MOV 1698 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1699 // Invert sense of branch from sense of CMOV 1700 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1701 emit_d8( cbuf, $brOffs$$constant ); 1702 %} 1703 1704 enc_class enc_PartialSubtypeCheck( ) %{ 1705 Register Redi = as_Register(EDI_enc); // result register 1706 Register Reax = as_Register(EAX_enc); // super class 1707 Register Recx = as_Register(ECX_enc); // killed 1708 Register Resi = as_Register(ESI_enc); // sub class 1709 Label miss; 1710 1711 MacroAssembler _masm(&cbuf); 1712 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1713 NULL, &miss, 1714 /*set_cond_codes:*/ true); 1715 if ($primary) { 1716 __ xorptr(Redi, Redi); 1717 } 1718 __ bind(miss); 1719 %} 1720 1721 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1722 MacroAssembler masm(&cbuf); 1723 int start = masm.offset(); 1724 if (UseSSE >= 2) { 1725 if (VerifyFPU) { 1726 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1727 } 1728 } else { 1729 // External c_calling_convention expects the FPU stack to be 'clean'. 1730 // Compiled code leaves it dirty. Do cleanup now. 1731 masm.empty_FPU_stack(); 1732 } 1733 if (sizeof_FFree_Float_Stack_All == -1) { 1734 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1735 } else { 1736 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1737 } 1738 %} 1739 1740 enc_class Verify_FPU_For_Leaf %{ 1741 if( VerifyFPU ) { 1742 MacroAssembler masm(&cbuf); 1743 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1744 } 1745 %} 1746 1747 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1748 // This is the instruction starting address for relocation info. 1749 cbuf.set_insts_mark(); 1750 $$$emit8$primary; 1751 // CALL directly to the runtime 1752 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1753 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1754 1755 if (UseSSE >= 2) { 1756 MacroAssembler _masm(&cbuf); 1757 BasicType rt = tf()->return_type(); 1758 1759 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1760 // A C runtime call where the return value is unused. In SSE2+ 1761 // mode the result needs to be removed from the FPU stack. It's 1762 // likely that this function call could be removed by the 1763 // optimizer if the C function is a pure function. 1764 __ ffree(0); 1765 } else if (rt == T_FLOAT) { 1766 __ lea(rsp, Address(rsp, -4)); 1767 __ fstp_s(Address(rsp, 0)); 1768 __ movflt(xmm0, Address(rsp, 0)); 1769 __ lea(rsp, Address(rsp, 4)); 1770 } else if (rt == T_DOUBLE) { 1771 __ lea(rsp, Address(rsp, -8)); 1772 __ fstp_d(Address(rsp, 0)); 1773 __ movdbl(xmm0, Address(rsp, 0)); 1774 __ lea(rsp, Address(rsp, 8)); 1775 } 1776 } 1777 %} 1778 1779 enc_class pre_call_resets %{ 1780 // If method sets FPU control word restore it here 1781 debug_only(int off0 = cbuf.insts_size()); 1782 if (ra_->C->in_24_bit_fp_mode()) { 1783 MacroAssembler _masm(&cbuf); 1784 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1785 } 1786 // Clear upper bits of YMM registers when current compiled code uses 1787 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1788 MacroAssembler _masm(&cbuf); 1789 __ vzeroupper(); 1790 debug_only(int off1 = cbuf.insts_size()); 1791 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1792 %} 1793 1794 enc_class post_call_FPU %{ 1795 // If method sets FPU control word do it here also 1796 if (Compile::current()->in_24_bit_fp_mode()) { 1797 MacroAssembler masm(&cbuf); 1798 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1799 } 1800 %} 1801 1802 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1803 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1804 // who we intended to call. 1805 cbuf.set_insts_mark(); 1806 $$$emit8$primary; 1807 1808 if (!_method) { 1809 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1810 runtime_call_Relocation::spec(), 1811 RELOC_IMM32); 1812 } else { 1813 int method_index = resolved_method_index(cbuf); 1814 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1815 : static_call_Relocation::spec(method_index); 1816 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1817 rspec, RELOC_DISP32); 1818 // Emit stubs for static call. 1819 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1820 if (stub == NULL) { 1821 ciEnv::current()->record_failure("CodeCache is full"); 1822 return; 1823 } 1824 } 1825 %} 1826 1827 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1828 MacroAssembler _masm(&cbuf); 1829 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1830 %} 1831 1832 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1833 int disp = in_bytes(Method::from_compiled_offset()); 1834 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1835 1836 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1837 cbuf.set_insts_mark(); 1838 $$$emit8$primary; 1839 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1840 emit_d8(cbuf, disp); // Displacement 1841 1842 %} 1843 1844 // Following encoding is no longer used, but may be restored if calling 1845 // convention changes significantly. 1846 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1847 // 1848 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1849 // // int ic_reg = Matcher::inline_cache_reg(); 1850 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1851 // // int imo_reg = Matcher::interpreter_method_reg(); 1852 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1853 // 1854 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1855 // // // so we load it immediately before the call 1856 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1857 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1858 // 1859 // // xor rbp,ebp 1860 // emit_opcode(cbuf, 0x33); 1861 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1862 // 1863 // // CALL to interpreter. 1864 // cbuf.set_insts_mark(); 1865 // $$$emit8$primary; 1866 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1867 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1868 // %} 1869 1870 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1871 $$$emit8$primary; 1872 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1873 $$$emit8$shift$$constant; 1874 %} 1875 1876 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1877 // Load immediate does not have a zero or sign extended version 1878 // for 8-bit immediates 1879 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1880 $$$emit32$src$$constant; 1881 %} 1882 1883 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1884 // Load immediate does not have a zero or sign extended version 1885 // for 8-bit immediates 1886 emit_opcode(cbuf, $primary + $dst$$reg); 1887 $$$emit32$src$$constant; 1888 %} 1889 1890 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1891 // Load immediate does not have a zero or sign extended version 1892 // for 8-bit immediates 1893 int dst_enc = $dst$$reg; 1894 int src_con = $src$$constant & 0x0FFFFFFFFL; 1895 if (src_con == 0) { 1896 // xor dst, dst 1897 emit_opcode(cbuf, 0x33); 1898 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1899 } else { 1900 emit_opcode(cbuf, $primary + dst_enc); 1901 emit_d32(cbuf, src_con); 1902 } 1903 %} 1904 1905 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1906 // Load immediate does not have a zero or sign extended version 1907 // for 8-bit immediates 1908 int dst_enc = $dst$$reg + 2; 1909 int src_con = ((julong)($src$$constant)) >> 32; 1910 if (src_con == 0) { 1911 // xor dst, dst 1912 emit_opcode(cbuf, 0x33); 1913 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1914 } else { 1915 emit_opcode(cbuf, $primary + dst_enc); 1916 emit_d32(cbuf, src_con); 1917 } 1918 %} 1919 1920 1921 // Encode a reg-reg copy. If it is useless, then empty encoding. 1922 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1923 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1924 %} 1925 1926 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1927 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1928 %} 1929 1930 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1931 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1932 %} 1933 1934 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1935 $$$emit8$primary; 1936 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1937 %} 1938 1939 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1940 $$$emit8$secondary; 1941 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1942 %} 1943 1944 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1945 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1946 %} 1947 1948 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1949 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1950 %} 1951 1952 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1953 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 1954 %} 1955 1956 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1957 // Output immediate 1958 $$$emit32$src$$constant; 1959 %} 1960 1961 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1962 // Output Float immediate bits 1963 jfloat jf = $src$$constant; 1964 int jf_as_bits = jint_cast( jf ); 1965 emit_d32(cbuf, jf_as_bits); 1966 %} 1967 1968 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1969 // Output Float immediate bits 1970 jfloat jf = $src$$constant; 1971 int jf_as_bits = jint_cast( jf ); 1972 emit_d32(cbuf, jf_as_bits); 1973 %} 1974 1975 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1976 // Output immediate 1977 $$$emit16$src$$constant; 1978 %} 1979 1980 enc_class Con_d32(immI src) %{ 1981 emit_d32(cbuf,$src$$constant); 1982 %} 1983 1984 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1985 // Output immediate memory reference 1986 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 1987 emit_d32(cbuf, 0x00); 1988 %} 1989 1990 enc_class lock_prefix( ) %{ 1991 emit_opcode(cbuf,0xF0); // [Lock] 1992 %} 1993 1994 // Cmp-xchg long value. 1995 // Note: we need to swap rbx, and rcx before and after the 1996 // cmpxchg8 instruction because the instruction uses 1997 // rcx as the high order word of the new value to store but 1998 // our register encoding uses rbx,. 1999 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2000 2001 // XCHG rbx,ecx 2002 emit_opcode(cbuf,0x87); 2003 emit_opcode(cbuf,0xD9); 2004 // [Lock] 2005 emit_opcode(cbuf,0xF0); 2006 // CMPXCHG8 [Eptr] 2007 emit_opcode(cbuf,0x0F); 2008 emit_opcode(cbuf,0xC7); 2009 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2010 // XCHG rbx,ecx 2011 emit_opcode(cbuf,0x87); 2012 emit_opcode(cbuf,0xD9); 2013 %} 2014 2015 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2016 // [Lock] 2017 emit_opcode(cbuf,0xF0); 2018 2019 // CMPXCHG [Eptr] 2020 emit_opcode(cbuf,0x0F); 2021 emit_opcode(cbuf,0xB1); 2022 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2023 %} 2024 2025 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2026 // [Lock] 2027 emit_opcode(cbuf,0xF0); 2028 2029 // CMPXCHGB [Eptr] 2030 emit_opcode(cbuf,0x0F); 2031 emit_opcode(cbuf,0xB0); 2032 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2033 %} 2034 2035 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2036 // [Lock] 2037 emit_opcode(cbuf,0xF0); 2038 2039 // 16-bit mode 2040 emit_opcode(cbuf, 0x66); 2041 2042 // CMPXCHGW [Eptr] 2043 emit_opcode(cbuf,0x0F); 2044 emit_opcode(cbuf,0xB1); 2045 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2046 %} 2047 2048 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2049 int res_encoding = $res$$reg; 2050 2051 // MOV res,0 2052 emit_opcode( cbuf, 0xB8 + res_encoding); 2053 emit_d32( cbuf, 0 ); 2054 // JNE,s fail 2055 emit_opcode(cbuf,0x75); 2056 emit_d8(cbuf, 5 ); 2057 // MOV res,1 2058 emit_opcode( cbuf, 0xB8 + res_encoding); 2059 emit_d32( cbuf, 1 ); 2060 // fail: 2061 %} 2062 2063 enc_class set_instruction_start( ) %{ 2064 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2065 %} 2066 2067 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2068 int reg_encoding = $ereg$$reg; 2069 int base = $mem$$base; 2070 int index = $mem$$index; 2071 int scale = $mem$$scale; 2072 int displace = $mem$$disp; 2073 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2074 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2075 %} 2076 2077 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2078 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2079 int base = $mem$$base; 2080 int index = $mem$$index; 2081 int scale = $mem$$scale; 2082 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2083 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2084 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2085 %} 2086 2087 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2088 int r1, r2; 2089 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2090 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2091 emit_opcode(cbuf,0x0F); 2092 emit_opcode(cbuf,$tertiary); 2093 emit_rm(cbuf, 0x3, r1, r2); 2094 emit_d8(cbuf,$cnt$$constant); 2095 emit_d8(cbuf,$primary); 2096 emit_rm(cbuf, 0x3, $secondary, r1); 2097 emit_d8(cbuf,$cnt$$constant); 2098 %} 2099 2100 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2101 emit_opcode( cbuf, 0x8B ); // Move 2102 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2103 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2104 emit_d8(cbuf,$primary); 2105 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2106 emit_d8(cbuf,$cnt$$constant-32); 2107 } 2108 emit_d8(cbuf,$primary); 2109 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2110 emit_d8(cbuf,31); 2111 %} 2112 2113 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2114 int r1, r2; 2115 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2116 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2117 2118 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2119 emit_rm(cbuf, 0x3, r1, r2); 2120 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2121 emit_opcode(cbuf,$primary); 2122 emit_rm(cbuf, 0x3, $secondary, r1); 2123 emit_d8(cbuf,$cnt$$constant-32); 2124 } 2125 emit_opcode(cbuf,0x33); // XOR r2,r2 2126 emit_rm(cbuf, 0x3, r2, r2); 2127 %} 2128 2129 // Clone of RegMem but accepts an extra parameter to access each 2130 // half of a double in memory; it never needs relocation info. 2131 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2132 emit_opcode(cbuf,$opcode$$constant); 2133 int reg_encoding = $rm_reg$$reg; 2134 int base = $mem$$base; 2135 int index = $mem$$index; 2136 int scale = $mem$$scale; 2137 int displace = $mem$$disp + $disp_for_half$$constant; 2138 relocInfo::relocType disp_reloc = relocInfo::none; 2139 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2140 %} 2141 2142 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2143 // 2144 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2145 // and it never needs relocation information. 2146 // Frequently used to move data between FPU's Stack Top and memory. 2147 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2148 int rm_byte_opcode = $rm_opcode$$constant; 2149 int base = $mem$$base; 2150 int index = $mem$$index; 2151 int scale = $mem$$scale; 2152 int displace = $mem$$disp; 2153 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2154 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2155 %} 2156 2157 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2158 int rm_byte_opcode = $rm_opcode$$constant; 2159 int base = $mem$$base; 2160 int index = $mem$$index; 2161 int scale = $mem$$scale; 2162 int displace = $mem$$disp; 2163 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2164 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2165 %} 2166 2167 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2168 int reg_encoding = $dst$$reg; 2169 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2170 int index = 0x04; // 0x04 indicates no index 2171 int scale = 0x00; // 0x00 indicates no scale 2172 int displace = $src1$$constant; // 0x00 indicates no displacement 2173 relocInfo::relocType disp_reloc = relocInfo::none; 2174 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2175 %} 2176 2177 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2178 // Compare dst,src 2179 emit_opcode(cbuf,0x3B); 2180 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2181 // jmp dst < src around move 2182 emit_opcode(cbuf,0x7C); 2183 emit_d8(cbuf,2); 2184 // move dst,src 2185 emit_opcode(cbuf,0x8B); 2186 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2187 %} 2188 2189 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2190 // Compare dst,src 2191 emit_opcode(cbuf,0x3B); 2192 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2193 // jmp dst > src around move 2194 emit_opcode(cbuf,0x7F); 2195 emit_d8(cbuf,2); 2196 // move dst,src 2197 emit_opcode(cbuf,0x8B); 2198 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2199 %} 2200 2201 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2202 // If src is FPR1, we can just FST to store it. 2203 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2204 int reg_encoding = 0x2; // Just store 2205 int base = $mem$$base; 2206 int index = $mem$$index; 2207 int scale = $mem$$scale; 2208 int displace = $mem$$disp; 2209 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2210 if( $src$$reg != FPR1L_enc ) { 2211 reg_encoding = 0x3; // Store & pop 2212 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2213 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2214 } 2215 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2216 emit_opcode(cbuf,$primary); 2217 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2218 %} 2219 2220 enc_class neg_reg(rRegI dst) %{ 2221 // NEG $dst 2222 emit_opcode(cbuf,0xF7); 2223 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2224 %} 2225 2226 enc_class setLT_reg(eCXRegI dst) %{ 2227 // SETLT $dst 2228 emit_opcode(cbuf,0x0F); 2229 emit_opcode(cbuf,0x9C); 2230 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2231 %} 2232 2233 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2234 int tmpReg = $tmp$$reg; 2235 2236 // SUB $p,$q 2237 emit_opcode(cbuf,0x2B); 2238 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2239 // SBB $tmp,$tmp 2240 emit_opcode(cbuf,0x1B); 2241 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2242 // AND $tmp,$y 2243 emit_opcode(cbuf,0x23); 2244 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2245 // ADD $p,$tmp 2246 emit_opcode(cbuf,0x03); 2247 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2248 %} 2249 2250 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2251 // TEST shift,32 2252 emit_opcode(cbuf,0xF7); 2253 emit_rm(cbuf, 0x3, 0, ECX_enc); 2254 emit_d32(cbuf,0x20); 2255 // JEQ,s small 2256 emit_opcode(cbuf, 0x74); 2257 emit_d8(cbuf, 0x04); 2258 // MOV $dst.hi,$dst.lo 2259 emit_opcode( cbuf, 0x8B ); 2260 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2261 // CLR $dst.lo 2262 emit_opcode(cbuf, 0x33); 2263 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2264 // small: 2265 // SHLD $dst.hi,$dst.lo,$shift 2266 emit_opcode(cbuf,0x0F); 2267 emit_opcode(cbuf,0xA5); 2268 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2269 // SHL $dst.lo,$shift" 2270 emit_opcode(cbuf,0xD3); 2271 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2272 %} 2273 2274 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2275 // TEST shift,32 2276 emit_opcode(cbuf,0xF7); 2277 emit_rm(cbuf, 0x3, 0, ECX_enc); 2278 emit_d32(cbuf,0x20); 2279 // JEQ,s small 2280 emit_opcode(cbuf, 0x74); 2281 emit_d8(cbuf, 0x04); 2282 // MOV $dst.lo,$dst.hi 2283 emit_opcode( cbuf, 0x8B ); 2284 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2285 // CLR $dst.hi 2286 emit_opcode(cbuf, 0x33); 2287 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2288 // small: 2289 // SHRD $dst.lo,$dst.hi,$shift 2290 emit_opcode(cbuf,0x0F); 2291 emit_opcode(cbuf,0xAD); 2292 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2293 // SHR $dst.hi,$shift" 2294 emit_opcode(cbuf,0xD3); 2295 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2296 %} 2297 2298 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2299 // TEST shift,32 2300 emit_opcode(cbuf,0xF7); 2301 emit_rm(cbuf, 0x3, 0, ECX_enc); 2302 emit_d32(cbuf,0x20); 2303 // JEQ,s small 2304 emit_opcode(cbuf, 0x74); 2305 emit_d8(cbuf, 0x05); 2306 // MOV $dst.lo,$dst.hi 2307 emit_opcode( cbuf, 0x8B ); 2308 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2309 // SAR $dst.hi,31 2310 emit_opcode(cbuf, 0xC1); 2311 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2312 emit_d8(cbuf, 0x1F ); 2313 // small: 2314 // SHRD $dst.lo,$dst.hi,$shift 2315 emit_opcode(cbuf,0x0F); 2316 emit_opcode(cbuf,0xAD); 2317 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2318 // SAR $dst.hi,$shift" 2319 emit_opcode(cbuf,0xD3); 2320 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2321 %} 2322 2323 2324 // ----------------- Encodings for floating point unit ----------------- 2325 // May leave result in FPU-TOS or FPU reg depending on opcodes 2326 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2327 $$$emit8$primary; 2328 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2329 %} 2330 2331 // Pop argument in FPR0 with FSTP ST(0) 2332 enc_class PopFPU() %{ 2333 emit_opcode( cbuf, 0xDD ); 2334 emit_d8( cbuf, 0xD8 ); 2335 %} 2336 2337 // !!!!! equivalent to Pop_Reg_F 2338 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2339 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2340 emit_d8( cbuf, 0xD8+$dst$$reg ); 2341 %} 2342 2343 enc_class Push_Reg_DPR( regDPR dst ) %{ 2344 emit_opcode( cbuf, 0xD9 ); 2345 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2346 %} 2347 2348 enc_class strictfp_bias1( regDPR dst ) %{ 2349 emit_opcode( cbuf, 0xDB ); // FLD m80real 2350 emit_opcode( cbuf, 0x2D ); 2351 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2352 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2353 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2354 %} 2355 2356 enc_class strictfp_bias2( regDPR dst ) %{ 2357 emit_opcode( cbuf, 0xDB ); // FLD m80real 2358 emit_opcode( cbuf, 0x2D ); 2359 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2360 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2361 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2362 %} 2363 2364 // Special case for moving an integer register to a stack slot. 2365 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2366 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2367 %} 2368 2369 // Special case for moving a register to a stack slot. 2370 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2371 // Opcode already emitted 2372 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2373 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2374 emit_d32(cbuf, $dst$$disp); // Displacement 2375 %} 2376 2377 // Push the integer in stackSlot 'src' onto FP-stack 2378 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2379 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2380 %} 2381 2382 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2383 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2384 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2385 %} 2386 2387 // Same as Pop_Mem_F except for opcode 2388 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2389 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2390 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2391 %} 2392 2393 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2394 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2395 emit_d8( cbuf, 0xD8+$dst$$reg ); 2396 %} 2397 2398 enc_class Push_Reg_FPR( regFPR dst ) %{ 2399 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2400 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2401 %} 2402 2403 // Push FPU's float to a stack-slot, and pop FPU-stack 2404 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2405 int pop = 0x02; 2406 if ($src$$reg != FPR1L_enc) { 2407 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2408 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2409 pop = 0x03; 2410 } 2411 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2412 %} 2413 2414 // Push FPU's double to a stack-slot, and pop FPU-stack 2415 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2416 int pop = 0x02; 2417 if ($src$$reg != FPR1L_enc) { 2418 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2419 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2420 pop = 0x03; 2421 } 2422 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2423 %} 2424 2425 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2426 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2427 int pop = 0xD0 - 1; // -1 since we skip FLD 2428 if ($src$$reg != FPR1L_enc) { 2429 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2430 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2431 pop = 0xD8; 2432 } 2433 emit_opcode( cbuf, 0xDD ); 2434 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2435 %} 2436 2437 2438 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2439 // load dst in FPR0 2440 emit_opcode( cbuf, 0xD9 ); 2441 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2442 if ($src$$reg != FPR1L_enc) { 2443 // fincstp 2444 emit_opcode (cbuf, 0xD9); 2445 emit_opcode (cbuf, 0xF7); 2446 // swap src with FPR1: 2447 // FXCH FPR1 with src 2448 emit_opcode(cbuf, 0xD9); 2449 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2450 // fdecstp 2451 emit_opcode (cbuf, 0xD9); 2452 emit_opcode (cbuf, 0xF6); 2453 } 2454 %} 2455 2456 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2457 MacroAssembler _masm(&cbuf); 2458 __ subptr(rsp, 8); 2459 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2460 __ fld_d(Address(rsp, 0)); 2461 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2462 __ fld_d(Address(rsp, 0)); 2463 %} 2464 2465 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2466 MacroAssembler _masm(&cbuf); 2467 __ subptr(rsp, 4); 2468 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2469 __ fld_s(Address(rsp, 0)); 2470 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2471 __ fld_s(Address(rsp, 0)); 2472 %} 2473 2474 enc_class Push_ResultD(regD dst) %{ 2475 MacroAssembler _masm(&cbuf); 2476 __ fstp_d(Address(rsp, 0)); 2477 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2478 __ addptr(rsp, 8); 2479 %} 2480 2481 enc_class Push_ResultF(regF dst, immI d8) %{ 2482 MacroAssembler _masm(&cbuf); 2483 __ fstp_s(Address(rsp, 0)); 2484 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2485 __ addptr(rsp, $d8$$constant); 2486 %} 2487 2488 enc_class Push_SrcD(regD src) %{ 2489 MacroAssembler _masm(&cbuf); 2490 __ subptr(rsp, 8); 2491 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2492 __ fld_d(Address(rsp, 0)); 2493 %} 2494 2495 enc_class push_stack_temp_qword() %{ 2496 MacroAssembler _masm(&cbuf); 2497 __ subptr(rsp, 8); 2498 %} 2499 2500 enc_class pop_stack_temp_qword() %{ 2501 MacroAssembler _masm(&cbuf); 2502 __ addptr(rsp, 8); 2503 %} 2504 2505 enc_class push_xmm_to_fpr1(regD src) %{ 2506 MacroAssembler _masm(&cbuf); 2507 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2508 __ fld_d(Address(rsp, 0)); 2509 %} 2510 2511 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2512 if ($src$$reg != FPR1L_enc) { 2513 // fincstp 2514 emit_opcode (cbuf, 0xD9); 2515 emit_opcode (cbuf, 0xF7); 2516 // FXCH FPR1 with src 2517 emit_opcode(cbuf, 0xD9); 2518 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2519 // fdecstp 2520 emit_opcode (cbuf, 0xD9); 2521 emit_opcode (cbuf, 0xF6); 2522 } 2523 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2524 // // FSTP FPR$dst$$reg 2525 // emit_opcode( cbuf, 0xDD ); 2526 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2527 %} 2528 2529 enc_class fnstsw_sahf_skip_parity() %{ 2530 // fnstsw ax 2531 emit_opcode( cbuf, 0xDF ); 2532 emit_opcode( cbuf, 0xE0 ); 2533 // sahf 2534 emit_opcode( cbuf, 0x9E ); 2535 // jnp ::skip 2536 emit_opcode( cbuf, 0x7B ); 2537 emit_opcode( cbuf, 0x05 ); 2538 %} 2539 2540 enc_class emitModDPR() %{ 2541 // fprem must be iterative 2542 // :: loop 2543 // fprem 2544 emit_opcode( cbuf, 0xD9 ); 2545 emit_opcode( cbuf, 0xF8 ); 2546 // wait 2547 emit_opcode( cbuf, 0x9b ); 2548 // fnstsw ax 2549 emit_opcode( cbuf, 0xDF ); 2550 emit_opcode( cbuf, 0xE0 ); 2551 // sahf 2552 emit_opcode( cbuf, 0x9E ); 2553 // jp ::loop 2554 emit_opcode( cbuf, 0x0F ); 2555 emit_opcode( cbuf, 0x8A ); 2556 emit_opcode( cbuf, 0xF4 ); 2557 emit_opcode( cbuf, 0xFF ); 2558 emit_opcode( cbuf, 0xFF ); 2559 emit_opcode( cbuf, 0xFF ); 2560 %} 2561 2562 enc_class fpu_flags() %{ 2563 // fnstsw_ax 2564 emit_opcode( cbuf, 0xDF); 2565 emit_opcode( cbuf, 0xE0); 2566 // test ax,0x0400 2567 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2568 emit_opcode( cbuf, 0xA9 ); 2569 emit_d16 ( cbuf, 0x0400 ); 2570 // // // This sequence works, but stalls for 12-16 cycles on PPro 2571 // // test rax,0x0400 2572 // emit_opcode( cbuf, 0xA9 ); 2573 // emit_d32 ( cbuf, 0x00000400 ); 2574 // 2575 // jz exit (no unordered comparison) 2576 emit_opcode( cbuf, 0x74 ); 2577 emit_d8 ( cbuf, 0x02 ); 2578 // mov ah,1 - treat as LT case (set carry flag) 2579 emit_opcode( cbuf, 0xB4 ); 2580 emit_d8 ( cbuf, 0x01 ); 2581 // sahf 2582 emit_opcode( cbuf, 0x9E); 2583 %} 2584 2585 enc_class cmpF_P6_fixup() %{ 2586 // Fixup the integer flags in case comparison involved a NaN 2587 // 2588 // JNP exit (no unordered comparison, P-flag is set by NaN) 2589 emit_opcode( cbuf, 0x7B ); 2590 emit_d8 ( cbuf, 0x03 ); 2591 // MOV AH,1 - treat as LT case (set carry flag) 2592 emit_opcode( cbuf, 0xB4 ); 2593 emit_d8 ( cbuf, 0x01 ); 2594 // SAHF 2595 emit_opcode( cbuf, 0x9E); 2596 // NOP // target for branch to avoid branch to branch 2597 emit_opcode( cbuf, 0x90); 2598 %} 2599 2600 // fnstsw_ax(); 2601 // sahf(); 2602 // movl(dst, nan_result); 2603 // jcc(Assembler::parity, exit); 2604 // movl(dst, less_result); 2605 // jcc(Assembler::below, exit); 2606 // movl(dst, equal_result); 2607 // jcc(Assembler::equal, exit); 2608 // movl(dst, greater_result); 2609 2610 // less_result = 1; 2611 // greater_result = -1; 2612 // equal_result = 0; 2613 // nan_result = -1; 2614 2615 enc_class CmpF_Result(rRegI dst) %{ 2616 // fnstsw_ax(); 2617 emit_opcode( cbuf, 0xDF); 2618 emit_opcode( cbuf, 0xE0); 2619 // sahf 2620 emit_opcode( cbuf, 0x9E); 2621 // movl(dst, nan_result); 2622 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2623 emit_d32( cbuf, -1 ); 2624 // jcc(Assembler::parity, exit); 2625 emit_opcode( cbuf, 0x7A ); 2626 emit_d8 ( cbuf, 0x13 ); 2627 // movl(dst, less_result); 2628 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2629 emit_d32( cbuf, -1 ); 2630 // jcc(Assembler::below, exit); 2631 emit_opcode( cbuf, 0x72 ); 2632 emit_d8 ( cbuf, 0x0C ); 2633 // movl(dst, equal_result); 2634 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2635 emit_d32( cbuf, 0 ); 2636 // jcc(Assembler::equal, exit); 2637 emit_opcode( cbuf, 0x74 ); 2638 emit_d8 ( cbuf, 0x05 ); 2639 // movl(dst, greater_result); 2640 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2641 emit_d32( cbuf, 1 ); 2642 %} 2643 2644 2645 // Compare the longs and set flags 2646 // BROKEN! Do Not use as-is 2647 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2648 // CMP $src1.hi,$src2.hi 2649 emit_opcode( cbuf, 0x3B ); 2650 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2651 // JNE,s done 2652 emit_opcode(cbuf,0x75); 2653 emit_d8(cbuf, 2 ); 2654 // CMP $src1.lo,$src2.lo 2655 emit_opcode( cbuf, 0x3B ); 2656 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2657 // done: 2658 %} 2659 2660 enc_class convert_int_long( regL dst, rRegI src ) %{ 2661 // mov $dst.lo,$src 2662 int dst_encoding = $dst$$reg; 2663 int src_encoding = $src$$reg; 2664 encode_Copy( cbuf, dst_encoding , src_encoding ); 2665 // mov $dst.hi,$src 2666 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2667 // sar $dst.hi,31 2668 emit_opcode( cbuf, 0xC1 ); 2669 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2670 emit_d8(cbuf, 0x1F ); 2671 %} 2672 2673 enc_class convert_long_double( eRegL src ) %{ 2674 // push $src.hi 2675 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2676 // push $src.lo 2677 emit_opcode(cbuf, 0x50+$src$$reg ); 2678 // fild 64-bits at [SP] 2679 emit_opcode(cbuf,0xdf); 2680 emit_d8(cbuf, 0x6C); 2681 emit_d8(cbuf, 0x24); 2682 emit_d8(cbuf, 0x00); 2683 // pop stack 2684 emit_opcode(cbuf, 0x83); // add SP, #8 2685 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2686 emit_d8(cbuf, 0x8); 2687 %} 2688 2689 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2690 // IMUL EDX:EAX,$src1 2691 emit_opcode( cbuf, 0xF7 ); 2692 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2693 // SAR EDX,$cnt-32 2694 int shift_count = ((int)$cnt$$constant) - 32; 2695 if (shift_count > 0) { 2696 emit_opcode(cbuf, 0xC1); 2697 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2698 emit_d8(cbuf, shift_count); 2699 } 2700 %} 2701 2702 // this version doesn't have add sp, 8 2703 enc_class convert_long_double2( eRegL src ) %{ 2704 // push $src.hi 2705 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2706 // push $src.lo 2707 emit_opcode(cbuf, 0x50+$src$$reg ); 2708 // fild 64-bits at [SP] 2709 emit_opcode(cbuf,0xdf); 2710 emit_d8(cbuf, 0x6C); 2711 emit_d8(cbuf, 0x24); 2712 emit_d8(cbuf, 0x00); 2713 %} 2714 2715 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2716 // Basic idea: long = (long)int * (long)int 2717 // IMUL EDX:EAX, src 2718 emit_opcode( cbuf, 0xF7 ); 2719 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2720 %} 2721 2722 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2723 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2724 // MUL EDX:EAX, src 2725 emit_opcode( cbuf, 0xF7 ); 2726 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2727 %} 2728 2729 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2730 // Basic idea: lo(result) = lo(x_lo * y_lo) 2731 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2732 // MOV $tmp,$src.lo 2733 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2734 // IMUL $tmp,EDX 2735 emit_opcode( cbuf, 0x0F ); 2736 emit_opcode( cbuf, 0xAF ); 2737 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2738 // MOV EDX,$src.hi 2739 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2740 // IMUL EDX,EAX 2741 emit_opcode( cbuf, 0x0F ); 2742 emit_opcode( cbuf, 0xAF ); 2743 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2744 // ADD $tmp,EDX 2745 emit_opcode( cbuf, 0x03 ); 2746 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2747 // MUL EDX:EAX,$src.lo 2748 emit_opcode( cbuf, 0xF7 ); 2749 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2750 // ADD EDX,ESI 2751 emit_opcode( cbuf, 0x03 ); 2752 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2753 %} 2754 2755 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2756 // Basic idea: lo(result) = lo(src * y_lo) 2757 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2758 // IMUL $tmp,EDX,$src 2759 emit_opcode( cbuf, 0x6B ); 2760 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2761 emit_d8( cbuf, (int)$src$$constant ); 2762 // MOV EDX,$src 2763 emit_opcode(cbuf, 0xB8 + EDX_enc); 2764 emit_d32( cbuf, (int)$src$$constant ); 2765 // MUL EDX:EAX,EDX 2766 emit_opcode( cbuf, 0xF7 ); 2767 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2768 // ADD EDX,ESI 2769 emit_opcode( cbuf, 0x03 ); 2770 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2771 %} 2772 2773 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2774 // PUSH src1.hi 2775 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2776 // PUSH src1.lo 2777 emit_opcode(cbuf, 0x50+$src1$$reg ); 2778 // PUSH src2.hi 2779 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2780 // PUSH src2.lo 2781 emit_opcode(cbuf, 0x50+$src2$$reg ); 2782 // CALL directly to the runtime 2783 cbuf.set_insts_mark(); 2784 emit_opcode(cbuf,0xE8); // Call into runtime 2785 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2786 // Restore stack 2787 emit_opcode(cbuf, 0x83); // add SP, #framesize 2788 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2789 emit_d8(cbuf, 4*4); 2790 %} 2791 2792 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2793 // PUSH src1.hi 2794 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2795 // PUSH src1.lo 2796 emit_opcode(cbuf, 0x50+$src1$$reg ); 2797 // PUSH src2.hi 2798 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2799 // PUSH src2.lo 2800 emit_opcode(cbuf, 0x50+$src2$$reg ); 2801 // CALL directly to the runtime 2802 cbuf.set_insts_mark(); 2803 emit_opcode(cbuf,0xE8); // Call into runtime 2804 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2805 // Restore stack 2806 emit_opcode(cbuf, 0x83); // add SP, #framesize 2807 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2808 emit_d8(cbuf, 4*4); 2809 %} 2810 2811 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2812 // MOV $tmp,$src.lo 2813 emit_opcode(cbuf, 0x8B); 2814 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2815 // OR $tmp,$src.hi 2816 emit_opcode(cbuf, 0x0B); 2817 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2818 %} 2819 2820 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2821 // CMP $src1.lo,$src2.lo 2822 emit_opcode( cbuf, 0x3B ); 2823 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2824 // JNE,s skip 2825 emit_cc(cbuf, 0x70, 0x5); 2826 emit_d8(cbuf,2); 2827 // CMP $src1.hi,$src2.hi 2828 emit_opcode( cbuf, 0x3B ); 2829 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2830 %} 2831 2832 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2833 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2834 emit_opcode( cbuf, 0x3B ); 2835 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2836 // MOV $tmp,$src1.hi 2837 emit_opcode( cbuf, 0x8B ); 2838 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2839 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2840 emit_opcode( cbuf, 0x1B ); 2841 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2842 %} 2843 2844 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2845 // XOR $tmp,$tmp 2846 emit_opcode(cbuf,0x33); // XOR 2847 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2848 // CMP $tmp,$src.lo 2849 emit_opcode( cbuf, 0x3B ); 2850 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2851 // SBB $tmp,$src.hi 2852 emit_opcode( cbuf, 0x1B ); 2853 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2854 %} 2855 2856 // Sniff, sniff... smells like Gnu Superoptimizer 2857 enc_class neg_long( eRegL dst ) %{ 2858 emit_opcode(cbuf,0xF7); // NEG hi 2859 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2860 emit_opcode(cbuf,0xF7); // NEG lo 2861 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2862 emit_opcode(cbuf,0x83); // SBB hi,0 2863 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2864 emit_d8 (cbuf,0 ); 2865 %} 2866 2867 enc_class enc_pop_rdx() %{ 2868 emit_opcode(cbuf,0x5A); 2869 %} 2870 2871 enc_class enc_rethrow() %{ 2872 cbuf.set_insts_mark(); 2873 emit_opcode(cbuf, 0xE9); // jmp entry 2874 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2875 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2876 %} 2877 2878 2879 // Convert a double to an int. Java semantics require we do complex 2880 // manglelations in the corner cases. So we set the rounding mode to 2881 // 'zero', store the darned double down as an int, and reset the 2882 // rounding mode to 'nearest'. The hardware throws an exception which 2883 // patches up the correct value directly to the stack. 2884 enc_class DPR2I_encoding( regDPR src ) %{ 2885 // Flip to round-to-zero mode. We attempted to allow invalid-op 2886 // exceptions here, so that a NAN or other corner-case value will 2887 // thrown an exception (but normal values get converted at full speed). 2888 // However, I2C adapters and other float-stack manglers leave pending 2889 // invalid-op exceptions hanging. We would have to clear them before 2890 // enabling them and that is more expensive than just testing for the 2891 // invalid value Intel stores down in the corner cases. 2892 emit_opcode(cbuf,0xD9); // FLDCW trunc 2893 emit_opcode(cbuf,0x2D); 2894 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2895 // Allocate a word 2896 emit_opcode(cbuf,0x83); // SUB ESP,4 2897 emit_opcode(cbuf,0xEC); 2898 emit_d8(cbuf,0x04); 2899 // Encoding assumes a double has been pushed into FPR0. 2900 // Store down the double as an int, popping the FPU stack 2901 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2902 emit_opcode(cbuf,0x1C); 2903 emit_d8(cbuf,0x24); 2904 // Restore the rounding mode; mask the exception 2905 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2906 emit_opcode(cbuf,0x2D); 2907 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2908 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2909 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2910 2911 // Load the converted int; adjust CPU stack 2912 emit_opcode(cbuf,0x58); // POP EAX 2913 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2914 emit_d32 (cbuf,0x80000000); // 0x80000000 2915 emit_opcode(cbuf,0x75); // JNE around_slow_call 2916 emit_d8 (cbuf,0x07); // Size of slow_call 2917 // Push src onto stack slow-path 2918 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2919 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2920 // CALL directly to the runtime 2921 cbuf.set_insts_mark(); 2922 emit_opcode(cbuf,0xE8); // Call into runtime 2923 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2924 // Carry on here... 2925 %} 2926 2927 enc_class DPR2L_encoding( regDPR src ) %{ 2928 emit_opcode(cbuf,0xD9); // FLDCW trunc 2929 emit_opcode(cbuf,0x2D); 2930 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2931 // Allocate a word 2932 emit_opcode(cbuf,0x83); // SUB ESP,8 2933 emit_opcode(cbuf,0xEC); 2934 emit_d8(cbuf,0x08); 2935 // Encoding assumes a double has been pushed into FPR0. 2936 // Store down the double as a long, popping the FPU stack 2937 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2938 emit_opcode(cbuf,0x3C); 2939 emit_d8(cbuf,0x24); 2940 // Restore the rounding mode; mask the exception 2941 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2942 emit_opcode(cbuf,0x2D); 2943 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2944 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2945 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2946 2947 // Load the converted int; adjust CPU stack 2948 emit_opcode(cbuf,0x58); // POP EAX 2949 emit_opcode(cbuf,0x5A); // POP EDX 2950 emit_opcode(cbuf,0x81); // CMP EDX,imm 2951 emit_d8 (cbuf,0xFA); // rdx 2952 emit_d32 (cbuf,0x80000000); // 0x80000000 2953 emit_opcode(cbuf,0x75); // JNE around_slow_call 2954 emit_d8 (cbuf,0x07+4); // Size of slow_call 2955 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2956 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2957 emit_opcode(cbuf,0x75); // JNE around_slow_call 2958 emit_d8 (cbuf,0x07); // Size of slow_call 2959 // Push src onto stack slow-path 2960 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2961 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2962 // CALL directly to the runtime 2963 cbuf.set_insts_mark(); 2964 emit_opcode(cbuf,0xE8); // Call into runtime 2965 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2966 // Carry on here... 2967 %} 2968 2969 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2970 // Operand was loaded from memory into fp ST (stack top) 2971 // FMUL ST,$src /* D8 C8+i */ 2972 emit_opcode(cbuf, 0xD8); 2973 emit_opcode(cbuf, 0xC8 + $src1$$reg); 2974 %} 2975 2976 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2977 // FADDP ST,src2 /* D8 C0+i */ 2978 emit_opcode(cbuf, 0xD8); 2979 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2980 //could use FADDP src2,fpST /* DE C0+i */ 2981 %} 2982 2983 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 2984 // FADDP src2,ST /* DE C0+i */ 2985 emit_opcode(cbuf, 0xDE); 2986 emit_opcode(cbuf, 0xC0 + $src2$$reg); 2987 %} 2988 2989 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 2990 // Operand has been loaded into fp ST (stack top) 2991 // FSUB ST,$src1 2992 emit_opcode(cbuf, 0xD8); 2993 emit_opcode(cbuf, 0xE0 + $src1$$reg); 2994 2995 // FDIV 2996 emit_opcode(cbuf, 0xD8); 2997 emit_opcode(cbuf, 0xF0 + $src2$$reg); 2998 %} 2999 3000 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3001 // Operand was loaded from memory into fp ST (stack top) 3002 // FADD ST,$src /* D8 C0+i */ 3003 emit_opcode(cbuf, 0xD8); 3004 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3005 3006 // FMUL ST,src2 /* D8 C*+i */ 3007 emit_opcode(cbuf, 0xD8); 3008 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3009 %} 3010 3011 3012 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3013 // Operand was loaded from memory into fp ST (stack top) 3014 // FADD ST,$src /* D8 C0+i */ 3015 emit_opcode(cbuf, 0xD8); 3016 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3017 3018 // FMULP src2,ST /* DE C8+i */ 3019 emit_opcode(cbuf, 0xDE); 3020 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3021 %} 3022 3023 // Atomically load the volatile long 3024 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3025 emit_opcode(cbuf,0xDF); 3026 int rm_byte_opcode = 0x05; 3027 int base = $mem$$base; 3028 int index = $mem$$index; 3029 int scale = $mem$$scale; 3030 int displace = $mem$$disp; 3031 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3032 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3033 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3034 %} 3035 3036 // Volatile Store Long. Must be atomic, so move it into 3037 // the FP TOS and then do a 64-bit FIST. Has to probe the 3038 // target address before the store (for null-ptr checks) 3039 // so the memory operand is used twice in the encoding. 3040 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3041 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3042 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3043 emit_opcode(cbuf,0xDF); 3044 int rm_byte_opcode = 0x07; 3045 int base = $mem$$base; 3046 int index = $mem$$index; 3047 int scale = $mem$$scale; 3048 int displace = $mem$$disp; 3049 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3050 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3051 %} 3052 3053 %} 3054 3055 3056 //----------FRAME-------------------------------------------------------------- 3057 // Definition of frame structure and management information. 3058 // 3059 // S T A C K L A Y O U T Allocators stack-slot number 3060 // | (to get allocators register number 3061 // G Owned by | | v add OptoReg::stack0()) 3062 // r CALLER | | 3063 // o | +--------+ pad to even-align allocators stack-slot 3064 // w V | pad0 | numbers; owned by CALLER 3065 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3066 // h ^ | in | 5 3067 // | | args | 4 Holes in incoming args owned by SELF 3068 // | | | | 3 3069 // | | +--------+ 3070 // V | | old out| Empty on Intel, window on Sparc 3071 // | old |preserve| Must be even aligned. 3072 // | SP-+--------+----> Matcher::_old_SP, even aligned 3073 // | | in | 3 area for Intel ret address 3074 // Owned by |preserve| Empty on Sparc. 3075 // SELF +--------+ 3076 // | | pad2 | 2 pad to align old SP 3077 // | +--------+ 1 3078 // | | locks | 0 3079 // | +--------+----> OptoReg::stack0(), even aligned 3080 // | | pad1 | 11 pad to align new SP 3081 // | +--------+ 3082 // | | | 10 3083 // | | spills | 9 spills 3084 // V | | 8 (pad0 slot for callee) 3085 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3086 // ^ | out | 7 3087 // | | args | 6 Holes in outgoing args owned by CALLEE 3088 // Owned by +--------+ 3089 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3090 // | new |preserve| Must be even-aligned. 3091 // | SP-+--------+----> Matcher::_new_SP, even aligned 3092 // | | | 3093 // 3094 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3095 // known from SELF's arguments and the Java calling convention. 3096 // Region 6-7 is determined per call site. 3097 // Note 2: If the calling convention leaves holes in the incoming argument 3098 // area, those holes are owned by SELF. Holes in the outgoing area 3099 // are owned by the CALLEE. Holes should not be nessecary in the 3100 // incoming area, as the Java calling convention is completely under 3101 // the control of the AD file. Doubles can be sorted and packed to 3102 // avoid holes. Holes in the outgoing arguments may be nessecary for 3103 // varargs C calling conventions. 3104 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3105 // even aligned with pad0 as needed. 3106 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3107 // region 6-11 is even aligned; it may be padded out more so that 3108 // the region from SP to FP meets the minimum stack alignment. 3109 3110 frame %{ 3111 // These three registers define part of the calling convention 3112 // between compiled code and the interpreter. 3113 inline_cache_reg(EAX); // Inline Cache Register 3114 3115 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3116 cisc_spilling_operand_name(indOffset32); 3117 3118 // Number of stack slots consumed by locking an object 3119 sync_stack_slots(1); 3120 3121 // Compiled code's Frame Pointer 3122 frame_pointer(ESP); 3123 // Interpreter stores its frame pointer in a register which is 3124 // stored to the stack by I2CAdaptors. 3125 // I2CAdaptors convert from interpreted java to compiled java. 3126 interpreter_frame_pointer(EBP); 3127 3128 // Stack alignment requirement 3129 // Alignment size in bytes (128-bit -> 16 bytes) 3130 stack_alignment(StackAlignmentInBytes); 3131 3132 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3133 // for calls to C. Supports the var-args backing area for register parms. 3134 varargs_C_out_slots_killed(0); 3135 3136 // The after-PROLOG location of the return address. Location of 3137 // return address specifies a type (REG or STACK) and a number 3138 // representing the register number (i.e. - use a register name) or 3139 // stack slot. 3140 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3141 // Otherwise, it is above the locks and verification slot and alignment word 3142 return_addr(STACK - 1 + 3143 align_up((Compile::current()->in_preserve_stack_slots() + 3144 Compile::current()->fixed_slots()), 3145 stack_alignment_in_slots())); 3146 3147 // Location of C & interpreter return values 3148 c_return_value %{ 3149 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3150 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3151 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3152 3153 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3154 // that C functions return float and double results in XMM0. 3155 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3156 return OptoRegPair(XMM0b_num,XMM0_num); 3157 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3158 return OptoRegPair(OptoReg::Bad,XMM0_num); 3159 3160 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3161 %} 3162 3163 // Location of return values 3164 return_value %{ 3165 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3166 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3167 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3168 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3169 return OptoRegPair(XMM0b_num,XMM0_num); 3170 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3171 return OptoRegPair(OptoReg::Bad,XMM0_num); 3172 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3173 %} 3174 3175 %} 3176 3177 //----------ATTRIBUTES--------------------------------------------------------- 3178 //----------Operand Attributes------------------------------------------------- 3179 op_attrib op_cost(0); // Required cost attribute 3180 3181 //----------Instruction Attributes--------------------------------------------- 3182 ins_attrib ins_cost(100); // Required cost attribute 3183 ins_attrib ins_size(8); // Required size attribute (in bits) 3184 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3185 // non-matching short branch variant of some 3186 // long branch? 3187 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3188 // specifies the alignment that some part of the instruction (not 3189 // necessarily the start) requires. If > 1, a compute_padding() 3190 // function must be provided for the instruction 3191 3192 //----------OPERANDS----------------------------------------------------------- 3193 // Operand definitions must precede instruction definitions for correct parsing 3194 // in the ADLC because operands constitute user defined types which are used in 3195 // instruction definitions. 3196 3197 //----------Simple Operands---------------------------------------------------- 3198 // Immediate Operands 3199 // Integer Immediate 3200 operand immI() %{ 3201 match(ConI); 3202 3203 op_cost(10); 3204 format %{ %} 3205 interface(CONST_INTER); 3206 %} 3207 3208 // Constant for test vs zero 3209 operand immI_0() %{ 3210 predicate(n->get_int() == 0); 3211 match(ConI); 3212 3213 op_cost(0); 3214 format %{ %} 3215 interface(CONST_INTER); 3216 %} 3217 3218 // Constant for increment 3219 operand immI_1() %{ 3220 predicate(n->get_int() == 1); 3221 match(ConI); 3222 3223 op_cost(0); 3224 format %{ %} 3225 interface(CONST_INTER); 3226 %} 3227 3228 // Constant for decrement 3229 operand immI_M1() %{ 3230 predicate(n->get_int() == -1); 3231 match(ConI); 3232 3233 op_cost(0); 3234 format %{ %} 3235 interface(CONST_INTER); 3236 %} 3237 3238 // Valid scale values for addressing modes 3239 operand immI2() %{ 3240 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3241 match(ConI); 3242 3243 format %{ %} 3244 interface(CONST_INTER); 3245 %} 3246 3247 operand immI8() %{ 3248 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3249 match(ConI); 3250 3251 op_cost(5); 3252 format %{ %} 3253 interface(CONST_INTER); 3254 %} 3255 3256 operand immU8() %{ 3257 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3258 match(ConI); 3259 3260 op_cost(5); 3261 format %{ %} 3262 interface(CONST_INTER); 3263 %} 3264 3265 operand immI16() %{ 3266 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3267 match(ConI); 3268 3269 op_cost(10); 3270 format %{ %} 3271 interface(CONST_INTER); 3272 %} 3273 3274 // Int Immediate non-negative 3275 operand immU31() 3276 %{ 3277 predicate(n->get_int() >= 0); 3278 match(ConI); 3279 3280 op_cost(0); 3281 format %{ %} 3282 interface(CONST_INTER); 3283 %} 3284 3285 // Constant for long shifts 3286 operand immI_32() %{ 3287 predicate( n->get_int() == 32 ); 3288 match(ConI); 3289 3290 op_cost(0); 3291 format %{ %} 3292 interface(CONST_INTER); 3293 %} 3294 3295 operand immI_1_31() %{ 3296 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3297 match(ConI); 3298 3299 op_cost(0); 3300 format %{ %} 3301 interface(CONST_INTER); 3302 %} 3303 3304 operand immI_32_63() %{ 3305 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3306 match(ConI); 3307 op_cost(0); 3308 3309 format %{ %} 3310 interface(CONST_INTER); 3311 %} 3312 3313 operand immI_2() %{ 3314 predicate( n->get_int() == 2 ); 3315 match(ConI); 3316 3317 op_cost(0); 3318 format %{ %} 3319 interface(CONST_INTER); 3320 %} 3321 3322 operand immI_3() %{ 3323 predicate( n->get_int() == 3 ); 3324 match(ConI); 3325 3326 op_cost(0); 3327 format %{ %} 3328 interface(CONST_INTER); 3329 %} 3330 3331 operand immI_4() 3332 %{ 3333 predicate(n->get_int() == 4); 3334 match(ConI); 3335 3336 op_cost(0); 3337 format %{ %} 3338 interface(CONST_INTER); 3339 %} 3340 3341 operand immI_8() 3342 %{ 3343 predicate(n->get_int() == 8); 3344 match(ConI); 3345 3346 op_cost(0); 3347 format %{ %} 3348 interface(CONST_INTER); 3349 %} 3350 3351 // Pointer Immediate 3352 operand immP() %{ 3353 match(ConP); 3354 3355 op_cost(10); 3356 format %{ %} 3357 interface(CONST_INTER); 3358 %} 3359 3360 // NULL Pointer Immediate 3361 operand immP0() %{ 3362 predicate( n->get_ptr() == 0 ); 3363 match(ConP); 3364 op_cost(0); 3365 3366 format %{ %} 3367 interface(CONST_INTER); 3368 %} 3369 3370 // Long Immediate 3371 operand immL() %{ 3372 match(ConL); 3373 3374 op_cost(20); 3375 format %{ %} 3376 interface(CONST_INTER); 3377 %} 3378 3379 // Long Immediate zero 3380 operand immL0() %{ 3381 predicate( n->get_long() == 0L ); 3382 match(ConL); 3383 op_cost(0); 3384 3385 format %{ %} 3386 interface(CONST_INTER); 3387 %} 3388 3389 // Long Immediate zero 3390 operand immL_M1() %{ 3391 predicate( n->get_long() == -1L ); 3392 match(ConL); 3393 op_cost(0); 3394 3395 format %{ %} 3396 interface(CONST_INTER); 3397 %} 3398 3399 // Long immediate from 0 to 127. 3400 // Used for a shorter form of long mul by 10. 3401 operand immL_127() %{ 3402 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3403 match(ConL); 3404 op_cost(0); 3405 3406 format %{ %} 3407 interface(CONST_INTER); 3408 %} 3409 3410 // Long Immediate: low 32-bit mask 3411 operand immL_32bits() %{ 3412 predicate(n->get_long() == 0xFFFFFFFFL); 3413 match(ConL); 3414 op_cost(0); 3415 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 // Long Immediate: low 32-bit mask 3421 operand immL32() %{ 3422 predicate(n->get_long() == (int)(n->get_long())); 3423 match(ConL); 3424 op_cost(20); 3425 3426 format %{ %} 3427 interface(CONST_INTER); 3428 %} 3429 3430 //Double Immediate zero 3431 operand immDPR0() %{ 3432 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3433 // bug that generates code such that NaNs compare equal to 0.0 3434 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3435 match(ConD); 3436 3437 op_cost(5); 3438 format %{ %} 3439 interface(CONST_INTER); 3440 %} 3441 3442 // Double Immediate one 3443 operand immDPR1() %{ 3444 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3445 match(ConD); 3446 3447 op_cost(5); 3448 format %{ %} 3449 interface(CONST_INTER); 3450 %} 3451 3452 // Double Immediate 3453 operand immDPR() %{ 3454 predicate(UseSSE<=1); 3455 match(ConD); 3456 3457 op_cost(5); 3458 format %{ %} 3459 interface(CONST_INTER); 3460 %} 3461 3462 operand immD() %{ 3463 predicate(UseSSE>=2); 3464 match(ConD); 3465 3466 op_cost(5); 3467 format %{ %} 3468 interface(CONST_INTER); 3469 %} 3470 3471 // Double Immediate zero 3472 operand immD0() %{ 3473 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3474 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3475 // compare equal to -0.0. 3476 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3477 match(ConD); 3478 3479 format %{ %} 3480 interface(CONST_INTER); 3481 %} 3482 3483 // Float Immediate zero 3484 operand immFPR0() %{ 3485 predicate(UseSSE == 0 && n->getf() == 0.0F); 3486 match(ConF); 3487 3488 op_cost(5); 3489 format %{ %} 3490 interface(CONST_INTER); 3491 %} 3492 3493 // Float Immediate one 3494 operand immFPR1() %{ 3495 predicate(UseSSE == 0 && n->getf() == 1.0F); 3496 match(ConF); 3497 3498 op_cost(5); 3499 format %{ %} 3500 interface(CONST_INTER); 3501 %} 3502 3503 // Float Immediate 3504 operand immFPR() %{ 3505 predicate( UseSSE == 0 ); 3506 match(ConF); 3507 3508 op_cost(5); 3509 format %{ %} 3510 interface(CONST_INTER); 3511 %} 3512 3513 // Float Immediate 3514 operand immF() %{ 3515 predicate(UseSSE >= 1); 3516 match(ConF); 3517 3518 op_cost(5); 3519 format %{ %} 3520 interface(CONST_INTER); 3521 %} 3522 3523 // Float Immediate zero. Zero and not -0.0 3524 operand immF0() %{ 3525 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3526 match(ConF); 3527 3528 op_cost(5); 3529 format %{ %} 3530 interface(CONST_INTER); 3531 %} 3532 3533 // Immediates for special shifts (sign extend) 3534 3535 // Constants for increment 3536 operand immI_16() %{ 3537 predicate( n->get_int() == 16 ); 3538 match(ConI); 3539 3540 format %{ %} 3541 interface(CONST_INTER); 3542 %} 3543 3544 operand immI_24() %{ 3545 predicate( n->get_int() == 24 ); 3546 match(ConI); 3547 3548 format %{ %} 3549 interface(CONST_INTER); 3550 %} 3551 3552 // Constant for byte-wide masking 3553 operand immI_255() %{ 3554 predicate( n->get_int() == 255 ); 3555 match(ConI); 3556 3557 format %{ %} 3558 interface(CONST_INTER); 3559 %} 3560 3561 // Constant for short-wide masking 3562 operand immI_65535() %{ 3563 predicate(n->get_int() == 65535); 3564 match(ConI); 3565 3566 format %{ %} 3567 interface(CONST_INTER); 3568 %} 3569 3570 operand kReg() 3571 %{ 3572 constraint(ALLOC_IN_RC(vectmask_reg)); 3573 match(RegVectMask); 3574 format %{%} 3575 interface(REG_INTER); 3576 %} 3577 3578 operand kReg_K1() 3579 %{ 3580 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3581 match(RegVectMask); 3582 format %{%} 3583 interface(REG_INTER); 3584 %} 3585 3586 operand kReg_K2() 3587 %{ 3588 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3589 match(RegVectMask); 3590 format %{%} 3591 interface(REG_INTER); 3592 %} 3593 3594 // Special Registers 3595 operand kReg_K3() 3596 %{ 3597 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3598 match(RegVectMask); 3599 format %{%} 3600 interface(REG_INTER); 3601 %} 3602 3603 operand kReg_K4() 3604 %{ 3605 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3606 match(RegVectMask); 3607 format %{%} 3608 interface(REG_INTER); 3609 %} 3610 3611 operand kReg_K5() 3612 %{ 3613 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3614 match(RegVectMask); 3615 format %{%} 3616 interface(REG_INTER); 3617 %} 3618 3619 operand kReg_K6() 3620 %{ 3621 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3622 match(RegVectMask); 3623 format %{%} 3624 interface(REG_INTER); 3625 %} 3626 3627 // Special Registers 3628 operand kReg_K7() 3629 %{ 3630 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3631 match(RegVectMask); 3632 format %{%} 3633 interface(REG_INTER); 3634 %} 3635 3636 // Register Operands 3637 // Integer Register 3638 operand rRegI() %{ 3639 constraint(ALLOC_IN_RC(int_reg)); 3640 match(RegI); 3641 match(xRegI); 3642 match(eAXRegI); 3643 match(eBXRegI); 3644 match(eCXRegI); 3645 match(eDXRegI); 3646 match(eDIRegI); 3647 match(eSIRegI); 3648 3649 format %{ %} 3650 interface(REG_INTER); 3651 %} 3652 3653 // Subset of Integer Register 3654 operand xRegI(rRegI reg) %{ 3655 constraint(ALLOC_IN_RC(int_x_reg)); 3656 match(reg); 3657 match(eAXRegI); 3658 match(eBXRegI); 3659 match(eCXRegI); 3660 match(eDXRegI); 3661 3662 format %{ %} 3663 interface(REG_INTER); 3664 %} 3665 3666 // Special Registers 3667 operand eAXRegI(xRegI reg) %{ 3668 constraint(ALLOC_IN_RC(eax_reg)); 3669 match(reg); 3670 match(rRegI); 3671 3672 format %{ "EAX" %} 3673 interface(REG_INTER); 3674 %} 3675 3676 // Special Registers 3677 operand eBXRegI(xRegI reg) %{ 3678 constraint(ALLOC_IN_RC(ebx_reg)); 3679 match(reg); 3680 match(rRegI); 3681 3682 format %{ "EBX" %} 3683 interface(REG_INTER); 3684 %} 3685 3686 operand eCXRegI(xRegI reg) %{ 3687 constraint(ALLOC_IN_RC(ecx_reg)); 3688 match(reg); 3689 match(rRegI); 3690 3691 format %{ "ECX" %} 3692 interface(REG_INTER); 3693 %} 3694 3695 operand eDXRegI(xRegI reg) %{ 3696 constraint(ALLOC_IN_RC(edx_reg)); 3697 match(reg); 3698 match(rRegI); 3699 3700 format %{ "EDX" %} 3701 interface(REG_INTER); 3702 %} 3703 3704 operand eDIRegI(xRegI reg) %{ 3705 constraint(ALLOC_IN_RC(edi_reg)); 3706 match(reg); 3707 match(rRegI); 3708 3709 format %{ "EDI" %} 3710 interface(REG_INTER); 3711 %} 3712 3713 operand naxRegI() %{ 3714 constraint(ALLOC_IN_RC(nax_reg)); 3715 match(RegI); 3716 match(eCXRegI); 3717 match(eDXRegI); 3718 match(eSIRegI); 3719 match(eDIRegI); 3720 3721 format %{ %} 3722 interface(REG_INTER); 3723 %} 3724 3725 operand nadxRegI() %{ 3726 constraint(ALLOC_IN_RC(nadx_reg)); 3727 match(RegI); 3728 match(eBXRegI); 3729 match(eCXRegI); 3730 match(eSIRegI); 3731 match(eDIRegI); 3732 3733 format %{ %} 3734 interface(REG_INTER); 3735 %} 3736 3737 operand ncxRegI() %{ 3738 constraint(ALLOC_IN_RC(ncx_reg)); 3739 match(RegI); 3740 match(eAXRegI); 3741 match(eDXRegI); 3742 match(eSIRegI); 3743 match(eDIRegI); 3744 3745 format %{ %} 3746 interface(REG_INTER); 3747 %} 3748 3749 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3750 // // 3751 operand eSIRegI(xRegI reg) %{ 3752 constraint(ALLOC_IN_RC(esi_reg)); 3753 match(reg); 3754 match(rRegI); 3755 3756 format %{ "ESI" %} 3757 interface(REG_INTER); 3758 %} 3759 3760 // Pointer Register 3761 operand anyRegP() %{ 3762 constraint(ALLOC_IN_RC(any_reg)); 3763 match(RegP); 3764 match(eAXRegP); 3765 match(eBXRegP); 3766 match(eCXRegP); 3767 match(eDIRegP); 3768 match(eRegP); 3769 3770 format %{ %} 3771 interface(REG_INTER); 3772 %} 3773 3774 operand eRegP() %{ 3775 constraint(ALLOC_IN_RC(int_reg)); 3776 match(RegP); 3777 match(eAXRegP); 3778 match(eBXRegP); 3779 match(eCXRegP); 3780 match(eDIRegP); 3781 3782 format %{ %} 3783 interface(REG_INTER); 3784 %} 3785 3786 operand rRegP() %{ 3787 constraint(ALLOC_IN_RC(int_reg)); 3788 match(RegP); 3789 match(eAXRegP); 3790 match(eBXRegP); 3791 match(eCXRegP); 3792 match(eDIRegP); 3793 3794 format %{ %} 3795 interface(REG_INTER); 3796 %} 3797 3798 // On windows95, EBP is not safe to use for implicit null tests. 3799 operand eRegP_no_EBP() %{ 3800 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3801 match(RegP); 3802 match(eAXRegP); 3803 match(eBXRegP); 3804 match(eCXRegP); 3805 match(eDIRegP); 3806 3807 op_cost(100); 3808 format %{ %} 3809 interface(REG_INTER); 3810 %} 3811 3812 operand naxRegP() %{ 3813 constraint(ALLOC_IN_RC(nax_reg)); 3814 match(RegP); 3815 match(eBXRegP); 3816 match(eDXRegP); 3817 match(eCXRegP); 3818 match(eSIRegP); 3819 match(eDIRegP); 3820 3821 format %{ %} 3822 interface(REG_INTER); 3823 %} 3824 3825 operand nabxRegP() %{ 3826 constraint(ALLOC_IN_RC(nabx_reg)); 3827 match(RegP); 3828 match(eCXRegP); 3829 match(eDXRegP); 3830 match(eSIRegP); 3831 match(eDIRegP); 3832 3833 format %{ %} 3834 interface(REG_INTER); 3835 %} 3836 3837 operand pRegP() %{ 3838 constraint(ALLOC_IN_RC(p_reg)); 3839 match(RegP); 3840 match(eBXRegP); 3841 match(eDXRegP); 3842 match(eSIRegP); 3843 match(eDIRegP); 3844 3845 format %{ %} 3846 interface(REG_INTER); 3847 %} 3848 3849 // Special Registers 3850 // Return a pointer value 3851 operand eAXRegP(eRegP reg) %{ 3852 constraint(ALLOC_IN_RC(eax_reg)); 3853 match(reg); 3854 format %{ "EAX" %} 3855 interface(REG_INTER); 3856 %} 3857 3858 // Used in AtomicAdd 3859 operand eBXRegP(eRegP reg) %{ 3860 constraint(ALLOC_IN_RC(ebx_reg)); 3861 match(reg); 3862 format %{ "EBX" %} 3863 interface(REG_INTER); 3864 %} 3865 3866 // Tail-call (interprocedural jump) to interpreter 3867 operand eCXRegP(eRegP reg) %{ 3868 constraint(ALLOC_IN_RC(ecx_reg)); 3869 match(reg); 3870 format %{ "ECX" %} 3871 interface(REG_INTER); 3872 %} 3873 3874 operand eDXRegP(eRegP reg) %{ 3875 constraint(ALLOC_IN_RC(edx_reg)); 3876 match(reg); 3877 format %{ "EDX" %} 3878 interface(REG_INTER); 3879 %} 3880 3881 operand eSIRegP(eRegP reg) %{ 3882 constraint(ALLOC_IN_RC(esi_reg)); 3883 match(reg); 3884 format %{ "ESI" %} 3885 interface(REG_INTER); 3886 %} 3887 3888 // Used in rep stosw 3889 operand eDIRegP(eRegP reg) %{ 3890 constraint(ALLOC_IN_RC(edi_reg)); 3891 match(reg); 3892 format %{ "EDI" %} 3893 interface(REG_INTER); 3894 %} 3895 3896 operand eRegL() %{ 3897 constraint(ALLOC_IN_RC(long_reg)); 3898 match(RegL); 3899 match(eADXRegL); 3900 3901 format %{ %} 3902 interface(REG_INTER); 3903 %} 3904 3905 operand eADXRegL( eRegL reg ) %{ 3906 constraint(ALLOC_IN_RC(eadx_reg)); 3907 match(reg); 3908 3909 format %{ "EDX:EAX" %} 3910 interface(REG_INTER); 3911 %} 3912 3913 operand eBCXRegL( eRegL reg ) %{ 3914 constraint(ALLOC_IN_RC(ebcx_reg)); 3915 match(reg); 3916 3917 format %{ "EBX:ECX" %} 3918 interface(REG_INTER); 3919 %} 3920 3921 // Special case for integer high multiply 3922 operand eADXRegL_low_only() %{ 3923 constraint(ALLOC_IN_RC(eadx_reg)); 3924 match(RegL); 3925 3926 format %{ "EAX" %} 3927 interface(REG_INTER); 3928 %} 3929 3930 // Flags register, used as output of compare instructions 3931 operand rFlagsReg() %{ 3932 constraint(ALLOC_IN_RC(int_flags)); 3933 match(RegFlags); 3934 3935 format %{ "EFLAGS" %} 3936 interface(REG_INTER); 3937 %} 3938 3939 // Flags register, used as output of compare instructions 3940 operand eFlagsReg() %{ 3941 constraint(ALLOC_IN_RC(int_flags)); 3942 match(RegFlags); 3943 3944 format %{ "EFLAGS" %} 3945 interface(REG_INTER); 3946 %} 3947 3948 // Flags register, used as output of FLOATING POINT compare instructions 3949 operand eFlagsRegU() %{ 3950 constraint(ALLOC_IN_RC(int_flags)); 3951 match(RegFlags); 3952 3953 format %{ "EFLAGS_U" %} 3954 interface(REG_INTER); 3955 %} 3956 3957 operand eFlagsRegUCF() %{ 3958 constraint(ALLOC_IN_RC(int_flags)); 3959 match(RegFlags); 3960 predicate(false); 3961 3962 format %{ "EFLAGS_U_CF" %} 3963 interface(REG_INTER); 3964 %} 3965 3966 // Condition Code Register used by long compare 3967 operand flagsReg_long_LTGE() %{ 3968 constraint(ALLOC_IN_RC(int_flags)); 3969 match(RegFlags); 3970 format %{ "FLAGS_LTGE" %} 3971 interface(REG_INTER); 3972 %} 3973 operand flagsReg_long_EQNE() %{ 3974 constraint(ALLOC_IN_RC(int_flags)); 3975 match(RegFlags); 3976 format %{ "FLAGS_EQNE" %} 3977 interface(REG_INTER); 3978 %} 3979 operand flagsReg_long_LEGT() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 format %{ "FLAGS_LEGT" %} 3983 interface(REG_INTER); 3984 %} 3985 3986 // Condition Code Register used by unsigned long compare 3987 operand flagsReg_ulong_LTGE() %{ 3988 constraint(ALLOC_IN_RC(int_flags)); 3989 match(RegFlags); 3990 format %{ "FLAGS_U_LTGE" %} 3991 interface(REG_INTER); 3992 %} 3993 operand flagsReg_ulong_EQNE() %{ 3994 constraint(ALLOC_IN_RC(int_flags)); 3995 match(RegFlags); 3996 format %{ "FLAGS_U_EQNE" %} 3997 interface(REG_INTER); 3998 %} 3999 operand flagsReg_ulong_LEGT() %{ 4000 constraint(ALLOC_IN_RC(int_flags)); 4001 match(RegFlags); 4002 format %{ "FLAGS_U_LEGT" %} 4003 interface(REG_INTER); 4004 %} 4005 4006 // Float register operands 4007 operand regDPR() %{ 4008 predicate( UseSSE < 2 ); 4009 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4010 match(RegD); 4011 match(regDPR1); 4012 match(regDPR2); 4013 format %{ %} 4014 interface(REG_INTER); 4015 %} 4016 4017 operand regDPR1(regDPR reg) %{ 4018 predicate( UseSSE < 2 ); 4019 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4020 match(reg); 4021 format %{ "FPR1" %} 4022 interface(REG_INTER); 4023 %} 4024 4025 operand regDPR2(regDPR reg) %{ 4026 predicate( UseSSE < 2 ); 4027 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4028 match(reg); 4029 format %{ "FPR2" %} 4030 interface(REG_INTER); 4031 %} 4032 4033 operand regnotDPR1(regDPR reg) %{ 4034 predicate( UseSSE < 2 ); 4035 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4036 match(reg); 4037 format %{ %} 4038 interface(REG_INTER); 4039 %} 4040 4041 // Float register operands 4042 operand regFPR() %{ 4043 predicate( UseSSE < 2 ); 4044 constraint(ALLOC_IN_RC(fp_flt_reg)); 4045 match(RegF); 4046 match(regFPR1); 4047 format %{ %} 4048 interface(REG_INTER); 4049 %} 4050 4051 // Float register operands 4052 operand regFPR1(regFPR reg) %{ 4053 predicate( UseSSE < 2 ); 4054 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4055 match(reg); 4056 format %{ "FPR1" %} 4057 interface(REG_INTER); 4058 %} 4059 4060 // XMM Float register operands 4061 operand regF() %{ 4062 predicate( UseSSE>=1 ); 4063 constraint(ALLOC_IN_RC(float_reg_legacy)); 4064 match(RegF); 4065 format %{ %} 4066 interface(REG_INTER); 4067 %} 4068 4069 operand legRegF() %{ 4070 predicate( UseSSE>=1 ); 4071 constraint(ALLOC_IN_RC(float_reg_legacy)); 4072 match(RegF); 4073 format %{ %} 4074 interface(REG_INTER); 4075 %} 4076 4077 // Float register operands 4078 operand vlRegF() %{ 4079 constraint(ALLOC_IN_RC(float_reg_vl)); 4080 match(RegF); 4081 4082 format %{ %} 4083 interface(REG_INTER); 4084 %} 4085 4086 // XMM Double register operands 4087 operand regD() %{ 4088 predicate( UseSSE>=2 ); 4089 constraint(ALLOC_IN_RC(double_reg_legacy)); 4090 match(RegD); 4091 format %{ %} 4092 interface(REG_INTER); 4093 %} 4094 4095 // Double register operands 4096 operand legRegD() %{ 4097 predicate( UseSSE>=2 ); 4098 constraint(ALLOC_IN_RC(double_reg_legacy)); 4099 match(RegD); 4100 format %{ %} 4101 interface(REG_INTER); 4102 %} 4103 4104 operand vlRegD() %{ 4105 constraint(ALLOC_IN_RC(double_reg_vl)); 4106 match(RegD); 4107 4108 format %{ %} 4109 interface(REG_INTER); 4110 %} 4111 4112 //----------Memory Operands---------------------------------------------------- 4113 // Direct Memory Operand 4114 operand direct(immP addr) %{ 4115 match(addr); 4116 4117 format %{ "[$addr]" %} 4118 interface(MEMORY_INTER) %{ 4119 base(0xFFFFFFFF); 4120 index(0x4); 4121 scale(0x0); 4122 disp($addr); 4123 %} 4124 %} 4125 4126 // Indirect Memory Operand 4127 operand indirect(eRegP reg) %{ 4128 constraint(ALLOC_IN_RC(int_reg)); 4129 match(reg); 4130 4131 format %{ "[$reg]" %} 4132 interface(MEMORY_INTER) %{ 4133 base($reg); 4134 index(0x4); 4135 scale(0x0); 4136 disp(0x0); 4137 %} 4138 %} 4139 4140 // Indirect Memory Plus Short Offset Operand 4141 operand indOffset8(eRegP reg, immI8 off) %{ 4142 match(AddP reg off); 4143 4144 format %{ "[$reg + $off]" %} 4145 interface(MEMORY_INTER) %{ 4146 base($reg); 4147 index(0x4); 4148 scale(0x0); 4149 disp($off); 4150 %} 4151 %} 4152 4153 // Indirect Memory Plus Long Offset Operand 4154 operand indOffset32(eRegP reg, immI off) %{ 4155 match(AddP reg off); 4156 4157 format %{ "[$reg + $off]" %} 4158 interface(MEMORY_INTER) %{ 4159 base($reg); 4160 index(0x4); 4161 scale(0x0); 4162 disp($off); 4163 %} 4164 %} 4165 4166 // Indirect Memory Plus Long Offset Operand 4167 operand indOffset32X(rRegI reg, immP off) %{ 4168 match(AddP off reg); 4169 4170 format %{ "[$reg + $off]" %} 4171 interface(MEMORY_INTER) %{ 4172 base($reg); 4173 index(0x4); 4174 scale(0x0); 4175 disp($off); 4176 %} 4177 %} 4178 4179 // Indirect Memory Plus Index Register Plus Offset Operand 4180 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4181 match(AddP (AddP reg ireg) off); 4182 4183 op_cost(10); 4184 format %{"[$reg + $off + $ireg]" %} 4185 interface(MEMORY_INTER) %{ 4186 base($reg); 4187 index($ireg); 4188 scale(0x0); 4189 disp($off); 4190 %} 4191 %} 4192 4193 // Indirect Memory Plus Index Register Plus Offset Operand 4194 operand indIndex(eRegP reg, rRegI ireg) %{ 4195 match(AddP reg ireg); 4196 4197 op_cost(10); 4198 format %{"[$reg + $ireg]" %} 4199 interface(MEMORY_INTER) %{ 4200 base($reg); 4201 index($ireg); 4202 scale(0x0); 4203 disp(0x0); 4204 %} 4205 %} 4206 4207 // // ------------------------------------------------------------------------- 4208 // // 486 architecture doesn't support "scale * index + offset" with out a base 4209 // // ------------------------------------------------------------------------- 4210 // // Scaled Memory Operands 4211 // // Indirect Memory Times Scale Plus Offset Operand 4212 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4213 // match(AddP off (LShiftI ireg scale)); 4214 // 4215 // op_cost(10); 4216 // format %{"[$off + $ireg << $scale]" %} 4217 // interface(MEMORY_INTER) %{ 4218 // base(0x4); 4219 // index($ireg); 4220 // scale($scale); 4221 // disp($off); 4222 // %} 4223 // %} 4224 4225 // Indirect Memory Times Scale Plus Index Register 4226 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4227 match(AddP reg (LShiftI ireg scale)); 4228 4229 op_cost(10); 4230 format %{"[$reg + $ireg << $scale]" %} 4231 interface(MEMORY_INTER) %{ 4232 base($reg); 4233 index($ireg); 4234 scale($scale); 4235 disp(0x0); 4236 %} 4237 %} 4238 4239 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4240 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4241 match(AddP (AddP reg (LShiftI ireg scale)) off); 4242 4243 op_cost(10); 4244 format %{"[$reg + $off + $ireg << $scale]" %} 4245 interface(MEMORY_INTER) %{ 4246 base($reg); 4247 index($ireg); 4248 scale($scale); 4249 disp($off); 4250 %} 4251 %} 4252 4253 //----------Load Long Memory Operands------------------------------------------ 4254 // The load-long idiom will use it's address expression again after loading 4255 // the first word of the long. If the load-long destination overlaps with 4256 // registers used in the addressing expression, the 2nd half will be loaded 4257 // from a clobbered address. Fix this by requiring that load-long use 4258 // address registers that do not overlap with the load-long target. 4259 4260 // load-long support 4261 operand load_long_RegP() %{ 4262 constraint(ALLOC_IN_RC(esi_reg)); 4263 match(RegP); 4264 match(eSIRegP); 4265 op_cost(100); 4266 format %{ %} 4267 interface(REG_INTER); 4268 %} 4269 4270 // Indirect Memory Operand Long 4271 operand load_long_indirect(load_long_RegP reg) %{ 4272 constraint(ALLOC_IN_RC(esi_reg)); 4273 match(reg); 4274 4275 format %{ "[$reg]" %} 4276 interface(MEMORY_INTER) %{ 4277 base($reg); 4278 index(0x4); 4279 scale(0x0); 4280 disp(0x0); 4281 %} 4282 %} 4283 4284 // Indirect Memory Plus Long Offset Operand 4285 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4286 match(AddP reg off); 4287 4288 format %{ "[$reg + $off]" %} 4289 interface(MEMORY_INTER) %{ 4290 base($reg); 4291 index(0x4); 4292 scale(0x0); 4293 disp($off); 4294 %} 4295 %} 4296 4297 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4298 4299 4300 //----------Special Memory Operands-------------------------------------------- 4301 // Stack Slot Operand - This operand is used for loading and storing temporary 4302 // values on the stack where a match requires a value to 4303 // flow through memory. 4304 operand stackSlotP(sRegP reg) %{ 4305 constraint(ALLOC_IN_RC(stack_slots)); 4306 // No match rule because this operand is only generated in matching 4307 format %{ "[$reg]" %} 4308 interface(MEMORY_INTER) %{ 4309 base(0x4); // ESP 4310 index(0x4); // No Index 4311 scale(0x0); // No Scale 4312 disp($reg); // Stack Offset 4313 %} 4314 %} 4315 4316 operand stackSlotI(sRegI reg) %{ 4317 constraint(ALLOC_IN_RC(stack_slots)); 4318 // No match rule because this operand is only generated in matching 4319 format %{ "[$reg]" %} 4320 interface(MEMORY_INTER) %{ 4321 base(0x4); // ESP 4322 index(0x4); // No Index 4323 scale(0x0); // No Scale 4324 disp($reg); // Stack Offset 4325 %} 4326 %} 4327 4328 operand stackSlotF(sRegF reg) %{ 4329 constraint(ALLOC_IN_RC(stack_slots)); 4330 // No match rule because this operand is only generated in matching 4331 format %{ "[$reg]" %} 4332 interface(MEMORY_INTER) %{ 4333 base(0x4); // ESP 4334 index(0x4); // No Index 4335 scale(0x0); // No Scale 4336 disp($reg); // Stack Offset 4337 %} 4338 %} 4339 4340 operand stackSlotD(sRegD reg) %{ 4341 constraint(ALLOC_IN_RC(stack_slots)); 4342 // No match rule because this operand is only generated in matching 4343 format %{ "[$reg]" %} 4344 interface(MEMORY_INTER) %{ 4345 base(0x4); // ESP 4346 index(0x4); // No Index 4347 scale(0x0); // No Scale 4348 disp($reg); // Stack Offset 4349 %} 4350 %} 4351 4352 operand stackSlotL(sRegL reg) %{ 4353 constraint(ALLOC_IN_RC(stack_slots)); 4354 // No match rule because this operand is only generated in matching 4355 format %{ "[$reg]" %} 4356 interface(MEMORY_INTER) %{ 4357 base(0x4); // ESP 4358 index(0x4); // No Index 4359 scale(0x0); // No Scale 4360 disp($reg); // Stack Offset 4361 %} 4362 %} 4363 4364 //----------Conditional Branch Operands---------------------------------------- 4365 // Comparison Op - This is the operation of the comparison, and is limited to 4366 // the following set of codes: 4367 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4368 // 4369 // Other attributes of the comparison, such as unsignedness, are specified 4370 // by the comparison instruction that sets a condition code flags register. 4371 // That result is represented by a flags operand whose subtype is appropriate 4372 // to the unsignedness (etc.) of the comparison. 4373 // 4374 // Later, the instruction which matches both the Comparison Op (a Bool) and 4375 // the flags (produced by the Cmp) specifies the coding of the comparison op 4376 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4377 4378 // Comparision Code 4379 operand cmpOp() %{ 4380 match(Bool); 4381 4382 format %{ "" %} 4383 interface(COND_INTER) %{ 4384 equal(0x4, "e"); 4385 not_equal(0x5, "ne"); 4386 less(0xC, "l"); 4387 greater_equal(0xD, "ge"); 4388 less_equal(0xE, "le"); 4389 greater(0xF, "g"); 4390 overflow(0x0, "o"); 4391 no_overflow(0x1, "no"); 4392 %} 4393 %} 4394 4395 // Comparison Code, unsigned compare. Used by FP also, with 4396 // C2 (unordered) turned into GT or LT already. The other bits 4397 // C0 and C3 are turned into Carry & Zero flags. 4398 operand cmpOpU() %{ 4399 match(Bool); 4400 4401 format %{ "" %} 4402 interface(COND_INTER) %{ 4403 equal(0x4, "e"); 4404 not_equal(0x5, "ne"); 4405 less(0x2, "b"); 4406 greater_equal(0x3, "nb"); 4407 less_equal(0x6, "be"); 4408 greater(0x7, "nbe"); 4409 overflow(0x0, "o"); 4410 no_overflow(0x1, "no"); 4411 %} 4412 %} 4413 4414 // Floating comparisons that don't require any fixup for the unordered case 4415 operand cmpOpUCF() %{ 4416 match(Bool); 4417 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4418 n->as_Bool()->_test._test == BoolTest::ge || 4419 n->as_Bool()->_test._test == BoolTest::le || 4420 n->as_Bool()->_test._test == BoolTest::gt); 4421 format %{ "" %} 4422 interface(COND_INTER) %{ 4423 equal(0x4, "e"); 4424 not_equal(0x5, "ne"); 4425 less(0x2, "b"); 4426 greater_equal(0x3, "nb"); 4427 less_equal(0x6, "be"); 4428 greater(0x7, "nbe"); 4429 overflow(0x0, "o"); 4430 no_overflow(0x1, "no"); 4431 %} 4432 %} 4433 4434 4435 // Floating comparisons that can be fixed up with extra conditional jumps 4436 operand cmpOpUCF2() %{ 4437 match(Bool); 4438 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4439 n->as_Bool()->_test._test == BoolTest::eq); 4440 format %{ "" %} 4441 interface(COND_INTER) %{ 4442 equal(0x4, "e"); 4443 not_equal(0x5, "ne"); 4444 less(0x2, "b"); 4445 greater_equal(0x3, "nb"); 4446 less_equal(0x6, "be"); 4447 greater(0x7, "nbe"); 4448 overflow(0x0, "o"); 4449 no_overflow(0x1, "no"); 4450 %} 4451 %} 4452 4453 // Comparison Code for FP conditional move 4454 operand cmpOp_fcmov() %{ 4455 match(Bool); 4456 4457 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4458 n->as_Bool()->_test._test != BoolTest::no_overflow); 4459 format %{ "" %} 4460 interface(COND_INTER) %{ 4461 equal (0x0C8); 4462 not_equal (0x1C8); 4463 less (0x0C0); 4464 greater_equal(0x1C0); 4465 less_equal (0x0D0); 4466 greater (0x1D0); 4467 overflow(0x0, "o"); // not really supported by the instruction 4468 no_overflow(0x1, "no"); // not really supported by the instruction 4469 %} 4470 %} 4471 4472 // Comparison Code used in long compares 4473 operand cmpOp_commute() %{ 4474 match(Bool); 4475 4476 format %{ "" %} 4477 interface(COND_INTER) %{ 4478 equal(0x4, "e"); 4479 not_equal(0x5, "ne"); 4480 less(0xF, "g"); 4481 greater_equal(0xE, "le"); 4482 less_equal(0xD, "ge"); 4483 greater(0xC, "l"); 4484 overflow(0x0, "o"); 4485 no_overflow(0x1, "no"); 4486 %} 4487 %} 4488 4489 // Comparison Code used in unsigned long compares 4490 operand cmpOpU_commute() %{ 4491 match(Bool); 4492 4493 format %{ "" %} 4494 interface(COND_INTER) %{ 4495 equal(0x4, "e"); 4496 not_equal(0x5, "ne"); 4497 less(0x7, "nbe"); 4498 greater_equal(0x6, "be"); 4499 less_equal(0x3, "nb"); 4500 greater(0x2, "b"); 4501 overflow(0x0, "o"); 4502 no_overflow(0x1, "no"); 4503 %} 4504 %} 4505 4506 //----------OPERAND CLASSES---------------------------------------------------- 4507 // Operand Classes are groups of operands that are used as to simplify 4508 // instruction definitions by not requiring the AD writer to specify separate 4509 // instructions for every form of operand when the instruction accepts 4510 // multiple operand types with the same basic encoding and format. The classic 4511 // case of this is memory operands. 4512 4513 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4514 indIndex, indIndexScale, indIndexScaleOffset); 4515 4516 // Long memory operations are encoded in 2 instructions and a +4 offset. 4517 // This means some kind of offset is always required and you cannot use 4518 // an oop as the offset (done when working on static globals). 4519 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4520 indIndex, indIndexScale, indIndexScaleOffset); 4521 4522 4523 //----------PIPELINE----------------------------------------------------------- 4524 // Rules which define the behavior of the target architectures pipeline. 4525 pipeline %{ 4526 4527 //----------ATTRIBUTES--------------------------------------------------------- 4528 attributes %{ 4529 variable_size_instructions; // Fixed size instructions 4530 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4531 instruction_unit_size = 1; // An instruction is 1 bytes long 4532 instruction_fetch_unit_size = 16; // The processor fetches one line 4533 instruction_fetch_units = 1; // of 16 bytes 4534 4535 // List of nop instructions 4536 nops( MachNop ); 4537 %} 4538 4539 //----------RESOURCES---------------------------------------------------------- 4540 // Resources are the functional units available to the machine 4541 4542 // Generic P2/P3 pipeline 4543 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4544 // 3 instructions decoded per cycle. 4545 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4546 // 2 ALU op, only ALU0 handles mul/div instructions. 4547 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4548 MS0, MS1, MEM = MS0 | MS1, 4549 BR, FPU, 4550 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4551 4552 //----------PIPELINE DESCRIPTION----------------------------------------------- 4553 // Pipeline Description specifies the stages in the machine's pipeline 4554 4555 // Generic P2/P3 pipeline 4556 pipe_desc(S0, S1, S2, S3, S4, S5); 4557 4558 //----------PIPELINE CLASSES--------------------------------------------------- 4559 // Pipeline Classes describe the stages in which input and output are 4560 // referenced by the hardware pipeline. 4561 4562 // Naming convention: ialu or fpu 4563 // Then: _reg 4564 // Then: _reg if there is a 2nd register 4565 // Then: _long if it's a pair of instructions implementing a long 4566 // Then: _fat if it requires the big decoder 4567 // Or: _mem if it requires the big decoder and a memory unit. 4568 4569 // Integer ALU reg operation 4570 pipe_class ialu_reg(rRegI dst) %{ 4571 single_instruction; 4572 dst : S4(write); 4573 dst : S3(read); 4574 DECODE : S0; // any decoder 4575 ALU : S3; // any alu 4576 %} 4577 4578 // Long ALU reg operation 4579 pipe_class ialu_reg_long(eRegL dst) %{ 4580 instruction_count(2); 4581 dst : S4(write); 4582 dst : S3(read); 4583 DECODE : S0(2); // any 2 decoders 4584 ALU : S3(2); // both alus 4585 %} 4586 4587 // Integer ALU reg operation using big decoder 4588 pipe_class ialu_reg_fat(rRegI dst) %{ 4589 single_instruction; 4590 dst : S4(write); 4591 dst : S3(read); 4592 D0 : S0; // big decoder only 4593 ALU : S3; // any alu 4594 %} 4595 4596 // Long ALU reg operation using big decoder 4597 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4598 instruction_count(2); 4599 dst : S4(write); 4600 dst : S3(read); 4601 D0 : S0(2); // big decoder only; twice 4602 ALU : S3(2); // any 2 alus 4603 %} 4604 4605 // Integer ALU reg-reg operation 4606 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4607 single_instruction; 4608 dst : S4(write); 4609 src : S3(read); 4610 DECODE : S0; // any decoder 4611 ALU : S3; // any alu 4612 %} 4613 4614 // Long ALU reg-reg operation 4615 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4616 instruction_count(2); 4617 dst : S4(write); 4618 src : S3(read); 4619 DECODE : S0(2); // any 2 decoders 4620 ALU : S3(2); // both alus 4621 %} 4622 4623 // Integer ALU reg-reg operation 4624 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4625 single_instruction; 4626 dst : S4(write); 4627 src : S3(read); 4628 D0 : S0; // big decoder only 4629 ALU : S3; // any alu 4630 %} 4631 4632 // Long ALU reg-reg operation 4633 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4634 instruction_count(2); 4635 dst : S4(write); 4636 src : S3(read); 4637 D0 : S0(2); // big decoder only; twice 4638 ALU : S3(2); // both alus 4639 %} 4640 4641 // Integer ALU reg-mem operation 4642 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4643 single_instruction; 4644 dst : S5(write); 4645 mem : S3(read); 4646 D0 : S0; // big decoder only 4647 ALU : S4; // any alu 4648 MEM : S3; // any mem 4649 %} 4650 4651 // Long ALU reg-mem operation 4652 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4653 instruction_count(2); 4654 dst : S5(write); 4655 mem : S3(read); 4656 D0 : S0(2); // big decoder only; twice 4657 ALU : S4(2); // any 2 alus 4658 MEM : S3(2); // both mems 4659 %} 4660 4661 // Integer mem operation (prefetch) 4662 pipe_class ialu_mem(memory mem) 4663 %{ 4664 single_instruction; 4665 mem : S3(read); 4666 D0 : S0; // big decoder only 4667 MEM : S3; // any mem 4668 %} 4669 4670 // Integer Store to Memory 4671 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4672 single_instruction; 4673 mem : S3(read); 4674 src : S5(read); 4675 D0 : S0; // big decoder only 4676 ALU : S4; // any alu 4677 MEM : S3; 4678 %} 4679 4680 // Long Store to Memory 4681 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4682 instruction_count(2); 4683 mem : S3(read); 4684 src : S5(read); 4685 D0 : S0(2); // big decoder only; twice 4686 ALU : S4(2); // any 2 alus 4687 MEM : S3(2); // Both mems 4688 %} 4689 4690 // Integer Store to Memory 4691 pipe_class ialu_mem_imm(memory mem) %{ 4692 single_instruction; 4693 mem : S3(read); 4694 D0 : S0; // big decoder only 4695 ALU : S4; // any alu 4696 MEM : S3; 4697 %} 4698 4699 // Integer ALU0 reg-reg operation 4700 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4701 single_instruction; 4702 dst : S4(write); 4703 src : S3(read); 4704 D0 : S0; // Big decoder only 4705 ALU0 : S3; // only alu0 4706 %} 4707 4708 // Integer ALU0 reg-mem operation 4709 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4710 single_instruction; 4711 dst : S5(write); 4712 mem : S3(read); 4713 D0 : S0; // big decoder only 4714 ALU0 : S4; // ALU0 only 4715 MEM : S3; // any mem 4716 %} 4717 4718 // Integer ALU reg-reg operation 4719 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4720 single_instruction; 4721 cr : S4(write); 4722 src1 : S3(read); 4723 src2 : S3(read); 4724 DECODE : S0; // any decoder 4725 ALU : S3; // any alu 4726 %} 4727 4728 // Integer ALU reg-imm operation 4729 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4730 single_instruction; 4731 cr : S4(write); 4732 src1 : S3(read); 4733 DECODE : S0; // any decoder 4734 ALU : S3; // any alu 4735 %} 4736 4737 // Integer ALU reg-mem operation 4738 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4739 single_instruction; 4740 cr : S4(write); 4741 src1 : S3(read); 4742 src2 : S3(read); 4743 D0 : S0; // big decoder only 4744 ALU : S4; // any alu 4745 MEM : S3; 4746 %} 4747 4748 // Conditional move reg-reg 4749 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4750 instruction_count(4); 4751 y : S4(read); 4752 q : S3(read); 4753 p : S3(read); 4754 DECODE : S0(4); // any decoder 4755 %} 4756 4757 // Conditional move reg-reg 4758 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4759 single_instruction; 4760 dst : S4(write); 4761 src : S3(read); 4762 cr : S3(read); 4763 DECODE : S0; // any decoder 4764 %} 4765 4766 // Conditional move reg-mem 4767 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4768 single_instruction; 4769 dst : S4(write); 4770 src : S3(read); 4771 cr : S3(read); 4772 DECODE : S0; // any decoder 4773 MEM : S3; 4774 %} 4775 4776 // Conditional move reg-reg long 4777 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4778 single_instruction; 4779 dst : S4(write); 4780 src : S3(read); 4781 cr : S3(read); 4782 DECODE : S0(2); // any 2 decoders 4783 %} 4784 4785 // Conditional move double reg-reg 4786 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4787 single_instruction; 4788 dst : S4(write); 4789 src : S3(read); 4790 cr : S3(read); 4791 DECODE : S0; // any decoder 4792 %} 4793 4794 // Float reg-reg operation 4795 pipe_class fpu_reg(regDPR dst) %{ 4796 instruction_count(2); 4797 dst : S3(read); 4798 DECODE : S0(2); // any 2 decoders 4799 FPU : S3; 4800 %} 4801 4802 // Float reg-reg operation 4803 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4804 instruction_count(2); 4805 dst : S4(write); 4806 src : S3(read); 4807 DECODE : S0(2); // any 2 decoders 4808 FPU : S3; 4809 %} 4810 4811 // Float reg-reg operation 4812 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4813 instruction_count(3); 4814 dst : S4(write); 4815 src1 : S3(read); 4816 src2 : S3(read); 4817 DECODE : S0(3); // any 3 decoders 4818 FPU : S3(2); 4819 %} 4820 4821 // Float reg-reg operation 4822 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4823 instruction_count(4); 4824 dst : S4(write); 4825 src1 : S3(read); 4826 src2 : S3(read); 4827 src3 : S3(read); 4828 DECODE : S0(4); // any 3 decoders 4829 FPU : S3(2); 4830 %} 4831 4832 // Float reg-reg operation 4833 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4834 instruction_count(4); 4835 dst : S4(write); 4836 src1 : S3(read); 4837 src2 : S3(read); 4838 src3 : S3(read); 4839 DECODE : S1(3); // any 3 decoders 4840 D0 : S0; // Big decoder only 4841 FPU : S3(2); 4842 MEM : S3; 4843 %} 4844 4845 // Float reg-mem operation 4846 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4847 instruction_count(2); 4848 dst : S5(write); 4849 mem : S3(read); 4850 D0 : S0; // big decoder only 4851 DECODE : S1; // any decoder for FPU POP 4852 FPU : S4; 4853 MEM : S3; // any mem 4854 %} 4855 4856 // Float reg-mem operation 4857 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4858 instruction_count(3); 4859 dst : S5(write); 4860 src1 : S3(read); 4861 mem : S3(read); 4862 D0 : S0; // big decoder only 4863 DECODE : S1(2); // any decoder for FPU POP 4864 FPU : S4; 4865 MEM : S3; // any mem 4866 %} 4867 4868 // Float mem-reg operation 4869 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4870 instruction_count(2); 4871 src : S5(read); 4872 mem : S3(read); 4873 DECODE : S0; // any decoder for FPU PUSH 4874 D0 : S1; // big decoder only 4875 FPU : S4; 4876 MEM : S3; // any mem 4877 %} 4878 4879 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4880 instruction_count(3); 4881 src1 : S3(read); 4882 src2 : S3(read); 4883 mem : S3(read); 4884 DECODE : S0(2); // any decoder for FPU PUSH 4885 D0 : S1; // big decoder only 4886 FPU : S4; 4887 MEM : S3; // any mem 4888 %} 4889 4890 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4891 instruction_count(3); 4892 src1 : S3(read); 4893 src2 : S3(read); 4894 mem : S4(read); 4895 DECODE : S0; // any decoder for FPU PUSH 4896 D0 : S0(2); // big decoder only 4897 FPU : S4; 4898 MEM : S3(2); // any mem 4899 %} 4900 4901 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4902 instruction_count(2); 4903 src1 : S3(read); 4904 dst : S4(read); 4905 D0 : S0(2); // big decoder only 4906 MEM : S3(2); // any mem 4907 %} 4908 4909 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4910 instruction_count(3); 4911 src1 : S3(read); 4912 src2 : S3(read); 4913 dst : S4(read); 4914 D0 : S0(3); // big decoder only 4915 FPU : S4; 4916 MEM : S3(3); // any mem 4917 %} 4918 4919 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4920 instruction_count(3); 4921 src1 : S4(read); 4922 mem : S4(read); 4923 DECODE : S0; // any decoder for FPU PUSH 4924 D0 : S0(2); // big decoder only 4925 FPU : S4; 4926 MEM : S3(2); // any mem 4927 %} 4928 4929 // Float load constant 4930 pipe_class fpu_reg_con(regDPR dst) %{ 4931 instruction_count(2); 4932 dst : S5(write); 4933 D0 : S0; // big decoder only for the load 4934 DECODE : S1; // any decoder for FPU POP 4935 FPU : S4; 4936 MEM : S3; // any mem 4937 %} 4938 4939 // Float load constant 4940 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4941 instruction_count(3); 4942 dst : S5(write); 4943 src : S3(read); 4944 D0 : S0; // big decoder only for the load 4945 DECODE : S1(2); // any decoder for FPU POP 4946 FPU : S4; 4947 MEM : S3; // any mem 4948 %} 4949 4950 // UnConditional branch 4951 pipe_class pipe_jmp( label labl ) %{ 4952 single_instruction; 4953 BR : S3; 4954 %} 4955 4956 // Conditional branch 4957 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4958 single_instruction; 4959 cr : S1(read); 4960 BR : S3; 4961 %} 4962 4963 // Allocation idiom 4964 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4965 instruction_count(1); force_serialization; 4966 fixed_latency(6); 4967 heap_ptr : S3(read); 4968 DECODE : S0(3); 4969 D0 : S2; 4970 MEM : S3; 4971 ALU : S3(2); 4972 dst : S5(write); 4973 BR : S5; 4974 %} 4975 4976 // Generic big/slow expanded idiom 4977 pipe_class pipe_slow( ) %{ 4978 instruction_count(10); multiple_bundles; force_serialization; 4979 fixed_latency(100); 4980 D0 : S0(2); 4981 MEM : S3(2); 4982 %} 4983 4984 // The real do-nothing guy 4985 pipe_class empty( ) %{ 4986 instruction_count(0); 4987 %} 4988 4989 // Define the class for the Nop node 4990 define %{ 4991 MachNop = empty; 4992 %} 4993 4994 %} 4995 4996 //----------INSTRUCTIONS------------------------------------------------------- 4997 // 4998 // match -- States which machine-independent subtree may be replaced 4999 // by this instruction. 5000 // ins_cost -- The estimated cost of this instruction is used by instruction 5001 // selection to identify a minimum cost tree of machine 5002 // instructions that matches a tree of machine-independent 5003 // instructions. 5004 // format -- A string providing the disassembly for this instruction. 5005 // The value of an instruction's operand may be inserted 5006 // by referring to it with a '$' prefix. 5007 // opcode -- Three instruction opcodes may be provided. These are referred 5008 // to within an encode class as $primary, $secondary, and $tertiary 5009 // respectively. The primary opcode is commonly used to 5010 // indicate the type of machine instruction, while secondary 5011 // and tertiary are often used for prefix options or addressing 5012 // modes. 5013 // ins_encode -- A list of encode classes with parameters. The encode class 5014 // name must have been defined in an 'enc_class' specification 5015 // in the encode section of the architecture description. 5016 5017 //----------BSWAP-Instruction-------------------------------------------------- 5018 instruct bytes_reverse_int(rRegI dst) %{ 5019 match(Set dst (ReverseBytesI dst)); 5020 5021 format %{ "BSWAP $dst" %} 5022 opcode(0x0F, 0xC8); 5023 ins_encode( OpcP, OpcSReg(dst) ); 5024 ins_pipe( ialu_reg ); 5025 %} 5026 5027 instruct bytes_reverse_long(eRegL dst) %{ 5028 match(Set dst (ReverseBytesL dst)); 5029 5030 format %{ "BSWAP $dst.lo\n\t" 5031 "BSWAP $dst.hi\n\t" 5032 "XCHG $dst.lo $dst.hi" %} 5033 5034 ins_cost(125); 5035 ins_encode( bswap_long_bytes(dst) ); 5036 ins_pipe( ialu_reg_reg); 5037 %} 5038 5039 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5040 match(Set dst (ReverseBytesUS dst)); 5041 effect(KILL cr); 5042 5043 format %{ "BSWAP $dst\n\t" 5044 "SHR $dst,16\n\t" %} 5045 ins_encode %{ 5046 __ bswapl($dst$$Register); 5047 __ shrl($dst$$Register, 16); 5048 %} 5049 ins_pipe( ialu_reg ); 5050 %} 5051 5052 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5053 match(Set dst (ReverseBytesS dst)); 5054 effect(KILL cr); 5055 5056 format %{ "BSWAP $dst\n\t" 5057 "SAR $dst,16\n\t" %} 5058 ins_encode %{ 5059 __ bswapl($dst$$Register); 5060 __ sarl($dst$$Register, 16); 5061 %} 5062 ins_pipe( ialu_reg ); 5063 %} 5064 5065 5066 //---------- Zeros Count Instructions ------------------------------------------ 5067 5068 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5069 predicate(UseCountLeadingZerosInstruction); 5070 match(Set dst (CountLeadingZerosI src)); 5071 effect(KILL cr); 5072 5073 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5074 ins_encode %{ 5075 __ lzcntl($dst$$Register, $src$$Register); 5076 %} 5077 ins_pipe(ialu_reg); 5078 %} 5079 5080 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5081 predicate(!UseCountLeadingZerosInstruction); 5082 match(Set dst (CountLeadingZerosI src)); 5083 effect(KILL cr); 5084 5085 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5086 "JNZ skip\n\t" 5087 "MOV $dst, -1\n" 5088 "skip:\n\t" 5089 "NEG $dst\n\t" 5090 "ADD $dst, 31" %} 5091 ins_encode %{ 5092 Register Rdst = $dst$$Register; 5093 Register Rsrc = $src$$Register; 5094 Label skip; 5095 __ bsrl(Rdst, Rsrc); 5096 __ jccb(Assembler::notZero, skip); 5097 __ movl(Rdst, -1); 5098 __ bind(skip); 5099 __ negl(Rdst); 5100 __ addl(Rdst, BitsPerInt - 1); 5101 %} 5102 ins_pipe(ialu_reg); 5103 %} 5104 5105 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5106 predicate(UseCountLeadingZerosInstruction); 5107 match(Set dst (CountLeadingZerosL src)); 5108 effect(TEMP dst, KILL cr); 5109 5110 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5111 "JNC done\n\t" 5112 "LZCNT $dst, $src.lo\n\t" 5113 "ADD $dst, 32\n" 5114 "done:" %} 5115 ins_encode %{ 5116 Register Rdst = $dst$$Register; 5117 Register Rsrc = $src$$Register; 5118 Label done; 5119 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5120 __ jccb(Assembler::carryClear, done); 5121 __ lzcntl(Rdst, Rsrc); 5122 __ addl(Rdst, BitsPerInt); 5123 __ bind(done); 5124 %} 5125 ins_pipe(ialu_reg); 5126 %} 5127 5128 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5129 predicate(!UseCountLeadingZerosInstruction); 5130 match(Set dst (CountLeadingZerosL src)); 5131 effect(TEMP dst, KILL cr); 5132 5133 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5134 "JZ msw_is_zero\n\t" 5135 "ADD $dst, 32\n\t" 5136 "JMP not_zero\n" 5137 "msw_is_zero:\n\t" 5138 "BSR $dst, $src.lo\n\t" 5139 "JNZ not_zero\n\t" 5140 "MOV $dst, -1\n" 5141 "not_zero:\n\t" 5142 "NEG $dst\n\t" 5143 "ADD $dst, 63\n" %} 5144 ins_encode %{ 5145 Register Rdst = $dst$$Register; 5146 Register Rsrc = $src$$Register; 5147 Label msw_is_zero; 5148 Label not_zero; 5149 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5150 __ jccb(Assembler::zero, msw_is_zero); 5151 __ addl(Rdst, BitsPerInt); 5152 __ jmpb(not_zero); 5153 __ bind(msw_is_zero); 5154 __ bsrl(Rdst, Rsrc); 5155 __ jccb(Assembler::notZero, not_zero); 5156 __ movl(Rdst, -1); 5157 __ bind(not_zero); 5158 __ negl(Rdst); 5159 __ addl(Rdst, BitsPerLong - 1); 5160 %} 5161 ins_pipe(ialu_reg); 5162 %} 5163 5164 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5165 predicate(UseCountTrailingZerosInstruction); 5166 match(Set dst (CountTrailingZerosI src)); 5167 effect(KILL cr); 5168 5169 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5170 ins_encode %{ 5171 __ tzcntl($dst$$Register, $src$$Register); 5172 %} 5173 ins_pipe(ialu_reg); 5174 %} 5175 5176 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5177 predicate(!UseCountTrailingZerosInstruction); 5178 match(Set dst (CountTrailingZerosI src)); 5179 effect(KILL cr); 5180 5181 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5182 "JNZ done\n\t" 5183 "MOV $dst, 32\n" 5184 "done:" %} 5185 ins_encode %{ 5186 Register Rdst = $dst$$Register; 5187 Label done; 5188 __ bsfl(Rdst, $src$$Register); 5189 __ jccb(Assembler::notZero, done); 5190 __ movl(Rdst, BitsPerInt); 5191 __ bind(done); 5192 %} 5193 ins_pipe(ialu_reg); 5194 %} 5195 5196 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5197 predicate(UseCountTrailingZerosInstruction); 5198 match(Set dst (CountTrailingZerosL src)); 5199 effect(TEMP dst, KILL cr); 5200 5201 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5202 "JNC done\n\t" 5203 "TZCNT $dst, $src.hi\n\t" 5204 "ADD $dst, 32\n" 5205 "done:" %} 5206 ins_encode %{ 5207 Register Rdst = $dst$$Register; 5208 Register Rsrc = $src$$Register; 5209 Label done; 5210 __ tzcntl(Rdst, Rsrc); 5211 __ jccb(Assembler::carryClear, done); 5212 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5213 __ addl(Rdst, BitsPerInt); 5214 __ bind(done); 5215 %} 5216 ins_pipe(ialu_reg); 5217 %} 5218 5219 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5220 predicate(!UseCountTrailingZerosInstruction); 5221 match(Set dst (CountTrailingZerosL src)); 5222 effect(TEMP dst, KILL cr); 5223 5224 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5225 "JNZ done\n\t" 5226 "BSF $dst, $src.hi\n\t" 5227 "JNZ msw_not_zero\n\t" 5228 "MOV $dst, 32\n" 5229 "msw_not_zero:\n\t" 5230 "ADD $dst, 32\n" 5231 "done:" %} 5232 ins_encode %{ 5233 Register Rdst = $dst$$Register; 5234 Register Rsrc = $src$$Register; 5235 Label msw_not_zero; 5236 Label done; 5237 __ bsfl(Rdst, Rsrc); 5238 __ jccb(Assembler::notZero, done); 5239 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5240 __ jccb(Assembler::notZero, msw_not_zero); 5241 __ movl(Rdst, BitsPerInt); 5242 __ bind(msw_not_zero); 5243 __ addl(Rdst, BitsPerInt); 5244 __ bind(done); 5245 %} 5246 ins_pipe(ialu_reg); 5247 %} 5248 5249 5250 //---------- Population Count Instructions ------------------------------------- 5251 5252 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5253 predicate(UsePopCountInstruction); 5254 match(Set dst (PopCountI src)); 5255 effect(KILL cr); 5256 5257 format %{ "POPCNT $dst, $src" %} 5258 ins_encode %{ 5259 __ popcntl($dst$$Register, $src$$Register); 5260 %} 5261 ins_pipe(ialu_reg); 5262 %} 5263 5264 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5265 predicate(UsePopCountInstruction); 5266 match(Set dst (PopCountI (LoadI mem))); 5267 effect(KILL cr); 5268 5269 format %{ "POPCNT $dst, $mem" %} 5270 ins_encode %{ 5271 __ popcntl($dst$$Register, $mem$$Address); 5272 %} 5273 ins_pipe(ialu_reg); 5274 %} 5275 5276 // Note: Long.bitCount(long) returns an int. 5277 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5278 predicate(UsePopCountInstruction); 5279 match(Set dst (PopCountL src)); 5280 effect(KILL cr, TEMP tmp, TEMP dst); 5281 5282 format %{ "POPCNT $dst, $src.lo\n\t" 5283 "POPCNT $tmp, $src.hi\n\t" 5284 "ADD $dst, $tmp" %} 5285 ins_encode %{ 5286 __ popcntl($dst$$Register, $src$$Register); 5287 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5288 __ addl($dst$$Register, $tmp$$Register); 5289 %} 5290 ins_pipe(ialu_reg); 5291 %} 5292 5293 // Note: Long.bitCount(long) returns an int. 5294 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5295 predicate(UsePopCountInstruction); 5296 match(Set dst (PopCountL (LoadL mem))); 5297 effect(KILL cr, TEMP tmp, TEMP dst); 5298 5299 format %{ "POPCNT $dst, $mem\n\t" 5300 "POPCNT $tmp, $mem+4\n\t" 5301 "ADD $dst, $tmp" %} 5302 ins_encode %{ 5303 //__ popcntl($dst$$Register, $mem$$Address$$first); 5304 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5305 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5306 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5307 __ addl($dst$$Register, $tmp$$Register); 5308 %} 5309 ins_pipe(ialu_reg); 5310 %} 5311 5312 5313 //----------Load/Store/Move Instructions--------------------------------------- 5314 //----------Load Instructions-------------------------------------------------- 5315 // Load Byte (8bit signed) 5316 instruct loadB(xRegI dst, memory mem) %{ 5317 match(Set dst (LoadB mem)); 5318 5319 ins_cost(125); 5320 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5321 5322 ins_encode %{ 5323 __ movsbl($dst$$Register, $mem$$Address); 5324 %} 5325 5326 ins_pipe(ialu_reg_mem); 5327 %} 5328 5329 // Load Byte (8bit signed) into Long Register 5330 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5331 match(Set dst (ConvI2L (LoadB mem))); 5332 effect(KILL cr); 5333 5334 ins_cost(375); 5335 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5336 "MOV $dst.hi,$dst.lo\n\t" 5337 "SAR $dst.hi,7" %} 5338 5339 ins_encode %{ 5340 __ movsbl($dst$$Register, $mem$$Address); 5341 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5342 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5343 %} 5344 5345 ins_pipe(ialu_reg_mem); 5346 %} 5347 5348 // Load Unsigned Byte (8bit UNsigned) 5349 instruct loadUB(xRegI dst, memory mem) %{ 5350 match(Set dst (LoadUB mem)); 5351 5352 ins_cost(125); 5353 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5354 5355 ins_encode %{ 5356 __ movzbl($dst$$Register, $mem$$Address); 5357 %} 5358 5359 ins_pipe(ialu_reg_mem); 5360 %} 5361 5362 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5363 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5364 match(Set dst (ConvI2L (LoadUB mem))); 5365 effect(KILL cr); 5366 5367 ins_cost(250); 5368 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5369 "XOR $dst.hi,$dst.hi" %} 5370 5371 ins_encode %{ 5372 Register Rdst = $dst$$Register; 5373 __ movzbl(Rdst, $mem$$Address); 5374 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5375 %} 5376 5377 ins_pipe(ialu_reg_mem); 5378 %} 5379 5380 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5381 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5382 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5383 effect(KILL cr); 5384 5385 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5386 "XOR $dst.hi,$dst.hi\n\t" 5387 "AND $dst.lo,right_n_bits($mask, 8)" %} 5388 ins_encode %{ 5389 Register Rdst = $dst$$Register; 5390 __ movzbl(Rdst, $mem$$Address); 5391 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5392 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5393 %} 5394 ins_pipe(ialu_reg_mem); 5395 %} 5396 5397 // Load Short (16bit signed) 5398 instruct loadS(rRegI dst, memory mem) %{ 5399 match(Set dst (LoadS mem)); 5400 5401 ins_cost(125); 5402 format %{ "MOVSX $dst,$mem\t# short" %} 5403 5404 ins_encode %{ 5405 __ movswl($dst$$Register, $mem$$Address); 5406 %} 5407 5408 ins_pipe(ialu_reg_mem); 5409 %} 5410 5411 // Load Short (16 bit signed) to Byte (8 bit signed) 5412 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5413 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5414 5415 ins_cost(125); 5416 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5417 ins_encode %{ 5418 __ movsbl($dst$$Register, $mem$$Address); 5419 %} 5420 ins_pipe(ialu_reg_mem); 5421 %} 5422 5423 // Load Short (16bit signed) into Long Register 5424 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5425 match(Set dst (ConvI2L (LoadS mem))); 5426 effect(KILL cr); 5427 5428 ins_cost(375); 5429 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5430 "MOV $dst.hi,$dst.lo\n\t" 5431 "SAR $dst.hi,15" %} 5432 5433 ins_encode %{ 5434 __ movswl($dst$$Register, $mem$$Address); 5435 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5436 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5437 %} 5438 5439 ins_pipe(ialu_reg_mem); 5440 %} 5441 5442 // Load Unsigned Short/Char (16bit unsigned) 5443 instruct loadUS(rRegI dst, memory mem) %{ 5444 match(Set dst (LoadUS mem)); 5445 5446 ins_cost(125); 5447 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5448 5449 ins_encode %{ 5450 __ movzwl($dst$$Register, $mem$$Address); 5451 %} 5452 5453 ins_pipe(ialu_reg_mem); 5454 %} 5455 5456 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5457 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5458 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5459 5460 ins_cost(125); 5461 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5462 ins_encode %{ 5463 __ movsbl($dst$$Register, $mem$$Address); 5464 %} 5465 ins_pipe(ialu_reg_mem); 5466 %} 5467 5468 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5469 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5470 match(Set dst (ConvI2L (LoadUS mem))); 5471 effect(KILL cr); 5472 5473 ins_cost(250); 5474 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5475 "XOR $dst.hi,$dst.hi" %} 5476 5477 ins_encode %{ 5478 __ movzwl($dst$$Register, $mem$$Address); 5479 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5480 %} 5481 5482 ins_pipe(ialu_reg_mem); 5483 %} 5484 5485 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5486 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5487 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5488 effect(KILL cr); 5489 5490 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5491 "XOR $dst.hi,$dst.hi" %} 5492 ins_encode %{ 5493 Register Rdst = $dst$$Register; 5494 __ movzbl(Rdst, $mem$$Address); 5495 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5496 %} 5497 ins_pipe(ialu_reg_mem); 5498 %} 5499 5500 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5501 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5502 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5503 effect(KILL cr); 5504 5505 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5506 "XOR $dst.hi,$dst.hi\n\t" 5507 "AND $dst.lo,right_n_bits($mask, 16)" %} 5508 ins_encode %{ 5509 Register Rdst = $dst$$Register; 5510 __ movzwl(Rdst, $mem$$Address); 5511 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5512 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5513 %} 5514 ins_pipe(ialu_reg_mem); 5515 %} 5516 5517 // Load Integer 5518 instruct loadI(rRegI dst, memory mem) %{ 5519 match(Set dst (LoadI mem)); 5520 5521 ins_cost(125); 5522 format %{ "MOV $dst,$mem\t# int" %} 5523 5524 ins_encode %{ 5525 __ movl($dst$$Register, $mem$$Address); 5526 %} 5527 5528 ins_pipe(ialu_reg_mem); 5529 %} 5530 5531 // Load Integer (32 bit signed) to Byte (8 bit signed) 5532 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5533 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5534 5535 ins_cost(125); 5536 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5537 ins_encode %{ 5538 __ movsbl($dst$$Register, $mem$$Address); 5539 %} 5540 ins_pipe(ialu_reg_mem); 5541 %} 5542 5543 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5544 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5545 match(Set dst (AndI (LoadI mem) mask)); 5546 5547 ins_cost(125); 5548 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5549 ins_encode %{ 5550 __ movzbl($dst$$Register, $mem$$Address); 5551 %} 5552 ins_pipe(ialu_reg_mem); 5553 %} 5554 5555 // Load Integer (32 bit signed) to Short (16 bit signed) 5556 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5557 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5558 5559 ins_cost(125); 5560 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5561 ins_encode %{ 5562 __ movswl($dst$$Register, $mem$$Address); 5563 %} 5564 ins_pipe(ialu_reg_mem); 5565 %} 5566 5567 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5568 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5569 match(Set dst (AndI (LoadI mem) mask)); 5570 5571 ins_cost(125); 5572 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5573 ins_encode %{ 5574 __ movzwl($dst$$Register, $mem$$Address); 5575 %} 5576 ins_pipe(ialu_reg_mem); 5577 %} 5578 5579 // Load Integer into Long Register 5580 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5581 match(Set dst (ConvI2L (LoadI mem))); 5582 effect(KILL cr); 5583 5584 ins_cost(375); 5585 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5586 "MOV $dst.hi,$dst.lo\n\t" 5587 "SAR $dst.hi,31" %} 5588 5589 ins_encode %{ 5590 __ movl($dst$$Register, $mem$$Address); 5591 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5592 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5593 %} 5594 5595 ins_pipe(ialu_reg_mem); 5596 %} 5597 5598 // Load Integer with mask 0xFF into Long Register 5599 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5600 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5601 effect(KILL cr); 5602 5603 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5604 "XOR $dst.hi,$dst.hi" %} 5605 ins_encode %{ 5606 Register Rdst = $dst$$Register; 5607 __ movzbl(Rdst, $mem$$Address); 5608 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5609 %} 5610 ins_pipe(ialu_reg_mem); 5611 %} 5612 5613 // Load Integer with mask 0xFFFF into Long Register 5614 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5615 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5616 effect(KILL cr); 5617 5618 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5619 "XOR $dst.hi,$dst.hi" %} 5620 ins_encode %{ 5621 Register Rdst = $dst$$Register; 5622 __ movzwl(Rdst, $mem$$Address); 5623 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5624 %} 5625 ins_pipe(ialu_reg_mem); 5626 %} 5627 5628 // Load Integer with 31-bit mask into Long Register 5629 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5630 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5631 effect(KILL cr); 5632 5633 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5634 "XOR $dst.hi,$dst.hi\n\t" 5635 "AND $dst.lo,$mask" %} 5636 ins_encode %{ 5637 Register Rdst = $dst$$Register; 5638 __ movl(Rdst, $mem$$Address); 5639 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5640 __ andl(Rdst, $mask$$constant); 5641 %} 5642 ins_pipe(ialu_reg_mem); 5643 %} 5644 5645 // Load Unsigned Integer into Long Register 5646 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5647 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5648 effect(KILL cr); 5649 5650 ins_cost(250); 5651 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5652 "XOR $dst.hi,$dst.hi" %} 5653 5654 ins_encode %{ 5655 __ movl($dst$$Register, $mem$$Address); 5656 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5657 %} 5658 5659 ins_pipe(ialu_reg_mem); 5660 %} 5661 5662 // Load Long. Cannot clobber address while loading, so restrict address 5663 // register to ESI 5664 instruct loadL(eRegL dst, load_long_memory mem) %{ 5665 predicate(!((LoadLNode*)n)->require_atomic_access()); 5666 match(Set dst (LoadL mem)); 5667 5668 ins_cost(250); 5669 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5670 "MOV $dst.hi,$mem+4" %} 5671 5672 ins_encode %{ 5673 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5674 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5675 __ movl($dst$$Register, Amemlo); 5676 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5677 %} 5678 5679 ins_pipe(ialu_reg_long_mem); 5680 %} 5681 5682 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5683 // then store it down to the stack and reload on the int 5684 // side. 5685 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5686 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5687 match(Set dst (LoadL mem)); 5688 5689 ins_cost(200); 5690 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5691 "FISTp $dst" %} 5692 ins_encode(enc_loadL_volatile(mem,dst)); 5693 ins_pipe( fpu_reg_mem ); 5694 %} 5695 5696 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5697 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5698 match(Set dst (LoadL mem)); 5699 effect(TEMP tmp); 5700 ins_cost(180); 5701 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5702 "MOVSD $dst,$tmp" %} 5703 ins_encode %{ 5704 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5705 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5706 %} 5707 ins_pipe( pipe_slow ); 5708 %} 5709 5710 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5711 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5712 match(Set dst (LoadL mem)); 5713 effect(TEMP tmp); 5714 ins_cost(160); 5715 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5716 "MOVD $dst.lo,$tmp\n\t" 5717 "PSRLQ $tmp,32\n\t" 5718 "MOVD $dst.hi,$tmp" %} 5719 ins_encode %{ 5720 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5721 __ movdl($dst$$Register, $tmp$$XMMRegister); 5722 __ psrlq($tmp$$XMMRegister, 32); 5723 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5724 %} 5725 ins_pipe( pipe_slow ); 5726 %} 5727 5728 // Load Range 5729 instruct loadRange(rRegI dst, memory mem) %{ 5730 match(Set dst (LoadRange mem)); 5731 5732 ins_cost(125); 5733 format %{ "MOV $dst,$mem" %} 5734 opcode(0x8B); 5735 ins_encode( OpcP, RegMem(dst,mem)); 5736 ins_pipe( ialu_reg_mem ); 5737 %} 5738 5739 5740 // Load Pointer 5741 instruct loadP(eRegP dst, memory mem) %{ 5742 match(Set dst (LoadP mem)); 5743 5744 ins_cost(125); 5745 format %{ "MOV $dst,$mem" %} 5746 opcode(0x8B); 5747 ins_encode( OpcP, RegMem(dst,mem)); 5748 ins_pipe( ialu_reg_mem ); 5749 %} 5750 5751 // Load Klass Pointer 5752 instruct loadKlass(eRegP dst, memory mem) %{ 5753 match(Set dst (LoadKlass mem)); 5754 5755 ins_cost(125); 5756 format %{ "MOV $dst,$mem" %} 5757 opcode(0x8B); 5758 ins_encode( OpcP, RegMem(dst,mem)); 5759 ins_pipe( ialu_reg_mem ); 5760 %} 5761 5762 // Load Float 5763 instruct MoveF2LEG(legRegF dst, regF src) %{ 5764 match(Set dst src); 5765 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5766 ins_encode %{ 5767 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5768 %} 5769 ins_pipe( fpu_reg_reg ); 5770 %} 5771 5772 // Load Float 5773 instruct MoveLEG2F(regF dst, legRegF src) %{ 5774 match(Set dst src); 5775 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5776 ins_encode %{ 5777 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 5778 %} 5779 ins_pipe( fpu_reg_reg ); 5780 %} 5781 5782 // Load Double 5783 instruct MoveD2LEG(legRegD dst, regD src) %{ 5784 match(Set dst src); 5785 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5786 ins_encode %{ 5787 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5788 %} 5789 ins_pipe( fpu_reg_reg ); 5790 %} 5791 5792 // Load Double 5793 instruct MoveLEG2D(regD dst, legRegD src) %{ 5794 match(Set dst src); 5795 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5796 ins_encode %{ 5797 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 5798 %} 5799 ins_pipe( fpu_reg_reg ); 5800 %} 5801 5802 // Load Double 5803 instruct loadDPR(regDPR dst, memory mem) %{ 5804 predicate(UseSSE<=1); 5805 match(Set dst (LoadD mem)); 5806 5807 ins_cost(150); 5808 format %{ "FLD_D ST,$mem\n\t" 5809 "FSTP $dst" %} 5810 opcode(0xDD); /* DD /0 */ 5811 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5812 Pop_Reg_DPR(dst) ); 5813 ins_pipe( fpu_reg_mem ); 5814 %} 5815 5816 // Load Double to XMM 5817 instruct loadD(regD dst, memory mem) %{ 5818 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5819 match(Set dst (LoadD mem)); 5820 ins_cost(145); 5821 format %{ "MOVSD $dst,$mem" %} 5822 ins_encode %{ 5823 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5824 %} 5825 ins_pipe( pipe_slow ); 5826 %} 5827 5828 instruct loadD_partial(regD dst, memory mem) %{ 5829 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5830 match(Set dst (LoadD mem)); 5831 ins_cost(145); 5832 format %{ "MOVLPD $dst,$mem" %} 5833 ins_encode %{ 5834 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 // Load to XMM register (single-precision floating point) 5840 // MOVSS instruction 5841 instruct loadF(regF dst, memory mem) %{ 5842 predicate(UseSSE>=1); 5843 match(Set dst (LoadF mem)); 5844 ins_cost(145); 5845 format %{ "MOVSS $dst,$mem" %} 5846 ins_encode %{ 5847 __ movflt ($dst$$XMMRegister, $mem$$Address); 5848 %} 5849 ins_pipe( pipe_slow ); 5850 %} 5851 5852 // Load Float 5853 instruct loadFPR(regFPR dst, memory mem) %{ 5854 predicate(UseSSE==0); 5855 match(Set dst (LoadF mem)); 5856 5857 ins_cost(150); 5858 format %{ "FLD_S ST,$mem\n\t" 5859 "FSTP $dst" %} 5860 opcode(0xD9); /* D9 /0 */ 5861 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5862 Pop_Reg_FPR(dst) ); 5863 ins_pipe( fpu_reg_mem ); 5864 %} 5865 5866 // Load Effective Address 5867 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5868 match(Set dst mem); 5869 5870 ins_cost(110); 5871 format %{ "LEA $dst,$mem" %} 5872 opcode(0x8D); 5873 ins_encode( OpcP, RegMem(dst,mem)); 5874 ins_pipe( ialu_reg_reg_fat ); 5875 %} 5876 5877 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5878 match(Set dst mem); 5879 5880 ins_cost(110); 5881 format %{ "LEA $dst,$mem" %} 5882 opcode(0x8D); 5883 ins_encode( OpcP, RegMem(dst,mem)); 5884 ins_pipe( ialu_reg_reg_fat ); 5885 %} 5886 5887 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5888 match(Set dst mem); 5889 5890 ins_cost(110); 5891 format %{ "LEA $dst,$mem" %} 5892 opcode(0x8D); 5893 ins_encode( OpcP, RegMem(dst,mem)); 5894 ins_pipe( ialu_reg_reg_fat ); 5895 %} 5896 5897 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5898 match(Set dst mem); 5899 5900 ins_cost(110); 5901 format %{ "LEA $dst,$mem" %} 5902 opcode(0x8D); 5903 ins_encode( OpcP, RegMem(dst,mem)); 5904 ins_pipe( ialu_reg_reg_fat ); 5905 %} 5906 5907 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5908 match(Set dst mem); 5909 5910 ins_cost(110); 5911 format %{ "LEA $dst,$mem" %} 5912 opcode(0x8D); 5913 ins_encode( OpcP, RegMem(dst,mem)); 5914 ins_pipe( ialu_reg_reg_fat ); 5915 %} 5916 5917 // Load Constant 5918 instruct loadConI(rRegI dst, immI src) %{ 5919 match(Set dst src); 5920 5921 format %{ "MOV $dst,$src" %} 5922 ins_encode( LdImmI(dst, src) ); 5923 ins_pipe( ialu_reg_fat ); 5924 %} 5925 5926 // Load Constant zero 5927 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 5928 match(Set dst src); 5929 effect(KILL cr); 5930 5931 ins_cost(50); 5932 format %{ "XOR $dst,$dst" %} 5933 opcode(0x33); /* + rd */ 5934 ins_encode( OpcP, RegReg( dst, dst ) ); 5935 ins_pipe( ialu_reg ); 5936 %} 5937 5938 instruct loadConP(eRegP dst, immP src) %{ 5939 match(Set dst src); 5940 5941 format %{ "MOV $dst,$src" %} 5942 opcode(0xB8); /* + rd */ 5943 ins_encode( LdImmP(dst, src) ); 5944 ins_pipe( ialu_reg_fat ); 5945 %} 5946 5947 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5948 match(Set dst src); 5949 effect(KILL cr); 5950 ins_cost(200); 5951 format %{ "MOV $dst.lo,$src.lo\n\t" 5952 "MOV $dst.hi,$src.hi" %} 5953 opcode(0xB8); 5954 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5955 ins_pipe( ialu_reg_long_fat ); 5956 %} 5957 5958 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5959 match(Set dst src); 5960 effect(KILL cr); 5961 ins_cost(150); 5962 format %{ "XOR $dst.lo,$dst.lo\n\t" 5963 "XOR $dst.hi,$dst.hi" %} 5964 opcode(0x33,0x33); 5965 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5966 ins_pipe( ialu_reg_long ); 5967 %} 5968 5969 // The instruction usage is guarded by predicate in operand immFPR(). 5970 instruct loadConFPR(regFPR dst, immFPR con) %{ 5971 match(Set dst con); 5972 ins_cost(125); 5973 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5974 "FSTP $dst" %} 5975 ins_encode %{ 5976 __ fld_s($constantaddress($con)); 5977 __ fstp_d($dst$$reg); 5978 %} 5979 ins_pipe(fpu_reg_con); 5980 %} 5981 5982 // The instruction usage is guarded by predicate in operand immFPR0(). 5983 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5984 match(Set dst con); 5985 ins_cost(125); 5986 format %{ "FLDZ ST\n\t" 5987 "FSTP $dst" %} 5988 ins_encode %{ 5989 __ fldz(); 5990 __ fstp_d($dst$$reg); 5991 %} 5992 ins_pipe(fpu_reg_con); 5993 %} 5994 5995 // The instruction usage is guarded by predicate in operand immFPR1(). 5996 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5997 match(Set dst con); 5998 ins_cost(125); 5999 format %{ "FLD1 ST\n\t" 6000 "FSTP $dst" %} 6001 ins_encode %{ 6002 __ fld1(); 6003 __ fstp_d($dst$$reg); 6004 %} 6005 ins_pipe(fpu_reg_con); 6006 %} 6007 6008 // The instruction usage is guarded by predicate in operand immF(). 6009 instruct loadConF(regF dst, immF con) %{ 6010 match(Set dst con); 6011 ins_cost(125); 6012 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6013 ins_encode %{ 6014 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6015 %} 6016 ins_pipe(pipe_slow); 6017 %} 6018 6019 // The instruction usage is guarded by predicate in operand immF0(). 6020 instruct loadConF0(regF dst, immF0 src) %{ 6021 match(Set dst src); 6022 ins_cost(100); 6023 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6024 ins_encode %{ 6025 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6026 %} 6027 ins_pipe(pipe_slow); 6028 %} 6029 6030 // The instruction usage is guarded by predicate in operand immDPR(). 6031 instruct loadConDPR(regDPR dst, immDPR con) %{ 6032 match(Set dst con); 6033 ins_cost(125); 6034 6035 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6036 "FSTP $dst" %} 6037 ins_encode %{ 6038 __ fld_d($constantaddress($con)); 6039 __ fstp_d($dst$$reg); 6040 %} 6041 ins_pipe(fpu_reg_con); 6042 %} 6043 6044 // The instruction usage is guarded by predicate in operand immDPR0(). 6045 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6046 match(Set dst con); 6047 ins_cost(125); 6048 6049 format %{ "FLDZ ST\n\t" 6050 "FSTP $dst" %} 6051 ins_encode %{ 6052 __ fldz(); 6053 __ fstp_d($dst$$reg); 6054 %} 6055 ins_pipe(fpu_reg_con); 6056 %} 6057 6058 // The instruction usage is guarded by predicate in operand immDPR1(). 6059 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6060 match(Set dst con); 6061 ins_cost(125); 6062 6063 format %{ "FLD1 ST\n\t" 6064 "FSTP $dst" %} 6065 ins_encode %{ 6066 __ fld1(); 6067 __ fstp_d($dst$$reg); 6068 %} 6069 ins_pipe(fpu_reg_con); 6070 %} 6071 6072 // The instruction usage is guarded by predicate in operand immD(). 6073 instruct loadConD(regD dst, immD con) %{ 6074 match(Set dst con); 6075 ins_cost(125); 6076 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6077 ins_encode %{ 6078 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6079 %} 6080 ins_pipe(pipe_slow); 6081 %} 6082 6083 // The instruction usage is guarded by predicate in operand immD0(). 6084 instruct loadConD0(regD dst, immD0 src) %{ 6085 match(Set dst src); 6086 ins_cost(100); 6087 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6088 ins_encode %{ 6089 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6090 %} 6091 ins_pipe( pipe_slow ); 6092 %} 6093 6094 // Load Stack Slot 6095 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6096 match(Set dst src); 6097 ins_cost(125); 6098 6099 format %{ "MOV $dst,$src" %} 6100 opcode(0x8B); 6101 ins_encode( OpcP, RegMem(dst,src)); 6102 ins_pipe( ialu_reg_mem ); 6103 %} 6104 6105 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6106 match(Set dst src); 6107 6108 ins_cost(200); 6109 format %{ "MOV $dst,$src.lo\n\t" 6110 "MOV $dst+4,$src.hi" %} 6111 opcode(0x8B, 0x8B); 6112 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6113 ins_pipe( ialu_mem_long_reg ); 6114 %} 6115 6116 // Load Stack Slot 6117 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6118 match(Set dst src); 6119 ins_cost(125); 6120 6121 format %{ "MOV $dst,$src" %} 6122 opcode(0x8B); 6123 ins_encode( OpcP, RegMem(dst,src)); 6124 ins_pipe( ialu_reg_mem ); 6125 %} 6126 6127 // Load Stack Slot 6128 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6129 match(Set dst src); 6130 ins_cost(125); 6131 6132 format %{ "FLD_S $src\n\t" 6133 "FSTP $dst" %} 6134 opcode(0xD9); /* D9 /0, FLD m32real */ 6135 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6136 Pop_Reg_FPR(dst) ); 6137 ins_pipe( fpu_reg_mem ); 6138 %} 6139 6140 // Load Stack Slot 6141 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6142 match(Set dst src); 6143 ins_cost(125); 6144 6145 format %{ "FLD_D $src\n\t" 6146 "FSTP $dst" %} 6147 opcode(0xDD); /* DD /0, FLD m64real */ 6148 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6149 Pop_Reg_DPR(dst) ); 6150 ins_pipe( fpu_reg_mem ); 6151 %} 6152 6153 // Prefetch instructions for allocation. 6154 // Must be safe to execute with invalid address (cannot fault). 6155 6156 instruct prefetchAlloc0( memory mem ) %{ 6157 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6158 match(PrefetchAllocation mem); 6159 ins_cost(0); 6160 size(0); 6161 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6162 ins_encode(); 6163 ins_pipe(empty); 6164 %} 6165 6166 instruct prefetchAlloc( memory mem ) %{ 6167 predicate(AllocatePrefetchInstr==3); 6168 match( PrefetchAllocation mem ); 6169 ins_cost(100); 6170 6171 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6172 ins_encode %{ 6173 __ prefetchw($mem$$Address); 6174 %} 6175 ins_pipe(ialu_mem); 6176 %} 6177 6178 instruct prefetchAllocNTA( memory mem ) %{ 6179 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6180 match(PrefetchAllocation mem); 6181 ins_cost(100); 6182 6183 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6184 ins_encode %{ 6185 __ prefetchnta($mem$$Address); 6186 %} 6187 ins_pipe(ialu_mem); 6188 %} 6189 6190 instruct prefetchAllocT0( memory mem ) %{ 6191 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6192 match(PrefetchAllocation mem); 6193 ins_cost(100); 6194 6195 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6196 ins_encode %{ 6197 __ prefetcht0($mem$$Address); 6198 %} 6199 ins_pipe(ialu_mem); 6200 %} 6201 6202 instruct prefetchAllocT2( memory mem ) %{ 6203 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6204 match(PrefetchAllocation mem); 6205 ins_cost(100); 6206 6207 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6208 ins_encode %{ 6209 __ prefetcht2($mem$$Address); 6210 %} 6211 ins_pipe(ialu_mem); 6212 %} 6213 6214 //----------Store Instructions------------------------------------------------- 6215 6216 // Store Byte 6217 instruct storeB(memory mem, xRegI src) %{ 6218 match(Set mem (StoreB mem src)); 6219 6220 ins_cost(125); 6221 format %{ "MOV8 $mem,$src" %} 6222 opcode(0x88); 6223 ins_encode( OpcP, RegMem( src, mem ) ); 6224 ins_pipe( ialu_mem_reg ); 6225 %} 6226 6227 // Store Char/Short 6228 instruct storeC(memory mem, rRegI src) %{ 6229 match(Set mem (StoreC mem src)); 6230 6231 ins_cost(125); 6232 format %{ "MOV16 $mem,$src" %} 6233 opcode(0x89, 0x66); 6234 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6235 ins_pipe( ialu_mem_reg ); 6236 %} 6237 6238 // Store Integer 6239 instruct storeI(memory mem, rRegI src) %{ 6240 match(Set mem (StoreI mem src)); 6241 6242 ins_cost(125); 6243 format %{ "MOV $mem,$src" %} 6244 opcode(0x89); 6245 ins_encode( OpcP, RegMem( src, mem ) ); 6246 ins_pipe( ialu_mem_reg ); 6247 %} 6248 6249 // Store Long 6250 instruct storeL(long_memory mem, eRegL src) %{ 6251 predicate(!((StoreLNode*)n)->require_atomic_access()); 6252 match(Set mem (StoreL mem src)); 6253 6254 ins_cost(200); 6255 format %{ "MOV $mem,$src.lo\n\t" 6256 "MOV $mem+4,$src.hi" %} 6257 opcode(0x89, 0x89); 6258 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6259 ins_pipe( ialu_mem_long_reg ); 6260 %} 6261 6262 // Store Long to Integer 6263 instruct storeL2I(memory mem, eRegL src) %{ 6264 match(Set mem (StoreI mem (ConvL2I src))); 6265 6266 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6267 ins_encode %{ 6268 __ movl($mem$$Address, $src$$Register); 6269 %} 6270 ins_pipe(ialu_mem_reg); 6271 %} 6272 6273 // Volatile Store Long. Must be atomic, so move it into 6274 // the FP TOS and then do a 64-bit FIST. Has to probe the 6275 // target address before the store (for null-ptr checks) 6276 // so the memory operand is used twice in the encoding. 6277 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6278 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6279 match(Set mem (StoreL mem src)); 6280 effect( KILL cr ); 6281 ins_cost(400); 6282 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6283 "FILD $src\n\t" 6284 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6285 opcode(0x3B); 6286 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6287 ins_pipe( fpu_reg_mem ); 6288 %} 6289 6290 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6291 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6292 match(Set mem (StoreL mem src)); 6293 effect( TEMP tmp, KILL cr ); 6294 ins_cost(380); 6295 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6296 "MOVSD $tmp,$src\n\t" 6297 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6298 ins_encode %{ 6299 __ cmpl(rax, $mem$$Address); 6300 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6301 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6307 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6308 match(Set mem (StoreL mem src)); 6309 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6310 ins_cost(360); 6311 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6312 "MOVD $tmp,$src.lo\n\t" 6313 "MOVD $tmp2,$src.hi\n\t" 6314 "PUNPCKLDQ $tmp,$tmp2\n\t" 6315 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6316 ins_encode %{ 6317 __ cmpl(rax, $mem$$Address); 6318 __ movdl($tmp$$XMMRegister, $src$$Register); 6319 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6320 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6321 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6322 %} 6323 ins_pipe( pipe_slow ); 6324 %} 6325 6326 // Store Pointer; for storing unknown oops and raw pointers 6327 instruct storeP(memory mem, anyRegP src) %{ 6328 match(Set mem (StoreP mem src)); 6329 6330 ins_cost(125); 6331 format %{ "MOV $mem,$src" %} 6332 opcode(0x89); 6333 ins_encode( OpcP, RegMem( src, mem ) ); 6334 ins_pipe( ialu_mem_reg ); 6335 %} 6336 6337 // Store Integer Immediate 6338 instruct storeImmI(memory mem, immI src) %{ 6339 match(Set mem (StoreI mem src)); 6340 6341 ins_cost(150); 6342 format %{ "MOV $mem,$src" %} 6343 opcode(0xC7); /* C7 /0 */ 6344 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6345 ins_pipe( ialu_mem_imm ); 6346 %} 6347 6348 // Store Short/Char Immediate 6349 instruct storeImmI16(memory mem, immI16 src) %{ 6350 predicate(UseStoreImmI16); 6351 match(Set mem (StoreC mem src)); 6352 6353 ins_cost(150); 6354 format %{ "MOV16 $mem,$src" %} 6355 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6356 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6357 ins_pipe( ialu_mem_imm ); 6358 %} 6359 6360 // Store Pointer Immediate; null pointers or constant oops that do not 6361 // need card-mark barriers. 6362 instruct storeImmP(memory mem, immP src) %{ 6363 match(Set mem (StoreP mem src)); 6364 6365 ins_cost(150); 6366 format %{ "MOV $mem,$src" %} 6367 opcode(0xC7); /* C7 /0 */ 6368 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6369 ins_pipe( ialu_mem_imm ); 6370 %} 6371 6372 // Store Byte Immediate 6373 instruct storeImmB(memory mem, immI8 src) %{ 6374 match(Set mem (StoreB mem src)); 6375 6376 ins_cost(150); 6377 format %{ "MOV8 $mem,$src" %} 6378 opcode(0xC6); /* C6 /0 */ 6379 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6380 ins_pipe( ialu_mem_imm ); 6381 %} 6382 6383 // Store CMS card-mark Immediate 6384 instruct storeImmCM(memory mem, immI8 src) %{ 6385 match(Set mem (StoreCM mem src)); 6386 6387 ins_cost(150); 6388 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6389 opcode(0xC6); /* C6 /0 */ 6390 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6391 ins_pipe( ialu_mem_imm ); 6392 %} 6393 6394 // Store Double 6395 instruct storeDPR( memory mem, regDPR1 src) %{ 6396 predicate(UseSSE<=1); 6397 match(Set mem (StoreD mem src)); 6398 6399 ins_cost(100); 6400 format %{ "FST_D $mem,$src" %} 6401 opcode(0xDD); /* DD /2 */ 6402 ins_encode( enc_FPR_store(mem,src) ); 6403 ins_pipe( fpu_mem_reg ); 6404 %} 6405 6406 // Store double does rounding on x86 6407 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6408 predicate(UseSSE<=1); 6409 match(Set mem (StoreD mem (RoundDouble src))); 6410 6411 ins_cost(100); 6412 format %{ "FST_D $mem,$src\t# round" %} 6413 opcode(0xDD); /* DD /2 */ 6414 ins_encode( enc_FPR_store(mem,src) ); 6415 ins_pipe( fpu_mem_reg ); 6416 %} 6417 6418 // Store XMM register to memory (double-precision floating points) 6419 // MOVSD instruction 6420 instruct storeD(memory mem, regD src) %{ 6421 predicate(UseSSE>=2); 6422 match(Set mem (StoreD mem src)); 6423 ins_cost(95); 6424 format %{ "MOVSD $mem,$src" %} 6425 ins_encode %{ 6426 __ movdbl($mem$$Address, $src$$XMMRegister); 6427 %} 6428 ins_pipe( pipe_slow ); 6429 %} 6430 6431 // Load Double 6432 instruct MoveD2VL(vlRegD dst, regD src) %{ 6433 match(Set dst src); 6434 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6435 ins_encode %{ 6436 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6437 %} 6438 ins_pipe( fpu_reg_reg ); 6439 %} 6440 6441 // Load Double 6442 instruct MoveVL2D(regD dst, vlRegD src) %{ 6443 match(Set dst src); 6444 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 6445 ins_encode %{ 6446 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6447 %} 6448 ins_pipe( fpu_reg_reg ); 6449 %} 6450 6451 // Store XMM register to memory (single-precision floating point) 6452 // MOVSS instruction 6453 instruct storeF(memory mem, regF src) %{ 6454 predicate(UseSSE>=1); 6455 match(Set mem (StoreF mem src)); 6456 ins_cost(95); 6457 format %{ "MOVSS $mem,$src" %} 6458 ins_encode %{ 6459 __ movflt($mem$$Address, $src$$XMMRegister); 6460 %} 6461 ins_pipe( pipe_slow ); 6462 %} 6463 6464 // Load Float 6465 instruct MoveF2VL(vlRegF dst, regF src) %{ 6466 match(Set dst src); 6467 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6468 ins_encode %{ 6469 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6470 %} 6471 ins_pipe( fpu_reg_reg ); 6472 %} 6473 6474 // Load Float 6475 instruct MoveVL2F(regF dst, vlRegF src) %{ 6476 match(Set dst src); 6477 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 6478 ins_encode %{ 6479 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6480 %} 6481 ins_pipe( fpu_reg_reg ); 6482 %} 6483 6484 // Store Float 6485 instruct storeFPR( memory mem, regFPR1 src) %{ 6486 predicate(UseSSE==0); 6487 match(Set mem (StoreF mem src)); 6488 6489 ins_cost(100); 6490 format %{ "FST_S $mem,$src" %} 6491 opcode(0xD9); /* D9 /2 */ 6492 ins_encode( enc_FPR_store(mem,src) ); 6493 ins_pipe( fpu_mem_reg ); 6494 %} 6495 6496 // Store Float does rounding on x86 6497 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6498 predicate(UseSSE==0); 6499 match(Set mem (StoreF mem (RoundFloat src))); 6500 6501 ins_cost(100); 6502 format %{ "FST_S $mem,$src\t# round" %} 6503 opcode(0xD9); /* D9 /2 */ 6504 ins_encode( enc_FPR_store(mem,src) ); 6505 ins_pipe( fpu_mem_reg ); 6506 %} 6507 6508 // Store Float does rounding on x86 6509 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6510 predicate(UseSSE<=1); 6511 match(Set mem (StoreF mem (ConvD2F src))); 6512 6513 ins_cost(100); 6514 format %{ "FST_S $mem,$src\t# D-round" %} 6515 opcode(0xD9); /* D9 /2 */ 6516 ins_encode( enc_FPR_store(mem,src) ); 6517 ins_pipe( fpu_mem_reg ); 6518 %} 6519 6520 // Store immediate Float value (it is faster than store from FPU register) 6521 // The instruction usage is guarded by predicate in operand immFPR(). 6522 instruct storeFPR_imm( memory mem, immFPR src) %{ 6523 match(Set mem (StoreF mem src)); 6524 6525 ins_cost(50); 6526 format %{ "MOV $mem,$src\t# store float" %} 6527 opcode(0xC7); /* C7 /0 */ 6528 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6529 ins_pipe( ialu_mem_imm ); 6530 %} 6531 6532 // Store immediate Float value (it is faster than store from XMM register) 6533 // The instruction usage is guarded by predicate in operand immF(). 6534 instruct storeF_imm( memory mem, immF src) %{ 6535 match(Set mem (StoreF mem src)); 6536 6537 ins_cost(50); 6538 format %{ "MOV $mem,$src\t# store float" %} 6539 opcode(0xC7); /* C7 /0 */ 6540 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6541 ins_pipe( ialu_mem_imm ); 6542 %} 6543 6544 // Store Integer to stack slot 6545 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6546 match(Set dst src); 6547 6548 ins_cost(100); 6549 format %{ "MOV $dst,$src" %} 6550 opcode(0x89); 6551 ins_encode( OpcPRegSS( dst, src ) ); 6552 ins_pipe( ialu_mem_reg ); 6553 %} 6554 6555 // Store Integer to stack slot 6556 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6557 match(Set dst src); 6558 6559 ins_cost(100); 6560 format %{ "MOV $dst,$src" %} 6561 opcode(0x89); 6562 ins_encode( OpcPRegSS( dst, src ) ); 6563 ins_pipe( ialu_mem_reg ); 6564 %} 6565 6566 // Store Long to stack slot 6567 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6568 match(Set dst src); 6569 6570 ins_cost(200); 6571 format %{ "MOV $dst,$src.lo\n\t" 6572 "MOV $dst+4,$src.hi" %} 6573 opcode(0x89, 0x89); 6574 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6575 ins_pipe( ialu_mem_long_reg ); 6576 %} 6577 6578 //----------MemBar Instructions----------------------------------------------- 6579 // Memory barrier flavors 6580 6581 instruct membar_acquire() %{ 6582 match(MemBarAcquire); 6583 match(LoadFence); 6584 ins_cost(400); 6585 6586 size(0); 6587 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6588 ins_encode(); 6589 ins_pipe(empty); 6590 %} 6591 6592 instruct membar_acquire_lock() %{ 6593 match(MemBarAcquireLock); 6594 ins_cost(0); 6595 6596 size(0); 6597 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6598 ins_encode( ); 6599 ins_pipe(empty); 6600 %} 6601 6602 instruct membar_release() %{ 6603 match(MemBarRelease); 6604 match(StoreFence); 6605 ins_cost(400); 6606 6607 size(0); 6608 format %{ "MEMBAR-release ! (empty encoding)" %} 6609 ins_encode( ); 6610 ins_pipe(empty); 6611 %} 6612 6613 instruct membar_release_lock() %{ 6614 match(MemBarReleaseLock); 6615 ins_cost(0); 6616 6617 size(0); 6618 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6619 ins_encode( ); 6620 ins_pipe(empty); 6621 %} 6622 6623 instruct membar_volatile(eFlagsReg cr) %{ 6624 match(MemBarVolatile); 6625 effect(KILL cr); 6626 ins_cost(400); 6627 6628 format %{ 6629 $$template 6630 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6631 %} 6632 ins_encode %{ 6633 __ membar(Assembler::StoreLoad); 6634 %} 6635 ins_pipe(pipe_slow); 6636 %} 6637 6638 instruct unnecessary_membar_volatile() %{ 6639 match(MemBarVolatile); 6640 predicate(Matcher::post_store_load_barrier(n)); 6641 ins_cost(0); 6642 6643 size(0); 6644 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6645 ins_encode( ); 6646 ins_pipe(empty); 6647 %} 6648 6649 instruct membar_storestore() %{ 6650 match(MemBarStoreStore); 6651 match(StoreStoreFence); 6652 ins_cost(0); 6653 6654 size(0); 6655 format %{ "MEMBAR-storestore (empty encoding)" %} 6656 ins_encode( ); 6657 ins_pipe(empty); 6658 %} 6659 6660 //----------Move Instructions-------------------------------------------------- 6661 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6662 match(Set dst (CastX2P src)); 6663 format %{ "# X2P $dst, $src" %} 6664 ins_encode( /*empty encoding*/ ); 6665 ins_cost(0); 6666 ins_pipe(empty); 6667 %} 6668 6669 instruct castP2X(rRegI dst, eRegP src ) %{ 6670 match(Set dst (CastP2X src)); 6671 ins_cost(50); 6672 format %{ "MOV $dst, $src\t# CastP2X" %} 6673 ins_encode( enc_Copy( dst, src) ); 6674 ins_pipe( ialu_reg_reg ); 6675 %} 6676 6677 //----------Conditional Move--------------------------------------------------- 6678 // Conditional move 6679 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6680 predicate(!VM_Version::supports_cmov() ); 6681 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6682 ins_cost(200); 6683 format %{ "J$cop,us skip\t# signed cmove\n\t" 6684 "MOV $dst,$src\n" 6685 "skip:" %} 6686 ins_encode %{ 6687 Label Lskip; 6688 // Invert sense of branch from sense of CMOV 6689 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6690 __ movl($dst$$Register, $src$$Register); 6691 __ bind(Lskip); 6692 %} 6693 ins_pipe( pipe_cmov_reg ); 6694 %} 6695 6696 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6697 predicate(!VM_Version::supports_cmov() ); 6698 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6699 ins_cost(200); 6700 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6701 "MOV $dst,$src\n" 6702 "skip:" %} 6703 ins_encode %{ 6704 Label Lskip; 6705 // Invert sense of branch from sense of CMOV 6706 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6707 __ movl($dst$$Register, $src$$Register); 6708 __ bind(Lskip); 6709 %} 6710 ins_pipe( pipe_cmov_reg ); 6711 %} 6712 6713 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6714 predicate(VM_Version::supports_cmov() ); 6715 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6716 ins_cost(200); 6717 format %{ "CMOV$cop $dst,$src" %} 6718 opcode(0x0F,0x40); 6719 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6720 ins_pipe( pipe_cmov_reg ); 6721 %} 6722 6723 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6724 predicate(VM_Version::supports_cmov() ); 6725 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6726 ins_cost(200); 6727 format %{ "CMOV$cop $dst,$src" %} 6728 opcode(0x0F,0x40); 6729 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6730 ins_pipe( pipe_cmov_reg ); 6731 %} 6732 6733 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6734 predicate(VM_Version::supports_cmov() ); 6735 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6736 ins_cost(200); 6737 expand %{ 6738 cmovI_regU(cop, cr, dst, src); 6739 %} 6740 %} 6741 6742 // Conditional move 6743 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6744 predicate(VM_Version::supports_cmov() ); 6745 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6746 ins_cost(250); 6747 format %{ "CMOV$cop $dst,$src" %} 6748 opcode(0x0F,0x40); 6749 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6750 ins_pipe( pipe_cmov_mem ); 6751 %} 6752 6753 // Conditional move 6754 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6755 predicate(VM_Version::supports_cmov() ); 6756 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6757 ins_cost(250); 6758 format %{ "CMOV$cop $dst,$src" %} 6759 opcode(0x0F,0x40); 6760 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6761 ins_pipe( pipe_cmov_mem ); 6762 %} 6763 6764 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6765 predicate(VM_Version::supports_cmov() ); 6766 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6767 ins_cost(250); 6768 expand %{ 6769 cmovI_memU(cop, cr, dst, src); 6770 %} 6771 %} 6772 6773 // Conditional move 6774 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6775 predicate(VM_Version::supports_cmov() ); 6776 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6777 ins_cost(200); 6778 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6779 opcode(0x0F,0x40); 6780 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6781 ins_pipe( pipe_cmov_reg ); 6782 %} 6783 6784 // Conditional move (non-P6 version) 6785 // Note: a CMoveP is generated for stubs and native wrappers 6786 // regardless of whether we are on a P6, so we 6787 // emulate a cmov here 6788 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6789 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6790 ins_cost(300); 6791 format %{ "Jn$cop skip\n\t" 6792 "MOV $dst,$src\t# pointer\n" 6793 "skip:" %} 6794 opcode(0x8b); 6795 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6796 ins_pipe( pipe_cmov_reg ); 6797 %} 6798 6799 // Conditional move 6800 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6801 predicate(VM_Version::supports_cmov() ); 6802 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6803 ins_cost(200); 6804 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6805 opcode(0x0F,0x40); 6806 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6807 ins_pipe( pipe_cmov_reg ); 6808 %} 6809 6810 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6811 predicate(VM_Version::supports_cmov() ); 6812 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6813 ins_cost(200); 6814 expand %{ 6815 cmovP_regU(cop, cr, dst, src); 6816 %} 6817 %} 6818 6819 // DISABLED: Requires the ADLC to emit a bottom_type call that 6820 // correctly meets the two pointer arguments; one is an incoming 6821 // register but the other is a memory operand. ALSO appears to 6822 // be buggy with implicit null checks. 6823 // 6824 //// Conditional move 6825 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6826 // predicate(VM_Version::supports_cmov() ); 6827 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6828 // ins_cost(250); 6829 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6830 // opcode(0x0F,0x40); 6831 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6832 // ins_pipe( pipe_cmov_mem ); 6833 //%} 6834 // 6835 //// Conditional move 6836 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6837 // predicate(VM_Version::supports_cmov() ); 6838 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6839 // ins_cost(250); 6840 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6841 // opcode(0x0F,0x40); 6842 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6843 // ins_pipe( pipe_cmov_mem ); 6844 //%} 6845 6846 // Conditional move 6847 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6848 predicate(UseSSE<=1); 6849 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6850 ins_cost(200); 6851 format %{ "FCMOV$cop $dst,$src\t# double" %} 6852 opcode(0xDA); 6853 ins_encode( enc_cmov_dpr(cop,src) ); 6854 ins_pipe( pipe_cmovDPR_reg ); 6855 %} 6856 6857 // Conditional move 6858 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6859 predicate(UseSSE==0); 6860 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6861 ins_cost(200); 6862 format %{ "FCMOV$cop $dst,$src\t# float" %} 6863 opcode(0xDA); 6864 ins_encode( enc_cmov_dpr(cop,src) ); 6865 ins_pipe( pipe_cmovDPR_reg ); 6866 %} 6867 6868 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6869 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6870 predicate(UseSSE<=1); 6871 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6872 ins_cost(200); 6873 format %{ "Jn$cop skip\n\t" 6874 "MOV $dst,$src\t# double\n" 6875 "skip:" %} 6876 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6877 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6878 ins_pipe( pipe_cmovDPR_reg ); 6879 %} 6880 6881 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6882 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6883 predicate(UseSSE==0); 6884 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6885 ins_cost(200); 6886 format %{ "Jn$cop skip\n\t" 6887 "MOV $dst,$src\t# float\n" 6888 "skip:" %} 6889 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6890 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6891 ins_pipe( pipe_cmovDPR_reg ); 6892 %} 6893 6894 // No CMOVE with SSE/SSE2 6895 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6896 predicate (UseSSE>=1); 6897 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6898 ins_cost(200); 6899 format %{ "Jn$cop skip\n\t" 6900 "MOVSS $dst,$src\t# float\n" 6901 "skip:" %} 6902 ins_encode %{ 6903 Label skip; 6904 // Invert sense of branch from sense of CMOV 6905 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6906 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6907 __ bind(skip); 6908 %} 6909 ins_pipe( pipe_slow ); 6910 %} 6911 6912 // No CMOVE with SSE/SSE2 6913 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6914 predicate (UseSSE>=2); 6915 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6916 ins_cost(200); 6917 format %{ "Jn$cop skip\n\t" 6918 "MOVSD $dst,$src\t# float\n" 6919 "skip:" %} 6920 ins_encode %{ 6921 Label skip; 6922 // Invert sense of branch from sense of CMOV 6923 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6924 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6925 __ bind(skip); 6926 %} 6927 ins_pipe( pipe_slow ); 6928 %} 6929 6930 // unsigned version 6931 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6932 predicate (UseSSE>=1); 6933 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6934 ins_cost(200); 6935 format %{ "Jn$cop skip\n\t" 6936 "MOVSS $dst,$src\t# float\n" 6937 "skip:" %} 6938 ins_encode %{ 6939 Label skip; 6940 // Invert sense of branch from sense of CMOV 6941 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6942 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6943 __ bind(skip); 6944 %} 6945 ins_pipe( pipe_slow ); 6946 %} 6947 6948 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6949 predicate (UseSSE>=1); 6950 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6951 ins_cost(200); 6952 expand %{ 6953 fcmovF_regU(cop, cr, dst, src); 6954 %} 6955 %} 6956 6957 // unsigned version 6958 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6959 predicate (UseSSE>=2); 6960 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6961 ins_cost(200); 6962 format %{ "Jn$cop skip\n\t" 6963 "MOVSD $dst,$src\t# float\n" 6964 "skip:" %} 6965 ins_encode %{ 6966 Label skip; 6967 // Invert sense of branch from sense of CMOV 6968 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6969 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6970 __ bind(skip); 6971 %} 6972 ins_pipe( pipe_slow ); 6973 %} 6974 6975 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6976 predicate (UseSSE>=2); 6977 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6978 ins_cost(200); 6979 expand %{ 6980 fcmovD_regU(cop, cr, dst, src); 6981 %} 6982 %} 6983 6984 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6985 predicate(VM_Version::supports_cmov() ); 6986 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6987 ins_cost(200); 6988 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6989 "CMOV$cop $dst.hi,$src.hi" %} 6990 opcode(0x0F,0x40); 6991 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6992 ins_pipe( pipe_cmov_reg_long ); 6993 %} 6994 6995 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6996 predicate(VM_Version::supports_cmov() ); 6997 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6998 ins_cost(200); 6999 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7000 "CMOV$cop $dst.hi,$src.hi" %} 7001 opcode(0x0F,0x40); 7002 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7003 ins_pipe( pipe_cmov_reg_long ); 7004 %} 7005 7006 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7007 predicate(VM_Version::supports_cmov() ); 7008 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7009 ins_cost(200); 7010 expand %{ 7011 cmovL_regU(cop, cr, dst, src); 7012 %} 7013 %} 7014 7015 //----------Arithmetic Instructions-------------------------------------------- 7016 //----------Addition Instructions---------------------------------------------- 7017 7018 // Integer Addition Instructions 7019 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7020 match(Set dst (AddI dst src)); 7021 effect(KILL cr); 7022 7023 size(2); 7024 format %{ "ADD $dst,$src" %} 7025 opcode(0x03); 7026 ins_encode( OpcP, RegReg( dst, src) ); 7027 ins_pipe( ialu_reg_reg ); 7028 %} 7029 7030 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7031 match(Set dst (AddI dst src)); 7032 effect(KILL cr); 7033 7034 format %{ "ADD $dst,$src" %} 7035 opcode(0x81, 0x00); /* /0 id */ 7036 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7037 ins_pipe( ialu_reg ); 7038 %} 7039 7040 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7041 predicate(UseIncDec); 7042 match(Set dst (AddI dst src)); 7043 effect(KILL cr); 7044 7045 size(1); 7046 format %{ "INC $dst" %} 7047 opcode(0x40); /* */ 7048 ins_encode( Opc_plus( primary, dst ) ); 7049 ins_pipe( ialu_reg ); 7050 %} 7051 7052 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7053 match(Set dst (AddI src0 src1)); 7054 ins_cost(110); 7055 7056 format %{ "LEA $dst,[$src0 + $src1]" %} 7057 opcode(0x8D); /* 0x8D /r */ 7058 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7059 ins_pipe( ialu_reg_reg ); 7060 %} 7061 7062 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7063 match(Set dst (AddP src0 src1)); 7064 ins_cost(110); 7065 7066 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7067 opcode(0x8D); /* 0x8D /r */ 7068 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7069 ins_pipe( ialu_reg_reg ); 7070 %} 7071 7072 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7073 predicate(UseIncDec); 7074 match(Set dst (AddI dst src)); 7075 effect(KILL cr); 7076 7077 size(1); 7078 format %{ "DEC $dst" %} 7079 opcode(0x48); /* */ 7080 ins_encode( Opc_plus( primary, dst ) ); 7081 ins_pipe( ialu_reg ); 7082 %} 7083 7084 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7085 match(Set dst (AddP dst src)); 7086 effect(KILL cr); 7087 7088 size(2); 7089 format %{ "ADD $dst,$src" %} 7090 opcode(0x03); 7091 ins_encode( OpcP, RegReg( dst, src) ); 7092 ins_pipe( ialu_reg_reg ); 7093 %} 7094 7095 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7096 match(Set dst (AddP dst src)); 7097 effect(KILL cr); 7098 7099 format %{ "ADD $dst,$src" %} 7100 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7101 // ins_encode( RegImm( dst, src) ); 7102 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7103 ins_pipe( ialu_reg ); 7104 %} 7105 7106 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7107 match(Set dst (AddI dst (LoadI src))); 7108 effect(KILL cr); 7109 7110 ins_cost(125); 7111 format %{ "ADD $dst,$src" %} 7112 opcode(0x03); 7113 ins_encode( OpcP, RegMem( dst, src) ); 7114 ins_pipe( ialu_reg_mem ); 7115 %} 7116 7117 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7118 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7119 effect(KILL cr); 7120 7121 ins_cost(150); 7122 format %{ "ADD $dst,$src" %} 7123 opcode(0x01); /* Opcode 01 /r */ 7124 ins_encode( OpcP, RegMem( src, dst ) ); 7125 ins_pipe( ialu_mem_reg ); 7126 %} 7127 7128 // Add Memory with Immediate 7129 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7130 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7131 effect(KILL cr); 7132 7133 ins_cost(125); 7134 format %{ "ADD $dst,$src" %} 7135 opcode(0x81); /* Opcode 81 /0 id */ 7136 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7137 ins_pipe( ialu_mem_imm ); 7138 %} 7139 7140 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7141 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7142 effect(KILL cr); 7143 7144 ins_cost(125); 7145 format %{ "INC $dst" %} 7146 opcode(0xFF); /* Opcode FF /0 */ 7147 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7148 ins_pipe( ialu_mem_imm ); 7149 %} 7150 7151 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7152 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7153 effect(KILL cr); 7154 7155 ins_cost(125); 7156 format %{ "DEC $dst" %} 7157 opcode(0xFF); /* Opcode FF /1 */ 7158 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7159 ins_pipe( ialu_mem_imm ); 7160 %} 7161 7162 7163 instruct checkCastPP( eRegP dst ) %{ 7164 match(Set dst (CheckCastPP dst)); 7165 7166 size(0); 7167 format %{ "#checkcastPP of $dst" %} 7168 ins_encode( /*empty encoding*/ ); 7169 ins_pipe( empty ); 7170 %} 7171 7172 instruct castPP( eRegP dst ) %{ 7173 match(Set dst (CastPP dst)); 7174 format %{ "#castPP of $dst" %} 7175 ins_encode( /*empty encoding*/ ); 7176 ins_pipe( empty ); 7177 %} 7178 7179 instruct castII( rRegI dst ) %{ 7180 match(Set dst (CastII dst)); 7181 format %{ "#castII of $dst" %} 7182 ins_encode( /*empty encoding*/ ); 7183 ins_cost(0); 7184 ins_pipe( empty ); 7185 %} 7186 7187 instruct castLL( eRegL dst ) %{ 7188 match(Set dst (CastLL dst)); 7189 format %{ "#castLL of $dst" %} 7190 ins_encode( /*empty encoding*/ ); 7191 ins_cost(0); 7192 ins_pipe( empty ); 7193 %} 7194 7195 instruct castFF( regF dst ) %{ 7196 predicate(UseSSE >= 1); 7197 match(Set dst (CastFF dst)); 7198 format %{ "#castFF of $dst" %} 7199 ins_encode( /*empty encoding*/ ); 7200 ins_cost(0); 7201 ins_pipe( empty ); 7202 %} 7203 7204 instruct castDD( regD dst ) %{ 7205 predicate(UseSSE >= 2); 7206 match(Set dst (CastDD dst)); 7207 format %{ "#castDD of $dst" %} 7208 ins_encode( /*empty encoding*/ ); 7209 ins_cost(0); 7210 ins_pipe( empty ); 7211 %} 7212 7213 instruct castFF_PR( regFPR dst ) %{ 7214 predicate(UseSSE < 1); 7215 match(Set dst (CastFF dst)); 7216 format %{ "#castFF of $dst" %} 7217 ins_encode( /*empty encoding*/ ); 7218 ins_cost(0); 7219 ins_pipe( empty ); 7220 %} 7221 7222 instruct castDD_PR( regDPR dst ) %{ 7223 predicate(UseSSE < 2); 7224 match(Set dst (CastDD dst)); 7225 format %{ "#castDD of $dst" %} 7226 ins_encode( /*empty encoding*/ ); 7227 ins_cost(0); 7228 ins_pipe( empty ); 7229 %} 7230 7231 // Load-locked - same as a regular pointer load when used with compare-swap 7232 instruct loadPLocked(eRegP dst, memory mem) %{ 7233 match(Set dst (LoadPLocked mem)); 7234 7235 ins_cost(125); 7236 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7237 opcode(0x8B); 7238 ins_encode( OpcP, RegMem(dst,mem)); 7239 ins_pipe( ialu_reg_mem ); 7240 %} 7241 7242 // Conditional-store of the updated heap-top. 7243 // Used during allocation of the shared heap. 7244 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7245 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7246 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7247 // EAX is killed if there is contention, but then it's also unused. 7248 // In the common case of no contention, EAX holds the new oop address. 7249 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7250 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7251 ins_pipe( pipe_cmpxchg ); 7252 %} 7253 7254 // Conditional-store of an int value. 7255 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7256 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7257 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7258 effect(KILL oldval); 7259 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7260 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7261 ins_pipe( pipe_cmpxchg ); 7262 %} 7263 7264 // Conditional-store of a long value. 7265 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7266 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7267 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7268 effect(KILL oldval); 7269 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7270 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7271 "XCHG EBX,ECX" 7272 %} 7273 ins_encode %{ 7274 // Note: we need to swap rbx, and rcx before and after the 7275 // cmpxchg8 instruction because the instruction uses 7276 // rcx as the high order word of the new value to store but 7277 // our register encoding uses rbx. 7278 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7279 __ lock(); 7280 __ cmpxchg8($mem$$Address); 7281 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7282 %} 7283 ins_pipe( pipe_cmpxchg ); 7284 %} 7285 7286 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7287 7288 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7289 predicate(VM_Version::supports_cx8()); 7290 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7291 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7292 effect(KILL cr, KILL oldval); 7293 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7294 "MOV $res,0\n\t" 7295 "JNE,s fail\n\t" 7296 "MOV $res,1\n" 7297 "fail:" %} 7298 ins_encode( enc_cmpxchg8(mem_ptr), 7299 enc_flags_ne_to_boolean(res) ); 7300 ins_pipe( pipe_cmpxchg ); 7301 %} 7302 7303 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7304 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7305 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7306 effect(KILL cr, KILL oldval); 7307 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7308 "MOV $res,0\n\t" 7309 "JNE,s fail\n\t" 7310 "MOV $res,1\n" 7311 "fail:" %} 7312 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7313 ins_pipe( pipe_cmpxchg ); 7314 %} 7315 7316 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7317 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7318 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7319 effect(KILL cr, KILL oldval); 7320 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7321 "MOV $res,0\n\t" 7322 "JNE,s fail\n\t" 7323 "MOV $res,1\n" 7324 "fail:" %} 7325 ins_encode( enc_cmpxchgb(mem_ptr), 7326 enc_flags_ne_to_boolean(res) ); 7327 ins_pipe( pipe_cmpxchg ); 7328 %} 7329 7330 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7331 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7332 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7333 effect(KILL cr, KILL oldval); 7334 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7335 "MOV $res,0\n\t" 7336 "JNE,s fail\n\t" 7337 "MOV $res,1\n" 7338 "fail:" %} 7339 ins_encode( enc_cmpxchgw(mem_ptr), 7340 enc_flags_ne_to_boolean(res) ); 7341 ins_pipe( pipe_cmpxchg ); 7342 %} 7343 7344 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7345 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7346 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7347 effect(KILL cr, KILL oldval); 7348 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7349 "MOV $res,0\n\t" 7350 "JNE,s fail\n\t" 7351 "MOV $res,1\n" 7352 "fail:" %} 7353 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7354 ins_pipe( pipe_cmpxchg ); 7355 %} 7356 7357 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7358 predicate(VM_Version::supports_cx8()); 7359 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7360 effect(KILL cr); 7361 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7362 ins_encode( enc_cmpxchg8(mem_ptr) ); 7363 ins_pipe( pipe_cmpxchg ); 7364 %} 7365 7366 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7367 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7368 effect(KILL cr); 7369 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7370 ins_encode( enc_cmpxchg(mem_ptr) ); 7371 ins_pipe( pipe_cmpxchg ); 7372 %} 7373 7374 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7375 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7376 effect(KILL cr); 7377 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7378 ins_encode( enc_cmpxchgb(mem_ptr) ); 7379 ins_pipe( pipe_cmpxchg ); 7380 %} 7381 7382 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7383 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7384 effect(KILL cr); 7385 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7386 ins_encode( enc_cmpxchgw(mem_ptr) ); 7387 ins_pipe( pipe_cmpxchg ); 7388 %} 7389 7390 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7391 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7392 effect(KILL cr); 7393 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7394 ins_encode( enc_cmpxchg(mem_ptr) ); 7395 ins_pipe( pipe_cmpxchg ); 7396 %} 7397 7398 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7399 predicate(n->as_LoadStore()->result_not_used()); 7400 match(Set dummy (GetAndAddB mem add)); 7401 effect(KILL cr); 7402 format %{ "ADDB [$mem],$add" %} 7403 ins_encode %{ 7404 __ lock(); 7405 __ addb($mem$$Address, $add$$constant); 7406 %} 7407 ins_pipe( pipe_cmpxchg ); 7408 %} 7409 7410 // Important to match to xRegI: only 8-bit regs. 7411 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7412 match(Set newval (GetAndAddB mem newval)); 7413 effect(KILL cr); 7414 format %{ "XADDB [$mem],$newval" %} 7415 ins_encode %{ 7416 __ lock(); 7417 __ xaddb($mem$$Address, $newval$$Register); 7418 %} 7419 ins_pipe( pipe_cmpxchg ); 7420 %} 7421 7422 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7423 predicate(n->as_LoadStore()->result_not_used()); 7424 match(Set dummy (GetAndAddS mem add)); 7425 effect(KILL cr); 7426 format %{ "ADDS [$mem],$add" %} 7427 ins_encode %{ 7428 __ lock(); 7429 __ addw($mem$$Address, $add$$constant); 7430 %} 7431 ins_pipe( pipe_cmpxchg ); 7432 %} 7433 7434 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7435 match(Set newval (GetAndAddS mem newval)); 7436 effect(KILL cr); 7437 format %{ "XADDS [$mem],$newval" %} 7438 ins_encode %{ 7439 __ lock(); 7440 __ xaddw($mem$$Address, $newval$$Register); 7441 %} 7442 ins_pipe( pipe_cmpxchg ); 7443 %} 7444 7445 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7446 predicate(n->as_LoadStore()->result_not_used()); 7447 match(Set dummy (GetAndAddI mem add)); 7448 effect(KILL cr); 7449 format %{ "ADDL [$mem],$add" %} 7450 ins_encode %{ 7451 __ lock(); 7452 __ addl($mem$$Address, $add$$constant); 7453 %} 7454 ins_pipe( pipe_cmpxchg ); 7455 %} 7456 7457 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7458 match(Set newval (GetAndAddI mem newval)); 7459 effect(KILL cr); 7460 format %{ "XADDL [$mem],$newval" %} 7461 ins_encode %{ 7462 __ lock(); 7463 __ xaddl($mem$$Address, $newval$$Register); 7464 %} 7465 ins_pipe( pipe_cmpxchg ); 7466 %} 7467 7468 // Important to match to xRegI: only 8-bit regs. 7469 instruct xchgB( memory mem, xRegI newval) %{ 7470 match(Set newval (GetAndSetB mem newval)); 7471 format %{ "XCHGB $newval,[$mem]" %} 7472 ins_encode %{ 7473 __ xchgb($newval$$Register, $mem$$Address); 7474 %} 7475 ins_pipe( pipe_cmpxchg ); 7476 %} 7477 7478 instruct xchgS( memory mem, rRegI newval) %{ 7479 match(Set newval (GetAndSetS mem newval)); 7480 format %{ "XCHGW $newval,[$mem]" %} 7481 ins_encode %{ 7482 __ xchgw($newval$$Register, $mem$$Address); 7483 %} 7484 ins_pipe( pipe_cmpxchg ); 7485 %} 7486 7487 instruct xchgI( memory mem, rRegI newval) %{ 7488 match(Set newval (GetAndSetI mem newval)); 7489 format %{ "XCHGL $newval,[$mem]" %} 7490 ins_encode %{ 7491 __ xchgl($newval$$Register, $mem$$Address); 7492 %} 7493 ins_pipe( pipe_cmpxchg ); 7494 %} 7495 7496 instruct xchgP( memory mem, pRegP newval) %{ 7497 match(Set newval (GetAndSetP mem newval)); 7498 format %{ "XCHGL $newval,[$mem]" %} 7499 ins_encode %{ 7500 __ xchgl($newval$$Register, $mem$$Address); 7501 %} 7502 ins_pipe( pipe_cmpxchg ); 7503 %} 7504 7505 //----------Subtraction Instructions------------------------------------------- 7506 7507 // Integer Subtraction Instructions 7508 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7509 match(Set dst (SubI dst src)); 7510 effect(KILL cr); 7511 7512 size(2); 7513 format %{ "SUB $dst,$src" %} 7514 opcode(0x2B); 7515 ins_encode( OpcP, RegReg( dst, src) ); 7516 ins_pipe( ialu_reg_reg ); 7517 %} 7518 7519 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7520 match(Set dst (SubI dst src)); 7521 effect(KILL cr); 7522 7523 format %{ "SUB $dst,$src" %} 7524 opcode(0x81,0x05); /* Opcode 81 /5 */ 7525 // ins_encode( RegImm( dst, src) ); 7526 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7527 ins_pipe( ialu_reg ); 7528 %} 7529 7530 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7531 match(Set dst (SubI dst (LoadI src))); 7532 effect(KILL cr); 7533 7534 ins_cost(125); 7535 format %{ "SUB $dst,$src" %} 7536 opcode(0x2B); 7537 ins_encode( OpcP, RegMem( dst, src) ); 7538 ins_pipe( ialu_reg_mem ); 7539 %} 7540 7541 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7542 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7543 effect(KILL cr); 7544 7545 ins_cost(150); 7546 format %{ "SUB $dst,$src" %} 7547 opcode(0x29); /* Opcode 29 /r */ 7548 ins_encode( OpcP, RegMem( src, dst ) ); 7549 ins_pipe( ialu_mem_reg ); 7550 %} 7551 7552 // Subtract from a pointer 7553 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7554 match(Set dst (AddP dst (SubI zero src))); 7555 effect(KILL cr); 7556 7557 size(2); 7558 format %{ "SUB $dst,$src" %} 7559 opcode(0x2B); 7560 ins_encode( OpcP, RegReg( dst, src) ); 7561 ins_pipe( ialu_reg_reg ); 7562 %} 7563 7564 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7565 match(Set dst (SubI zero dst)); 7566 effect(KILL cr); 7567 7568 size(2); 7569 format %{ "NEG $dst" %} 7570 opcode(0xF7,0x03); // Opcode F7 /3 7571 ins_encode( OpcP, RegOpc( dst ) ); 7572 ins_pipe( ialu_reg ); 7573 %} 7574 7575 //----------Multiplication/Division Instructions------------------------------- 7576 // Integer Multiplication Instructions 7577 // Multiply Register 7578 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7579 match(Set dst (MulI dst src)); 7580 effect(KILL cr); 7581 7582 size(3); 7583 ins_cost(300); 7584 format %{ "IMUL $dst,$src" %} 7585 opcode(0xAF, 0x0F); 7586 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7587 ins_pipe( ialu_reg_reg_alu0 ); 7588 %} 7589 7590 // Multiply 32-bit Immediate 7591 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7592 match(Set dst (MulI src imm)); 7593 effect(KILL cr); 7594 7595 ins_cost(300); 7596 format %{ "IMUL $dst,$src,$imm" %} 7597 opcode(0x69); /* 69 /r id */ 7598 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7599 ins_pipe( ialu_reg_reg_alu0 ); 7600 %} 7601 7602 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7603 match(Set dst src); 7604 effect(KILL cr); 7605 7606 // Note that this is artificially increased to make it more expensive than loadConL 7607 ins_cost(250); 7608 format %{ "MOV EAX,$src\t// low word only" %} 7609 opcode(0xB8); 7610 ins_encode( LdImmL_Lo(dst, src) ); 7611 ins_pipe( ialu_reg_fat ); 7612 %} 7613 7614 // Multiply by 32-bit Immediate, taking the shifted high order results 7615 // (special case for shift by 32) 7616 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7617 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7618 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7619 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7620 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7621 effect(USE src1, KILL cr); 7622 7623 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7624 ins_cost(0*100 + 1*400 - 150); 7625 format %{ "IMUL EDX:EAX,$src1" %} 7626 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 // Multiply by 32-bit Immediate, taking the shifted high order results 7631 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7632 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7633 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7634 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7635 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7636 effect(USE src1, KILL cr); 7637 7638 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7639 ins_cost(1*100 + 1*400 - 150); 7640 format %{ "IMUL EDX:EAX,$src1\n\t" 7641 "SAR EDX,$cnt-32" %} 7642 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7643 ins_pipe( pipe_slow ); 7644 %} 7645 7646 // Multiply Memory 32-bit Immediate 7647 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7648 match(Set dst (MulI (LoadI src) imm)); 7649 effect(KILL cr); 7650 7651 ins_cost(300); 7652 format %{ "IMUL $dst,$src,$imm" %} 7653 opcode(0x69); /* 69 /r id */ 7654 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7655 ins_pipe( ialu_reg_mem_alu0 ); 7656 %} 7657 7658 // Multiply Memory 7659 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7660 match(Set dst (MulI dst (LoadI src))); 7661 effect(KILL cr); 7662 7663 ins_cost(350); 7664 format %{ "IMUL $dst,$src" %} 7665 opcode(0xAF, 0x0F); 7666 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7667 ins_pipe( ialu_reg_mem_alu0 ); 7668 %} 7669 7670 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7671 %{ 7672 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7673 effect(KILL cr, KILL src2); 7674 7675 expand %{ mulI_eReg(dst, src1, cr); 7676 mulI_eReg(src2, src3, cr); 7677 addI_eReg(dst, src2, cr); %} 7678 %} 7679 7680 // Multiply Register Int to Long 7681 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7682 // Basic Idea: long = (long)int * (long)int 7683 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7684 effect(DEF dst, USE src, USE src1, KILL flags); 7685 7686 ins_cost(300); 7687 format %{ "IMUL $dst,$src1" %} 7688 7689 ins_encode( long_int_multiply( dst, src1 ) ); 7690 ins_pipe( ialu_reg_reg_alu0 ); 7691 %} 7692 7693 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7694 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7695 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7696 effect(KILL flags); 7697 7698 ins_cost(300); 7699 format %{ "MUL $dst,$src1" %} 7700 7701 ins_encode( long_uint_multiply(dst, src1) ); 7702 ins_pipe( ialu_reg_reg_alu0 ); 7703 %} 7704 7705 // Multiply Register Long 7706 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7707 match(Set dst (MulL dst src)); 7708 effect(KILL cr, TEMP tmp); 7709 ins_cost(4*100+3*400); 7710 // Basic idea: lo(result) = lo(x_lo * y_lo) 7711 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7712 format %{ "MOV $tmp,$src.lo\n\t" 7713 "IMUL $tmp,EDX\n\t" 7714 "MOV EDX,$src.hi\n\t" 7715 "IMUL EDX,EAX\n\t" 7716 "ADD $tmp,EDX\n\t" 7717 "MUL EDX:EAX,$src.lo\n\t" 7718 "ADD EDX,$tmp" %} 7719 ins_encode( long_multiply( dst, src, tmp ) ); 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 // Multiply Register Long where the left operand's high 32 bits are zero 7724 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7725 predicate(is_operand_hi32_zero(n->in(1))); 7726 match(Set dst (MulL dst src)); 7727 effect(KILL cr, TEMP tmp); 7728 ins_cost(2*100+2*400); 7729 // Basic idea: lo(result) = lo(x_lo * y_lo) 7730 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7731 format %{ "MOV $tmp,$src.hi\n\t" 7732 "IMUL $tmp,EAX\n\t" 7733 "MUL EDX:EAX,$src.lo\n\t" 7734 "ADD EDX,$tmp" %} 7735 ins_encode %{ 7736 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7737 __ imull($tmp$$Register, rax); 7738 __ mull($src$$Register); 7739 __ addl(rdx, $tmp$$Register); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 // Multiply Register Long where the right operand's high 32 bits are zero 7745 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7746 predicate(is_operand_hi32_zero(n->in(2))); 7747 match(Set dst (MulL dst src)); 7748 effect(KILL cr, TEMP tmp); 7749 ins_cost(2*100+2*400); 7750 // Basic idea: lo(result) = lo(x_lo * y_lo) 7751 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7752 format %{ "MOV $tmp,$src.lo\n\t" 7753 "IMUL $tmp,EDX\n\t" 7754 "MUL EDX:EAX,$src.lo\n\t" 7755 "ADD EDX,$tmp" %} 7756 ins_encode %{ 7757 __ movl($tmp$$Register, $src$$Register); 7758 __ imull($tmp$$Register, rdx); 7759 __ mull($src$$Register); 7760 __ addl(rdx, $tmp$$Register); 7761 %} 7762 ins_pipe( pipe_slow ); 7763 %} 7764 7765 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7766 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7767 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7768 match(Set dst (MulL dst src)); 7769 effect(KILL cr); 7770 ins_cost(1*400); 7771 // Basic idea: lo(result) = lo(x_lo * y_lo) 7772 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7773 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7774 ins_encode %{ 7775 __ mull($src$$Register); 7776 %} 7777 ins_pipe( pipe_slow ); 7778 %} 7779 7780 // Multiply Register Long by small constant 7781 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7782 match(Set dst (MulL dst src)); 7783 effect(KILL cr, TEMP tmp); 7784 ins_cost(2*100+2*400); 7785 size(12); 7786 // Basic idea: lo(result) = lo(src * EAX) 7787 // hi(result) = hi(src * EAX) + lo(src * EDX) 7788 format %{ "IMUL $tmp,EDX,$src\n\t" 7789 "MOV EDX,$src\n\t" 7790 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7791 "ADD EDX,$tmp" %} 7792 ins_encode( long_multiply_con( dst, src, tmp ) ); 7793 ins_pipe( pipe_slow ); 7794 %} 7795 7796 // Integer DIV with Register 7797 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7798 match(Set rax (DivI rax div)); 7799 effect(KILL rdx, KILL cr); 7800 size(26); 7801 ins_cost(30*100+10*100); 7802 format %{ "CMP EAX,0x80000000\n\t" 7803 "JNE,s normal\n\t" 7804 "XOR EDX,EDX\n\t" 7805 "CMP ECX,-1\n\t" 7806 "JE,s done\n" 7807 "normal: CDQ\n\t" 7808 "IDIV $div\n\t" 7809 "done:" %} 7810 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7811 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7812 ins_pipe( ialu_reg_reg_alu0 ); 7813 %} 7814 7815 // Divide Register Long 7816 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7817 match(Set dst (DivL src1 src2)); 7818 effect(CALL); 7819 ins_cost(10000); 7820 format %{ "PUSH $src1.hi\n\t" 7821 "PUSH $src1.lo\n\t" 7822 "PUSH $src2.hi\n\t" 7823 "PUSH $src2.lo\n\t" 7824 "CALL SharedRuntime::ldiv\n\t" 7825 "ADD ESP,16" %} 7826 ins_encode( long_div(src1,src2) ); 7827 ins_pipe( pipe_slow ); 7828 %} 7829 7830 // Integer DIVMOD with Register, both quotient and mod results 7831 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7832 match(DivModI rax div); 7833 effect(KILL cr); 7834 size(26); 7835 ins_cost(30*100+10*100); 7836 format %{ "CMP EAX,0x80000000\n\t" 7837 "JNE,s normal\n\t" 7838 "XOR EDX,EDX\n\t" 7839 "CMP ECX,-1\n\t" 7840 "JE,s done\n" 7841 "normal: CDQ\n\t" 7842 "IDIV $div\n\t" 7843 "done:" %} 7844 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7845 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7846 ins_pipe( pipe_slow ); 7847 %} 7848 7849 // Integer MOD with Register 7850 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7851 match(Set rdx (ModI rax div)); 7852 effect(KILL rax, KILL cr); 7853 7854 size(26); 7855 ins_cost(300); 7856 format %{ "CDQ\n\t" 7857 "IDIV $div" %} 7858 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7859 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7860 ins_pipe( ialu_reg_reg_alu0 ); 7861 %} 7862 7863 // Remainder Register Long 7864 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7865 match(Set dst (ModL src1 src2)); 7866 effect(CALL); 7867 ins_cost(10000); 7868 format %{ "PUSH $src1.hi\n\t" 7869 "PUSH $src1.lo\n\t" 7870 "PUSH $src2.hi\n\t" 7871 "PUSH $src2.lo\n\t" 7872 "CALL SharedRuntime::lrem\n\t" 7873 "ADD ESP,16" %} 7874 ins_encode( long_mod(src1,src2) ); 7875 ins_pipe( pipe_slow ); 7876 %} 7877 7878 // Divide Register Long (no special case since divisor != -1) 7879 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7880 match(Set dst (DivL dst imm)); 7881 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7882 ins_cost(1000); 7883 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7884 "XOR $tmp2,$tmp2\n\t" 7885 "CMP $tmp,EDX\n\t" 7886 "JA,s fast\n\t" 7887 "MOV $tmp2,EAX\n\t" 7888 "MOV EAX,EDX\n\t" 7889 "MOV EDX,0\n\t" 7890 "JLE,s pos\n\t" 7891 "LNEG EAX : $tmp2\n\t" 7892 "DIV $tmp # unsigned division\n\t" 7893 "XCHG EAX,$tmp2\n\t" 7894 "DIV $tmp\n\t" 7895 "LNEG $tmp2 : EAX\n\t" 7896 "JMP,s done\n" 7897 "pos:\n\t" 7898 "DIV $tmp\n\t" 7899 "XCHG EAX,$tmp2\n" 7900 "fast:\n\t" 7901 "DIV $tmp\n" 7902 "done:\n\t" 7903 "MOV EDX,$tmp2\n\t" 7904 "NEG EDX:EAX # if $imm < 0" %} 7905 ins_encode %{ 7906 int con = (int)$imm$$constant; 7907 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7908 int pcon = (con > 0) ? con : -con; 7909 Label Lfast, Lpos, Ldone; 7910 7911 __ movl($tmp$$Register, pcon); 7912 __ xorl($tmp2$$Register,$tmp2$$Register); 7913 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7914 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7915 7916 __ movl($tmp2$$Register, $dst$$Register); // save 7917 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7918 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7919 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7920 7921 // Negative dividend. 7922 // convert value to positive to use unsigned division 7923 __ lneg($dst$$Register, $tmp2$$Register); 7924 __ divl($tmp$$Register); 7925 __ xchgl($dst$$Register, $tmp2$$Register); 7926 __ divl($tmp$$Register); 7927 // revert result back to negative 7928 __ lneg($tmp2$$Register, $dst$$Register); 7929 __ jmpb(Ldone); 7930 7931 __ bind(Lpos); 7932 __ divl($tmp$$Register); // Use unsigned division 7933 __ xchgl($dst$$Register, $tmp2$$Register); 7934 // Fallthrow for final divide, tmp2 has 32 bit hi result 7935 7936 __ bind(Lfast); 7937 // fast path: src is positive 7938 __ divl($tmp$$Register); // Use unsigned division 7939 7940 __ bind(Ldone); 7941 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7942 if (con < 0) { 7943 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7944 } 7945 %} 7946 ins_pipe( pipe_slow ); 7947 %} 7948 7949 // Remainder Register Long (remainder fit into 32 bits) 7950 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7951 match(Set dst (ModL dst imm)); 7952 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7953 ins_cost(1000); 7954 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7955 "CMP $tmp,EDX\n\t" 7956 "JA,s fast\n\t" 7957 "MOV $tmp2,EAX\n\t" 7958 "MOV EAX,EDX\n\t" 7959 "MOV EDX,0\n\t" 7960 "JLE,s pos\n\t" 7961 "LNEG EAX : $tmp2\n\t" 7962 "DIV $tmp # unsigned division\n\t" 7963 "MOV EAX,$tmp2\n\t" 7964 "DIV $tmp\n\t" 7965 "NEG EDX\n\t" 7966 "JMP,s done\n" 7967 "pos:\n\t" 7968 "DIV $tmp\n\t" 7969 "MOV EAX,$tmp2\n" 7970 "fast:\n\t" 7971 "DIV $tmp\n" 7972 "done:\n\t" 7973 "MOV EAX,EDX\n\t" 7974 "SAR EDX,31\n\t" %} 7975 ins_encode %{ 7976 int con = (int)$imm$$constant; 7977 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7978 int pcon = (con > 0) ? con : -con; 7979 Label Lfast, Lpos, Ldone; 7980 7981 __ movl($tmp$$Register, pcon); 7982 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7983 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7984 7985 __ movl($tmp2$$Register, $dst$$Register); // save 7986 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7987 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7988 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7989 7990 // Negative dividend. 7991 // convert value to positive to use unsigned division 7992 __ lneg($dst$$Register, $tmp2$$Register); 7993 __ divl($tmp$$Register); 7994 __ movl($dst$$Register, $tmp2$$Register); 7995 __ divl($tmp$$Register); 7996 // revert remainder back to negative 7997 __ negl(HIGH_FROM_LOW($dst$$Register)); 7998 __ jmpb(Ldone); 7999 8000 __ bind(Lpos); 8001 __ divl($tmp$$Register); 8002 __ movl($dst$$Register, $tmp2$$Register); 8003 8004 __ bind(Lfast); 8005 // fast path: src is positive 8006 __ divl($tmp$$Register); 8007 8008 __ bind(Ldone); 8009 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8010 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8011 8012 %} 8013 ins_pipe( pipe_slow ); 8014 %} 8015 8016 // Integer Shift Instructions 8017 // Shift Left by one 8018 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8019 match(Set dst (LShiftI dst shift)); 8020 effect(KILL cr); 8021 8022 size(2); 8023 format %{ "SHL $dst,$shift" %} 8024 opcode(0xD1, 0x4); /* D1 /4 */ 8025 ins_encode( OpcP, RegOpc( dst ) ); 8026 ins_pipe( ialu_reg ); 8027 %} 8028 8029 // Shift Left by 8-bit immediate 8030 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8031 match(Set dst (LShiftI dst shift)); 8032 effect(KILL cr); 8033 8034 size(3); 8035 format %{ "SHL $dst,$shift" %} 8036 opcode(0xC1, 0x4); /* C1 /4 ib */ 8037 ins_encode( RegOpcImm( dst, shift) ); 8038 ins_pipe( ialu_reg ); 8039 %} 8040 8041 // Shift Left by variable 8042 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8043 match(Set dst (LShiftI dst shift)); 8044 effect(KILL cr); 8045 8046 size(2); 8047 format %{ "SHL $dst,$shift" %} 8048 opcode(0xD3, 0x4); /* D3 /4 */ 8049 ins_encode( OpcP, RegOpc( dst ) ); 8050 ins_pipe( ialu_reg_reg ); 8051 %} 8052 8053 // Arithmetic shift right by one 8054 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8055 match(Set dst (RShiftI dst shift)); 8056 effect(KILL cr); 8057 8058 size(2); 8059 format %{ "SAR $dst,$shift" %} 8060 opcode(0xD1, 0x7); /* D1 /7 */ 8061 ins_encode( OpcP, RegOpc( dst ) ); 8062 ins_pipe( ialu_reg ); 8063 %} 8064 8065 // Arithmetic shift right by one 8066 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8067 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8068 effect(KILL cr); 8069 format %{ "SAR $dst,$shift" %} 8070 opcode(0xD1, 0x7); /* D1 /7 */ 8071 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8072 ins_pipe( ialu_mem_imm ); 8073 %} 8074 8075 // Arithmetic Shift Right by 8-bit immediate 8076 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8077 match(Set dst (RShiftI dst shift)); 8078 effect(KILL cr); 8079 8080 size(3); 8081 format %{ "SAR $dst,$shift" %} 8082 opcode(0xC1, 0x7); /* C1 /7 ib */ 8083 ins_encode( RegOpcImm( dst, shift ) ); 8084 ins_pipe( ialu_mem_imm ); 8085 %} 8086 8087 // Arithmetic Shift Right by 8-bit immediate 8088 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8089 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8090 effect(KILL cr); 8091 8092 format %{ "SAR $dst,$shift" %} 8093 opcode(0xC1, 0x7); /* C1 /7 ib */ 8094 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8095 ins_pipe( ialu_mem_imm ); 8096 %} 8097 8098 // Arithmetic Shift Right by variable 8099 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8100 match(Set dst (RShiftI dst shift)); 8101 effect(KILL cr); 8102 8103 size(2); 8104 format %{ "SAR $dst,$shift" %} 8105 opcode(0xD3, 0x7); /* D3 /7 */ 8106 ins_encode( OpcP, RegOpc( dst ) ); 8107 ins_pipe( ialu_reg_reg ); 8108 %} 8109 8110 // Logical shift right by one 8111 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8112 match(Set dst (URShiftI dst shift)); 8113 effect(KILL cr); 8114 8115 size(2); 8116 format %{ "SHR $dst,$shift" %} 8117 opcode(0xD1, 0x5); /* D1 /5 */ 8118 ins_encode( OpcP, RegOpc( dst ) ); 8119 ins_pipe( ialu_reg ); 8120 %} 8121 8122 // Logical Shift Right by 8-bit immediate 8123 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8124 match(Set dst (URShiftI dst shift)); 8125 effect(KILL cr); 8126 8127 size(3); 8128 format %{ "SHR $dst,$shift" %} 8129 opcode(0xC1, 0x5); /* C1 /5 ib */ 8130 ins_encode( RegOpcImm( dst, shift) ); 8131 ins_pipe( ialu_reg ); 8132 %} 8133 8134 8135 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8136 // This idiom is used by the compiler for the i2b bytecode. 8137 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8138 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8139 8140 size(3); 8141 format %{ "MOVSX $dst,$src :8" %} 8142 ins_encode %{ 8143 __ movsbl($dst$$Register, $src$$Register); 8144 %} 8145 ins_pipe(ialu_reg_reg); 8146 %} 8147 8148 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8149 // This idiom is used by the compiler the i2s bytecode. 8150 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8151 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8152 8153 size(3); 8154 format %{ "MOVSX $dst,$src :16" %} 8155 ins_encode %{ 8156 __ movswl($dst$$Register, $src$$Register); 8157 %} 8158 ins_pipe(ialu_reg_reg); 8159 %} 8160 8161 8162 // Logical Shift Right by variable 8163 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8164 match(Set dst (URShiftI dst shift)); 8165 effect(KILL cr); 8166 8167 size(2); 8168 format %{ "SHR $dst,$shift" %} 8169 opcode(0xD3, 0x5); /* D3 /5 */ 8170 ins_encode( OpcP, RegOpc( dst ) ); 8171 ins_pipe( ialu_reg_reg ); 8172 %} 8173 8174 8175 //----------Logical Instructions----------------------------------------------- 8176 //----------Integer Logical Instructions--------------------------------------- 8177 // And Instructions 8178 // And Register with Register 8179 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8180 match(Set dst (AndI dst src)); 8181 effect(KILL cr); 8182 8183 size(2); 8184 format %{ "AND $dst,$src" %} 8185 opcode(0x23); 8186 ins_encode( OpcP, RegReg( dst, src) ); 8187 ins_pipe( ialu_reg_reg ); 8188 %} 8189 8190 // And Register with Immediate 8191 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8192 match(Set dst (AndI dst src)); 8193 effect(KILL cr); 8194 8195 format %{ "AND $dst,$src" %} 8196 opcode(0x81,0x04); /* Opcode 81 /4 */ 8197 // ins_encode( RegImm( dst, src) ); 8198 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8199 ins_pipe( ialu_reg ); 8200 %} 8201 8202 // And Register with Memory 8203 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8204 match(Set dst (AndI dst (LoadI src))); 8205 effect(KILL cr); 8206 8207 ins_cost(125); 8208 format %{ "AND $dst,$src" %} 8209 opcode(0x23); 8210 ins_encode( OpcP, RegMem( dst, src) ); 8211 ins_pipe( ialu_reg_mem ); 8212 %} 8213 8214 // And Memory with Register 8215 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8216 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8217 effect(KILL cr); 8218 8219 ins_cost(150); 8220 format %{ "AND $dst,$src" %} 8221 opcode(0x21); /* Opcode 21 /r */ 8222 ins_encode( OpcP, RegMem( src, dst ) ); 8223 ins_pipe( ialu_mem_reg ); 8224 %} 8225 8226 // And Memory with Immediate 8227 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8228 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8229 effect(KILL cr); 8230 8231 ins_cost(125); 8232 format %{ "AND $dst,$src" %} 8233 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8234 // ins_encode( MemImm( dst, src) ); 8235 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8236 ins_pipe( ialu_mem_imm ); 8237 %} 8238 8239 // BMI1 instructions 8240 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8241 match(Set dst (AndI (XorI src1 minus_1) src2)); 8242 predicate(UseBMI1Instructions); 8243 effect(KILL cr); 8244 8245 format %{ "ANDNL $dst, $src1, $src2" %} 8246 8247 ins_encode %{ 8248 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8249 %} 8250 ins_pipe(ialu_reg); 8251 %} 8252 8253 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8254 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8255 predicate(UseBMI1Instructions); 8256 effect(KILL cr); 8257 8258 ins_cost(125); 8259 format %{ "ANDNL $dst, $src1, $src2" %} 8260 8261 ins_encode %{ 8262 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8263 %} 8264 ins_pipe(ialu_reg_mem); 8265 %} 8266 8267 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8268 match(Set dst (AndI (SubI imm_zero src) src)); 8269 predicate(UseBMI1Instructions); 8270 effect(KILL cr); 8271 8272 format %{ "BLSIL $dst, $src" %} 8273 8274 ins_encode %{ 8275 __ blsil($dst$$Register, $src$$Register); 8276 %} 8277 ins_pipe(ialu_reg); 8278 %} 8279 8280 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8281 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8282 predicate(UseBMI1Instructions); 8283 effect(KILL cr); 8284 8285 ins_cost(125); 8286 format %{ "BLSIL $dst, $src" %} 8287 8288 ins_encode %{ 8289 __ blsil($dst$$Register, $src$$Address); 8290 %} 8291 ins_pipe(ialu_reg_mem); 8292 %} 8293 8294 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8295 %{ 8296 match(Set dst (XorI (AddI src minus_1) src)); 8297 predicate(UseBMI1Instructions); 8298 effect(KILL cr); 8299 8300 format %{ "BLSMSKL $dst, $src" %} 8301 8302 ins_encode %{ 8303 __ blsmskl($dst$$Register, $src$$Register); 8304 %} 8305 8306 ins_pipe(ialu_reg); 8307 %} 8308 8309 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8310 %{ 8311 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8312 predicate(UseBMI1Instructions); 8313 effect(KILL cr); 8314 8315 ins_cost(125); 8316 format %{ "BLSMSKL $dst, $src" %} 8317 8318 ins_encode %{ 8319 __ blsmskl($dst$$Register, $src$$Address); 8320 %} 8321 8322 ins_pipe(ialu_reg_mem); 8323 %} 8324 8325 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8326 %{ 8327 match(Set dst (AndI (AddI src minus_1) src) ); 8328 predicate(UseBMI1Instructions); 8329 effect(KILL cr); 8330 8331 format %{ "BLSRL $dst, $src" %} 8332 8333 ins_encode %{ 8334 __ blsrl($dst$$Register, $src$$Register); 8335 %} 8336 8337 ins_pipe(ialu_reg); 8338 %} 8339 8340 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8341 %{ 8342 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8343 predicate(UseBMI1Instructions); 8344 effect(KILL cr); 8345 8346 ins_cost(125); 8347 format %{ "BLSRL $dst, $src" %} 8348 8349 ins_encode %{ 8350 __ blsrl($dst$$Register, $src$$Address); 8351 %} 8352 8353 ins_pipe(ialu_reg_mem); 8354 %} 8355 8356 // Or Instructions 8357 // Or Register with Register 8358 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8359 match(Set dst (OrI dst src)); 8360 effect(KILL cr); 8361 8362 size(2); 8363 format %{ "OR $dst,$src" %} 8364 opcode(0x0B); 8365 ins_encode( OpcP, RegReg( dst, src) ); 8366 ins_pipe( ialu_reg_reg ); 8367 %} 8368 8369 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8370 match(Set dst (OrI dst (CastP2X src))); 8371 effect(KILL cr); 8372 8373 size(2); 8374 format %{ "OR $dst,$src" %} 8375 opcode(0x0B); 8376 ins_encode( OpcP, RegReg( dst, src) ); 8377 ins_pipe( ialu_reg_reg ); 8378 %} 8379 8380 8381 // Or Register with Immediate 8382 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8383 match(Set dst (OrI dst src)); 8384 effect(KILL cr); 8385 8386 format %{ "OR $dst,$src" %} 8387 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8388 // ins_encode( RegImm( dst, src) ); 8389 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8390 ins_pipe( ialu_reg ); 8391 %} 8392 8393 // Or Register with Memory 8394 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8395 match(Set dst (OrI dst (LoadI src))); 8396 effect(KILL cr); 8397 8398 ins_cost(125); 8399 format %{ "OR $dst,$src" %} 8400 opcode(0x0B); 8401 ins_encode( OpcP, RegMem( dst, src) ); 8402 ins_pipe( ialu_reg_mem ); 8403 %} 8404 8405 // Or Memory with Register 8406 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8407 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8408 effect(KILL cr); 8409 8410 ins_cost(150); 8411 format %{ "OR $dst,$src" %} 8412 opcode(0x09); /* Opcode 09 /r */ 8413 ins_encode( OpcP, RegMem( src, dst ) ); 8414 ins_pipe( ialu_mem_reg ); 8415 %} 8416 8417 // Or Memory with Immediate 8418 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8419 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8420 effect(KILL cr); 8421 8422 ins_cost(125); 8423 format %{ "OR $dst,$src" %} 8424 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8425 // ins_encode( MemImm( dst, src) ); 8426 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8427 ins_pipe( ialu_mem_imm ); 8428 %} 8429 8430 // ROL/ROR 8431 // ROL expand 8432 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8433 effect(USE_DEF dst, USE shift, KILL cr); 8434 8435 format %{ "ROL $dst, $shift" %} 8436 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8437 ins_encode( OpcP, RegOpc( dst )); 8438 ins_pipe( ialu_reg ); 8439 %} 8440 8441 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8442 effect(USE_DEF dst, USE shift, KILL cr); 8443 8444 format %{ "ROL $dst, $shift" %} 8445 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8446 ins_encode( RegOpcImm(dst, shift) ); 8447 ins_pipe(ialu_reg); 8448 %} 8449 8450 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8451 effect(USE_DEF dst, USE shift, KILL cr); 8452 8453 format %{ "ROL $dst, $shift" %} 8454 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8455 ins_encode(OpcP, RegOpc(dst)); 8456 ins_pipe( ialu_reg_reg ); 8457 %} 8458 // end of ROL expand 8459 8460 // ROL 32bit by one once 8461 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8462 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8463 8464 expand %{ 8465 rolI_eReg_imm1(dst, lshift, cr); 8466 %} 8467 %} 8468 8469 // ROL 32bit var by imm8 once 8470 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8471 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8472 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8473 8474 expand %{ 8475 rolI_eReg_imm8(dst, lshift, cr); 8476 %} 8477 %} 8478 8479 // ROL 32bit var by var once 8480 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8481 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8482 8483 expand %{ 8484 rolI_eReg_CL(dst, shift, cr); 8485 %} 8486 %} 8487 8488 // ROL 32bit var by var once 8489 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8490 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8491 8492 expand %{ 8493 rolI_eReg_CL(dst, shift, cr); 8494 %} 8495 %} 8496 8497 // ROR expand 8498 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8499 effect(USE_DEF dst, USE shift, KILL cr); 8500 8501 format %{ "ROR $dst, $shift" %} 8502 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8503 ins_encode( OpcP, RegOpc( dst ) ); 8504 ins_pipe( ialu_reg ); 8505 %} 8506 8507 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8508 effect (USE_DEF dst, USE shift, KILL cr); 8509 8510 format %{ "ROR $dst, $shift" %} 8511 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8512 ins_encode( RegOpcImm(dst, shift) ); 8513 ins_pipe( ialu_reg ); 8514 %} 8515 8516 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8517 effect(USE_DEF dst, USE shift, KILL cr); 8518 8519 format %{ "ROR $dst, $shift" %} 8520 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8521 ins_encode(OpcP, RegOpc(dst)); 8522 ins_pipe( ialu_reg_reg ); 8523 %} 8524 // end of ROR expand 8525 8526 // ROR right once 8527 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8528 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8529 8530 expand %{ 8531 rorI_eReg_imm1(dst, rshift, cr); 8532 %} 8533 %} 8534 8535 // ROR 32bit by immI8 once 8536 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8537 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8538 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8539 8540 expand %{ 8541 rorI_eReg_imm8(dst, rshift, cr); 8542 %} 8543 %} 8544 8545 // ROR 32bit var by var once 8546 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8547 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8548 8549 expand %{ 8550 rorI_eReg_CL(dst, shift, cr); 8551 %} 8552 %} 8553 8554 // ROR 32bit var by var once 8555 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8556 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8557 8558 expand %{ 8559 rorI_eReg_CL(dst, shift, cr); 8560 %} 8561 %} 8562 8563 // Xor Instructions 8564 // Xor Register with Register 8565 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8566 match(Set dst (XorI dst src)); 8567 effect(KILL cr); 8568 8569 size(2); 8570 format %{ "XOR $dst,$src" %} 8571 opcode(0x33); 8572 ins_encode( OpcP, RegReg( dst, src) ); 8573 ins_pipe( ialu_reg_reg ); 8574 %} 8575 8576 // Xor Register with Immediate -1 8577 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8578 match(Set dst (XorI dst imm)); 8579 8580 size(2); 8581 format %{ "NOT $dst" %} 8582 ins_encode %{ 8583 __ notl($dst$$Register); 8584 %} 8585 ins_pipe( ialu_reg ); 8586 %} 8587 8588 // Xor Register with Immediate 8589 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8590 match(Set dst (XorI dst src)); 8591 effect(KILL cr); 8592 8593 format %{ "XOR $dst,$src" %} 8594 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8595 // ins_encode( RegImm( dst, src) ); 8596 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8597 ins_pipe( ialu_reg ); 8598 %} 8599 8600 // Xor Register with Memory 8601 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8602 match(Set dst (XorI dst (LoadI src))); 8603 effect(KILL cr); 8604 8605 ins_cost(125); 8606 format %{ "XOR $dst,$src" %} 8607 opcode(0x33); 8608 ins_encode( OpcP, RegMem(dst, src) ); 8609 ins_pipe( ialu_reg_mem ); 8610 %} 8611 8612 // Xor Memory with Register 8613 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8614 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8615 effect(KILL cr); 8616 8617 ins_cost(150); 8618 format %{ "XOR $dst,$src" %} 8619 opcode(0x31); /* Opcode 31 /r */ 8620 ins_encode( OpcP, RegMem( src, dst ) ); 8621 ins_pipe( ialu_mem_reg ); 8622 %} 8623 8624 // Xor Memory with Immediate 8625 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8626 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8627 effect(KILL cr); 8628 8629 ins_cost(125); 8630 format %{ "XOR $dst,$src" %} 8631 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8632 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8633 ins_pipe( ialu_mem_imm ); 8634 %} 8635 8636 //----------Convert Int to Boolean--------------------------------------------- 8637 8638 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8639 effect( DEF dst, USE src ); 8640 format %{ "MOV $dst,$src" %} 8641 ins_encode( enc_Copy( dst, src) ); 8642 ins_pipe( ialu_reg_reg ); 8643 %} 8644 8645 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8646 effect( USE_DEF dst, USE src, KILL cr ); 8647 8648 size(4); 8649 format %{ "NEG $dst\n\t" 8650 "ADC $dst,$src" %} 8651 ins_encode( neg_reg(dst), 8652 OpcRegReg(0x13,dst,src) ); 8653 ins_pipe( ialu_reg_reg_long ); 8654 %} 8655 8656 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8657 match(Set dst (Conv2B src)); 8658 8659 expand %{ 8660 movI_nocopy(dst,src); 8661 ci2b(dst,src,cr); 8662 %} 8663 %} 8664 8665 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8666 effect( DEF dst, USE src ); 8667 format %{ "MOV $dst,$src" %} 8668 ins_encode( enc_Copy( dst, src) ); 8669 ins_pipe( ialu_reg_reg ); 8670 %} 8671 8672 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8673 effect( USE_DEF dst, USE src, KILL cr ); 8674 format %{ "NEG $dst\n\t" 8675 "ADC $dst,$src" %} 8676 ins_encode( neg_reg(dst), 8677 OpcRegReg(0x13,dst,src) ); 8678 ins_pipe( ialu_reg_reg_long ); 8679 %} 8680 8681 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8682 match(Set dst (Conv2B src)); 8683 8684 expand %{ 8685 movP_nocopy(dst,src); 8686 cp2b(dst,src,cr); 8687 %} 8688 %} 8689 8690 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8691 match(Set dst (CmpLTMask p q)); 8692 effect(KILL cr); 8693 ins_cost(400); 8694 8695 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8696 format %{ "XOR $dst,$dst\n\t" 8697 "CMP $p,$q\n\t" 8698 "SETlt $dst\n\t" 8699 "NEG $dst" %} 8700 ins_encode %{ 8701 Register Rp = $p$$Register; 8702 Register Rq = $q$$Register; 8703 Register Rd = $dst$$Register; 8704 Label done; 8705 __ xorl(Rd, Rd); 8706 __ cmpl(Rp, Rq); 8707 __ setb(Assembler::less, Rd); 8708 __ negl(Rd); 8709 %} 8710 8711 ins_pipe(pipe_slow); 8712 %} 8713 8714 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8715 match(Set dst (CmpLTMask dst zero)); 8716 effect(DEF dst, KILL cr); 8717 ins_cost(100); 8718 8719 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8720 ins_encode %{ 8721 __ sarl($dst$$Register, 31); 8722 %} 8723 ins_pipe(ialu_reg); 8724 %} 8725 8726 /* better to save a register than avoid a branch */ 8727 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8728 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8729 effect(KILL cr); 8730 ins_cost(400); 8731 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8732 "JGE done\n\t" 8733 "ADD $p,$y\n" 8734 "done: " %} 8735 ins_encode %{ 8736 Register Rp = $p$$Register; 8737 Register Rq = $q$$Register; 8738 Register Ry = $y$$Register; 8739 Label done; 8740 __ subl(Rp, Rq); 8741 __ jccb(Assembler::greaterEqual, done); 8742 __ addl(Rp, Ry); 8743 __ bind(done); 8744 %} 8745 8746 ins_pipe(pipe_cmplt); 8747 %} 8748 8749 /* better to save a register than avoid a branch */ 8750 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8751 match(Set y (AndI (CmpLTMask p q) y)); 8752 effect(KILL cr); 8753 8754 ins_cost(300); 8755 8756 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8757 "JLT done\n\t" 8758 "XORL $y, $y\n" 8759 "done: " %} 8760 ins_encode %{ 8761 Register Rp = $p$$Register; 8762 Register Rq = $q$$Register; 8763 Register Ry = $y$$Register; 8764 Label done; 8765 __ cmpl(Rp, Rq); 8766 __ jccb(Assembler::less, done); 8767 __ xorl(Ry, Ry); 8768 __ bind(done); 8769 %} 8770 8771 ins_pipe(pipe_cmplt); 8772 %} 8773 8774 /* If I enable this, I encourage spilling in the inner loop of compress. 8775 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8776 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8777 */ 8778 //----------Overflow Math Instructions----------------------------------------- 8779 8780 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8781 %{ 8782 match(Set cr (OverflowAddI op1 op2)); 8783 effect(DEF cr, USE_KILL op1, USE op2); 8784 8785 format %{ "ADD $op1, $op2\t# overflow check int" %} 8786 8787 ins_encode %{ 8788 __ addl($op1$$Register, $op2$$Register); 8789 %} 8790 ins_pipe(ialu_reg_reg); 8791 %} 8792 8793 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8794 %{ 8795 match(Set cr (OverflowAddI op1 op2)); 8796 effect(DEF cr, USE_KILL op1, USE op2); 8797 8798 format %{ "ADD $op1, $op2\t# overflow check int" %} 8799 8800 ins_encode %{ 8801 __ addl($op1$$Register, $op2$$constant); 8802 %} 8803 ins_pipe(ialu_reg_reg); 8804 %} 8805 8806 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8807 %{ 8808 match(Set cr (OverflowSubI op1 op2)); 8809 8810 format %{ "CMP $op1, $op2\t# overflow check int" %} 8811 ins_encode %{ 8812 __ cmpl($op1$$Register, $op2$$Register); 8813 %} 8814 ins_pipe(ialu_reg_reg); 8815 %} 8816 8817 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8818 %{ 8819 match(Set cr (OverflowSubI op1 op2)); 8820 8821 format %{ "CMP $op1, $op2\t# overflow check int" %} 8822 ins_encode %{ 8823 __ cmpl($op1$$Register, $op2$$constant); 8824 %} 8825 ins_pipe(ialu_reg_reg); 8826 %} 8827 8828 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8829 %{ 8830 match(Set cr (OverflowSubI zero op2)); 8831 effect(DEF cr, USE_KILL op2); 8832 8833 format %{ "NEG $op2\t# overflow check int" %} 8834 ins_encode %{ 8835 __ negl($op2$$Register); 8836 %} 8837 ins_pipe(ialu_reg_reg); 8838 %} 8839 8840 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8841 %{ 8842 match(Set cr (OverflowMulI op1 op2)); 8843 effect(DEF cr, USE_KILL op1, USE op2); 8844 8845 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8846 ins_encode %{ 8847 __ imull($op1$$Register, $op2$$Register); 8848 %} 8849 ins_pipe(ialu_reg_reg_alu0); 8850 %} 8851 8852 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8853 %{ 8854 match(Set cr (OverflowMulI op1 op2)); 8855 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8856 8857 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8858 ins_encode %{ 8859 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8860 %} 8861 ins_pipe(ialu_reg_reg_alu0); 8862 %} 8863 8864 // Integer Absolute Instructions 8865 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8866 %{ 8867 match(Set dst (AbsI src)); 8868 effect(TEMP dst, TEMP tmp, KILL cr); 8869 format %{ "movl $tmp, $src\n\t" 8870 "sarl $tmp, 31\n\t" 8871 "movl $dst, $src\n\t" 8872 "xorl $dst, $tmp\n\t" 8873 "subl $dst, $tmp\n" 8874 %} 8875 ins_encode %{ 8876 __ movl($tmp$$Register, $src$$Register); 8877 __ sarl($tmp$$Register, 31); 8878 __ movl($dst$$Register, $src$$Register); 8879 __ xorl($dst$$Register, $tmp$$Register); 8880 __ subl($dst$$Register, $tmp$$Register); 8881 %} 8882 8883 ins_pipe(ialu_reg_reg); 8884 %} 8885 8886 //----------Long Instructions------------------------------------------------ 8887 // Add Long Register with Register 8888 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8889 match(Set dst (AddL dst src)); 8890 effect(KILL cr); 8891 ins_cost(200); 8892 format %{ "ADD $dst.lo,$src.lo\n\t" 8893 "ADC $dst.hi,$src.hi" %} 8894 opcode(0x03, 0x13); 8895 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8896 ins_pipe( ialu_reg_reg_long ); 8897 %} 8898 8899 // Add Long Register with Immediate 8900 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8901 match(Set dst (AddL dst src)); 8902 effect(KILL cr); 8903 format %{ "ADD $dst.lo,$src.lo\n\t" 8904 "ADC $dst.hi,$src.hi" %} 8905 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8906 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8907 ins_pipe( ialu_reg_long ); 8908 %} 8909 8910 // Add Long Register with Memory 8911 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8912 match(Set dst (AddL dst (LoadL mem))); 8913 effect(KILL cr); 8914 ins_cost(125); 8915 format %{ "ADD $dst.lo,$mem\n\t" 8916 "ADC $dst.hi,$mem+4" %} 8917 opcode(0x03, 0x13); 8918 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8919 ins_pipe( ialu_reg_long_mem ); 8920 %} 8921 8922 // Subtract Long Register with Register. 8923 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8924 match(Set dst (SubL dst src)); 8925 effect(KILL cr); 8926 ins_cost(200); 8927 format %{ "SUB $dst.lo,$src.lo\n\t" 8928 "SBB $dst.hi,$src.hi" %} 8929 opcode(0x2B, 0x1B); 8930 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8931 ins_pipe( ialu_reg_reg_long ); 8932 %} 8933 8934 // Subtract Long Register with Immediate 8935 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8936 match(Set dst (SubL dst src)); 8937 effect(KILL cr); 8938 format %{ "SUB $dst.lo,$src.lo\n\t" 8939 "SBB $dst.hi,$src.hi" %} 8940 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8941 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8942 ins_pipe( ialu_reg_long ); 8943 %} 8944 8945 // Subtract Long Register with Memory 8946 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8947 match(Set dst (SubL dst (LoadL mem))); 8948 effect(KILL cr); 8949 ins_cost(125); 8950 format %{ "SUB $dst.lo,$mem\n\t" 8951 "SBB $dst.hi,$mem+4" %} 8952 opcode(0x2B, 0x1B); 8953 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8954 ins_pipe( ialu_reg_long_mem ); 8955 %} 8956 8957 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8958 match(Set dst (SubL zero dst)); 8959 effect(KILL cr); 8960 ins_cost(300); 8961 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8962 ins_encode( neg_long(dst) ); 8963 ins_pipe( ialu_reg_reg_long ); 8964 %} 8965 8966 // And Long Register with Register 8967 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8968 match(Set dst (AndL dst src)); 8969 effect(KILL cr); 8970 format %{ "AND $dst.lo,$src.lo\n\t" 8971 "AND $dst.hi,$src.hi" %} 8972 opcode(0x23,0x23); 8973 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8974 ins_pipe( ialu_reg_reg_long ); 8975 %} 8976 8977 // And Long Register with Immediate 8978 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8979 match(Set dst (AndL dst src)); 8980 effect(KILL cr); 8981 format %{ "AND $dst.lo,$src.lo\n\t" 8982 "AND $dst.hi,$src.hi" %} 8983 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8984 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8985 ins_pipe( ialu_reg_long ); 8986 %} 8987 8988 // And Long Register with Memory 8989 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8990 match(Set dst (AndL dst (LoadL mem))); 8991 effect(KILL cr); 8992 ins_cost(125); 8993 format %{ "AND $dst.lo,$mem\n\t" 8994 "AND $dst.hi,$mem+4" %} 8995 opcode(0x23, 0x23); 8996 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8997 ins_pipe( ialu_reg_long_mem ); 8998 %} 8999 9000 // BMI1 instructions 9001 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 9002 match(Set dst (AndL (XorL src1 minus_1) src2)); 9003 predicate(UseBMI1Instructions); 9004 effect(KILL cr, TEMP dst); 9005 9006 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9007 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9008 %} 9009 9010 ins_encode %{ 9011 Register Rdst = $dst$$Register; 9012 Register Rsrc1 = $src1$$Register; 9013 Register Rsrc2 = $src2$$Register; 9014 __ andnl(Rdst, Rsrc1, Rsrc2); 9015 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9016 %} 9017 ins_pipe(ialu_reg_reg_long); 9018 %} 9019 9020 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9021 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9022 predicate(UseBMI1Instructions); 9023 effect(KILL cr, TEMP dst); 9024 9025 ins_cost(125); 9026 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9027 "ANDNL $dst.hi, $src1.hi, $src2+4" 9028 %} 9029 9030 ins_encode %{ 9031 Register Rdst = $dst$$Register; 9032 Register Rsrc1 = $src1$$Register; 9033 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9034 9035 __ andnl(Rdst, Rsrc1, $src2$$Address); 9036 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9037 %} 9038 ins_pipe(ialu_reg_mem); 9039 %} 9040 9041 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9042 match(Set dst (AndL (SubL imm_zero src) src)); 9043 predicate(UseBMI1Instructions); 9044 effect(KILL cr, TEMP dst); 9045 9046 format %{ "MOVL $dst.hi, 0\n\t" 9047 "BLSIL $dst.lo, $src.lo\n\t" 9048 "JNZ done\n\t" 9049 "BLSIL $dst.hi, $src.hi\n" 9050 "done:" 9051 %} 9052 9053 ins_encode %{ 9054 Label done; 9055 Register Rdst = $dst$$Register; 9056 Register Rsrc = $src$$Register; 9057 __ movl(HIGH_FROM_LOW(Rdst), 0); 9058 __ blsil(Rdst, Rsrc); 9059 __ jccb(Assembler::notZero, done); 9060 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9061 __ bind(done); 9062 %} 9063 ins_pipe(ialu_reg); 9064 %} 9065 9066 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9067 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9068 predicate(UseBMI1Instructions); 9069 effect(KILL cr, TEMP dst); 9070 9071 ins_cost(125); 9072 format %{ "MOVL $dst.hi, 0\n\t" 9073 "BLSIL $dst.lo, $src\n\t" 9074 "JNZ done\n\t" 9075 "BLSIL $dst.hi, $src+4\n" 9076 "done:" 9077 %} 9078 9079 ins_encode %{ 9080 Label done; 9081 Register Rdst = $dst$$Register; 9082 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9083 9084 __ movl(HIGH_FROM_LOW(Rdst), 0); 9085 __ blsil(Rdst, $src$$Address); 9086 __ jccb(Assembler::notZero, done); 9087 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9088 __ bind(done); 9089 %} 9090 ins_pipe(ialu_reg_mem); 9091 %} 9092 9093 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9094 %{ 9095 match(Set dst (XorL (AddL src minus_1) src)); 9096 predicate(UseBMI1Instructions); 9097 effect(KILL cr, TEMP dst); 9098 9099 format %{ "MOVL $dst.hi, 0\n\t" 9100 "BLSMSKL $dst.lo, $src.lo\n\t" 9101 "JNC done\n\t" 9102 "BLSMSKL $dst.hi, $src.hi\n" 9103 "done:" 9104 %} 9105 9106 ins_encode %{ 9107 Label done; 9108 Register Rdst = $dst$$Register; 9109 Register Rsrc = $src$$Register; 9110 __ movl(HIGH_FROM_LOW(Rdst), 0); 9111 __ blsmskl(Rdst, Rsrc); 9112 __ jccb(Assembler::carryClear, done); 9113 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9114 __ bind(done); 9115 %} 9116 9117 ins_pipe(ialu_reg); 9118 %} 9119 9120 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9121 %{ 9122 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9123 predicate(UseBMI1Instructions); 9124 effect(KILL cr, TEMP dst); 9125 9126 ins_cost(125); 9127 format %{ "MOVL $dst.hi, 0\n\t" 9128 "BLSMSKL $dst.lo, $src\n\t" 9129 "JNC done\n\t" 9130 "BLSMSKL $dst.hi, $src+4\n" 9131 "done:" 9132 %} 9133 9134 ins_encode %{ 9135 Label done; 9136 Register Rdst = $dst$$Register; 9137 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9138 9139 __ movl(HIGH_FROM_LOW(Rdst), 0); 9140 __ blsmskl(Rdst, $src$$Address); 9141 __ jccb(Assembler::carryClear, done); 9142 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9143 __ bind(done); 9144 %} 9145 9146 ins_pipe(ialu_reg_mem); 9147 %} 9148 9149 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9150 %{ 9151 match(Set dst (AndL (AddL src minus_1) src) ); 9152 predicate(UseBMI1Instructions); 9153 effect(KILL cr, TEMP dst); 9154 9155 format %{ "MOVL $dst.hi, $src.hi\n\t" 9156 "BLSRL $dst.lo, $src.lo\n\t" 9157 "JNC done\n\t" 9158 "BLSRL $dst.hi, $src.hi\n" 9159 "done:" 9160 %} 9161 9162 ins_encode %{ 9163 Label done; 9164 Register Rdst = $dst$$Register; 9165 Register Rsrc = $src$$Register; 9166 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9167 __ blsrl(Rdst, Rsrc); 9168 __ jccb(Assembler::carryClear, done); 9169 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9170 __ bind(done); 9171 %} 9172 9173 ins_pipe(ialu_reg); 9174 %} 9175 9176 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9177 %{ 9178 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9179 predicate(UseBMI1Instructions); 9180 effect(KILL cr, TEMP dst); 9181 9182 ins_cost(125); 9183 format %{ "MOVL $dst.hi, $src+4\n\t" 9184 "BLSRL $dst.lo, $src\n\t" 9185 "JNC done\n\t" 9186 "BLSRL $dst.hi, $src+4\n" 9187 "done:" 9188 %} 9189 9190 ins_encode %{ 9191 Label done; 9192 Register Rdst = $dst$$Register; 9193 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9194 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9195 __ blsrl(Rdst, $src$$Address); 9196 __ jccb(Assembler::carryClear, done); 9197 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9198 __ bind(done); 9199 %} 9200 9201 ins_pipe(ialu_reg_mem); 9202 %} 9203 9204 // Or Long Register with Register 9205 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9206 match(Set dst (OrL dst src)); 9207 effect(KILL cr); 9208 format %{ "OR $dst.lo,$src.lo\n\t" 9209 "OR $dst.hi,$src.hi" %} 9210 opcode(0x0B,0x0B); 9211 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9212 ins_pipe( ialu_reg_reg_long ); 9213 %} 9214 9215 // Or Long Register with Immediate 9216 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9217 match(Set dst (OrL dst src)); 9218 effect(KILL cr); 9219 format %{ "OR $dst.lo,$src.lo\n\t" 9220 "OR $dst.hi,$src.hi" %} 9221 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9222 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9223 ins_pipe( ialu_reg_long ); 9224 %} 9225 9226 // Or Long Register with Memory 9227 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9228 match(Set dst (OrL dst (LoadL mem))); 9229 effect(KILL cr); 9230 ins_cost(125); 9231 format %{ "OR $dst.lo,$mem\n\t" 9232 "OR $dst.hi,$mem+4" %} 9233 opcode(0x0B,0x0B); 9234 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9235 ins_pipe( ialu_reg_long_mem ); 9236 %} 9237 9238 // Xor Long Register with Register 9239 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9240 match(Set dst (XorL dst src)); 9241 effect(KILL cr); 9242 format %{ "XOR $dst.lo,$src.lo\n\t" 9243 "XOR $dst.hi,$src.hi" %} 9244 opcode(0x33,0x33); 9245 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9246 ins_pipe( ialu_reg_reg_long ); 9247 %} 9248 9249 // Xor Long Register with Immediate -1 9250 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9251 match(Set dst (XorL dst imm)); 9252 format %{ "NOT $dst.lo\n\t" 9253 "NOT $dst.hi" %} 9254 ins_encode %{ 9255 __ notl($dst$$Register); 9256 __ notl(HIGH_FROM_LOW($dst$$Register)); 9257 %} 9258 ins_pipe( ialu_reg_long ); 9259 %} 9260 9261 // Xor Long Register with Immediate 9262 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9263 match(Set dst (XorL dst src)); 9264 effect(KILL cr); 9265 format %{ "XOR $dst.lo,$src.lo\n\t" 9266 "XOR $dst.hi,$src.hi" %} 9267 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9268 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9269 ins_pipe( ialu_reg_long ); 9270 %} 9271 9272 // Xor Long Register with Memory 9273 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9274 match(Set dst (XorL dst (LoadL mem))); 9275 effect(KILL cr); 9276 ins_cost(125); 9277 format %{ "XOR $dst.lo,$mem\n\t" 9278 "XOR $dst.hi,$mem+4" %} 9279 opcode(0x33,0x33); 9280 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9281 ins_pipe( ialu_reg_long_mem ); 9282 %} 9283 9284 // Shift Left Long by 1 9285 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9286 predicate(UseNewLongLShift); 9287 match(Set dst (LShiftL dst cnt)); 9288 effect(KILL cr); 9289 ins_cost(100); 9290 format %{ "ADD $dst.lo,$dst.lo\n\t" 9291 "ADC $dst.hi,$dst.hi" %} 9292 ins_encode %{ 9293 __ addl($dst$$Register,$dst$$Register); 9294 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9295 %} 9296 ins_pipe( ialu_reg_long ); 9297 %} 9298 9299 // Shift Left Long by 2 9300 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9301 predicate(UseNewLongLShift); 9302 match(Set dst (LShiftL dst cnt)); 9303 effect(KILL cr); 9304 ins_cost(100); 9305 format %{ "ADD $dst.lo,$dst.lo\n\t" 9306 "ADC $dst.hi,$dst.hi\n\t" 9307 "ADD $dst.lo,$dst.lo\n\t" 9308 "ADC $dst.hi,$dst.hi" %} 9309 ins_encode %{ 9310 __ addl($dst$$Register,$dst$$Register); 9311 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9312 __ addl($dst$$Register,$dst$$Register); 9313 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9314 %} 9315 ins_pipe( ialu_reg_long ); 9316 %} 9317 9318 // Shift Left Long by 3 9319 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9320 predicate(UseNewLongLShift); 9321 match(Set dst (LShiftL dst cnt)); 9322 effect(KILL cr); 9323 ins_cost(100); 9324 format %{ "ADD $dst.lo,$dst.lo\n\t" 9325 "ADC $dst.hi,$dst.hi\n\t" 9326 "ADD $dst.lo,$dst.lo\n\t" 9327 "ADC $dst.hi,$dst.hi\n\t" 9328 "ADD $dst.lo,$dst.lo\n\t" 9329 "ADC $dst.hi,$dst.hi" %} 9330 ins_encode %{ 9331 __ addl($dst$$Register,$dst$$Register); 9332 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9333 __ addl($dst$$Register,$dst$$Register); 9334 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9335 __ addl($dst$$Register,$dst$$Register); 9336 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9337 %} 9338 ins_pipe( ialu_reg_long ); 9339 %} 9340 9341 // Shift Left Long by 1-31 9342 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9343 match(Set dst (LShiftL dst cnt)); 9344 effect(KILL cr); 9345 ins_cost(200); 9346 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9347 "SHL $dst.lo,$cnt" %} 9348 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9349 ins_encode( move_long_small_shift(dst,cnt) ); 9350 ins_pipe( ialu_reg_long ); 9351 %} 9352 9353 // Shift Left Long by 32-63 9354 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9355 match(Set dst (LShiftL dst cnt)); 9356 effect(KILL cr); 9357 ins_cost(300); 9358 format %{ "MOV $dst.hi,$dst.lo\n" 9359 "\tSHL $dst.hi,$cnt-32\n" 9360 "\tXOR $dst.lo,$dst.lo" %} 9361 opcode(0xC1, 0x4); /* C1 /4 ib */ 9362 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9363 ins_pipe( ialu_reg_long ); 9364 %} 9365 9366 // Shift Left Long by variable 9367 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9368 match(Set dst (LShiftL dst shift)); 9369 effect(KILL cr); 9370 ins_cost(500+200); 9371 size(17); 9372 format %{ "TEST $shift,32\n\t" 9373 "JEQ,s small\n\t" 9374 "MOV $dst.hi,$dst.lo\n\t" 9375 "XOR $dst.lo,$dst.lo\n" 9376 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9377 "SHL $dst.lo,$shift" %} 9378 ins_encode( shift_left_long( dst, shift ) ); 9379 ins_pipe( pipe_slow ); 9380 %} 9381 9382 // Shift Right Long by 1-31 9383 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9384 match(Set dst (URShiftL dst cnt)); 9385 effect(KILL cr); 9386 ins_cost(200); 9387 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9388 "SHR $dst.hi,$cnt" %} 9389 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9390 ins_encode( move_long_small_shift(dst,cnt) ); 9391 ins_pipe( ialu_reg_long ); 9392 %} 9393 9394 // Shift Right Long by 32-63 9395 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9396 match(Set dst (URShiftL dst cnt)); 9397 effect(KILL cr); 9398 ins_cost(300); 9399 format %{ "MOV $dst.lo,$dst.hi\n" 9400 "\tSHR $dst.lo,$cnt-32\n" 9401 "\tXOR $dst.hi,$dst.hi" %} 9402 opcode(0xC1, 0x5); /* C1 /5 ib */ 9403 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9404 ins_pipe( ialu_reg_long ); 9405 %} 9406 9407 // Shift Right Long by variable 9408 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9409 match(Set dst (URShiftL dst shift)); 9410 effect(KILL cr); 9411 ins_cost(600); 9412 size(17); 9413 format %{ "TEST $shift,32\n\t" 9414 "JEQ,s small\n\t" 9415 "MOV $dst.lo,$dst.hi\n\t" 9416 "XOR $dst.hi,$dst.hi\n" 9417 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9418 "SHR $dst.hi,$shift" %} 9419 ins_encode( shift_right_long( dst, shift ) ); 9420 ins_pipe( pipe_slow ); 9421 %} 9422 9423 // Shift Right Long by 1-31 9424 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9425 match(Set dst (RShiftL dst cnt)); 9426 effect(KILL cr); 9427 ins_cost(200); 9428 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9429 "SAR $dst.hi,$cnt" %} 9430 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9431 ins_encode( move_long_small_shift(dst,cnt) ); 9432 ins_pipe( ialu_reg_long ); 9433 %} 9434 9435 // Shift Right Long by 32-63 9436 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9437 match(Set dst (RShiftL dst cnt)); 9438 effect(KILL cr); 9439 ins_cost(300); 9440 format %{ "MOV $dst.lo,$dst.hi\n" 9441 "\tSAR $dst.lo,$cnt-32\n" 9442 "\tSAR $dst.hi,31" %} 9443 opcode(0xC1, 0x7); /* C1 /7 ib */ 9444 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9445 ins_pipe( ialu_reg_long ); 9446 %} 9447 9448 // Shift Right arithmetic Long by variable 9449 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9450 match(Set dst (RShiftL dst shift)); 9451 effect(KILL cr); 9452 ins_cost(600); 9453 size(18); 9454 format %{ "TEST $shift,32\n\t" 9455 "JEQ,s small\n\t" 9456 "MOV $dst.lo,$dst.hi\n\t" 9457 "SAR $dst.hi,31\n" 9458 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9459 "SAR $dst.hi,$shift" %} 9460 ins_encode( shift_right_arith_long( dst, shift ) ); 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 9465 //----------Double Instructions------------------------------------------------ 9466 // Double Math 9467 9468 // Compare & branch 9469 9470 // P6 version of float compare, sets condition codes in EFLAGS 9471 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9472 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9473 match(Set cr (CmpD src1 src2)); 9474 effect(KILL rax); 9475 ins_cost(150); 9476 format %{ "FLD $src1\n\t" 9477 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9478 "JNP exit\n\t" 9479 "MOV ah,1 // saw a NaN, set CF\n\t" 9480 "SAHF\n" 9481 "exit:\tNOP // avoid branch to branch" %} 9482 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9483 ins_encode( Push_Reg_DPR(src1), 9484 OpcP, RegOpc(src2), 9485 cmpF_P6_fixup ); 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9490 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9491 match(Set cr (CmpD src1 src2)); 9492 ins_cost(150); 9493 format %{ "FLD $src1\n\t" 9494 "FUCOMIP ST,$src2 // P6 instruction" %} 9495 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9496 ins_encode( Push_Reg_DPR(src1), 9497 OpcP, RegOpc(src2)); 9498 ins_pipe( pipe_slow ); 9499 %} 9500 9501 // Compare & branch 9502 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9503 predicate(UseSSE<=1); 9504 match(Set cr (CmpD src1 src2)); 9505 effect(KILL rax); 9506 ins_cost(200); 9507 format %{ "FLD $src1\n\t" 9508 "FCOMp $src2\n\t" 9509 "FNSTSW AX\n\t" 9510 "TEST AX,0x400\n\t" 9511 "JZ,s flags\n\t" 9512 "MOV AH,1\t# unordered treat as LT\n" 9513 "flags:\tSAHF" %} 9514 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9515 ins_encode( Push_Reg_DPR(src1), 9516 OpcP, RegOpc(src2), 9517 fpu_flags); 9518 ins_pipe( pipe_slow ); 9519 %} 9520 9521 // Compare vs zero into -1,0,1 9522 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9523 predicate(UseSSE<=1); 9524 match(Set dst (CmpD3 src1 zero)); 9525 effect(KILL cr, KILL rax); 9526 ins_cost(280); 9527 format %{ "FTSTD $dst,$src1" %} 9528 opcode(0xE4, 0xD9); 9529 ins_encode( Push_Reg_DPR(src1), 9530 OpcS, OpcP, PopFPU, 9531 CmpF_Result(dst)); 9532 ins_pipe( pipe_slow ); 9533 %} 9534 9535 // Compare into -1,0,1 9536 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9537 predicate(UseSSE<=1); 9538 match(Set dst (CmpD3 src1 src2)); 9539 effect(KILL cr, KILL rax); 9540 ins_cost(300); 9541 format %{ "FCMPD $dst,$src1,$src2" %} 9542 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9543 ins_encode( Push_Reg_DPR(src1), 9544 OpcP, RegOpc(src2), 9545 CmpF_Result(dst)); 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 // float compare and set condition codes in EFLAGS by XMM regs 9550 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9551 predicate(UseSSE>=2); 9552 match(Set cr (CmpD src1 src2)); 9553 ins_cost(145); 9554 format %{ "UCOMISD $src1,$src2\n\t" 9555 "JNP,s exit\n\t" 9556 "PUSHF\t# saw NaN, set CF\n\t" 9557 "AND [rsp], #0xffffff2b\n\t" 9558 "POPF\n" 9559 "exit:" %} 9560 ins_encode %{ 9561 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9562 emit_cmpfp_fixup(_masm); 9563 %} 9564 ins_pipe( pipe_slow ); 9565 %} 9566 9567 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9568 predicate(UseSSE>=2); 9569 match(Set cr (CmpD src1 src2)); 9570 ins_cost(100); 9571 format %{ "UCOMISD $src1,$src2" %} 9572 ins_encode %{ 9573 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9574 %} 9575 ins_pipe( pipe_slow ); 9576 %} 9577 9578 // float compare and set condition codes in EFLAGS by XMM regs 9579 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9580 predicate(UseSSE>=2); 9581 match(Set cr (CmpD src1 (LoadD src2))); 9582 ins_cost(145); 9583 format %{ "UCOMISD $src1,$src2\n\t" 9584 "JNP,s exit\n\t" 9585 "PUSHF\t# saw NaN, set CF\n\t" 9586 "AND [rsp], #0xffffff2b\n\t" 9587 "POPF\n" 9588 "exit:" %} 9589 ins_encode %{ 9590 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9591 emit_cmpfp_fixup(_masm); 9592 %} 9593 ins_pipe( pipe_slow ); 9594 %} 9595 9596 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9597 predicate(UseSSE>=2); 9598 match(Set cr (CmpD src1 (LoadD src2))); 9599 ins_cost(100); 9600 format %{ "UCOMISD $src1,$src2" %} 9601 ins_encode %{ 9602 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9603 %} 9604 ins_pipe( pipe_slow ); 9605 %} 9606 9607 // Compare into -1,0,1 in XMM 9608 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9609 predicate(UseSSE>=2); 9610 match(Set dst (CmpD3 src1 src2)); 9611 effect(KILL cr); 9612 ins_cost(255); 9613 format %{ "UCOMISD $src1, $src2\n\t" 9614 "MOV $dst, #-1\n\t" 9615 "JP,s done\n\t" 9616 "JB,s done\n\t" 9617 "SETNE $dst\n\t" 9618 "MOVZB $dst, $dst\n" 9619 "done:" %} 9620 ins_encode %{ 9621 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9622 emit_cmpfp3(_masm, $dst$$Register); 9623 %} 9624 ins_pipe( pipe_slow ); 9625 %} 9626 9627 // Compare into -1,0,1 in XMM and memory 9628 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9629 predicate(UseSSE>=2); 9630 match(Set dst (CmpD3 src1 (LoadD src2))); 9631 effect(KILL cr); 9632 ins_cost(275); 9633 format %{ "UCOMISD $src1, $src2\n\t" 9634 "MOV $dst, #-1\n\t" 9635 "JP,s done\n\t" 9636 "JB,s done\n\t" 9637 "SETNE $dst\n\t" 9638 "MOVZB $dst, $dst\n" 9639 "done:" %} 9640 ins_encode %{ 9641 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9642 emit_cmpfp3(_masm, $dst$$Register); 9643 %} 9644 ins_pipe( pipe_slow ); 9645 %} 9646 9647 9648 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9649 predicate (UseSSE <=1); 9650 match(Set dst (SubD dst src)); 9651 9652 format %{ "FLD $src\n\t" 9653 "DSUBp $dst,ST" %} 9654 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9655 ins_cost(150); 9656 ins_encode( Push_Reg_DPR(src), 9657 OpcP, RegOpc(dst) ); 9658 ins_pipe( fpu_reg_reg ); 9659 %} 9660 9661 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9662 predicate (UseSSE <=1); 9663 match(Set dst (RoundDouble (SubD src1 src2))); 9664 ins_cost(250); 9665 9666 format %{ "FLD $src2\n\t" 9667 "DSUB ST,$src1\n\t" 9668 "FSTP_D $dst\t# D-round" %} 9669 opcode(0xD8, 0x5); 9670 ins_encode( Push_Reg_DPR(src2), 9671 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9672 ins_pipe( fpu_mem_reg_reg ); 9673 %} 9674 9675 9676 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9677 predicate (UseSSE <=1); 9678 match(Set dst (SubD dst (LoadD src))); 9679 ins_cost(150); 9680 9681 format %{ "FLD $src\n\t" 9682 "DSUBp $dst,ST" %} 9683 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9684 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9685 OpcP, RegOpc(dst) ); 9686 ins_pipe( fpu_reg_mem ); 9687 %} 9688 9689 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9690 predicate (UseSSE<=1); 9691 match(Set dst (AbsD src)); 9692 ins_cost(100); 9693 format %{ "FABS" %} 9694 opcode(0xE1, 0xD9); 9695 ins_encode( OpcS, OpcP ); 9696 ins_pipe( fpu_reg_reg ); 9697 %} 9698 9699 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9700 predicate(UseSSE<=1); 9701 match(Set dst (NegD src)); 9702 ins_cost(100); 9703 format %{ "FCHS" %} 9704 opcode(0xE0, 0xD9); 9705 ins_encode( OpcS, OpcP ); 9706 ins_pipe( fpu_reg_reg ); 9707 %} 9708 9709 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9710 predicate(UseSSE<=1); 9711 match(Set dst (AddD dst src)); 9712 format %{ "FLD $src\n\t" 9713 "DADD $dst,ST" %} 9714 size(4); 9715 ins_cost(150); 9716 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9717 ins_encode( Push_Reg_DPR(src), 9718 OpcP, RegOpc(dst) ); 9719 ins_pipe( fpu_reg_reg ); 9720 %} 9721 9722 9723 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9724 predicate(UseSSE<=1); 9725 match(Set dst (RoundDouble (AddD src1 src2))); 9726 ins_cost(250); 9727 9728 format %{ "FLD $src2\n\t" 9729 "DADD ST,$src1\n\t" 9730 "FSTP_D $dst\t# D-round" %} 9731 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9732 ins_encode( Push_Reg_DPR(src2), 9733 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9734 ins_pipe( fpu_mem_reg_reg ); 9735 %} 9736 9737 9738 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9739 predicate(UseSSE<=1); 9740 match(Set dst (AddD dst (LoadD src))); 9741 ins_cost(150); 9742 9743 format %{ "FLD $src\n\t" 9744 "DADDp $dst,ST" %} 9745 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9746 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9747 OpcP, RegOpc(dst) ); 9748 ins_pipe( fpu_reg_mem ); 9749 %} 9750 9751 // add-to-memory 9752 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9753 predicate(UseSSE<=1); 9754 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9755 ins_cost(150); 9756 9757 format %{ "FLD_D $dst\n\t" 9758 "DADD ST,$src\n\t" 9759 "FST_D $dst" %} 9760 opcode(0xDD, 0x0); 9761 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9762 Opcode(0xD8), RegOpc(src), 9763 set_instruction_start, 9764 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9765 ins_pipe( fpu_reg_mem ); 9766 %} 9767 9768 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9769 predicate(UseSSE<=1); 9770 match(Set dst (AddD dst con)); 9771 ins_cost(125); 9772 format %{ "FLD1\n\t" 9773 "DADDp $dst,ST" %} 9774 ins_encode %{ 9775 __ fld1(); 9776 __ faddp($dst$$reg); 9777 %} 9778 ins_pipe(fpu_reg); 9779 %} 9780 9781 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9782 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9783 match(Set dst (AddD dst con)); 9784 ins_cost(200); 9785 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9786 "DADDp $dst,ST" %} 9787 ins_encode %{ 9788 __ fld_d($constantaddress($con)); 9789 __ faddp($dst$$reg); 9790 %} 9791 ins_pipe(fpu_reg_mem); 9792 %} 9793 9794 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9795 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9796 match(Set dst (RoundDouble (AddD src con))); 9797 ins_cost(200); 9798 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9799 "DADD ST,$src\n\t" 9800 "FSTP_D $dst\t# D-round" %} 9801 ins_encode %{ 9802 __ fld_d($constantaddress($con)); 9803 __ fadd($src$$reg); 9804 __ fstp_d(Address(rsp, $dst$$disp)); 9805 %} 9806 ins_pipe(fpu_mem_reg_con); 9807 %} 9808 9809 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9810 predicate(UseSSE<=1); 9811 match(Set dst (MulD dst src)); 9812 format %{ "FLD $src\n\t" 9813 "DMULp $dst,ST" %} 9814 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9815 ins_cost(150); 9816 ins_encode( Push_Reg_DPR(src), 9817 OpcP, RegOpc(dst) ); 9818 ins_pipe( fpu_reg_reg ); 9819 %} 9820 9821 // Strict FP instruction biases argument before multiply then 9822 // biases result to avoid double rounding of subnormals. 9823 // 9824 // scale arg1 by multiplying arg1 by 2^(-15360) 9825 // load arg2 9826 // multiply scaled arg1 by arg2 9827 // rescale product by 2^(15360) 9828 // 9829 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9830 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9831 match(Set dst (MulD dst src)); 9832 ins_cost(1); // Select this instruction for all FP double multiplies 9833 9834 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9835 "DMULp $dst,ST\n\t" 9836 "FLD $src\n\t" 9837 "DMULp $dst,ST\n\t" 9838 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9839 "DMULp $dst,ST\n\t" %} 9840 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9841 ins_encode( strictfp_bias1(dst), 9842 Push_Reg_DPR(src), 9843 OpcP, RegOpc(dst), 9844 strictfp_bias2(dst) ); 9845 ins_pipe( fpu_reg_reg ); 9846 %} 9847 9848 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9849 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9850 match(Set dst (MulD dst con)); 9851 ins_cost(200); 9852 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9853 "DMULp $dst,ST" %} 9854 ins_encode %{ 9855 __ fld_d($constantaddress($con)); 9856 __ fmulp($dst$$reg); 9857 %} 9858 ins_pipe(fpu_reg_mem); 9859 %} 9860 9861 9862 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9863 predicate( UseSSE<=1 ); 9864 match(Set dst (MulD dst (LoadD src))); 9865 ins_cost(200); 9866 format %{ "FLD_D $src\n\t" 9867 "DMULp $dst,ST" %} 9868 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9869 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9870 OpcP, RegOpc(dst) ); 9871 ins_pipe( fpu_reg_mem ); 9872 %} 9873 9874 // 9875 // Cisc-alternate to reg-reg multiply 9876 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9877 predicate( UseSSE<=1 ); 9878 match(Set dst (MulD src (LoadD mem))); 9879 ins_cost(250); 9880 format %{ "FLD_D $mem\n\t" 9881 "DMUL ST,$src\n\t" 9882 "FSTP_D $dst" %} 9883 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9884 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9885 OpcReg_FPR(src), 9886 Pop_Reg_DPR(dst) ); 9887 ins_pipe( fpu_reg_reg_mem ); 9888 %} 9889 9890 9891 // MACRO3 -- addDPR a mulDPR 9892 // This instruction is a '2-address' instruction in that the result goes 9893 // back to src2. This eliminates a move from the macro; possibly the 9894 // register allocator will have to add it back (and maybe not). 9895 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9896 predicate( UseSSE<=1 ); 9897 match(Set src2 (AddD (MulD src0 src1) src2)); 9898 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9899 "DMUL ST,$src1\n\t" 9900 "DADDp $src2,ST" %} 9901 ins_cost(250); 9902 opcode(0xDD); /* LoadD DD /0 */ 9903 ins_encode( Push_Reg_FPR(src0), 9904 FMul_ST_reg(src1), 9905 FAddP_reg_ST(src2) ); 9906 ins_pipe( fpu_reg_reg_reg ); 9907 %} 9908 9909 9910 // MACRO3 -- subDPR a mulDPR 9911 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9912 predicate( UseSSE<=1 ); 9913 match(Set src2 (SubD (MulD src0 src1) src2)); 9914 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9915 "DMUL ST,$src1\n\t" 9916 "DSUBRp $src2,ST" %} 9917 ins_cost(250); 9918 ins_encode( Push_Reg_FPR(src0), 9919 FMul_ST_reg(src1), 9920 Opcode(0xDE), Opc_plus(0xE0,src2)); 9921 ins_pipe( fpu_reg_reg_reg ); 9922 %} 9923 9924 9925 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9926 predicate( UseSSE<=1 ); 9927 match(Set dst (DivD dst src)); 9928 9929 format %{ "FLD $src\n\t" 9930 "FDIVp $dst,ST" %} 9931 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9932 ins_cost(150); 9933 ins_encode( Push_Reg_DPR(src), 9934 OpcP, RegOpc(dst) ); 9935 ins_pipe( fpu_reg_reg ); 9936 %} 9937 9938 // Strict FP instruction biases argument before division then 9939 // biases result, to avoid double rounding of subnormals. 9940 // 9941 // scale dividend by multiplying dividend by 2^(-15360) 9942 // load divisor 9943 // divide scaled dividend by divisor 9944 // rescale quotient by 2^(15360) 9945 // 9946 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9947 predicate (UseSSE<=1); 9948 match(Set dst (DivD dst src)); 9949 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9950 ins_cost(01); 9951 9952 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9953 "DMULp $dst,ST\n\t" 9954 "FLD $src\n\t" 9955 "FDIVp $dst,ST\n\t" 9956 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9957 "DMULp $dst,ST\n\t" %} 9958 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9959 ins_encode( strictfp_bias1(dst), 9960 Push_Reg_DPR(src), 9961 OpcP, RegOpc(dst), 9962 strictfp_bias2(dst) ); 9963 ins_pipe( fpu_reg_reg ); 9964 %} 9965 9966 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9967 predicate(UseSSE<=1); 9968 match(Set dst (ModD dst src)); 9969 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9970 9971 format %{ "DMOD $dst,$src" %} 9972 ins_cost(250); 9973 ins_encode(Push_Reg_Mod_DPR(dst, src), 9974 emitModDPR(), 9975 Push_Result_Mod_DPR(src), 9976 Pop_Reg_DPR(dst)); 9977 ins_pipe( pipe_slow ); 9978 %} 9979 9980 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9981 predicate(UseSSE>=2); 9982 match(Set dst (ModD src0 src1)); 9983 effect(KILL rax, KILL cr); 9984 9985 format %{ "SUB ESP,8\t # DMOD\n" 9986 "\tMOVSD [ESP+0],$src1\n" 9987 "\tFLD_D [ESP+0]\n" 9988 "\tMOVSD [ESP+0],$src0\n" 9989 "\tFLD_D [ESP+0]\n" 9990 "loop:\tFPREM\n" 9991 "\tFWAIT\n" 9992 "\tFNSTSW AX\n" 9993 "\tSAHF\n" 9994 "\tJP loop\n" 9995 "\tFSTP_D [ESP+0]\n" 9996 "\tMOVSD $dst,[ESP+0]\n" 9997 "\tADD ESP,8\n" 9998 "\tFSTP ST0\t # Restore FPU Stack" 9999 %} 10000 ins_cost(250); 10001 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 10006 predicate (UseSSE<=1); 10007 match(Set dst(AtanD dst src)); 10008 format %{ "DATA $dst,$src" %} 10009 opcode(0xD9, 0xF3); 10010 ins_encode( Push_Reg_DPR(src), 10011 OpcP, OpcS, RegOpc(dst) ); 10012 ins_pipe( pipe_slow ); 10013 %} 10014 10015 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10016 predicate (UseSSE>=2); 10017 match(Set dst(AtanD dst src)); 10018 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10019 format %{ "DATA $dst,$src" %} 10020 opcode(0xD9, 0xF3); 10021 ins_encode( Push_SrcD(src), 10022 OpcP, OpcS, Push_ResultD(dst) ); 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10027 predicate (UseSSE<=1); 10028 match(Set dst (SqrtD src)); 10029 format %{ "DSQRT $dst,$src" %} 10030 opcode(0xFA, 0xD9); 10031 ins_encode( Push_Reg_DPR(src), 10032 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10033 ins_pipe( pipe_slow ); 10034 %} 10035 10036 //-------------Float Instructions------------------------------- 10037 // Float Math 10038 10039 // Code for float compare: 10040 // fcompp(); 10041 // fwait(); fnstsw_ax(); 10042 // sahf(); 10043 // movl(dst, unordered_result); 10044 // jcc(Assembler::parity, exit); 10045 // movl(dst, less_result); 10046 // jcc(Assembler::below, exit); 10047 // movl(dst, equal_result); 10048 // jcc(Assembler::equal, exit); 10049 // movl(dst, greater_result); 10050 // exit: 10051 10052 // P6 version of float compare, sets condition codes in EFLAGS 10053 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10054 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10055 match(Set cr (CmpF src1 src2)); 10056 effect(KILL rax); 10057 ins_cost(150); 10058 format %{ "FLD $src1\n\t" 10059 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10060 "JNP exit\n\t" 10061 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10062 "SAHF\n" 10063 "exit:\tNOP // avoid branch to branch" %} 10064 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10065 ins_encode( Push_Reg_DPR(src1), 10066 OpcP, RegOpc(src2), 10067 cmpF_P6_fixup ); 10068 ins_pipe( pipe_slow ); 10069 %} 10070 10071 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10072 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10073 match(Set cr (CmpF src1 src2)); 10074 ins_cost(100); 10075 format %{ "FLD $src1\n\t" 10076 "FUCOMIP ST,$src2 // P6 instruction" %} 10077 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10078 ins_encode( Push_Reg_DPR(src1), 10079 OpcP, RegOpc(src2)); 10080 ins_pipe( pipe_slow ); 10081 %} 10082 10083 10084 // Compare & branch 10085 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10086 predicate(UseSSE == 0); 10087 match(Set cr (CmpF src1 src2)); 10088 effect(KILL rax); 10089 ins_cost(200); 10090 format %{ "FLD $src1\n\t" 10091 "FCOMp $src2\n\t" 10092 "FNSTSW AX\n\t" 10093 "TEST AX,0x400\n\t" 10094 "JZ,s flags\n\t" 10095 "MOV AH,1\t# unordered treat as LT\n" 10096 "flags:\tSAHF" %} 10097 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10098 ins_encode( Push_Reg_DPR(src1), 10099 OpcP, RegOpc(src2), 10100 fpu_flags); 10101 ins_pipe( pipe_slow ); 10102 %} 10103 10104 // Compare vs zero into -1,0,1 10105 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10106 predicate(UseSSE == 0); 10107 match(Set dst (CmpF3 src1 zero)); 10108 effect(KILL cr, KILL rax); 10109 ins_cost(280); 10110 format %{ "FTSTF $dst,$src1" %} 10111 opcode(0xE4, 0xD9); 10112 ins_encode( Push_Reg_DPR(src1), 10113 OpcS, OpcP, PopFPU, 10114 CmpF_Result(dst)); 10115 ins_pipe( pipe_slow ); 10116 %} 10117 10118 // Compare into -1,0,1 10119 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10120 predicate(UseSSE == 0); 10121 match(Set dst (CmpF3 src1 src2)); 10122 effect(KILL cr, KILL rax); 10123 ins_cost(300); 10124 format %{ "FCMPF $dst,$src1,$src2" %} 10125 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10126 ins_encode( Push_Reg_DPR(src1), 10127 OpcP, RegOpc(src2), 10128 CmpF_Result(dst)); 10129 ins_pipe( pipe_slow ); 10130 %} 10131 10132 // float compare and set condition codes in EFLAGS by XMM regs 10133 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10134 predicate(UseSSE>=1); 10135 match(Set cr (CmpF src1 src2)); 10136 ins_cost(145); 10137 format %{ "UCOMISS $src1,$src2\n\t" 10138 "JNP,s exit\n\t" 10139 "PUSHF\t# saw NaN, set CF\n\t" 10140 "AND [rsp], #0xffffff2b\n\t" 10141 "POPF\n" 10142 "exit:" %} 10143 ins_encode %{ 10144 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10145 emit_cmpfp_fixup(_masm); 10146 %} 10147 ins_pipe( pipe_slow ); 10148 %} 10149 10150 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10151 predicate(UseSSE>=1); 10152 match(Set cr (CmpF src1 src2)); 10153 ins_cost(100); 10154 format %{ "UCOMISS $src1,$src2" %} 10155 ins_encode %{ 10156 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10157 %} 10158 ins_pipe( pipe_slow ); 10159 %} 10160 10161 // float compare and set condition codes in EFLAGS by XMM regs 10162 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10163 predicate(UseSSE>=1); 10164 match(Set cr (CmpF src1 (LoadF src2))); 10165 ins_cost(165); 10166 format %{ "UCOMISS $src1,$src2\n\t" 10167 "JNP,s exit\n\t" 10168 "PUSHF\t# saw NaN, set CF\n\t" 10169 "AND [rsp], #0xffffff2b\n\t" 10170 "POPF\n" 10171 "exit:" %} 10172 ins_encode %{ 10173 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10174 emit_cmpfp_fixup(_masm); 10175 %} 10176 ins_pipe( pipe_slow ); 10177 %} 10178 10179 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10180 predicate(UseSSE>=1); 10181 match(Set cr (CmpF src1 (LoadF src2))); 10182 ins_cost(100); 10183 format %{ "UCOMISS $src1,$src2" %} 10184 ins_encode %{ 10185 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10186 %} 10187 ins_pipe( pipe_slow ); 10188 %} 10189 10190 // Compare into -1,0,1 in XMM 10191 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10192 predicate(UseSSE>=1); 10193 match(Set dst (CmpF3 src1 src2)); 10194 effect(KILL cr); 10195 ins_cost(255); 10196 format %{ "UCOMISS $src1, $src2\n\t" 10197 "MOV $dst, #-1\n\t" 10198 "JP,s done\n\t" 10199 "JB,s done\n\t" 10200 "SETNE $dst\n\t" 10201 "MOVZB $dst, $dst\n" 10202 "done:" %} 10203 ins_encode %{ 10204 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10205 emit_cmpfp3(_masm, $dst$$Register); 10206 %} 10207 ins_pipe( pipe_slow ); 10208 %} 10209 10210 // Compare into -1,0,1 in XMM and memory 10211 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10212 predicate(UseSSE>=1); 10213 match(Set dst (CmpF3 src1 (LoadF src2))); 10214 effect(KILL cr); 10215 ins_cost(275); 10216 format %{ "UCOMISS $src1, $src2\n\t" 10217 "MOV $dst, #-1\n\t" 10218 "JP,s done\n\t" 10219 "JB,s done\n\t" 10220 "SETNE $dst\n\t" 10221 "MOVZB $dst, $dst\n" 10222 "done:" %} 10223 ins_encode %{ 10224 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10225 emit_cmpfp3(_masm, $dst$$Register); 10226 %} 10227 ins_pipe( pipe_slow ); 10228 %} 10229 10230 // Spill to obtain 24-bit precision 10231 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10232 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10233 match(Set dst (SubF src1 src2)); 10234 10235 format %{ "FSUB $dst,$src1 - $src2" %} 10236 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10237 ins_encode( Push_Reg_FPR(src1), 10238 OpcReg_FPR(src2), 10239 Pop_Mem_FPR(dst) ); 10240 ins_pipe( fpu_mem_reg_reg ); 10241 %} 10242 // 10243 // This instruction does not round to 24-bits 10244 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10245 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10246 match(Set dst (SubF dst src)); 10247 10248 format %{ "FSUB $dst,$src" %} 10249 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10250 ins_encode( Push_Reg_FPR(src), 10251 OpcP, RegOpc(dst) ); 10252 ins_pipe( fpu_reg_reg ); 10253 %} 10254 10255 // Spill to obtain 24-bit precision 10256 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10257 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10258 match(Set dst (AddF src1 src2)); 10259 10260 format %{ "FADD $dst,$src1,$src2" %} 10261 opcode(0xD8, 0x0); /* D8 C0+i */ 10262 ins_encode( Push_Reg_FPR(src2), 10263 OpcReg_FPR(src1), 10264 Pop_Mem_FPR(dst) ); 10265 ins_pipe( fpu_mem_reg_reg ); 10266 %} 10267 // 10268 // This instruction does not round to 24-bits 10269 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10270 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10271 match(Set dst (AddF dst src)); 10272 10273 format %{ "FLD $src\n\t" 10274 "FADDp $dst,ST" %} 10275 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10276 ins_encode( Push_Reg_FPR(src), 10277 OpcP, RegOpc(dst) ); 10278 ins_pipe( fpu_reg_reg ); 10279 %} 10280 10281 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10282 predicate(UseSSE==0); 10283 match(Set dst (AbsF src)); 10284 ins_cost(100); 10285 format %{ "FABS" %} 10286 opcode(0xE1, 0xD9); 10287 ins_encode( OpcS, OpcP ); 10288 ins_pipe( fpu_reg_reg ); 10289 %} 10290 10291 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10292 predicate(UseSSE==0); 10293 match(Set dst (NegF src)); 10294 ins_cost(100); 10295 format %{ "FCHS" %} 10296 opcode(0xE0, 0xD9); 10297 ins_encode( OpcS, OpcP ); 10298 ins_pipe( fpu_reg_reg ); 10299 %} 10300 10301 // Cisc-alternate to addFPR_reg 10302 // Spill to obtain 24-bit precision 10303 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10304 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10305 match(Set dst (AddF src1 (LoadF src2))); 10306 10307 format %{ "FLD $src2\n\t" 10308 "FADD ST,$src1\n\t" 10309 "FSTP_S $dst" %} 10310 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10311 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10312 OpcReg_FPR(src1), 10313 Pop_Mem_FPR(dst) ); 10314 ins_pipe( fpu_mem_reg_mem ); 10315 %} 10316 // 10317 // Cisc-alternate to addFPR_reg 10318 // This instruction does not round to 24-bits 10319 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10320 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10321 match(Set dst (AddF dst (LoadF src))); 10322 10323 format %{ "FADD $dst,$src" %} 10324 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10325 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10326 OpcP, RegOpc(dst) ); 10327 ins_pipe( fpu_reg_mem ); 10328 %} 10329 10330 // // Following two instructions for _222_mpegaudio 10331 // Spill to obtain 24-bit precision 10332 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10333 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10334 match(Set dst (AddF src1 src2)); 10335 10336 format %{ "FADD $dst,$src1,$src2" %} 10337 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10338 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10339 OpcReg_FPR(src2), 10340 Pop_Mem_FPR(dst) ); 10341 ins_pipe( fpu_mem_reg_mem ); 10342 %} 10343 10344 // Cisc-spill variant 10345 // Spill to obtain 24-bit precision 10346 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10347 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10348 match(Set dst (AddF src1 (LoadF src2))); 10349 10350 format %{ "FADD $dst,$src1,$src2 cisc" %} 10351 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10352 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10353 set_instruction_start, 10354 OpcP, RMopc_Mem(secondary,src1), 10355 Pop_Mem_FPR(dst) ); 10356 ins_pipe( fpu_mem_mem_mem ); 10357 %} 10358 10359 // Spill to obtain 24-bit precision 10360 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10361 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10362 match(Set dst (AddF src1 src2)); 10363 10364 format %{ "FADD $dst,$src1,$src2" %} 10365 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10366 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10367 set_instruction_start, 10368 OpcP, RMopc_Mem(secondary,src1), 10369 Pop_Mem_FPR(dst) ); 10370 ins_pipe( fpu_mem_mem_mem ); 10371 %} 10372 10373 10374 // Spill to obtain 24-bit precision 10375 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10376 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10377 match(Set dst (AddF src con)); 10378 format %{ "FLD $src\n\t" 10379 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10380 "FSTP_S $dst" %} 10381 ins_encode %{ 10382 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10383 __ fadd_s($constantaddress($con)); 10384 __ fstp_s(Address(rsp, $dst$$disp)); 10385 %} 10386 ins_pipe(fpu_mem_reg_con); 10387 %} 10388 // 10389 // This instruction does not round to 24-bits 10390 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10391 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10392 match(Set dst (AddF src con)); 10393 format %{ "FLD $src\n\t" 10394 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10395 "FSTP $dst" %} 10396 ins_encode %{ 10397 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10398 __ fadd_s($constantaddress($con)); 10399 __ fstp_d($dst$$reg); 10400 %} 10401 ins_pipe(fpu_reg_reg_con); 10402 %} 10403 10404 // Spill to obtain 24-bit precision 10405 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10406 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10407 match(Set dst (MulF src1 src2)); 10408 10409 format %{ "FLD $src1\n\t" 10410 "FMUL $src2\n\t" 10411 "FSTP_S $dst" %} 10412 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10413 ins_encode( Push_Reg_FPR(src1), 10414 OpcReg_FPR(src2), 10415 Pop_Mem_FPR(dst) ); 10416 ins_pipe( fpu_mem_reg_reg ); 10417 %} 10418 // 10419 // This instruction does not round to 24-bits 10420 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10421 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10422 match(Set dst (MulF src1 src2)); 10423 10424 format %{ "FLD $src1\n\t" 10425 "FMUL $src2\n\t" 10426 "FSTP_S $dst" %} 10427 opcode(0xD8, 0x1); /* D8 C8+i */ 10428 ins_encode( Push_Reg_FPR(src2), 10429 OpcReg_FPR(src1), 10430 Pop_Reg_FPR(dst) ); 10431 ins_pipe( fpu_reg_reg_reg ); 10432 %} 10433 10434 10435 // Spill to obtain 24-bit precision 10436 // Cisc-alternate to reg-reg multiply 10437 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10438 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10439 match(Set dst (MulF src1 (LoadF src2))); 10440 10441 format %{ "FLD_S $src2\n\t" 10442 "FMUL $src1\n\t" 10443 "FSTP_S $dst" %} 10444 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10445 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10446 OpcReg_FPR(src1), 10447 Pop_Mem_FPR(dst) ); 10448 ins_pipe( fpu_mem_reg_mem ); 10449 %} 10450 // 10451 // This instruction does not round to 24-bits 10452 // Cisc-alternate to reg-reg multiply 10453 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10454 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10455 match(Set dst (MulF src1 (LoadF src2))); 10456 10457 format %{ "FMUL $dst,$src1,$src2" %} 10458 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10459 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10460 OpcReg_FPR(src1), 10461 Pop_Reg_FPR(dst) ); 10462 ins_pipe( fpu_reg_reg_mem ); 10463 %} 10464 10465 // Spill to obtain 24-bit precision 10466 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10467 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10468 match(Set dst (MulF src1 src2)); 10469 10470 format %{ "FMUL $dst,$src1,$src2" %} 10471 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10472 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10473 set_instruction_start, 10474 OpcP, RMopc_Mem(secondary,src1), 10475 Pop_Mem_FPR(dst) ); 10476 ins_pipe( fpu_mem_mem_mem ); 10477 %} 10478 10479 // Spill to obtain 24-bit precision 10480 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10481 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10482 match(Set dst (MulF src con)); 10483 10484 format %{ "FLD $src\n\t" 10485 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10486 "FSTP_S $dst" %} 10487 ins_encode %{ 10488 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10489 __ fmul_s($constantaddress($con)); 10490 __ fstp_s(Address(rsp, $dst$$disp)); 10491 %} 10492 ins_pipe(fpu_mem_reg_con); 10493 %} 10494 // 10495 // This instruction does not round to 24-bits 10496 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10497 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10498 match(Set dst (MulF src con)); 10499 10500 format %{ "FLD $src\n\t" 10501 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10502 "FSTP $dst" %} 10503 ins_encode %{ 10504 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10505 __ fmul_s($constantaddress($con)); 10506 __ fstp_d($dst$$reg); 10507 %} 10508 ins_pipe(fpu_reg_reg_con); 10509 %} 10510 10511 10512 // 10513 // MACRO1 -- subsume unshared load into mulFPR 10514 // This instruction does not round to 24-bits 10515 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10516 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10517 match(Set dst (MulF (LoadF mem1) src)); 10518 10519 format %{ "FLD $mem1 ===MACRO1===\n\t" 10520 "FMUL ST,$src\n\t" 10521 "FSTP $dst" %} 10522 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10523 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10524 OpcReg_FPR(src), 10525 Pop_Reg_FPR(dst) ); 10526 ins_pipe( fpu_reg_reg_mem ); 10527 %} 10528 // 10529 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10530 // This instruction does not round to 24-bits 10531 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10532 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10533 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10534 ins_cost(95); 10535 10536 format %{ "FLD $mem1 ===MACRO2===\n\t" 10537 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10538 "FADD ST,$src2\n\t" 10539 "FSTP $dst" %} 10540 opcode(0xD9); /* LoadF D9 /0 */ 10541 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10542 FMul_ST_reg(src1), 10543 FAdd_ST_reg(src2), 10544 Pop_Reg_FPR(dst) ); 10545 ins_pipe( fpu_reg_mem_reg_reg ); 10546 %} 10547 10548 // MACRO3 -- addFPR a mulFPR 10549 // This instruction does not round to 24-bits. It is a '2-address' 10550 // instruction in that the result goes back to src2. This eliminates 10551 // a move from the macro; possibly the register allocator will have 10552 // to add it back (and maybe not). 10553 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10554 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10555 match(Set src2 (AddF (MulF src0 src1) src2)); 10556 10557 format %{ "FLD $src0 ===MACRO3===\n\t" 10558 "FMUL ST,$src1\n\t" 10559 "FADDP $src2,ST" %} 10560 opcode(0xD9); /* LoadF D9 /0 */ 10561 ins_encode( Push_Reg_FPR(src0), 10562 FMul_ST_reg(src1), 10563 FAddP_reg_ST(src2) ); 10564 ins_pipe( fpu_reg_reg_reg ); 10565 %} 10566 10567 // MACRO4 -- divFPR subFPR 10568 // This instruction does not round to 24-bits 10569 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10570 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10571 match(Set dst (DivF (SubF src2 src1) src3)); 10572 10573 format %{ "FLD $src2 ===MACRO4===\n\t" 10574 "FSUB ST,$src1\n\t" 10575 "FDIV ST,$src3\n\t" 10576 "FSTP $dst" %} 10577 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10578 ins_encode( Push_Reg_FPR(src2), 10579 subFPR_divFPR_encode(src1,src3), 10580 Pop_Reg_FPR(dst) ); 10581 ins_pipe( fpu_reg_reg_reg_reg ); 10582 %} 10583 10584 // Spill to obtain 24-bit precision 10585 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10586 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10587 match(Set dst (DivF src1 src2)); 10588 10589 format %{ "FDIV $dst,$src1,$src2" %} 10590 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10591 ins_encode( Push_Reg_FPR(src1), 10592 OpcReg_FPR(src2), 10593 Pop_Mem_FPR(dst) ); 10594 ins_pipe( fpu_mem_reg_reg ); 10595 %} 10596 // 10597 // This instruction does not round to 24-bits 10598 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10599 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10600 match(Set dst (DivF dst src)); 10601 10602 format %{ "FDIV $dst,$src" %} 10603 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10604 ins_encode( Push_Reg_FPR(src), 10605 OpcP, RegOpc(dst) ); 10606 ins_pipe( fpu_reg_reg ); 10607 %} 10608 10609 10610 // Spill to obtain 24-bit precision 10611 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10612 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10613 match(Set dst (ModF src1 src2)); 10614 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10615 10616 format %{ "FMOD $dst,$src1,$src2" %} 10617 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10618 emitModDPR(), 10619 Push_Result_Mod_DPR(src2), 10620 Pop_Mem_FPR(dst)); 10621 ins_pipe( pipe_slow ); 10622 %} 10623 // 10624 // This instruction does not round to 24-bits 10625 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10626 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10627 match(Set dst (ModF dst src)); 10628 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10629 10630 format %{ "FMOD $dst,$src" %} 10631 ins_encode(Push_Reg_Mod_DPR(dst, src), 10632 emitModDPR(), 10633 Push_Result_Mod_DPR(src), 10634 Pop_Reg_FPR(dst)); 10635 ins_pipe( pipe_slow ); 10636 %} 10637 10638 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10639 predicate(UseSSE>=1); 10640 match(Set dst (ModF src0 src1)); 10641 effect(KILL rax, KILL cr); 10642 format %{ "SUB ESP,4\t # FMOD\n" 10643 "\tMOVSS [ESP+0],$src1\n" 10644 "\tFLD_S [ESP+0]\n" 10645 "\tMOVSS [ESP+0],$src0\n" 10646 "\tFLD_S [ESP+0]\n" 10647 "loop:\tFPREM\n" 10648 "\tFWAIT\n" 10649 "\tFNSTSW AX\n" 10650 "\tSAHF\n" 10651 "\tJP loop\n" 10652 "\tFSTP_S [ESP+0]\n" 10653 "\tMOVSS $dst,[ESP+0]\n" 10654 "\tADD ESP,4\n" 10655 "\tFSTP ST0\t # Restore FPU Stack" 10656 %} 10657 ins_cost(250); 10658 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10659 ins_pipe( pipe_slow ); 10660 %} 10661 10662 10663 //----------Arithmetic Conversion Instructions--------------------------------- 10664 // The conversions operations are all Alpha sorted. Please keep it that way! 10665 10666 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10667 predicate(UseSSE==0); 10668 match(Set dst (RoundFloat src)); 10669 ins_cost(125); 10670 format %{ "FST_S $dst,$src\t# F-round" %} 10671 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10672 ins_pipe( fpu_mem_reg ); 10673 %} 10674 10675 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10676 predicate(UseSSE<=1); 10677 match(Set dst (RoundDouble src)); 10678 ins_cost(125); 10679 format %{ "FST_D $dst,$src\t# D-round" %} 10680 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10681 ins_pipe( fpu_mem_reg ); 10682 %} 10683 10684 // Force rounding to 24-bit precision and 6-bit exponent 10685 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10686 predicate(UseSSE==0); 10687 match(Set dst (ConvD2F src)); 10688 format %{ "FST_S $dst,$src\t# F-round" %} 10689 expand %{ 10690 roundFloat_mem_reg(dst,src); 10691 %} 10692 %} 10693 10694 // Force rounding to 24-bit precision and 6-bit exponent 10695 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10696 predicate(UseSSE==1); 10697 match(Set dst (ConvD2F src)); 10698 effect( KILL cr ); 10699 format %{ "SUB ESP,4\n\t" 10700 "FST_S [ESP],$src\t# F-round\n\t" 10701 "MOVSS $dst,[ESP]\n\t" 10702 "ADD ESP,4" %} 10703 ins_encode %{ 10704 __ subptr(rsp, 4); 10705 if ($src$$reg != FPR1L_enc) { 10706 __ fld_s($src$$reg-1); 10707 __ fstp_s(Address(rsp, 0)); 10708 } else { 10709 __ fst_s(Address(rsp, 0)); 10710 } 10711 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10712 __ addptr(rsp, 4); 10713 %} 10714 ins_pipe( pipe_slow ); 10715 %} 10716 10717 // Force rounding double precision to single precision 10718 instruct convD2F_reg(regF dst, regD src) %{ 10719 predicate(UseSSE>=2); 10720 match(Set dst (ConvD2F src)); 10721 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10722 ins_encode %{ 10723 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10724 %} 10725 ins_pipe( pipe_slow ); 10726 %} 10727 10728 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10729 predicate(UseSSE==0); 10730 match(Set dst (ConvF2D src)); 10731 format %{ "FST_S $dst,$src\t# D-round" %} 10732 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10733 ins_pipe( fpu_reg_reg ); 10734 %} 10735 10736 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10737 predicate(UseSSE==1); 10738 match(Set dst (ConvF2D src)); 10739 format %{ "FST_D $dst,$src\t# D-round" %} 10740 expand %{ 10741 roundDouble_mem_reg(dst,src); 10742 %} 10743 %} 10744 10745 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10746 predicate(UseSSE==1); 10747 match(Set dst (ConvF2D src)); 10748 effect( KILL cr ); 10749 format %{ "SUB ESP,4\n\t" 10750 "MOVSS [ESP] $src\n\t" 10751 "FLD_S [ESP]\n\t" 10752 "ADD ESP,4\n\t" 10753 "FSTP $dst\t# D-round" %} 10754 ins_encode %{ 10755 __ subptr(rsp, 4); 10756 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10757 __ fld_s(Address(rsp, 0)); 10758 __ addptr(rsp, 4); 10759 __ fstp_d($dst$$reg); 10760 %} 10761 ins_pipe( pipe_slow ); 10762 %} 10763 10764 instruct convF2D_reg(regD dst, regF src) %{ 10765 predicate(UseSSE>=2); 10766 match(Set dst (ConvF2D src)); 10767 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10768 ins_encode %{ 10769 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10770 %} 10771 ins_pipe( pipe_slow ); 10772 %} 10773 10774 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10775 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10776 predicate(UseSSE<=1); 10777 match(Set dst (ConvD2I src)); 10778 effect( KILL tmp, KILL cr ); 10779 format %{ "FLD $src\t# Convert double to int \n\t" 10780 "FLDCW trunc mode\n\t" 10781 "SUB ESP,4\n\t" 10782 "FISTp [ESP + #0]\n\t" 10783 "FLDCW std/24-bit mode\n\t" 10784 "POP EAX\n\t" 10785 "CMP EAX,0x80000000\n\t" 10786 "JNE,s fast\n\t" 10787 "FLD_D $src\n\t" 10788 "CALL d2i_wrapper\n" 10789 "fast:" %} 10790 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10791 ins_pipe( pipe_slow ); 10792 %} 10793 10794 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10795 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10796 predicate(UseSSE>=2); 10797 match(Set dst (ConvD2I src)); 10798 effect( KILL tmp, KILL cr ); 10799 format %{ "CVTTSD2SI $dst, $src\n\t" 10800 "CMP $dst,0x80000000\n\t" 10801 "JNE,s fast\n\t" 10802 "SUB ESP, 8\n\t" 10803 "MOVSD [ESP], $src\n\t" 10804 "FLD_D [ESP]\n\t" 10805 "ADD ESP, 8\n\t" 10806 "CALL d2i_wrapper\n" 10807 "fast:" %} 10808 ins_encode %{ 10809 Label fast; 10810 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10811 __ cmpl($dst$$Register, 0x80000000); 10812 __ jccb(Assembler::notEqual, fast); 10813 __ subptr(rsp, 8); 10814 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10815 __ fld_d(Address(rsp, 0)); 10816 __ addptr(rsp, 8); 10817 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10818 __ bind(fast); 10819 %} 10820 ins_pipe( pipe_slow ); 10821 %} 10822 10823 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10824 predicate(UseSSE<=1); 10825 match(Set dst (ConvD2L src)); 10826 effect( KILL cr ); 10827 format %{ "FLD $src\t# Convert double to long\n\t" 10828 "FLDCW trunc mode\n\t" 10829 "SUB ESP,8\n\t" 10830 "FISTp [ESP + #0]\n\t" 10831 "FLDCW std/24-bit mode\n\t" 10832 "POP EAX\n\t" 10833 "POP EDX\n\t" 10834 "CMP EDX,0x80000000\n\t" 10835 "JNE,s fast\n\t" 10836 "TEST EAX,EAX\n\t" 10837 "JNE,s fast\n\t" 10838 "FLD $src\n\t" 10839 "CALL d2l_wrapper\n" 10840 "fast:" %} 10841 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10842 ins_pipe( pipe_slow ); 10843 %} 10844 10845 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10846 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10847 predicate (UseSSE>=2); 10848 match(Set dst (ConvD2L src)); 10849 effect( KILL cr ); 10850 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10851 "MOVSD [ESP],$src\n\t" 10852 "FLD_D [ESP]\n\t" 10853 "FLDCW trunc mode\n\t" 10854 "FISTp [ESP + #0]\n\t" 10855 "FLDCW std/24-bit mode\n\t" 10856 "POP EAX\n\t" 10857 "POP EDX\n\t" 10858 "CMP EDX,0x80000000\n\t" 10859 "JNE,s fast\n\t" 10860 "TEST EAX,EAX\n\t" 10861 "JNE,s fast\n\t" 10862 "SUB ESP,8\n\t" 10863 "MOVSD [ESP],$src\n\t" 10864 "FLD_D [ESP]\n\t" 10865 "ADD ESP,8\n\t" 10866 "CALL d2l_wrapper\n" 10867 "fast:" %} 10868 ins_encode %{ 10869 Label fast; 10870 __ subptr(rsp, 8); 10871 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10872 __ fld_d(Address(rsp, 0)); 10873 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10874 __ fistp_d(Address(rsp, 0)); 10875 // Restore the rounding mode, mask the exception 10876 if (Compile::current()->in_24_bit_fp_mode()) { 10877 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10878 } else { 10879 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10880 } 10881 // Load the converted long, adjust CPU stack 10882 __ pop(rax); 10883 __ pop(rdx); 10884 __ cmpl(rdx, 0x80000000); 10885 __ jccb(Assembler::notEqual, fast); 10886 __ testl(rax, rax); 10887 __ jccb(Assembler::notEqual, fast); 10888 __ subptr(rsp, 8); 10889 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10890 __ fld_d(Address(rsp, 0)); 10891 __ addptr(rsp, 8); 10892 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10893 __ bind(fast); 10894 %} 10895 ins_pipe( pipe_slow ); 10896 %} 10897 10898 // Convert a double to an int. Java semantics require we do complex 10899 // manglations in the corner cases. So we set the rounding mode to 10900 // 'zero', store the darned double down as an int, and reset the 10901 // rounding mode to 'nearest'. The hardware stores a flag value down 10902 // if we would overflow or converted a NAN; we check for this and 10903 // and go the slow path if needed. 10904 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10905 predicate(UseSSE==0); 10906 match(Set dst (ConvF2I src)); 10907 effect( KILL tmp, KILL cr ); 10908 format %{ "FLD $src\t# Convert float to int \n\t" 10909 "FLDCW trunc mode\n\t" 10910 "SUB ESP,4\n\t" 10911 "FISTp [ESP + #0]\n\t" 10912 "FLDCW std/24-bit mode\n\t" 10913 "POP EAX\n\t" 10914 "CMP EAX,0x80000000\n\t" 10915 "JNE,s fast\n\t" 10916 "FLD $src\n\t" 10917 "CALL d2i_wrapper\n" 10918 "fast:" %} 10919 // DPR2I_encoding works for FPR2I 10920 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10921 ins_pipe( pipe_slow ); 10922 %} 10923 10924 // Convert a float in xmm to an int reg. 10925 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10926 predicate(UseSSE>=1); 10927 match(Set dst (ConvF2I src)); 10928 effect( KILL tmp, KILL cr ); 10929 format %{ "CVTTSS2SI $dst, $src\n\t" 10930 "CMP $dst,0x80000000\n\t" 10931 "JNE,s fast\n\t" 10932 "SUB ESP, 4\n\t" 10933 "MOVSS [ESP], $src\n\t" 10934 "FLD [ESP]\n\t" 10935 "ADD ESP, 4\n\t" 10936 "CALL d2i_wrapper\n" 10937 "fast:" %} 10938 ins_encode %{ 10939 Label fast; 10940 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10941 __ cmpl($dst$$Register, 0x80000000); 10942 __ jccb(Assembler::notEqual, fast); 10943 __ subptr(rsp, 4); 10944 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10945 __ fld_s(Address(rsp, 0)); 10946 __ addptr(rsp, 4); 10947 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10948 __ bind(fast); 10949 %} 10950 ins_pipe( pipe_slow ); 10951 %} 10952 10953 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10954 predicate(UseSSE==0); 10955 match(Set dst (ConvF2L src)); 10956 effect( KILL cr ); 10957 format %{ "FLD $src\t# Convert float to long\n\t" 10958 "FLDCW trunc mode\n\t" 10959 "SUB ESP,8\n\t" 10960 "FISTp [ESP + #0]\n\t" 10961 "FLDCW std/24-bit mode\n\t" 10962 "POP EAX\n\t" 10963 "POP EDX\n\t" 10964 "CMP EDX,0x80000000\n\t" 10965 "JNE,s fast\n\t" 10966 "TEST EAX,EAX\n\t" 10967 "JNE,s fast\n\t" 10968 "FLD $src\n\t" 10969 "CALL d2l_wrapper\n" 10970 "fast:" %} 10971 // DPR2L_encoding works for FPR2L 10972 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10973 ins_pipe( pipe_slow ); 10974 %} 10975 10976 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10977 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10978 predicate (UseSSE>=1); 10979 match(Set dst (ConvF2L src)); 10980 effect( KILL cr ); 10981 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10982 "MOVSS [ESP],$src\n\t" 10983 "FLD_S [ESP]\n\t" 10984 "FLDCW trunc mode\n\t" 10985 "FISTp [ESP + #0]\n\t" 10986 "FLDCW std/24-bit mode\n\t" 10987 "POP EAX\n\t" 10988 "POP EDX\n\t" 10989 "CMP EDX,0x80000000\n\t" 10990 "JNE,s fast\n\t" 10991 "TEST EAX,EAX\n\t" 10992 "JNE,s fast\n\t" 10993 "SUB ESP,4\t# Convert float to long\n\t" 10994 "MOVSS [ESP],$src\n\t" 10995 "FLD_S [ESP]\n\t" 10996 "ADD ESP,4\n\t" 10997 "CALL d2l_wrapper\n" 10998 "fast:" %} 10999 ins_encode %{ 11000 Label fast; 11001 __ subptr(rsp, 8); 11002 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11003 __ fld_s(Address(rsp, 0)); 11004 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 11005 __ fistp_d(Address(rsp, 0)); 11006 // Restore the rounding mode, mask the exception 11007 if (Compile::current()->in_24_bit_fp_mode()) { 11008 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 11009 } else { 11010 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 11011 } 11012 // Load the converted long, adjust CPU stack 11013 __ pop(rax); 11014 __ pop(rdx); 11015 __ cmpl(rdx, 0x80000000); 11016 __ jccb(Assembler::notEqual, fast); 11017 __ testl(rax, rax); 11018 __ jccb(Assembler::notEqual, fast); 11019 __ subptr(rsp, 4); 11020 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11021 __ fld_s(Address(rsp, 0)); 11022 __ addptr(rsp, 4); 11023 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11024 __ bind(fast); 11025 %} 11026 ins_pipe( pipe_slow ); 11027 %} 11028 11029 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11030 predicate( UseSSE<=1 ); 11031 match(Set dst (ConvI2D src)); 11032 format %{ "FILD $src\n\t" 11033 "FSTP $dst" %} 11034 opcode(0xDB, 0x0); /* DB /0 */ 11035 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11036 ins_pipe( fpu_reg_mem ); 11037 %} 11038 11039 instruct convI2D_reg(regD dst, rRegI src) %{ 11040 predicate( UseSSE>=2 && !UseXmmI2D ); 11041 match(Set dst (ConvI2D src)); 11042 format %{ "CVTSI2SD $dst,$src" %} 11043 ins_encode %{ 11044 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11045 %} 11046 ins_pipe( pipe_slow ); 11047 %} 11048 11049 instruct convI2D_mem(regD dst, memory mem) %{ 11050 predicate( UseSSE>=2 ); 11051 match(Set dst (ConvI2D (LoadI mem))); 11052 format %{ "CVTSI2SD $dst,$mem" %} 11053 ins_encode %{ 11054 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11055 %} 11056 ins_pipe( pipe_slow ); 11057 %} 11058 11059 instruct convXI2D_reg(regD dst, rRegI src) 11060 %{ 11061 predicate( UseSSE>=2 && UseXmmI2D ); 11062 match(Set dst (ConvI2D src)); 11063 11064 format %{ "MOVD $dst,$src\n\t" 11065 "CVTDQ2PD $dst,$dst\t# i2d" %} 11066 ins_encode %{ 11067 __ movdl($dst$$XMMRegister, $src$$Register); 11068 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11069 %} 11070 ins_pipe(pipe_slow); // XXX 11071 %} 11072 11073 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11074 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11075 match(Set dst (ConvI2D (LoadI mem))); 11076 format %{ "FILD $mem\n\t" 11077 "FSTP $dst" %} 11078 opcode(0xDB); /* DB /0 */ 11079 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11080 Pop_Reg_DPR(dst)); 11081 ins_pipe( fpu_reg_mem ); 11082 %} 11083 11084 // Convert a byte to a float; no rounding step needed. 11085 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11086 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11087 match(Set dst (ConvI2F src)); 11088 format %{ "FILD $src\n\t" 11089 "FSTP $dst" %} 11090 11091 opcode(0xDB, 0x0); /* DB /0 */ 11092 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11093 ins_pipe( fpu_reg_mem ); 11094 %} 11095 11096 // In 24-bit mode, force exponent rounding by storing back out 11097 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11098 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11099 match(Set dst (ConvI2F src)); 11100 ins_cost(200); 11101 format %{ "FILD $src\n\t" 11102 "FSTP_S $dst" %} 11103 opcode(0xDB, 0x0); /* DB /0 */ 11104 ins_encode( Push_Mem_I(src), 11105 Pop_Mem_FPR(dst)); 11106 ins_pipe( fpu_mem_mem ); 11107 %} 11108 11109 // In 24-bit mode, force exponent rounding by storing back out 11110 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11111 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11112 match(Set dst (ConvI2F (LoadI mem))); 11113 ins_cost(200); 11114 format %{ "FILD $mem\n\t" 11115 "FSTP_S $dst" %} 11116 opcode(0xDB); /* DB /0 */ 11117 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11118 Pop_Mem_FPR(dst)); 11119 ins_pipe( fpu_mem_mem ); 11120 %} 11121 11122 // This instruction does not round to 24-bits 11123 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11124 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11125 match(Set dst (ConvI2F src)); 11126 format %{ "FILD $src\n\t" 11127 "FSTP $dst" %} 11128 opcode(0xDB, 0x0); /* DB /0 */ 11129 ins_encode( Push_Mem_I(src), 11130 Pop_Reg_FPR(dst)); 11131 ins_pipe( fpu_reg_mem ); 11132 %} 11133 11134 // This instruction does not round to 24-bits 11135 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11136 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11137 match(Set dst (ConvI2F (LoadI mem))); 11138 format %{ "FILD $mem\n\t" 11139 "FSTP $dst" %} 11140 opcode(0xDB); /* DB /0 */ 11141 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11142 Pop_Reg_FPR(dst)); 11143 ins_pipe( fpu_reg_mem ); 11144 %} 11145 11146 // Convert an int to a float in xmm; no rounding step needed. 11147 instruct convI2F_reg(regF dst, rRegI src) %{ 11148 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11149 match(Set dst (ConvI2F src)); 11150 format %{ "CVTSI2SS $dst, $src" %} 11151 ins_encode %{ 11152 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11153 %} 11154 ins_pipe( pipe_slow ); 11155 %} 11156 11157 instruct convXI2F_reg(regF dst, rRegI src) 11158 %{ 11159 predicate( UseSSE>=2 && UseXmmI2F ); 11160 match(Set dst (ConvI2F src)); 11161 11162 format %{ "MOVD $dst,$src\n\t" 11163 "CVTDQ2PS $dst,$dst\t# i2f" %} 11164 ins_encode %{ 11165 __ movdl($dst$$XMMRegister, $src$$Register); 11166 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11167 %} 11168 ins_pipe(pipe_slow); // XXX 11169 %} 11170 11171 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11172 match(Set dst (ConvI2L src)); 11173 effect(KILL cr); 11174 ins_cost(375); 11175 format %{ "MOV $dst.lo,$src\n\t" 11176 "MOV $dst.hi,$src\n\t" 11177 "SAR $dst.hi,31" %} 11178 ins_encode(convert_int_long(dst,src)); 11179 ins_pipe( ialu_reg_reg_long ); 11180 %} 11181 11182 // Zero-extend convert int to long 11183 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11184 match(Set dst (AndL (ConvI2L src) mask) ); 11185 effect( KILL flags ); 11186 ins_cost(250); 11187 format %{ "MOV $dst.lo,$src\n\t" 11188 "XOR $dst.hi,$dst.hi" %} 11189 opcode(0x33); // XOR 11190 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11191 ins_pipe( ialu_reg_reg_long ); 11192 %} 11193 11194 // Zero-extend long 11195 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11196 match(Set dst (AndL src mask) ); 11197 effect( KILL flags ); 11198 ins_cost(250); 11199 format %{ "MOV $dst.lo,$src.lo\n\t" 11200 "XOR $dst.hi,$dst.hi\n\t" %} 11201 opcode(0x33); // XOR 11202 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11203 ins_pipe( ialu_reg_reg_long ); 11204 %} 11205 11206 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11207 predicate (UseSSE<=1); 11208 match(Set dst (ConvL2D src)); 11209 effect( KILL cr ); 11210 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11211 "PUSH $src.lo\n\t" 11212 "FILD ST,[ESP + #0]\n\t" 11213 "ADD ESP,8\n\t" 11214 "FSTP_D $dst\t# D-round" %} 11215 opcode(0xDF, 0x5); /* DF /5 */ 11216 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11217 ins_pipe( pipe_slow ); 11218 %} 11219 11220 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11221 predicate (UseSSE>=2); 11222 match(Set dst (ConvL2D src)); 11223 effect( KILL cr ); 11224 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11225 "PUSH $src.lo\n\t" 11226 "FILD_D [ESP]\n\t" 11227 "FSTP_D [ESP]\n\t" 11228 "MOVSD $dst,[ESP]\n\t" 11229 "ADD ESP,8" %} 11230 opcode(0xDF, 0x5); /* DF /5 */ 11231 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11232 ins_pipe( pipe_slow ); 11233 %} 11234 11235 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11236 predicate (UseSSE>=1); 11237 match(Set dst (ConvL2F src)); 11238 effect( KILL cr ); 11239 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11240 "PUSH $src.lo\n\t" 11241 "FILD_D [ESP]\n\t" 11242 "FSTP_S [ESP]\n\t" 11243 "MOVSS $dst,[ESP]\n\t" 11244 "ADD ESP,8" %} 11245 opcode(0xDF, 0x5); /* DF /5 */ 11246 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11247 ins_pipe( pipe_slow ); 11248 %} 11249 11250 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11251 match(Set dst (ConvL2F src)); 11252 effect( KILL cr ); 11253 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11254 "PUSH $src.lo\n\t" 11255 "FILD ST,[ESP + #0]\n\t" 11256 "ADD ESP,8\n\t" 11257 "FSTP_S $dst\t# F-round" %} 11258 opcode(0xDF, 0x5); /* DF /5 */ 11259 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11260 ins_pipe( pipe_slow ); 11261 %} 11262 11263 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11264 match(Set dst (ConvL2I src)); 11265 effect( DEF dst, USE src ); 11266 format %{ "MOV $dst,$src.lo" %} 11267 ins_encode(enc_CopyL_Lo(dst,src)); 11268 ins_pipe( ialu_reg_reg ); 11269 %} 11270 11271 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11272 match(Set dst (MoveF2I src)); 11273 effect( DEF dst, USE src ); 11274 ins_cost(100); 11275 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11276 ins_encode %{ 11277 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11278 %} 11279 ins_pipe( ialu_reg_mem ); 11280 %} 11281 11282 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11283 predicate(UseSSE==0); 11284 match(Set dst (MoveF2I src)); 11285 effect( DEF dst, USE src ); 11286 11287 ins_cost(125); 11288 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11289 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11290 ins_pipe( fpu_mem_reg ); 11291 %} 11292 11293 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11294 predicate(UseSSE>=1); 11295 match(Set dst (MoveF2I src)); 11296 effect( DEF dst, USE src ); 11297 11298 ins_cost(95); 11299 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11300 ins_encode %{ 11301 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11302 %} 11303 ins_pipe( pipe_slow ); 11304 %} 11305 11306 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11307 predicate(UseSSE>=2); 11308 match(Set dst (MoveF2I src)); 11309 effect( DEF dst, USE src ); 11310 ins_cost(85); 11311 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11312 ins_encode %{ 11313 __ movdl($dst$$Register, $src$$XMMRegister); 11314 %} 11315 ins_pipe( pipe_slow ); 11316 %} 11317 11318 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11319 match(Set dst (MoveI2F src)); 11320 effect( DEF dst, USE src ); 11321 11322 ins_cost(100); 11323 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11324 ins_encode %{ 11325 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11326 %} 11327 ins_pipe( ialu_mem_reg ); 11328 %} 11329 11330 11331 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11332 predicate(UseSSE==0); 11333 match(Set dst (MoveI2F src)); 11334 effect(DEF dst, USE src); 11335 11336 ins_cost(125); 11337 format %{ "FLD_S $src\n\t" 11338 "FSTP $dst\t# MoveI2F_stack_reg" %} 11339 opcode(0xD9); /* D9 /0, FLD m32real */ 11340 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11341 Pop_Reg_FPR(dst) ); 11342 ins_pipe( fpu_reg_mem ); 11343 %} 11344 11345 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11346 predicate(UseSSE>=1); 11347 match(Set dst (MoveI2F src)); 11348 effect( DEF dst, USE src ); 11349 11350 ins_cost(95); 11351 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11352 ins_encode %{ 11353 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11354 %} 11355 ins_pipe( pipe_slow ); 11356 %} 11357 11358 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11359 predicate(UseSSE>=2); 11360 match(Set dst (MoveI2F src)); 11361 effect( DEF dst, USE src ); 11362 11363 ins_cost(85); 11364 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11365 ins_encode %{ 11366 __ movdl($dst$$XMMRegister, $src$$Register); 11367 %} 11368 ins_pipe( pipe_slow ); 11369 %} 11370 11371 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11372 match(Set dst (MoveD2L src)); 11373 effect(DEF dst, USE src); 11374 11375 ins_cost(250); 11376 format %{ "MOV $dst.lo,$src\n\t" 11377 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11378 opcode(0x8B, 0x8B); 11379 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11380 ins_pipe( ialu_mem_long_reg ); 11381 %} 11382 11383 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11384 predicate(UseSSE<=1); 11385 match(Set dst (MoveD2L src)); 11386 effect(DEF dst, USE src); 11387 11388 ins_cost(125); 11389 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11390 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11391 ins_pipe( fpu_mem_reg ); 11392 %} 11393 11394 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11395 predicate(UseSSE>=2); 11396 match(Set dst (MoveD2L src)); 11397 effect(DEF dst, USE src); 11398 ins_cost(95); 11399 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11400 ins_encode %{ 11401 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11402 %} 11403 ins_pipe( pipe_slow ); 11404 %} 11405 11406 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11407 predicate(UseSSE>=2); 11408 match(Set dst (MoveD2L src)); 11409 effect(DEF dst, USE src, TEMP tmp); 11410 ins_cost(85); 11411 format %{ "MOVD $dst.lo,$src\n\t" 11412 "PSHUFLW $tmp,$src,0x4E\n\t" 11413 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11414 ins_encode %{ 11415 __ movdl($dst$$Register, $src$$XMMRegister); 11416 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11417 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11418 %} 11419 ins_pipe( pipe_slow ); 11420 %} 11421 11422 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11423 match(Set dst (MoveL2D src)); 11424 effect(DEF dst, USE src); 11425 11426 ins_cost(200); 11427 format %{ "MOV $dst,$src.lo\n\t" 11428 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11429 opcode(0x89, 0x89); 11430 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11431 ins_pipe( ialu_mem_long_reg ); 11432 %} 11433 11434 11435 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11436 predicate(UseSSE<=1); 11437 match(Set dst (MoveL2D src)); 11438 effect(DEF dst, USE src); 11439 ins_cost(125); 11440 11441 format %{ "FLD_D $src\n\t" 11442 "FSTP $dst\t# MoveL2D_stack_reg" %} 11443 opcode(0xDD); /* DD /0, FLD m64real */ 11444 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11445 Pop_Reg_DPR(dst) ); 11446 ins_pipe( fpu_reg_mem ); 11447 %} 11448 11449 11450 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11451 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11452 match(Set dst (MoveL2D src)); 11453 effect(DEF dst, USE src); 11454 11455 ins_cost(95); 11456 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11457 ins_encode %{ 11458 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11459 %} 11460 ins_pipe( pipe_slow ); 11461 %} 11462 11463 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11464 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11465 match(Set dst (MoveL2D src)); 11466 effect(DEF dst, USE src); 11467 11468 ins_cost(95); 11469 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11470 ins_encode %{ 11471 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11472 %} 11473 ins_pipe( pipe_slow ); 11474 %} 11475 11476 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11477 predicate(UseSSE>=2); 11478 match(Set dst (MoveL2D src)); 11479 effect(TEMP dst, USE src, TEMP tmp); 11480 ins_cost(85); 11481 format %{ "MOVD $dst,$src.lo\n\t" 11482 "MOVD $tmp,$src.hi\n\t" 11483 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11484 ins_encode %{ 11485 __ movdl($dst$$XMMRegister, $src$$Register); 11486 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11487 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11488 %} 11489 ins_pipe( pipe_slow ); 11490 %} 11491 11492 11493 // ======================================================================= 11494 // fast clearing of an array 11495 // Small ClearArray non-AVX512. 11496 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11497 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11498 match(Set dummy (ClearArray cnt base)); 11499 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11500 11501 format %{ $$template 11502 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11503 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11504 $$emit$$"JG LARGE\n\t" 11505 $$emit$$"SHL ECX, 1\n\t" 11506 $$emit$$"DEC ECX\n\t" 11507 $$emit$$"JS DONE\t# Zero length\n\t" 11508 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11509 $$emit$$"DEC ECX\n\t" 11510 $$emit$$"JGE LOOP\n\t" 11511 $$emit$$"JMP DONE\n\t" 11512 $$emit$$"# LARGE:\n\t" 11513 if (UseFastStosb) { 11514 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11515 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11516 } else if (UseXMMForObjInit) { 11517 $$emit$$"MOV RDI,RAX\n\t" 11518 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11519 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11520 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11521 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11522 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11523 $$emit$$"ADD 0x40,RAX\n\t" 11524 $$emit$$"# L_zero_64_bytes:\n\t" 11525 $$emit$$"SUB 0x8,RCX\n\t" 11526 $$emit$$"JGE L_loop\n\t" 11527 $$emit$$"ADD 0x4,RCX\n\t" 11528 $$emit$$"JL L_tail\n\t" 11529 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11530 $$emit$$"ADD 0x20,RAX\n\t" 11531 $$emit$$"SUB 0x4,RCX\n\t" 11532 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11533 $$emit$$"ADD 0x4,RCX\n\t" 11534 $$emit$$"JLE L_end\n\t" 11535 $$emit$$"DEC RCX\n\t" 11536 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11537 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11538 $$emit$$"ADD 0x8,RAX\n\t" 11539 $$emit$$"DEC RCX\n\t" 11540 $$emit$$"JGE L_sloop\n\t" 11541 $$emit$$"# L_end:\n\t" 11542 } else { 11543 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11544 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11545 } 11546 $$emit$$"# DONE" 11547 %} 11548 ins_encode %{ 11549 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11550 $tmp$$XMMRegister, false, knoreg); 11551 %} 11552 ins_pipe( pipe_slow ); 11553 %} 11554 11555 // Small ClearArray AVX512 non-constant length. 11556 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11557 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11558 match(Set dummy (ClearArray cnt base)); 11559 ins_cost(125); 11560 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11561 11562 format %{ $$template 11563 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11564 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11565 $$emit$$"JG LARGE\n\t" 11566 $$emit$$"SHL ECX, 1\n\t" 11567 $$emit$$"DEC ECX\n\t" 11568 $$emit$$"JS DONE\t# Zero length\n\t" 11569 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11570 $$emit$$"DEC ECX\n\t" 11571 $$emit$$"JGE LOOP\n\t" 11572 $$emit$$"JMP DONE\n\t" 11573 $$emit$$"# LARGE:\n\t" 11574 if (UseFastStosb) { 11575 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11576 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11577 } else if (UseXMMForObjInit) { 11578 $$emit$$"MOV RDI,RAX\n\t" 11579 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11580 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11581 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11582 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11583 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11584 $$emit$$"ADD 0x40,RAX\n\t" 11585 $$emit$$"# L_zero_64_bytes:\n\t" 11586 $$emit$$"SUB 0x8,RCX\n\t" 11587 $$emit$$"JGE L_loop\n\t" 11588 $$emit$$"ADD 0x4,RCX\n\t" 11589 $$emit$$"JL L_tail\n\t" 11590 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11591 $$emit$$"ADD 0x20,RAX\n\t" 11592 $$emit$$"SUB 0x4,RCX\n\t" 11593 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11594 $$emit$$"ADD 0x4,RCX\n\t" 11595 $$emit$$"JLE L_end\n\t" 11596 $$emit$$"DEC RCX\n\t" 11597 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11598 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11599 $$emit$$"ADD 0x8,RAX\n\t" 11600 $$emit$$"DEC RCX\n\t" 11601 $$emit$$"JGE L_sloop\n\t" 11602 $$emit$$"# L_end:\n\t" 11603 } else { 11604 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11605 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11606 } 11607 $$emit$$"# DONE" 11608 %} 11609 ins_encode %{ 11610 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11611 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11612 %} 11613 ins_pipe( pipe_slow ); 11614 %} 11615 11616 // Large ClearArray non-AVX512. 11617 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11618 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11619 match(Set dummy (ClearArray cnt base)); 11620 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11621 format %{ $$template 11622 if (UseFastStosb) { 11623 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11624 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11625 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11626 } else if (UseXMMForObjInit) { 11627 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11628 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11629 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11630 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11631 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11632 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11633 $$emit$$"ADD 0x40,RAX\n\t" 11634 $$emit$$"# L_zero_64_bytes:\n\t" 11635 $$emit$$"SUB 0x8,RCX\n\t" 11636 $$emit$$"JGE L_loop\n\t" 11637 $$emit$$"ADD 0x4,RCX\n\t" 11638 $$emit$$"JL L_tail\n\t" 11639 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11640 $$emit$$"ADD 0x20,RAX\n\t" 11641 $$emit$$"SUB 0x4,RCX\n\t" 11642 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11643 $$emit$$"ADD 0x4,RCX\n\t" 11644 $$emit$$"JLE L_end\n\t" 11645 $$emit$$"DEC RCX\n\t" 11646 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11647 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11648 $$emit$$"ADD 0x8,RAX\n\t" 11649 $$emit$$"DEC RCX\n\t" 11650 $$emit$$"JGE L_sloop\n\t" 11651 $$emit$$"# L_end:\n\t" 11652 } else { 11653 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11654 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11655 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11656 } 11657 $$emit$$"# DONE" 11658 %} 11659 ins_encode %{ 11660 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11661 $tmp$$XMMRegister, true, knoreg); 11662 %} 11663 ins_pipe( pipe_slow ); 11664 %} 11665 11666 // Large ClearArray AVX512. 11667 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11668 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11669 match(Set dummy (ClearArray cnt base)); 11670 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11671 format %{ $$template 11672 if (UseFastStosb) { 11673 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11674 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11675 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11676 } else if (UseXMMForObjInit) { 11677 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11678 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11679 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11680 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11681 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11682 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11683 $$emit$$"ADD 0x40,RAX\n\t" 11684 $$emit$$"# L_zero_64_bytes:\n\t" 11685 $$emit$$"SUB 0x8,RCX\n\t" 11686 $$emit$$"JGE L_loop\n\t" 11687 $$emit$$"ADD 0x4,RCX\n\t" 11688 $$emit$$"JL L_tail\n\t" 11689 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11690 $$emit$$"ADD 0x20,RAX\n\t" 11691 $$emit$$"SUB 0x4,RCX\n\t" 11692 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11693 $$emit$$"ADD 0x4,RCX\n\t" 11694 $$emit$$"JLE L_end\n\t" 11695 $$emit$$"DEC RCX\n\t" 11696 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11697 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11698 $$emit$$"ADD 0x8,RAX\n\t" 11699 $$emit$$"DEC RCX\n\t" 11700 $$emit$$"JGE L_sloop\n\t" 11701 $$emit$$"# L_end:\n\t" 11702 } else { 11703 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11704 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11705 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11706 } 11707 $$emit$$"# DONE" 11708 %} 11709 ins_encode %{ 11710 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11711 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11712 %} 11713 ins_pipe( pipe_slow ); 11714 %} 11715 11716 // Small ClearArray AVX512 constant length. 11717 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11718 %{ 11719 predicate(!((ClearArrayNode*)n)->is_large() && 11720 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11721 match(Set dummy (ClearArray cnt base)); 11722 ins_cost(100); 11723 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11724 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11725 ins_encode %{ 11726 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11727 %} 11728 ins_pipe(pipe_slow); 11729 %} 11730 11731 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11732 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11733 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11734 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11735 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11736 11737 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11738 ins_encode %{ 11739 __ string_compare($str1$$Register, $str2$$Register, 11740 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11741 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11742 %} 11743 ins_pipe( pipe_slow ); 11744 %} 11745 11746 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11747 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11748 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11749 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11750 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11751 11752 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11753 ins_encode %{ 11754 __ string_compare($str1$$Register, $str2$$Register, 11755 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11756 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11757 %} 11758 ins_pipe( pipe_slow ); 11759 %} 11760 11761 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11762 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11763 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11764 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11765 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11766 11767 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11768 ins_encode %{ 11769 __ string_compare($str1$$Register, $str2$$Register, 11770 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11771 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11772 %} 11773 ins_pipe( pipe_slow ); 11774 %} 11775 11776 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11777 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11778 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11779 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11780 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11781 11782 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11783 ins_encode %{ 11784 __ string_compare($str1$$Register, $str2$$Register, 11785 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11786 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11787 %} 11788 ins_pipe( pipe_slow ); 11789 %} 11790 11791 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11792 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11793 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11794 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11795 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11796 11797 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11798 ins_encode %{ 11799 __ string_compare($str1$$Register, $str2$$Register, 11800 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11801 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11802 %} 11803 ins_pipe( pipe_slow ); 11804 %} 11805 11806 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11807 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11808 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11809 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11810 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11811 11812 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11813 ins_encode %{ 11814 __ string_compare($str1$$Register, $str2$$Register, 11815 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11816 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11817 %} 11818 ins_pipe( pipe_slow ); 11819 %} 11820 11821 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11822 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11823 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11824 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11825 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11826 11827 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11828 ins_encode %{ 11829 __ string_compare($str2$$Register, $str1$$Register, 11830 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11831 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11832 %} 11833 ins_pipe( pipe_slow ); 11834 %} 11835 11836 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11837 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11838 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11839 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11840 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11841 11842 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11843 ins_encode %{ 11844 __ string_compare($str2$$Register, $str1$$Register, 11845 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11846 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11847 %} 11848 ins_pipe( pipe_slow ); 11849 %} 11850 11851 // fast string equals 11852 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11853 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11854 predicate(!VM_Version::supports_avx512vlbw()); 11855 match(Set result (StrEquals (Binary str1 str2) cnt)); 11856 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11857 11858 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11859 ins_encode %{ 11860 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11861 $cnt$$Register, $result$$Register, $tmp3$$Register, 11862 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11863 %} 11864 11865 ins_pipe( pipe_slow ); 11866 %} 11867 11868 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11869 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11870 predicate(VM_Version::supports_avx512vlbw()); 11871 match(Set result (StrEquals (Binary str1 str2) cnt)); 11872 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11873 11874 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11875 ins_encode %{ 11876 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11877 $cnt$$Register, $result$$Register, $tmp3$$Register, 11878 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11879 %} 11880 11881 ins_pipe( pipe_slow ); 11882 %} 11883 11884 11885 // fast search of substring with known size. 11886 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11887 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11888 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11889 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11890 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11891 11892 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11893 ins_encode %{ 11894 int icnt2 = (int)$int_cnt2$$constant; 11895 if (icnt2 >= 16) { 11896 // IndexOf for constant substrings with size >= 16 elements 11897 // which don't need to be loaded through stack. 11898 __ string_indexofC8($str1$$Register, $str2$$Register, 11899 $cnt1$$Register, $cnt2$$Register, 11900 icnt2, $result$$Register, 11901 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11902 } else { 11903 // Small strings are loaded through stack if they cross page boundary. 11904 __ string_indexof($str1$$Register, $str2$$Register, 11905 $cnt1$$Register, $cnt2$$Register, 11906 icnt2, $result$$Register, 11907 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11908 } 11909 %} 11910 ins_pipe( pipe_slow ); 11911 %} 11912 11913 // fast search of substring with known size. 11914 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11915 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11916 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11917 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11918 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11919 11920 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11921 ins_encode %{ 11922 int icnt2 = (int)$int_cnt2$$constant; 11923 if (icnt2 >= 8) { 11924 // IndexOf for constant substrings with size >= 8 elements 11925 // which don't need to be loaded through stack. 11926 __ string_indexofC8($str1$$Register, $str2$$Register, 11927 $cnt1$$Register, $cnt2$$Register, 11928 icnt2, $result$$Register, 11929 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11930 } else { 11931 // Small strings are loaded through stack if they cross page boundary. 11932 __ string_indexof($str1$$Register, $str2$$Register, 11933 $cnt1$$Register, $cnt2$$Register, 11934 icnt2, $result$$Register, 11935 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11936 } 11937 %} 11938 ins_pipe( pipe_slow ); 11939 %} 11940 11941 // fast search of substring with known size. 11942 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11943 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11944 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11945 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11946 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11947 11948 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11949 ins_encode %{ 11950 int icnt2 = (int)$int_cnt2$$constant; 11951 if (icnt2 >= 8) { 11952 // IndexOf for constant substrings with size >= 8 elements 11953 // which don't need to be loaded through stack. 11954 __ string_indexofC8($str1$$Register, $str2$$Register, 11955 $cnt1$$Register, $cnt2$$Register, 11956 icnt2, $result$$Register, 11957 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11958 } else { 11959 // Small strings are loaded through stack if they cross page boundary. 11960 __ string_indexof($str1$$Register, $str2$$Register, 11961 $cnt1$$Register, $cnt2$$Register, 11962 icnt2, $result$$Register, 11963 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11964 } 11965 %} 11966 ins_pipe( pipe_slow ); 11967 %} 11968 11969 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11970 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11971 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11972 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11973 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11974 11975 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11976 ins_encode %{ 11977 __ string_indexof($str1$$Register, $str2$$Register, 11978 $cnt1$$Register, $cnt2$$Register, 11979 (-1), $result$$Register, 11980 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11981 %} 11982 ins_pipe( pipe_slow ); 11983 %} 11984 11985 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11986 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 11987 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11988 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11989 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11990 11991 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11992 ins_encode %{ 11993 __ string_indexof($str1$$Register, $str2$$Register, 11994 $cnt1$$Register, $cnt2$$Register, 11995 (-1), $result$$Register, 11996 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11997 %} 11998 ins_pipe( pipe_slow ); 11999 %} 12000 12001 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12002 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12003 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12004 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12005 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12006 12007 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12008 ins_encode %{ 12009 __ string_indexof($str1$$Register, $str2$$Register, 12010 $cnt1$$Register, $cnt2$$Register, 12011 (-1), $result$$Register, 12012 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12013 %} 12014 ins_pipe( pipe_slow ); 12015 %} 12016 12017 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12018 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12019 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12020 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12021 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12022 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12023 ins_encode %{ 12024 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12025 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12026 %} 12027 ins_pipe( pipe_slow ); 12028 %} 12029 12030 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12031 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12032 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12033 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12034 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12035 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12036 ins_encode %{ 12037 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12038 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12039 %} 12040 ins_pipe( pipe_slow ); 12041 %} 12042 12043 12044 // fast array equals 12045 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12046 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12047 %{ 12048 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12049 match(Set result (AryEq ary1 ary2)); 12050 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12051 //ins_cost(300); 12052 12053 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12054 ins_encode %{ 12055 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12056 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12057 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12058 %} 12059 ins_pipe( pipe_slow ); 12060 %} 12061 12062 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12063 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12064 %{ 12065 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12066 match(Set result (AryEq ary1 ary2)); 12067 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12068 //ins_cost(300); 12069 12070 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12071 ins_encode %{ 12072 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12073 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12074 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12075 %} 12076 ins_pipe( pipe_slow ); 12077 %} 12078 12079 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12080 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12081 %{ 12082 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12083 match(Set result (AryEq ary1 ary2)); 12084 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12085 //ins_cost(300); 12086 12087 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12088 ins_encode %{ 12089 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12090 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12091 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12092 %} 12093 ins_pipe( pipe_slow ); 12094 %} 12095 12096 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12097 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12098 %{ 12099 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12100 match(Set result (AryEq ary1 ary2)); 12101 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12102 //ins_cost(300); 12103 12104 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12105 ins_encode %{ 12106 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12107 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12108 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12109 %} 12110 ins_pipe( pipe_slow ); 12111 %} 12112 12113 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12114 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12115 %{ 12116 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12117 match(Set result (HasNegatives ary1 len)); 12118 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12119 12120 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12121 ins_encode %{ 12122 __ has_negatives($ary1$$Register, $len$$Register, 12123 $result$$Register, $tmp3$$Register, 12124 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12125 %} 12126 ins_pipe( pipe_slow ); 12127 %} 12128 12129 instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12130 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12131 %{ 12132 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12133 match(Set result (HasNegatives ary1 len)); 12134 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12135 12136 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12137 ins_encode %{ 12138 __ has_negatives($ary1$$Register, $len$$Register, 12139 $result$$Register, $tmp3$$Register, 12140 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12141 %} 12142 ins_pipe( pipe_slow ); 12143 %} 12144 12145 12146 // fast char[] to byte[] compression 12147 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12148 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12149 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12150 match(Set result (StrCompressedCopy src (Binary dst len))); 12151 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12152 12153 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12154 ins_encode %{ 12155 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12156 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12157 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12158 knoreg, knoreg); 12159 %} 12160 ins_pipe( pipe_slow ); 12161 %} 12162 12163 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12164 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12165 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12166 match(Set result (StrCompressedCopy src (Binary dst len))); 12167 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12168 12169 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12170 ins_encode %{ 12171 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12172 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12173 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12174 $ktmp1$$KRegister, $ktmp2$$KRegister); 12175 %} 12176 ins_pipe( pipe_slow ); 12177 %} 12178 12179 // fast byte[] to char[] inflation 12180 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12181 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12182 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12183 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12184 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12185 12186 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12187 ins_encode %{ 12188 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12189 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12190 %} 12191 ins_pipe( pipe_slow ); 12192 %} 12193 12194 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12195 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12196 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12197 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12198 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12199 12200 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12201 ins_encode %{ 12202 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12203 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12204 %} 12205 ins_pipe( pipe_slow ); 12206 %} 12207 12208 // encode char[] to byte[] in ISO_8859_1 12209 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12210 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12211 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12212 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12213 match(Set result (EncodeISOArray src (Binary dst len))); 12214 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12215 12216 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12217 ins_encode %{ 12218 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12219 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12220 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12221 %} 12222 ins_pipe( pipe_slow ); 12223 %} 12224 12225 // encode char[] to byte[] in ASCII 12226 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12227 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12228 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12229 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12230 match(Set result (EncodeISOArray src (Binary dst len))); 12231 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12232 12233 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12234 ins_encode %{ 12235 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12236 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12237 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12238 %} 12239 ins_pipe( pipe_slow ); 12240 %} 12241 12242 //----------Control Flow Instructions------------------------------------------ 12243 // Signed compare Instructions 12244 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12245 match(Set cr (CmpI op1 op2)); 12246 effect( DEF cr, USE op1, USE op2 ); 12247 format %{ "CMP $op1,$op2" %} 12248 opcode(0x3B); /* Opcode 3B /r */ 12249 ins_encode( OpcP, RegReg( op1, op2) ); 12250 ins_pipe( ialu_cr_reg_reg ); 12251 %} 12252 12253 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12254 match(Set cr (CmpI op1 op2)); 12255 effect( DEF cr, USE op1 ); 12256 format %{ "CMP $op1,$op2" %} 12257 opcode(0x81,0x07); /* Opcode 81 /7 */ 12258 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12259 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12260 ins_pipe( ialu_cr_reg_imm ); 12261 %} 12262 12263 // Cisc-spilled version of cmpI_eReg 12264 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12265 match(Set cr (CmpI op1 (LoadI op2))); 12266 12267 format %{ "CMP $op1,$op2" %} 12268 ins_cost(500); 12269 opcode(0x3B); /* Opcode 3B /r */ 12270 ins_encode( OpcP, RegMem( op1, op2) ); 12271 ins_pipe( ialu_cr_reg_mem ); 12272 %} 12273 12274 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12275 match(Set cr (CmpI src zero)); 12276 effect( DEF cr, USE src ); 12277 12278 format %{ "TEST $src,$src" %} 12279 opcode(0x85); 12280 ins_encode( OpcP, RegReg( src, src ) ); 12281 ins_pipe( ialu_cr_reg_imm ); 12282 %} 12283 12284 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12285 match(Set cr (CmpI (AndI src con) zero)); 12286 12287 format %{ "TEST $src,$con" %} 12288 opcode(0xF7,0x00); 12289 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12290 ins_pipe( ialu_cr_reg_imm ); 12291 %} 12292 12293 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12294 match(Set cr (CmpI (AndI src mem) zero)); 12295 12296 format %{ "TEST $src,$mem" %} 12297 opcode(0x85); 12298 ins_encode( OpcP, RegMem( src, mem ) ); 12299 ins_pipe( ialu_cr_reg_mem ); 12300 %} 12301 12302 // Unsigned compare Instructions; really, same as signed except they 12303 // produce an eFlagsRegU instead of eFlagsReg. 12304 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12305 match(Set cr (CmpU op1 op2)); 12306 12307 format %{ "CMPu $op1,$op2" %} 12308 opcode(0x3B); /* Opcode 3B /r */ 12309 ins_encode( OpcP, RegReg( op1, op2) ); 12310 ins_pipe( ialu_cr_reg_reg ); 12311 %} 12312 12313 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12314 match(Set cr (CmpU op1 op2)); 12315 12316 format %{ "CMPu $op1,$op2" %} 12317 opcode(0x81,0x07); /* Opcode 81 /7 */ 12318 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12319 ins_pipe( ialu_cr_reg_imm ); 12320 %} 12321 12322 // // Cisc-spilled version of cmpU_eReg 12323 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12324 match(Set cr (CmpU op1 (LoadI op2))); 12325 12326 format %{ "CMPu $op1,$op2" %} 12327 ins_cost(500); 12328 opcode(0x3B); /* Opcode 3B /r */ 12329 ins_encode( OpcP, RegMem( op1, op2) ); 12330 ins_pipe( ialu_cr_reg_mem ); 12331 %} 12332 12333 // // Cisc-spilled version of cmpU_eReg 12334 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12335 // match(Set cr (CmpU (LoadI op1) op2)); 12336 // 12337 // format %{ "CMPu $op1,$op2" %} 12338 // ins_cost(500); 12339 // opcode(0x39); /* Opcode 39 /r */ 12340 // ins_encode( OpcP, RegMem( op1, op2) ); 12341 //%} 12342 12343 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12344 match(Set cr (CmpU src zero)); 12345 12346 format %{ "TESTu $src,$src" %} 12347 opcode(0x85); 12348 ins_encode( OpcP, RegReg( src, src ) ); 12349 ins_pipe( ialu_cr_reg_imm ); 12350 %} 12351 12352 // Unsigned pointer compare Instructions 12353 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12354 match(Set cr (CmpP op1 op2)); 12355 12356 format %{ "CMPu $op1,$op2" %} 12357 opcode(0x3B); /* Opcode 3B /r */ 12358 ins_encode( OpcP, RegReg( op1, op2) ); 12359 ins_pipe( ialu_cr_reg_reg ); 12360 %} 12361 12362 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12363 match(Set cr (CmpP op1 op2)); 12364 12365 format %{ "CMPu $op1,$op2" %} 12366 opcode(0x81,0x07); /* Opcode 81 /7 */ 12367 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12368 ins_pipe( ialu_cr_reg_imm ); 12369 %} 12370 12371 // // Cisc-spilled version of cmpP_eReg 12372 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12373 match(Set cr (CmpP op1 (LoadP op2))); 12374 12375 format %{ "CMPu $op1,$op2" %} 12376 ins_cost(500); 12377 opcode(0x3B); /* Opcode 3B /r */ 12378 ins_encode( OpcP, RegMem( op1, op2) ); 12379 ins_pipe( ialu_cr_reg_mem ); 12380 %} 12381 12382 // // Cisc-spilled version of cmpP_eReg 12383 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12384 // match(Set cr (CmpP (LoadP op1) op2)); 12385 // 12386 // format %{ "CMPu $op1,$op2" %} 12387 // ins_cost(500); 12388 // opcode(0x39); /* Opcode 39 /r */ 12389 // ins_encode( OpcP, RegMem( op1, op2) ); 12390 //%} 12391 12392 // Compare raw pointer (used in out-of-heap check). 12393 // Only works because non-oop pointers must be raw pointers 12394 // and raw pointers have no anti-dependencies. 12395 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12396 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12397 match(Set cr (CmpP op1 (LoadP op2))); 12398 12399 format %{ "CMPu $op1,$op2" %} 12400 opcode(0x3B); /* Opcode 3B /r */ 12401 ins_encode( OpcP, RegMem( op1, op2) ); 12402 ins_pipe( ialu_cr_reg_mem ); 12403 %} 12404 12405 // 12406 // This will generate a signed flags result. This should be ok 12407 // since any compare to a zero should be eq/neq. 12408 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12409 match(Set cr (CmpP src zero)); 12410 12411 format %{ "TEST $src,$src" %} 12412 opcode(0x85); 12413 ins_encode( OpcP, RegReg( src, src ) ); 12414 ins_pipe( ialu_cr_reg_imm ); 12415 %} 12416 12417 // Cisc-spilled version of testP_reg 12418 // This will generate a signed flags result. This should be ok 12419 // since any compare to a zero should be eq/neq. 12420 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12421 match(Set cr (CmpP (LoadP op) zero)); 12422 12423 format %{ "TEST $op,0xFFFFFFFF" %} 12424 ins_cost(500); 12425 opcode(0xF7); /* Opcode F7 /0 */ 12426 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12427 ins_pipe( ialu_cr_reg_imm ); 12428 %} 12429 12430 // Yanked all unsigned pointer compare operations. 12431 // Pointer compares are done with CmpP which is already unsigned. 12432 12433 //----------Max and Min-------------------------------------------------------- 12434 // Min Instructions 12435 //// 12436 // *** Min and Max using the conditional move are slower than the 12437 // *** branch version on a Pentium III. 12438 // // Conditional move for min 12439 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12440 // effect( USE_DEF op2, USE op1, USE cr ); 12441 // format %{ "CMOVlt $op2,$op1\t! min" %} 12442 // opcode(0x4C,0x0F); 12443 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12444 // ins_pipe( pipe_cmov_reg ); 12445 //%} 12446 // 12447 //// Min Register with Register (P6 version) 12448 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12449 // predicate(VM_Version::supports_cmov() ); 12450 // match(Set op2 (MinI op1 op2)); 12451 // ins_cost(200); 12452 // expand %{ 12453 // eFlagsReg cr; 12454 // compI_eReg(cr,op1,op2); 12455 // cmovI_reg_lt(op2,op1,cr); 12456 // %} 12457 //%} 12458 12459 // Min Register with Register (generic version) 12460 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12461 match(Set dst (MinI dst src)); 12462 effect(KILL flags); 12463 ins_cost(300); 12464 12465 format %{ "MIN $dst,$src" %} 12466 opcode(0xCC); 12467 ins_encode( min_enc(dst,src) ); 12468 ins_pipe( pipe_slow ); 12469 %} 12470 12471 // Max Register with Register 12472 // *** Min and Max using the conditional move are slower than the 12473 // *** branch version on a Pentium III. 12474 // // Conditional move for max 12475 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12476 // effect( USE_DEF op2, USE op1, USE cr ); 12477 // format %{ "CMOVgt $op2,$op1\t! max" %} 12478 // opcode(0x4F,0x0F); 12479 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12480 // ins_pipe( pipe_cmov_reg ); 12481 //%} 12482 // 12483 // // Max Register with Register (P6 version) 12484 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12485 // predicate(VM_Version::supports_cmov() ); 12486 // match(Set op2 (MaxI op1 op2)); 12487 // ins_cost(200); 12488 // expand %{ 12489 // eFlagsReg cr; 12490 // compI_eReg(cr,op1,op2); 12491 // cmovI_reg_gt(op2,op1,cr); 12492 // %} 12493 //%} 12494 12495 // Max Register with Register (generic version) 12496 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12497 match(Set dst (MaxI dst src)); 12498 effect(KILL flags); 12499 ins_cost(300); 12500 12501 format %{ "MAX $dst,$src" %} 12502 opcode(0xCC); 12503 ins_encode( max_enc(dst,src) ); 12504 ins_pipe( pipe_slow ); 12505 %} 12506 12507 // ============================================================================ 12508 // Counted Loop limit node which represents exact final iterator value. 12509 // Note: the resulting value should fit into integer range since 12510 // counted loops have limit check on overflow. 12511 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12512 match(Set limit (LoopLimit (Binary init limit) stride)); 12513 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12514 ins_cost(300); 12515 12516 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12517 ins_encode %{ 12518 int strd = (int)$stride$$constant; 12519 assert(strd != 1 && strd != -1, "sanity"); 12520 int m1 = (strd > 0) ? 1 : -1; 12521 // Convert limit to long (EAX:EDX) 12522 __ cdql(); 12523 // Convert init to long (init:tmp) 12524 __ movl($tmp$$Register, $init$$Register); 12525 __ sarl($tmp$$Register, 31); 12526 // $limit - $init 12527 __ subl($limit$$Register, $init$$Register); 12528 __ sbbl($limit_hi$$Register, $tmp$$Register); 12529 // + ($stride - 1) 12530 if (strd > 0) { 12531 __ addl($limit$$Register, (strd - 1)); 12532 __ adcl($limit_hi$$Register, 0); 12533 __ movl($tmp$$Register, strd); 12534 } else { 12535 __ addl($limit$$Register, (strd + 1)); 12536 __ adcl($limit_hi$$Register, -1); 12537 __ lneg($limit_hi$$Register, $limit$$Register); 12538 __ movl($tmp$$Register, -strd); 12539 } 12540 // signed devision: (EAX:EDX) / pos_stride 12541 __ idivl($tmp$$Register); 12542 if (strd < 0) { 12543 // restore sign 12544 __ negl($tmp$$Register); 12545 } 12546 // (EAX) * stride 12547 __ mull($tmp$$Register); 12548 // + init (ignore upper bits) 12549 __ addl($limit$$Register, $init$$Register); 12550 %} 12551 ins_pipe( pipe_slow ); 12552 %} 12553 12554 // ============================================================================ 12555 // Branch Instructions 12556 // Jump Table 12557 instruct jumpXtnd(rRegI switch_val) %{ 12558 match(Jump switch_val); 12559 ins_cost(350); 12560 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12561 ins_encode %{ 12562 // Jump to Address(table_base + switch_reg) 12563 Address index(noreg, $switch_val$$Register, Address::times_1); 12564 __ jump(ArrayAddress($constantaddress, index)); 12565 %} 12566 ins_pipe(pipe_jmp); 12567 %} 12568 12569 // Jump Direct - Label defines a relative address from JMP+1 12570 instruct jmpDir(label labl) %{ 12571 match(Goto); 12572 effect(USE labl); 12573 12574 ins_cost(300); 12575 format %{ "JMP $labl" %} 12576 size(5); 12577 ins_encode %{ 12578 Label* L = $labl$$label; 12579 __ jmp(*L, false); // Always long jump 12580 %} 12581 ins_pipe( pipe_jmp ); 12582 %} 12583 12584 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12585 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12586 match(If cop cr); 12587 effect(USE labl); 12588 12589 ins_cost(300); 12590 format %{ "J$cop $labl" %} 12591 size(6); 12592 ins_encode %{ 12593 Label* L = $labl$$label; 12594 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12595 %} 12596 ins_pipe( pipe_jcc ); 12597 %} 12598 12599 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12600 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12601 predicate(!n->has_vector_mask_set()); 12602 match(CountedLoopEnd cop cr); 12603 effect(USE labl); 12604 12605 ins_cost(300); 12606 format %{ "J$cop $labl\t# Loop end" %} 12607 size(6); 12608 ins_encode %{ 12609 Label* L = $labl$$label; 12610 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12611 %} 12612 ins_pipe( pipe_jcc ); 12613 %} 12614 12615 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12616 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12617 predicate(!n->has_vector_mask_set()); 12618 match(CountedLoopEnd cop cmp); 12619 effect(USE labl); 12620 12621 ins_cost(300); 12622 format %{ "J$cop,u $labl\t# Loop end" %} 12623 size(6); 12624 ins_encode %{ 12625 Label* L = $labl$$label; 12626 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12627 %} 12628 ins_pipe( pipe_jcc ); 12629 %} 12630 12631 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12632 predicate(!n->has_vector_mask_set()); 12633 match(CountedLoopEnd cop cmp); 12634 effect(USE labl); 12635 12636 ins_cost(200); 12637 format %{ "J$cop,u $labl\t# Loop end" %} 12638 size(6); 12639 ins_encode %{ 12640 Label* L = $labl$$label; 12641 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12642 %} 12643 ins_pipe( pipe_jcc ); 12644 %} 12645 12646 // mask version 12647 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12648 // Bounded mask operand used in following patten is needed for 12649 // post-loop multiversioning. 12650 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{ 12651 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12652 match(CountedLoopEnd cop cr); 12653 effect(USE labl, TEMP ktmp); 12654 12655 ins_cost(400); 12656 format %{ "J$cop $labl\t# Loop end\n\t" 12657 "restorevectmask \t# vector mask restore for loops" %} 12658 size(10); 12659 ins_encode %{ 12660 Label* L = $labl$$label; 12661 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12662 __ restorevectmask($ktmp$$KRegister); 12663 %} 12664 ins_pipe( pipe_jcc ); 12665 %} 12666 12667 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12668 // Bounded mask operand used in following patten is needed for 12669 // post-loop multiversioning. 12670 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{ 12671 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12672 match(CountedLoopEnd cop cmp); 12673 effect(USE labl, TEMP ktmp); 12674 12675 ins_cost(400); 12676 format %{ "J$cop,u $labl\t# Loop end\n\t" 12677 "restorevectmask \t# vector mask restore for loops" %} 12678 size(10); 12679 ins_encode %{ 12680 Label* L = $labl$$label; 12681 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12682 __ restorevectmask($ktmp$$KRegister); 12683 %} 12684 ins_pipe( pipe_jcc ); 12685 %} 12686 12687 // Bounded mask operand used in following patten is needed for 12688 // post-loop multiversioning. 12689 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{ 12690 predicate(PostLoopMultiversioning && n->has_vector_mask_set()); 12691 match(CountedLoopEnd cop cmp); 12692 effect(USE labl, TEMP ktmp); 12693 12694 ins_cost(300); 12695 format %{ "J$cop,u $labl\t# Loop end\n\t" 12696 "restorevectmask \t# vector mask restore for loops" %} 12697 size(10); 12698 ins_encode %{ 12699 Label* L = $labl$$label; 12700 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12701 __ restorevectmask($ktmp$$KRegister); 12702 %} 12703 ins_pipe( pipe_jcc ); 12704 %} 12705 12706 // Jump Direct Conditional - using unsigned comparison 12707 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12708 match(If cop cmp); 12709 effect(USE labl); 12710 12711 ins_cost(300); 12712 format %{ "J$cop,u $labl" %} 12713 size(6); 12714 ins_encode %{ 12715 Label* L = $labl$$label; 12716 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12717 %} 12718 ins_pipe(pipe_jcc); 12719 %} 12720 12721 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12722 match(If cop cmp); 12723 effect(USE labl); 12724 12725 ins_cost(200); 12726 format %{ "J$cop,u $labl" %} 12727 size(6); 12728 ins_encode %{ 12729 Label* L = $labl$$label; 12730 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12731 %} 12732 ins_pipe(pipe_jcc); 12733 %} 12734 12735 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12736 match(If cop cmp); 12737 effect(USE labl); 12738 12739 ins_cost(200); 12740 format %{ $$template 12741 if ($cop$$cmpcode == Assembler::notEqual) { 12742 $$emit$$"JP,u $labl\n\t" 12743 $$emit$$"J$cop,u $labl" 12744 } else { 12745 $$emit$$"JP,u done\n\t" 12746 $$emit$$"J$cop,u $labl\n\t" 12747 $$emit$$"done:" 12748 } 12749 %} 12750 ins_encode %{ 12751 Label* l = $labl$$label; 12752 if ($cop$$cmpcode == Assembler::notEqual) { 12753 __ jcc(Assembler::parity, *l, false); 12754 __ jcc(Assembler::notEqual, *l, false); 12755 } else if ($cop$$cmpcode == Assembler::equal) { 12756 Label done; 12757 __ jccb(Assembler::parity, done); 12758 __ jcc(Assembler::equal, *l, false); 12759 __ bind(done); 12760 } else { 12761 ShouldNotReachHere(); 12762 } 12763 %} 12764 ins_pipe(pipe_jcc); 12765 %} 12766 12767 // ============================================================================ 12768 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12769 // array for an instance of the superklass. Set a hidden internal cache on a 12770 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12771 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12772 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12773 match(Set result (PartialSubtypeCheck sub super)); 12774 effect( KILL rcx, KILL cr ); 12775 12776 ins_cost(1100); // slightly larger than the next version 12777 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12778 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12779 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12780 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12781 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12782 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12783 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12784 "miss:\t" %} 12785 12786 opcode(0x1); // Force a XOR of EDI 12787 ins_encode( enc_PartialSubtypeCheck() ); 12788 ins_pipe( pipe_slow ); 12789 %} 12790 12791 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12792 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12793 effect( KILL rcx, KILL result ); 12794 12795 ins_cost(1000); 12796 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12797 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12798 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12799 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12800 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12801 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12802 "miss:\t" %} 12803 12804 opcode(0x0); // No need to XOR EDI 12805 ins_encode( enc_PartialSubtypeCheck() ); 12806 ins_pipe( pipe_slow ); 12807 %} 12808 12809 // ============================================================================ 12810 // Branch Instructions -- short offset versions 12811 // 12812 // These instructions are used to replace jumps of a long offset (the default 12813 // match) with jumps of a shorter offset. These instructions are all tagged 12814 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12815 // match rules in general matching. Instead, the ADLC generates a conversion 12816 // method in the MachNode which can be used to do in-place replacement of the 12817 // long variant with the shorter variant. The compiler will determine if a 12818 // branch can be taken by the is_short_branch_offset() predicate in the machine 12819 // specific code section of the file. 12820 12821 // Jump Direct - Label defines a relative address from JMP+1 12822 instruct jmpDir_short(label labl) %{ 12823 match(Goto); 12824 effect(USE labl); 12825 12826 ins_cost(300); 12827 format %{ "JMP,s $labl" %} 12828 size(2); 12829 ins_encode %{ 12830 Label* L = $labl$$label; 12831 __ jmpb(*L); 12832 %} 12833 ins_pipe( pipe_jmp ); 12834 ins_short_branch(1); 12835 %} 12836 12837 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12838 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12839 match(If cop cr); 12840 effect(USE labl); 12841 12842 ins_cost(300); 12843 format %{ "J$cop,s $labl" %} 12844 size(2); 12845 ins_encode %{ 12846 Label* L = $labl$$label; 12847 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12848 %} 12849 ins_pipe( pipe_jcc ); 12850 ins_short_branch(1); 12851 %} 12852 12853 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12854 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12855 match(CountedLoopEnd cop cr); 12856 effect(USE labl); 12857 12858 ins_cost(300); 12859 format %{ "J$cop,s $labl\t# Loop end" %} 12860 size(2); 12861 ins_encode %{ 12862 Label* L = $labl$$label; 12863 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12864 %} 12865 ins_pipe( pipe_jcc ); 12866 ins_short_branch(1); 12867 %} 12868 12869 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12870 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12871 match(CountedLoopEnd cop cmp); 12872 effect(USE labl); 12873 12874 ins_cost(300); 12875 format %{ "J$cop,us $labl\t# Loop end" %} 12876 size(2); 12877 ins_encode %{ 12878 Label* L = $labl$$label; 12879 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12880 %} 12881 ins_pipe( pipe_jcc ); 12882 ins_short_branch(1); 12883 %} 12884 12885 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12886 match(CountedLoopEnd cop cmp); 12887 effect(USE labl); 12888 12889 ins_cost(300); 12890 format %{ "J$cop,us $labl\t# Loop end" %} 12891 size(2); 12892 ins_encode %{ 12893 Label* L = $labl$$label; 12894 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12895 %} 12896 ins_pipe( pipe_jcc ); 12897 ins_short_branch(1); 12898 %} 12899 12900 // Jump Direct Conditional - using unsigned comparison 12901 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12902 match(If cop cmp); 12903 effect(USE labl); 12904 12905 ins_cost(300); 12906 format %{ "J$cop,us $labl" %} 12907 size(2); 12908 ins_encode %{ 12909 Label* L = $labl$$label; 12910 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12911 %} 12912 ins_pipe( pipe_jcc ); 12913 ins_short_branch(1); 12914 %} 12915 12916 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12917 match(If cop cmp); 12918 effect(USE labl); 12919 12920 ins_cost(300); 12921 format %{ "J$cop,us $labl" %} 12922 size(2); 12923 ins_encode %{ 12924 Label* L = $labl$$label; 12925 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12926 %} 12927 ins_pipe( pipe_jcc ); 12928 ins_short_branch(1); 12929 %} 12930 12931 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12932 match(If cop cmp); 12933 effect(USE labl); 12934 12935 ins_cost(300); 12936 format %{ $$template 12937 if ($cop$$cmpcode == Assembler::notEqual) { 12938 $$emit$$"JP,u,s $labl\n\t" 12939 $$emit$$"J$cop,u,s $labl" 12940 } else { 12941 $$emit$$"JP,u,s done\n\t" 12942 $$emit$$"J$cop,u,s $labl\n\t" 12943 $$emit$$"done:" 12944 } 12945 %} 12946 size(4); 12947 ins_encode %{ 12948 Label* l = $labl$$label; 12949 if ($cop$$cmpcode == Assembler::notEqual) { 12950 __ jccb(Assembler::parity, *l); 12951 __ jccb(Assembler::notEqual, *l); 12952 } else if ($cop$$cmpcode == Assembler::equal) { 12953 Label done; 12954 __ jccb(Assembler::parity, done); 12955 __ jccb(Assembler::equal, *l); 12956 __ bind(done); 12957 } else { 12958 ShouldNotReachHere(); 12959 } 12960 %} 12961 ins_pipe(pipe_jcc); 12962 ins_short_branch(1); 12963 %} 12964 12965 // ============================================================================ 12966 // Long Compare 12967 // 12968 // Currently we hold longs in 2 registers. Comparing such values efficiently 12969 // is tricky. The flavor of compare used depends on whether we are testing 12970 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12971 // The GE test is the negated LT test. The LE test can be had by commuting 12972 // the operands (yielding a GE test) and then negating; negate again for the 12973 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12974 // NE test is negated from that. 12975 12976 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12977 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12978 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12979 // are collapsed internally in the ADLC's dfa-gen code. The match for 12980 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12981 // foo match ends up with the wrong leaf. One fix is to not match both 12982 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12983 // both forms beat the trinary form of long-compare and both are very useful 12984 // on Intel which has so few registers. 12985 12986 // Manifest a CmpL result in an integer register. Very painful. 12987 // This is the test to avoid. 12988 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12989 match(Set dst (CmpL3 src1 src2)); 12990 effect( KILL flags ); 12991 ins_cost(1000); 12992 format %{ "XOR $dst,$dst\n\t" 12993 "CMP $src1.hi,$src2.hi\n\t" 12994 "JLT,s m_one\n\t" 12995 "JGT,s p_one\n\t" 12996 "CMP $src1.lo,$src2.lo\n\t" 12997 "JB,s m_one\n\t" 12998 "JEQ,s done\n" 12999 "p_one:\tINC $dst\n\t" 13000 "JMP,s done\n" 13001 "m_one:\tDEC $dst\n" 13002 "done:" %} 13003 ins_encode %{ 13004 Label p_one, m_one, done; 13005 __ xorptr($dst$$Register, $dst$$Register); 13006 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13007 __ jccb(Assembler::less, m_one); 13008 __ jccb(Assembler::greater, p_one); 13009 __ cmpl($src1$$Register, $src2$$Register); 13010 __ jccb(Assembler::below, m_one); 13011 __ jccb(Assembler::equal, done); 13012 __ bind(p_one); 13013 __ incrementl($dst$$Register); 13014 __ jmpb(done); 13015 __ bind(m_one); 13016 __ decrementl($dst$$Register); 13017 __ bind(done); 13018 %} 13019 ins_pipe( pipe_slow ); 13020 %} 13021 13022 //====== 13023 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13024 // compares. Can be used for LE or GT compares by reversing arguments. 13025 // NOT GOOD FOR EQ/NE tests. 13026 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13027 match( Set flags (CmpL src zero )); 13028 ins_cost(100); 13029 format %{ "TEST $src.hi,$src.hi" %} 13030 opcode(0x85); 13031 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13032 ins_pipe( ialu_cr_reg_reg ); 13033 %} 13034 13035 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13036 // compares. Can be used for LE or GT compares by reversing arguments. 13037 // NOT GOOD FOR EQ/NE tests. 13038 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13039 match( Set flags (CmpL src1 src2 )); 13040 effect( TEMP tmp ); 13041 ins_cost(300); 13042 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13043 "MOV $tmp,$src1.hi\n\t" 13044 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13045 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13046 ins_pipe( ialu_cr_reg_reg ); 13047 %} 13048 13049 // Long compares reg < zero/req OR reg >= zero/req. 13050 // Just a wrapper for a normal branch, plus the predicate test. 13051 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13052 match(If cmp flags); 13053 effect(USE labl); 13054 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13055 expand %{ 13056 jmpCon(cmp,flags,labl); // JLT or JGE... 13057 %} 13058 %} 13059 13060 //====== 13061 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13062 // compares. Can be used for LE or GT compares by reversing arguments. 13063 // NOT GOOD FOR EQ/NE tests. 13064 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13065 match(Set flags (CmpUL src zero)); 13066 ins_cost(100); 13067 format %{ "TEST $src.hi,$src.hi" %} 13068 opcode(0x85); 13069 ins_encode(OpcP, RegReg_Hi2(src, src)); 13070 ins_pipe(ialu_cr_reg_reg); 13071 %} 13072 13073 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13074 // compares. Can be used for LE or GT compares by reversing arguments. 13075 // NOT GOOD FOR EQ/NE tests. 13076 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13077 match(Set flags (CmpUL src1 src2)); 13078 effect(TEMP tmp); 13079 ins_cost(300); 13080 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13081 "MOV $tmp,$src1.hi\n\t" 13082 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13083 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13084 ins_pipe(ialu_cr_reg_reg); 13085 %} 13086 13087 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13088 // Just a wrapper for a normal branch, plus the predicate test. 13089 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13090 match(If cmp flags); 13091 effect(USE labl); 13092 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13093 expand %{ 13094 jmpCon(cmp, flags, labl); // JLT or JGE... 13095 %} 13096 %} 13097 13098 // Compare 2 longs and CMOVE longs. 13099 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13100 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13101 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13102 ins_cost(400); 13103 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13104 "CMOV$cmp $dst.hi,$src.hi" %} 13105 opcode(0x0F,0x40); 13106 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13107 ins_pipe( pipe_cmov_reg_long ); 13108 %} 13109 13110 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13111 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13112 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13113 ins_cost(500); 13114 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13115 "CMOV$cmp $dst.hi,$src.hi" %} 13116 opcode(0x0F,0x40); 13117 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13118 ins_pipe( pipe_cmov_reg_long ); 13119 %} 13120 13121 // Compare 2 longs and CMOVE ints. 13122 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13123 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13124 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13125 ins_cost(200); 13126 format %{ "CMOV$cmp $dst,$src" %} 13127 opcode(0x0F,0x40); 13128 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13129 ins_pipe( pipe_cmov_reg ); 13130 %} 13131 13132 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13133 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13134 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13135 ins_cost(250); 13136 format %{ "CMOV$cmp $dst,$src" %} 13137 opcode(0x0F,0x40); 13138 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13139 ins_pipe( pipe_cmov_mem ); 13140 %} 13141 13142 // Compare 2 longs and CMOVE ints. 13143 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13144 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13145 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13146 ins_cost(200); 13147 format %{ "CMOV$cmp $dst,$src" %} 13148 opcode(0x0F,0x40); 13149 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13150 ins_pipe( pipe_cmov_reg ); 13151 %} 13152 13153 // Compare 2 longs and CMOVE doubles 13154 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13155 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13156 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13157 ins_cost(200); 13158 expand %{ 13159 fcmovDPR_regS(cmp,flags,dst,src); 13160 %} 13161 %} 13162 13163 // Compare 2 longs and CMOVE doubles 13164 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13165 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13166 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13167 ins_cost(200); 13168 expand %{ 13169 fcmovD_regS(cmp,flags,dst,src); 13170 %} 13171 %} 13172 13173 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13174 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13175 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13176 ins_cost(200); 13177 expand %{ 13178 fcmovFPR_regS(cmp,flags,dst,src); 13179 %} 13180 %} 13181 13182 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13183 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13184 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13185 ins_cost(200); 13186 expand %{ 13187 fcmovF_regS(cmp,flags,dst,src); 13188 %} 13189 %} 13190 13191 //====== 13192 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13193 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13194 match( Set flags (CmpL src zero )); 13195 effect(TEMP tmp); 13196 ins_cost(200); 13197 format %{ "MOV $tmp,$src.lo\n\t" 13198 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13199 ins_encode( long_cmp_flags0( src, tmp ) ); 13200 ins_pipe( ialu_reg_reg_long ); 13201 %} 13202 13203 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13204 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13205 match( Set flags (CmpL src1 src2 )); 13206 ins_cost(200+300); 13207 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13208 "JNE,s skip\n\t" 13209 "CMP $src1.hi,$src2.hi\n\t" 13210 "skip:\t" %} 13211 ins_encode( long_cmp_flags1( src1, src2 ) ); 13212 ins_pipe( ialu_cr_reg_reg ); 13213 %} 13214 13215 // Long compare reg == zero/reg OR reg != zero/reg 13216 // Just a wrapper for a normal branch, plus the predicate test. 13217 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13218 match(If cmp flags); 13219 effect(USE labl); 13220 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13221 expand %{ 13222 jmpCon(cmp,flags,labl); // JEQ or JNE... 13223 %} 13224 %} 13225 13226 //====== 13227 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13228 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13229 match(Set flags (CmpUL src zero)); 13230 effect(TEMP tmp); 13231 ins_cost(200); 13232 format %{ "MOV $tmp,$src.lo\n\t" 13233 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13234 ins_encode(long_cmp_flags0(src, tmp)); 13235 ins_pipe(ialu_reg_reg_long); 13236 %} 13237 13238 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13239 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13240 match(Set flags (CmpUL src1 src2)); 13241 ins_cost(200+300); 13242 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13243 "JNE,s skip\n\t" 13244 "CMP $src1.hi,$src2.hi\n\t" 13245 "skip:\t" %} 13246 ins_encode(long_cmp_flags1(src1, src2)); 13247 ins_pipe(ialu_cr_reg_reg); 13248 %} 13249 13250 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13251 // Just a wrapper for a normal branch, plus the predicate test. 13252 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13253 match(If cmp flags); 13254 effect(USE labl); 13255 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13256 expand %{ 13257 jmpCon(cmp, flags, labl); // JEQ or JNE... 13258 %} 13259 %} 13260 13261 // Compare 2 longs and CMOVE longs. 13262 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13263 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13264 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13265 ins_cost(400); 13266 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13267 "CMOV$cmp $dst.hi,$src.hi" %} 13268 opcode(0x0F,0x40); 13269 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13270 ins_pipe( pipe_cmov_reg_long ); 13271 %} 13272 13273 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13274 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13275 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13276 ins_cost(500); 13277 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13278 "CMOV$cmp $dst.hi,$src.hi" %} 13279 opcode(0x0F,0x40); 13280 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13281 ins_pipe( pipe_cmov_reg_long ); 13282 %} 13283 13284 // Compare 2 longs and CMOVE ints. 13285 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13286 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13287 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13288 ins_cost(200); 13289 format %{ "CMOV$cmp $dst,$src" %} 13290 opcode(0x0F,0x40); 13291 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13292 ins_pipe( pipe_cmov_reg ); 13293 %} 13294 13295 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13296 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13297 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13298 ins_cost(250); 13299 format %{ "CMOV$cmp $dst,$src" %} 13300 opcode(0x0F,0x40); 13301 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13302 ins_pipe( pipe_cmov_mem ); 13303 %} 13304 13305 // Compare 2 longs and CMOVE ints. 13306 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13307 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13308 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13309 ins_cost(200); 13310 format %{ "CMOV$cmp $dst,$src" %} 13311 opcode(0x0F,0x40); 13312 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13313 ins_pipe( pipe_cmov_reg ); 13314 %} 13315 13316 // Compare 2 longs and CMOVE doubles 13317 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13318 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13319 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13320 ins_cost(200); 13321 expand %{ 13322 fcmovDPR_regS(cmp,flags,dst,src); 13323 %} 13324 %} 13325 13326 // Compare 2 longs and CMOVE doubles 13327 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13328 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13329 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13330 ins_cost(200); 13331 expand %{ 13332 fcmovD_regS(cmp,flags,dst,src); 13333 %} 13334 %} 13335 13336 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13337 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13338 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13339 ins_cost(200); 13340 expand %{ 13341 fcmovFPR_regS(cmp,flags,dst,src); 13342 %} 13343 %} 13344 13345 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13346 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13347 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13348 ins_cost(200); 13349 expand %{ 13350 fcmovF_regS(cmp,flags,dst,src); 13351 %} 13352 %} 13353 13354 //====== 13355 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13356 // Same as cmpL_reg_flags_LEGT except must negate src 13357 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13358 match( Set flags (CmpL src zero )); 13359 effect( TEMP tmp ); 13360 ins_cost(300); 13361 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13362 "CMP $tmp,$src.lo\n\t" 13363 "SBB $tmp,$src.hi\n\t" %} 13364 ins_encode( long_cmp_flags3(src, tmp) ); 13365 ins_pipe( ialu_reg_reg_long ); 13366 %} 13367 13368 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13369 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13370 // requires a commuted test to get the same result. 13371 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13372 match( Set flags (CmpL src1 src2 )); 13373 effect( TEMP tmp ); 13374 ins_cost(300); 13375 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13376 "MOV $tmp,$src2.hi\n\t" 13377 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13378 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13379 ins_pipe( ialu_cr_reg_reg ); 13380 %} 13381 13382 // Long compares reg < zero/req OR reg >= zero/req. 13383 // Just a wrapper for a normal branch, plus the predicate test 13384 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13385 match(If cmp flags); 13386 effect(USE labl); 13387 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13388 ins_cost(300); 13389 expand %{ 13390 jmpCon(cmp,flags,labl); // JGT or JLE... 13391 %} 13392 %} 13393 13394 //====== 13395 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13396 // Same as cmpUL_reg_flags_LEGT except must negate src 13397 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13398 match(Set flags (CmpUL src zero)); 13399 effect(TEMP tmp); 13400 ins_cost(300); 13401 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13402 "CMP $tmp,$src.lo\n\t" 13403 "SBB $tmp,$src.hi\n\t" %} 13404 ins_encode(long_cmp_flags3(src, tmp)); 13405 ins_pipe(ialu_reg_reg_long); 13406 %} 13407 13408 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13409 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13410 // requires a commuted test to get the same result. 13411 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13412 match(Set flags (CmpUL src1 src2)); 13413 effect(TEMP tmp); 13414 ins_cost(300); 13415 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13416 "MOV $tmp,$src2.hi\n\t" 13417 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13418 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13419 ins_pipe(ialu_cr_reg_reg); 13420 %} 13421 13422 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13423 // Just a wrapper for a normal branch, plus the predicate test 13424 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13425 match(If cmp flags); 13426 effect(USE labl); 13427 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13428 ins_cost(300); 13429 expand %{ 13430 jmpCon(cmp, flags, labl); // JGT or JLE... 13431 %} 13432 %} 13433 13434 // Compare 2 longs and CMOVE longs. 13435 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13436 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13437 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13438 ins_cost(400); 13439 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13440 "CMOV$cmp $dst.hi,$src.hi" %} 13441 opcode(0x0F,0x40); 13442 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13443 ins_pipe( pipe_cmov_reg_long ); 13444 %} 13445 13446 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13447 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13448 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13449 ins_cost(500); 13450 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13451 "CMOV$cmp $dst.hi,$src.hi+4" %} 13452 opcode(0x0F,0x40); 13453 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13454 ins_pipe( pipe_cmov_reg_long ); 13455 %} 13456 13457 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13458 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13459 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13460 ins_cost(400); 13461 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13462 "CMOV$cmp $dst.hi,$src.hi" %} 13463 opcode(0x0F,0x40); 13464 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13465 ins_pipe( pipe_cmov_reg_long ); 13466 %} 13467 13468 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13469 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13470 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13471 ins_cost(500); 13472 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13473 "CMOV$cmp $dst.hi,$src.hi+4" %} 13474 opcode(0x0F,0x40); 13475 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13476 ins_pipe( pipe_cmov_reg_long ); 13477 %} 13478 13479 // Compare 2 longs and CMOVE ints. 13480 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13481 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13482 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13483 ins_cost(200); 13484 format %{ "CMOV$cmp $dst,$src" %} 13485 opcode(0x0F,0x40); 13486 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13487 ins_pipe( pipe_cmov_reg ); 13488 %} 13489 13490 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13491 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13492 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13493 ins_cost(250); 13494 format %{ "CMOV$cmp $dst,$src" %} 13495 opcode(0x0F,0x40); 13496 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13497 ins_pipe( pipe_cmov_mem ); 13498 %} 13499 13500 // Compare 2 longs and CMOVE ptrs. 13501 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13502 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13503 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13504 ins_cost(200); 13505 format %{ "CMOV$cmp $dst,$src" %} 13506 opcode(0x0F,0x40); 13507 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13508 ins_pipe( pipe_cmov_reg ); 13509 %} 13510 13511 // Compare 2 longs and CMOVE doubles 13512 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13513 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13514 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13515 ins_cost(200); 13516 expand %{ 13517 fcmovDPR_regS(cmp,flags,dst,src); 13518 %} 13519 %} 13520 13521 // Compare 2 longs and CMOVE doubles 13522 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13523 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13524 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13525 ins_cost(200); 13526 expand %{ 13527 fcmovD_regS(cmp,flags,dst,src); 13528 %} 13529 %} 13530 13531 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13532 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13533 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13534 ins_cost(200); 13535 expand %{ 13536 fcmovFPR_regS(cmp,flags,dst,src); 13537 %} 13538 %} 13539 13540 13541 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13542 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13543 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13544 ins_cost(200); 13545 expand %{ 13546 fcmovF_regS(cmp,flags,dst,src); 13547 %} 13548 %} 13549 13550 13551 // ============================================================================ 13552 // Procedure Call/Return Instructions 13553 // Call Java Static Instruction 13554 // Note: If this code changes, the corresponding ret_addr_offset() and 13555 // compute_padding() functions will have to be adjusted. 13556 instruct CallStaticJavaDirect(method meth) %{ 13557 match(CallStaticJava); 13558 effect(USE meth); 13559 13560 ins_cost(300); 13561 format %{ "CALL,static " %} 13562 opcode(0xE8); /* E8 cd */ 13563 ins_encode( pre_call_resets, 13564 Java_Static_Call( meth ), 13565 call_epilog, 13566 post_call_FPU ); 13567 ins_pipe( pipe_slow ); 13568 ins_alignment(4); 13569 %} 13570 13571 // Call Java Dynamic Instruction 13572 // Note: If this code changes, the corresponding ret_addr_offset() and 13573 // compute_padding() functions will have to be adjusted. 13574 instruct CallDynamicJavaDirect(method meth) %{ 13575 match(CallDynamicJava); 13576 effect(USE meth); 13577 13578 ins_cost(300); 13579 format %{ "MOV EAX,(oop)-1\n\t" 13580 "CALL,dynamic" %} 13581 opcode(0xE8); /* E8 cd */ 13582 ins_encode( pre_call_resets, 13583 Java_Dynamic_Call( meth ), 13584 call_epilog, 13585 post_call_FPU ); 13586 ins_pipe( pipe_slow ); 13587 ins_alignment(4); 13588 %} 13589 13590 // Call Runtime Instruction 13591 instruct CallRuntimeDirect(method meth) %{ 13592 match(CallRuntime ); 13593 effect(USE meth); 13594 13595 ins_cost(300); 13596 format %{ "CALL,runtime " %} 13597 opcode(0xE8); /* E8 cd */ 13598 // Use FFREEs to clear entries in float stack 13599 ins_encode( pre_call_resets, 13600 FFree_Float_Stack_All, 13601 Java_To_Runtime( meth ), 13602 post_call_FPU ); 13603 ins_pipe( pipe_slow ); 13604 %} 13605 13606 // Call runtime without safepoint 13607 instruct CallLeafDirect(method meth) %{ 13608 match(CallLeaf); 13609 effect(USE meth); 13610 13611 ins_cost(300); 13612 format %{ "CALL_LEAF,runtime " %} 13613 opcode(0xE8); /* E8 cd */ 13614 ins_encode( pre_call_resets, 13615 FFree_Float_Stack_All, 13616 Java_To_Runtime( meth ), 13617 Verify_FPU_For_Leaf, post_call_FPU ); 13618 ins_pipe( pipe_slow ); 13619 %} 13620 13621 instruct CallLeafNoFPDirect(method meth) %{ 13622 match(CallLeafNoFP); 13623 effect(USE meth); 13624 13625 ins_cost(300); 13626 format %{ "CALL_LEAF_NOFP,runtime " %} 13627 opcode(0xE8); /* E8 cd */ 13628 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13629 ins_pipe( pipe_slow ); 13630 %} 13631 13632 13633 // Return Instruction 13634 // Remove the return address & jump to it. 13635 instruct Ret() %{ 13636 match(Return); 13637 format %{ "RET" %} 13638 opcode(0xC3); 13639 ins_encode(OpcP); 13640 ins_pipe( pipe_jmp ); 13641 %} 13642 13643 // Tail Call; Jump from runtime stub to Java code. 13644 // Also known as an 'interprocedural jump'. 13645 // Target of jump will eventually return to caller. 13646 // TailJump below removes the return address. 13647 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13648 match(TailCall jump_target method_ptr); 13649 ins_cost(300); 13650 format %{ "JMP $jump_target \t# EBX holds method" %} 13651 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13652 ins_encode( OpcP, RegOpc(jump_target) ); 13653 ins_pipe( pipe_jmp ); 13654 %} 13655 13656 13657 // Tail Jump; remove the return address; jump to target. 13658 // TailCall above leaves the return address around. 13659 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13660 match( TailJump jump_target ex_oop ); 13661 ins_cost(300); 13662 format %{ "POP EDX\t# pop return address into dummy\n\t" 13663 "JMP $jump_target " %} 13664 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13665 ins_encode( enc_pop_rdx, 13666 OpcP, RegOpc(jump_target) ); 13667 ins_pipe( pipe_jmp ); 13668 %} 13669 13670 // Create exception oop: created by stack-crawling runtime code. 13671 // Created exception is now available to this handler, and is setup 13672 // just prior to jumping to this handler. No code emitted. 13673 instruct CreateException( eAXRegP ex_oop ) 13674 %{ 13675 match(Set ex_oop (CreateEx)); 13676 13677 size(0); 13678 // use the following format syntax 13679 format %{ "# exception oop is in EAX; no code emitted" %} 13680 ins_encode(); 13681 ins_pipe( empty ); 13682 %} 13683 13684 13685 // Rethrow exception: 13686 // The exception oop will come in the first argument position. 13687 // Then JUMP (not call) to the rethrow stub code. 13688 instruct RethrowException() 13689 %{ 13690 match(Rethrow); 13691 13692 // use the following format syntax 13693 format %{ "JMP rethrow_stub" %} 13694 ins_encode(enc_rethrow); 13695 ins_pipe( pipe_jmp ); 13696 %} 13697 13698 // inlined locking and unlocking 13699 13700 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ 13701 predicate(Compile::current()->use_rtm()); 13702 match(Set cr (FastLock object box)); 13703 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); 13704 ins_cost(300); 13705 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13706 ins_encode %{ 13707 __ get_thread($thread$$Register); 13708 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13709 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, 13710 _counters, _rtm_counters, _stack_rtm_counters, 13711 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13712 true, ra_->C->profile_rtm()); 13713 %} 13714 ins_pipe(pipe_slow); 13715 %} 13716 13717 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13718 predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm()); 13719 match(Set cr (FastLock object box)); 13720 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13721 ins_cost(300); 13722 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13723 ins_encode %{ 13724 __ get_thread($thread$$Register); 13725 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13726 $scr$$Register, noreg, noreg, $thread$$Register, _counters, NULL, NULL, NULL, false, false); 13727 %} 13728 ins_pipe(pipe_slow); 13729 %} 13730 13731 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13732 predicate(LockingMode != LM_LIGHTWEIGHT); 13733 match(Set cr (FastUnlock object box)); 13734 effect(TEMP tmp, USE_KILL box); 13735 ins_cost(300); 13736 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13737 ins_encode %{ 13738 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13739 %} 13740 ins_pipe(pipe_slow); 13741 %} 13742 13743 instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ 13744 predicate(LockingMode == LM_LIGHTWEIGHT); 13745 match(Set cr (FastLock object box)); 13746 effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); 13747 ins_cost(300); 13748 format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} 13749 ins_encode %{ 13750 __ get_thread($thread$$Register); 13751 __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13752 %} 13753 ins_pipe(pipe_slow); 13754 %} 13755 13756 instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ 13757 predicate(LockingMode == LM_LIGHTWEIGHT); 13758 match(Set cr (FastUnlock object eax_reg)); 13759 effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); 13760 ins_cost(300); 13761 format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} 13762 ins_encode %{ 13763 __ get_thread($thread$$Register); 13764 __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); 13765 %} 13766 ins_pipe(pipe_slow); 13767 %} 13768 13769 // ============================================================================ 13770 // Safepoint Instruction 13771 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13772 match(SafePoint poll); 13773 effect(KILL cr, USE poll); 13774 13775 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13776 ins_cost(125); 13777 // EBP would need size(3) 13778 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13779 ins_encode %{ 13780 __ relocate(relocInfo::poll_type); 13781 address pre_pc = __ pc(); 13782 __ testl(rax, Address($poll$$Register, 0)); 13783 address post_pc = __ pc(); 13784 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13785 %} 13786 ins_pipe(ialu_reg_mem); 13787 %} 13788 13789 13790 // ============================================================================ 13791 // This name is KNOWN by the ADLC and cannot be changed. 13792 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13793 // for this guy. 13794 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13795 match(Set dst (ThreadLocal)); 13796 effect(DEF dst, KILL cr); 13797 13798 format %{ "MOV $dst, Thread::current()" %} 13799 ins_encode %{ 13800 Register dstReg = as_Register($dst$$reg); 13801 __ get_thread(dstReg); 13802 %} 13803 ins_pipe( ialu_reg_fat ); 13804 %} 13805 13806 13807 13808 //----------PEEPHOLE RULES----------------------------------------------------- 13809 // These must follow all instruction definitions as they use the names 13810 // defined in the instructions definitions. 13811 // 13812 // peepmatch ( root_instr_name [preceding_instruction]* ); 13813 // 13814 // peepconstraint %{ 13815 // (instruction_number.operand_name relational_op instruction_number.operand_name 13816 // [, ...] ); 13817 // // instruction numbers are zero-based using left to right order in peepmatch 13818 // 13819 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13820 // // provide an instruction_number.operand_name for each operand that appears 13821 // // in the replacement instruction's match rule 13822 // 13823 // ---------VM FLAGS--------------------------------------------------------- 13824 // 13825 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13826 // 13827 // Each peephole rule is given an identifying number starting with zero and 13828 // increasing by one in the order seen by the parser. An individual peephole 13829 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13830 // on the command-line. 13831 // 13832 // ---------CURRENT LIMITATIONS---------------------------------------------- 13833 // 13834 // Only match adjacent instructions in same basic block 13835 // Only equality constraints 13836 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13837 // Only one replacement instruction 13838 // 13839 // ---------EXAMPLE---------------------------------------------------------- 13840 // 13841 // // pertinent parts of existing instructions in architecture description 13842 // instruct movI(rRegI dst, rRegI src) %{ 13843 // match(Set dst (CopyI src)); 13844 // %} 13845 // 13846 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13847 // match(Set dst (AddI dst src)); 13848 // effect(KILL cr); 13849 // %} 13850 // 13851 // // Change (inc mov) to lea 13852 // peephole %{ 13853 // // increment preceeded by register-register move 13854 // peepmatch ( incI_eReg movI ); 13855 // // require that the destination register of the increment 13856 // // match the destination register of the move 13857 // peepconstraint ( 0.dst == 1.dst ); 13858 // // construct a replacement instruction that sets 13859 // // the destination to ( move's source register + one ) 13860 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13861 // %} 13862 // 13863 // Implementation no longer uses movX instructions since 13864 // machine-independent system no longer uses CopyX nodes. 13865 // 13866 // peephole %{ 13867 // peepmatch ( incI_eReg movI ); 13868 // peepconstraint ( 0.dst == 1.dst ); 13869 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13870 // %} 13871 // 13872 // peephole %{ 13873 // peepmatch ( decI_eReg movI ); 13874 // peepconstraint ( 0.dst == 1.dst ); 13875 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13876 // %} 13877 // 13878 // peephole %{ 13879 // peepmatch ( addI_eReg_imm movI ); 13880 // peepconstraint ( 0.dst == 1.dst ); 13881 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13882 // %} 13883 // 13884 // peephole %{ 13885 // peepmatch ( addP_eReg_imm movP ); 13886 // peepconstraint ( 0.dst == 1.dst ); 13887 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13888 // %} 13889 13890 // // Change load of spilled value to only a spill 13891 // instruct storeI(memory mem, rRegI src) %{ 13892 // match(Set mem (StoreI mem src)); 13893 // %} 13894 // 13895 // instruct loadI(rRegI dst, memory mem) %{ 13896 // match(Set dst (LoadI mem)); 13897 // %} 13898 // 13899 peephole %{ 13900 peepmatch ( loadI storeI ); 13901 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13902 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13903 %} 13904 13905 //----------SMARTSPILL RULES--------------------------------------------------- 13906 // These must follow all instruction definitions as they use the names 13907 // defined in the instructions definitions.