1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 int framesize = C->output()->frame_size_in_bytes(); 615 int bangsize = C->output()->bang_size_in_bytes(); 616 617 int max_monitors = C->method() != NULL ? C->max_monitors() : 0; 618 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL, max_monitors); 619 620 C->output()->set_frame_complete(cbuf.insts_size()); 621 622 if (C->has_mach_constant_base_node()) { 623 // NOTE: We set the table base offset here because users might be 624 // emitted before MachConstantBaseNode. 625 ConstantTable& constant_table = C->output()->constant_table(); 626 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 627 } 628 } 629 630 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 631 return MachNode::size(ra_); // too many variables; just compute it the hard way 632 } 633 634 int MachPrologNode::reloc() const { 635 return 0; // a large enough number 636 } 637 638 //============================================================================= 639 #ifndef PRODUCT 640 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 641 Compile *C = ra_->C; 642 int framesize = C->output()->frame_size_in_bytes(); 643 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 644 // Remove two words for return addr and rbp, 645 framesize -= 2*wordSize; 646 647 if (C->max_vector_size() > 16) { 648 st->print("VZEROUPPER"); 649 st->cr(); st->print("\t"); 650 } 651 if (C->in_24_bit_fp_mode()) { 652 st->print("FLDCW standard control word"); 653 st->cr(); st->print("\t"); 654 } 655 if (framesize) { 656 st->print("ADD ESP,%d\t# Destroy frame",framesize); 657 st->cr(); st->print("\t"); 658 } 659 st->print_cr("POPL EBP"); st->print("\t"); 660 if (do_polling() && C->is_method_compilation()) { 661 st->print("CMPL rsp, poll_offset[thread] \n\t" 662 "JA #safepoint_stub\t" 663 "# Safepoint: poll for GC"); 664 } 665 } 666 #endif 667 668 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 669 Compile *C = ra_->C; 670 MacroAssembler _masm(&cbuf); 671 672 if (C->max_vector_size() > 16) { 673 // Clear upper bits of YMM registers when current compiled code uses 674 // wide vectors to avoid AVX <-> SSE transition penalty during call. 675 _masm.vzeroupper(); 676 } 677 // If method set FPU control word, restore to standard control word 678 if (C->in_24_bit_fp_mode()) { 679 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 680 } 681 682 int framesize = C->output()->frame_size_in_bytes(); 683 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 684 // Remove two words for return addr and rbp, 685 framesize -= 2*wordSize; 686 687 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 688 689 if (framesize >= 128) { 690 emit_opcode(cbuf, 0x81); // add SP, #framesize 691 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 692 emit_d32(cbuf, framesize); 693 } else if (framesize) { 694 emit_opcode(cbuf, 0x83); // add SP, #framesize 695 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 696 emit_d8(cbuf, framesize); 697 } 698 699 emit_opcode(cbuf, 0x58 | EBP_enc); 700 701 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 702 __ reserved_stack_check(); 703 } 704 705 if (do_polling() && C->is_method_compilation()) { 706 Register thread = as_Register(EBX_enc); 707 MacroAssembler masm(&cbuf); 708 __ get_thread(thread); 709 Label dummy_label; 710 Label* code_stub = &dummy_label; 711 if (!C->output()->in_scratch_emit_size()) { 712 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 713 C->output()->add_stub(stub); 714 code_stub = &stub->entry(); 715 } 716 __ relocate(relocInfo::poll_return_type); 717 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 718 } 719 } 720 721 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 722 return MachNode::size(ra_); // too many variables; just compute it 723 // the hard way 724 } 725 726 int MachEpilogNode::reloc() const { 727 return 0; // a large enough number 728 } 729 730 const Pipeline * MachEpilogNode::pipeline() const { 731 return MachNode::pipeline_class(); 732 } 733 734 //============================================================================= 735 736 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 737 static enum RC rc_class( OptoReg::Name reg ) { 738 739 if( !OptoReg::is_valid(reg) ) return rc_bad; 740 if (OptoReg::is_stack(reg)) return rc_stack; 741 742 VMReg r = OptoReg::as_VMReg(reg); 743 if (r->is_Register()) return rc_int; 744 if (r->is_FloatRegister()) { 745 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 746 return rc_float; 747 } 748 if (r->is_KRegister()) return rc_kreg; 749 assert(r->is_XMMRegister(), "must be"); 750 return rc_xmm; 751 } 752 753 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 754 int opcode, const char *op_str, int size, outputStream* st ) { 755 if( cbuf ) { 756 emit_opcode (*cbuf, opcode ); 757 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 758 #ifndef PRODUCT 759 } else if( !do_size ) { 760 if( size != 0 ) st->print("\n\t"); 761 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 762 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 763 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 764 } else { // FLD, FST, PUSH, POP 765 st->print("%s [ESP + #%d]",op_str,offset); 766 } 767 #endif 768 } 769 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 770 return size+3+offset_size; 771 } 772 773 // Helper for XMM registers. Extra opcode bits, limited syntax. 774 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 775 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 776 int in_size_in_bits = Assembler::EVEX_32bit; 777 int evex_encoding = 0; 778 if (reg_lo+1 == reg_hi) { 779 in_size_in_bits = Assembler::EVEX_64bit; 780 evex_encoding = Assembler::VEX_W; 781 } 782 if (cbuf) { 783 MacroAssembler _masm(cbuf); 784 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 785 // it maps more cases to single byte displacement 786 _masm.set_managed(); 787 if (reg_lo+1 == reg_hi) { // double move? 788 if (is_load) { 789 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 790 } else { 791 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 792 } 793 } else { 794 if (is_load) { 795 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 796 } else { 797 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 798 } 799 } 800 #ifndef PRODUCT 801 } else if (!do_size) { 802 if (size != 0) st->print("\n\t"); 803 if (reg_lo+1 == reg_hi) { // double move? 804 if (is_load) st->print("%s %s,[ESP + #%d]", 805 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 806 Matcher::regName[reg_lo], offset); 807 else st->print("MOVSD [ESP + #%d],%s", 808 offset, Matcher::regName[reg_lo]); 809 } else { 810 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSS [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } 815 #endif 816 } 817 bool is_single_byte = false; 818 if ((UseAVX > 2) && (offset != 0)) { 819 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 820 } 821 int offset_size = 0; 822 if (UseAVX > 2 ) { 823 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 824 } else { 825 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 826 } 827 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 828 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 829 return size+5+offset_size; 830 } 831 832 833 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 834 int src_hi, int dst_hi, int size, outputStream* st ) { 835 if (cbuf) { 836 MacroAssembler _masm(cbuf); 837 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 838 _masm.set_managed(); 839 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 840 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 841 as_XMMRegister(Matcher::_regEncode[src_lo])); 842 } else { 843 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 844 as_XMMRegister(Matcher::_regEncode[src_lo])); 845 } 846 #ifndef PRODUCT 847 } else if (!do_size) { 848 if (size != 0) st->print("\n\t"); 849 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 850 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 851 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 852 } else { 853 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 854 } 855 } else { 856 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 857 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 858 } else { 859 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 860 } 861 } 862 #endif 863 } 864 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 865 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 866 int sz = (UseAVX > 2) ? 6 : 4; 867 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 868 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 869 return size + sz; 870 } 871 872 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 873 int src_hi, int dst_hi, int size, outputStream* st ) { 874 // 32-bit 875 if (cbuf) { 876 MacroAssembler _masm(cbuf); 877 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 878 _masm.set_managed(); 879 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 880 as_Register(Matcher::_regEncode[src_lo])); 881 #ifndef PRODUCT 882 } else if (!do_size) { 883 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 884 #endif 885 } 886 return (UseAVX> 2) ? 6 : 4; 887 } 888 889 890 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 891 int src_hi, int dst_hi, int size, outputStream* st ) { 892 // 32-bit 893 if (cbuf) { 894 MacroAssembler _masm(cbuf); 895 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 896 _masm.set_managed(); 897 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 898 as_XMMRegister(Matcher::_regEncode[src_lo])); 899 #ifndef PRODUCT 900 } else if (!do_size) { 901 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 902 #endif 903 } 904 return (UseAVX> 2) ? 6 : 4; 905 } 906 907 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 908 if( cbuf ) { 909 emit_opcode(*cbuf, 0x8B ); 910 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 911 #ifndef PRODUCT 912 } else if( !do_size ) { 913 if( size != 0 ) st->print("\n\t"); 914 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 915 #endif 916 } 917 return size+2; 918 } 919 920 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 921 int offset, int size, outputStream* st ) { 922 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 923 if( cbuf ) { 924 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 925 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 926 #ifndef PRODUCT 927 } else if( !do_size ) { 928 if( size != 0 ) st->print("\n\t"); 929 st->print("FLD %s",Matcher::regName[src_lo]); 930 #endif 931 } 932 size += 2; 933 } 934 935 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 936 const char *op_str; 937 int op; 938 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 939 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 940 op = 0xDD; 941 } else { // 32-bit store 942 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 943 op = 0xD9; 944 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 945 } 946 947 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 948 } 949 950 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 951 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 952 int src_hi, int dst_hi, uint ireg, outputStream* st); 953 954 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 955 int stack_offset, int reg, uint ireg, outputStream* st); 956 957 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 958 int dst_offset, uint ireg, outputStream* st) { 959 if (cbuf) { 960 MacroAssembler _masm(cbuf); 961 switch (ireg) { 962 case Op_VecS: 963 __ pushl(Address(rsp, src_offset)); 964 __ popl (Address(rsp, dst_offset)); 965 break; 966 case Op_VecD: 967 __ pushl(Address(rsp, src_offset)); 968 __ popl (Address(rsp, dst_offset)); 969 __ pushl(Address(rsp, src_offset+4)); 970 __ popl (Address(rsp, dst_offset+4)); 971 break; 972 case Op_VecX: 973 __ movdqu(Address(rsp, -16), xmm0); 974 __ movdqu(xmm0, Address(rsp, src_offset)); 975 __ movdqu(Address(rsp, dst_offset), xmm0); 976 __ movdqu(xmm0, Address(rsp, -16)); 977 break; 978 case Op_VecY: 979 __ vmovdqu(Address(rsp, -32), xmm0); 980 __ vmovdqu(xmm0, Address(rsp, src_offset)); 981 __ vmovdqu(Address(rsp, dst_offset), xmm0); 982 __ vmovdqu(xmm0, Address(rsp, -32)); 983 break; 984 case Op_VecZ: 985 __ evmovdquq(Address(rsp, -64), xmm0, 2); 986 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 987 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 988 __ evmovdquq(xmm0, Address(rsp, -64), 2); 989 break; 990 default: 991 ShouldNotReachHere(); 992 } 993 #ifndef PRODUCT 994 } else { 995 switch (ireg) { 996 case Op_VecS: 997 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 998 "popl [rsp + #%d]", 999 src_offset, dst_offset); 1000 break; 1001 case Op_VecD: 1002 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1003 "popq [rsp + #%d]\n\t" 1004 "pushl [rsp + #%d]\n\t" 1005 "popq [rsp + #%d]", 1006 src_offset, dst_offset, src_offset+4, dst_offset+4); 1007 break; 1008 case Op_VecX: 1009 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1010 "movdqu xmm0, [rsp + #%d]\n\t" 1011 "movdqu [rsp + #%d], xmm0\n\t" 1012 "movdqu xmm0, [rsp - #16]", 1013 src_offset, dst_offset); 1014 break; 1015 case Op_VecY: 1016 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1017 "vmovdqu xmm0, [rsp + #%d]\n\t" 1018 "vmovdqu [rsp + #%d], xmm0\n\t" 1019 "vmovdqu xmm0, [rsp - #32]", 1020 src_offset, dst_offset); 1021 break; 1022 case Op_VecZ: 1023 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1024 "vmovdqu xmm0, [rsp + #%d]\n\t" 1025 "vmovdqu [rsp + #%d], xmm0\n\t" 1026 "vmovdqu xmm0, [rsp - #64]", 1027 src_offset, dst_offset); 1028 break; 1029 default: 1030 ShouldNotReachHere(); 1031 } 1032 #endif 1033 } 1034 } 1035 1036 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1037 // Get registers to move 1038 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1039 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1040 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1041 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1042 1043 enum RC src_second_rc = rc_class(src_second); 1044 enum RC src_first_rc = rc_class(src_first); 1045 enum RC dst_second_rc = rc_class(dst_second); 1046 enum RC dst_first_rc = rc_class(dst_first); 1047 1048 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1049 1050 // Generate spill code! 1051 int size = 0; 1052 1053 if( src_first == dst_first && src_second == dst_second ) 1054 return size; // Self copy, no move 1055 1056 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1057 uint ireg = ideal_reg(); 1058 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1059 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1060 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1061 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1062 // mem -> mem 1063 int src_offset = ra_->reg2offset(src_first); 1064 int dst_offset = ra_->reg2offset(dst_first); 1065 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1066 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1067 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1068 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1069 int stack_offset = ra_->reg2offset(dst_first); 1070 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1071 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1072 int stack_offset = ra_->reg2offset(src_first); 1073 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1074 } else { 1075 ShouldNotReachHere(); 1076 } 1077 return 0; 1078 } 1079 1080 // -------------------------------------- 1081 // Check for mem-mem move. push/pop to move. 1082 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1083 if( src_second == dst_first ) { // overlapping stack copy ranges 1084 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1085 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1086 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1087 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1088 } 1089 // move low bits 1090 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1091 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1092 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1093 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1094 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1095 } 1096 return size; 1097 } 1098 1099 // -------------------------------------- 1100 // Check for integer reg-reg copy 1101 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1102 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1103 1104 // Check for integer store 1105 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1106 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1107 1108 // Check for integer load 1109 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1110 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1111 1112 // Check for integer reg-xmm reg copy 1113 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1114 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1115 "no 64 bit integer-float reg moves" ); 1116 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1117 } 1118 // -------------------------------------- 1119 // Check for float reg-reg copy 1120 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1121 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1122 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1123 if( cbuf ) { 1124 1125 // Note the mucking with the register encode to compensate for the 0/1 1126 // indexing issue mentioned in a comment in the reg_def sections 1127 // for FPR registers many lines above here. 1128 1129 if( src_first != FPR1L_num ) { 1130 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1131 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1132 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1133 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1134 } else { 1135 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1136 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1137 } 1138 #ifndef PRODUCT 1139 } else if( !do_size ) { 1140 if( size != 0 ) st->print("\n\t"); 1141 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1142 else st->print( "FST %s", Matcher::regName[dst_first]); 1143 #endif 1144 } 1145 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1146 } 1147 1148 // Check for float store 1149 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1150 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1151 } 1152 1153 // Check for float load 1154 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1155 int offset = ra_->reg2offset(src_first); 1156 const char *op_str; 1157 int op; 1158 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1159 op_str = "FLD_D"; 1160 op = 0xDD; 1161 } else { // 32-bit load 1162 op_str = "FLD_S"; 1163 op = 0xD9; 1164 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1165 } 1166 if( cbuf ) { 1167 emit_opcode (*cbuf, op ); 1168 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1169 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1170 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1171 #ifndef PRODUCT 1172 } else if( !do_size ) { 1173 if( size != 0 ) st->print("\n\t"); 1174 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1175 #endif 1176 } 1177 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1178 return size + 3+offset_size+2; 1179 } 1180 1181 // Check for xmm reg-reg copy 1182 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1183 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1184 (src_first+1 == src_second && dst_first+1 == dst_second), 1185 "no non-adjacent float-moves" ); 1186 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1187 } 1188 1189 // Check for xmm reg-integer reg copy 1190 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1191 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1192 "no 64 bit float-integer reg moves" ); 1193 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1194 } 1195 1196 // Check for xmm store 1197 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1198 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1199 } 1200 1201 // Check for float xmm load 1202 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1203 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1204 } 1205 1206 // Copy from float reg to xmm reg 1207 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1208 // copy to the top of stack from floating point reg 1209 // and use LEA to preserve flags 1210 if( cbuf ) { 1211 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1212 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1213 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1214 emit_d8(*cbuf,0xF8); 1215 #ifndef PRODUCT 1216 } else if( !do_size ) { 1217 if( size != 0 ) st->print("\n\t"); 1218 st->print("LEA ESP,[ESP-8]"); 1219 #endif 1220 } 1221 size += 4; 1222 1223 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1224 1225 // Copy from the temp memory to the xmm reg. 1226 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1227 1228 if( cbuf ) { 1229 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1230 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1231 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1232 emit_d8(*cbuf,0x08); 1233 #ifndef PRODUCT 1234 } else if( !do_size ) { 1235 if( size != 0 ) st->print("\n\t"); 1236 st->print("LEA ESP,[ESP+8]"); 1237 #endif 1238 } 1239 size += 4; 1240 return size; 1241 } 1242 1243 // AVX-512 opmask specific spilling. 1244 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1245 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1246 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1247 MacroAssembler _masm(cbuf); 1248 int offset = ra_->reg2offset(src_first); 1249 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1250 return 0; 1251 } 1252 1253 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1254 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1255 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1256 MacroAssembler _masm(cbuf); 1257 int offset = ra_->reg2offset(dst_first); 1258 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1259 return 0; 1260 } 1261 1262 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1263 Unimplemented(); 1264 return 0; 1265 } 1266 1267 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1268 Unimplemented(); 1269 return 0; 1270 } 1271 1272 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1273 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1274 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1275 MacroAssembler _masm(cbuf); 1276 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1277 return 0; 1278 } 1279 1280 assert( size > 0, "missed a case" ); 1281 1282 // -------------------------------------------------------------------- 1283 // Check for second bits still needing moving. 1284 if( src_second == dst_second ) 1285 return size; // Self copy; no move 1286 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1287 1288 // Check for second word int-int move 1289 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1290 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1291 1292 // Check for second word integer store 1293 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1295 1296 // Check for second word integer load 1297 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1298 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1299 1300 Unimplemented(); 1301 return 0; // Mute compiler 1302 } 1303 1304 #ifndef PRODUCT 1305 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1306 implementation( NULL, ra_, false, st ); 1307 } 1308 #endif 1309 1310 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1311 implementation( &cbuf, ra_, false, NULL ); 1312 } 1313 1314 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1315 return MachNode::size(ra_); 1316 } 1317 1318 1319 //============================================================================= 1320 #ifndef PRODUCT 1321 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1322 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1323 int reg = ra_->get_reg_first(this); 1324 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1325 } 1326 #endif 1327 1328 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1329 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1330 int reg = ra_->get_encode(this); 1331 if( offset >= 128 ) { 1332 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1333 emit_rm(cbuf, 0x2, reg, 0x04); 1334 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1335 emit_d32(cbuf, offset); 1336 } 1337 else { 1338 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1339 emit_rm(cbuf, 0x1, reg, 0x04); 1340 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1341 emit_d8(cbuf, offset); 1342 } 1343 } 1344 1345 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1346 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1347 if( offset >= 128 ) { 1348 return 7; 1349 } 1350 else { 1351 return 4; 1352 } 1353 } 1354 1355 //============================================================================= 1356 #ifndef PRODUCT 1357 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1358 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1359 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1360 st->print_cr("\tNOP"); 1361 st->print_cr("\tNOP"); 1362 if( !OptoBreakpoint ) 1363 st->print_cr("\tNOP"); 1364 } 1365 #endif 1366 1367 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1368 MacroAssembler masm(&cbuf); 1369 #ifdef ASSERT 1370 uint insts_size = cbuf.insts_size(); 1371 #endif 1372 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1373 masm.jump_cc(Assembler::notEqual, 1374 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1375 /* WARNING these NOPs are critical so that verified entry point is properly 1376 aligned for patching by NativeJump::patch_verified_entry() */ 1377 int nops_cnt = 2; 1378 if( !OptoBreakpoint ) // Leave space for int3 1379 nops_cnt += 1; 1380 masm.nop(nops_cnt); 1381 1382 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1383 } 1384 1385 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1386 return OptoBreakpoint ? 11 : 12; 1387 } 1388 1389 1390 //============================================================================= 1391 1392 // Vector calling convention not supported. 1393 const bool Matcher::supports_vector_calling_convention() { 1394 return false; 1395 } 1396 1397 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1398 Unimplemented(); 1399 return OptoRegPair(0, 0); 1400 } 1401 1402 // Is this branch offset short enough that a short branch can be used? 1403 // 1404 // NOTE: If the platform does not provide any short branch variants, then 1405 // this method should return false for offset 0. 1406 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1407 // The passed offset is relative to address of the branch. 1408 // On 86 a branch displacement is calculated relative to address 1409 // of a next instruction. 1410 offset -= br_size; 1411 1412 // the short version of jmpConUCF2 contains multiple branches, 1413 // making the reach slightly less 1414 if (rule == jmpConUCF2_rule) 1415 return (-126 <= offset && offset <= 125); 1416 return (-128 <= offset && offset <= 127); 1417 } 1418 1419 // Return whether or not this register is ever used as an argument. This 1420 // function is used on startup to build the trampoline stubs in generateOptoStub. 1421 // Registers not mentioned will be killed by the VM call in the trampoline, and 1422 // arguments in those registers not be available to the callee. 1423 bool Matcher::can_be_java_arg( int reg ) { 1424 if( reg == ECX_num || reg == EDX_num ) return true; 1425 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1426 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1427 return false; 1428 } 1429 1430 bool Matcher::is_spillable_arg( int reg ) { 1431 return can_be_java_arg(reg); 1432 } 1433 1434 uint Matcher::int_pressure_limit() 1435 { 1436 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1437 } 1438 1439 uint Matcher::float_pressure_limit() 1440 { 1441 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1442 } 1443 1444 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1445 // Use hardware integer DIV instruction when 1446 // it is faster than a code which use multiply. 1447 // Only when constant divisor fits into 32 bit 1448 // (min_jint is excluded to get only correct 1449 // positive 32 bit values from negative). 1450 return VM_Version::has_fast_idiv() && 1451 (divisor == (int)divisor && divisor != min_jint); 1452 } 1453 1454 // Register for DIVI projection of divmodI 1455 RegMask Matcher::divI_proj_mask() { 1456 return EAX_REG_mask(); 1457 } 1458 1459 // Register for MODI projection of divmodI 1460 RegMask Matcher::modI_proj_mask() { 1461 return EDX_REG_mask(); 1462 } 1463 1464 // Register for DIVL projection of divmodL 1465 RegMask Matcher::divL_proj_mask() { 1466 ShouldNotReachHere(); 1467 return RegMask(); 1468 } 1469 1470 // Register for MODL projection of divmodL 1471 RegMask Matcher::modL_proj_mask() { 1472 ShouldNotReachHere(); 1473 return RegMask(); 1474 } 1475 1476 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1477 return NO_REG_mask(); 1478 } 1479 1480 // Returns true if the high 32 bits of the value is known to be zero. 1481 bool is_operand_hi32_zero(Node* n) { 1482 int opc = n->Opcode(); 1483 if (opc == Op_AndL) { 1484 Node* o2 = n->in(2); 1485 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1486 return true; 1487 } 1488 } 1489 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1490 return true; 1491 } 1492 return false; 1493 } 1494 1495 %} 1496 1497 //----------ENCODING BLOCK----------------------------------------------------- 1498 // This block specifies the encoding classes used by the compiler to output 1499 // byte streams. Encoding classes generate functions which are called by 1500 // Machine Instruction Nodes in order to generate the bit encoding of the 1501 // instruction. Operands specify their base encoding interface with the 1502 // interface keyword. There are currently supported four interfaces, 1503 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1504 // operand to generate a function which returns its register number when 1505 // queried. CONST_INTER causes an operand to generate a function which 1506 // returns the value of the constant when queried. MEMORY_INTER causes an 1507 // operand to generate four functions which return the Base Register, the 1508 // Index Register, the Scale Value, and the Offset Value of the operand when 1509 // queried. COND_INTER causes an operand to generate six functions which 1510 // return the encoding code (ie - encoding bits for the instruction) 1511 // associated with each basic boolean condition for a conditional instruction. 1512 // Instructions specify two basic values for encoding. They use the 1513 // ins_encode keyword to specify their encoding class (which must be one of 1514 // the class names specified in the encoding block), and they use the 1515 // opcode keyword to specify, in order, their primary, secondary, and 1516 // tertiary opcode. Only the opcode sections which a particular instruction 1517 // needs for encoding need to be specified. 1518 encode %{ 1519 // Build emit functions for each basic byte or larger field in the intel 1520 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1521 // code in the enc_class source block. Emit functions will live in the 1522 // main source block for now. In future, we can generalize this by 1523 // adding a syntax that specifies the sizes of fields in an order, 1524 // so that the adlc can build the emit functions automagically 1525 1526 // Emit primary opcode 1527 enc_class OpcP %{ 1528 emit_opcode(cbuf, $primary); 1529 %} 1530 1531 // Emit secondary opcode 1532 enc_class OpcS %{ 1533 emit_opcode(cbuf, $secondary); 1534 %} 1535 1536 // Emit opcode directly 1537 enc_class Opcode(immI d8) %{ 1538 emit_opcode(cbuf, $d8$$constant); 1539 %} 1540 1541 enc_class SizePrefix %{ 1542 emit_opcode(cbuf,0x66); 1543 %} 1544 1545 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1546 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1547 %} 1548 1549 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1550 emit_opcode(cbuf,$opcode$$constant); 1551 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1552 %} 1553 1554 enc_class mov_r32_imm0( rRegI dst ) %{ 1555 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1556 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1557 %} 1558 1559 enc_class cdq_enc %{ 1560 // Full implementation of Java idiv and irem; checks for 1561 // special case as described in JVM spec., p.243 & p.271. 1562 // 1563 // normal case special case 1564 // 1565 // input : rax,: dividend min_int 1566 // reg: divisor -1 1567 // 1568 // output: rax,: quotient (= rax, idiv reg) min_int 1569 // rdx: remainder (= rax, irem reg) 0 1570 // 1571 // Code sequnce: 1572 // 1573 // 81 F8 00 00 00 80 cmp rax,80000000h 1574 // 0F 85 0B 00 00 00 jne normal_case 1575 // 33 D2 xor rdx,edx 1576 // 83 F9 FF cmp rcx,0FFh 1577 // 0F 84 03 00 00 00 je done 1578 // normal_case: 1579 // 99 cdq 1580 // F7 F9 idiv rax,ecx 1581 // done: 1582 // 1583 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1584 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1585 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1586 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1587 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1588 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1589 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1590 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1591 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1592 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1593 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1594 // normal_case: 1595 emit_opcode(cbuf,0x99); // cdq 1596 // idiv (note: must be emitted by the user of this rule) 1597 // normal: 1598 %} 1599 1600 // Dense encoding for older common ops 1601 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1602 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1603 %} 1604 1605 1606 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1607 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1608 // Check for 8-bit immediate, and set sign extend bit in opcode 1609 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1610 emit_opcode(cbuf, $primary | 0x02); 1611 } 1612 else { // If 32-bit immediate 1613 emit_opcode(cbuf, $primary); 1614 } 1615 %} 1616 1617 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1618 // Emit primary opcode and set sign-extend bit 1619 // Check for 8-bit immediate, and set sign extend bit in opcode 1620 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1621 emit_opcode(cbuf, $primary | 0x02); } 1622 else { // If 32-bit immediate 1623 emit_opcode(cbuf, $primary); 1624 } 1625 // Emit r/m byte with secondary opcode, after primary opcode. 1626 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1627 %} 1628 1629 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1630 // Check for 8-bit immediate, and set sign extend bit in opcode 1631 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1632 $$$emit8$imm$$constant; 1633 } 1634 else { // If 32-bit immediate 1635 // Output immediate 1636 $$$emit32$imm$$constant; 1637 } 1638 %} 1639 1640 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1641 // Emit primary opcode and set sign-extend bit 1642 // Check for 8-bit immediate, and set sign extend bit in opcode 1643 int con = (int)$imm$$constant; // Throw away top bits 1644 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1645 // Emit r/m byte with secondary opcode, after primary opcode. 1646 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1647 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1648 else emit_d32(cbuf,con); 1649 %} 1650 1651 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1652 // Emit primary opcode and set sign-extend bit 1653 // Check for 8-bit immediate, and set sign extend bit in opcode 1654 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1655 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1656 // Emit r/m byte with tertiary opcode, after primary opcode. 1657 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1658 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1659 else emit_d32(cbuf,con); 1660 %} 1661 1662 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1663 emit_cc(cbuf, $secondary, $dst$$reg ); 1664 %} 1665 1666 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1667 int destlo = $dst$$reg; 1668 int desthi = HIGH_FROM_LOW_ENC(destlo); 1669 // bswap lo 1670 emit_opcode(cbuf, 0x0F); 1671 emit_cc(cbuf, 0xC8, destlo); 1672 // bswap hi 1673 emit_opcode(cbuf, 0x0F); 1674 emit_cc(cbuf, 0xC8, desthi); 1675 // xchg lo and hi 1676 emit_opcode(cbuf, 0x87); 1677 emit_rm(cbuf, 0x3, destlo, desthi); 1678 %} 1679 1680 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1681 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1682 %} 1683 1684 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1685 $$$emit8$primary; 1686 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1687 %} 1688 1689 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1690 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1691 emit_d8(cbuf, op >> 8 ); 1692 emit_d8(cbuf, op & 255); 1693 %} 1694 1695 // emulate a CMOV with a conditional branch around a MOV 1696 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1697 // Invert sense of branch from sense of CMOV 1698 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1699 emit_d8( cbuf, $brOffs$$constant ); 1700 %} 1701 1702 enc_class enc_PartialSubtypeCheck( ) %{ 1703 Register Redi = as_Register(EDI_enc); // result register 1704 Register Reax = as_Register(EAX_enc); // super class 1705 Register Recx = as_Register(ECX_enc); // killed 1706 Register Resi = as_Register(ESI_enc); // sub class 1707 Label miss; 1708 1709 MacroAssembler _masm(&cbuf); 1710 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1711 NULL, &miss, 1712 /*set_cond_codes:*/ true); 1713 if ($primary) { 1714 __ xorptr(Redi, Redi); 1715 } 1716 __ bind(miss); 1717 %} 1718 1719 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1720 MacroAssembler masm(&cbuf); 1721 int start = masm.offset(); 1722 if (UseSSE >= 2) { 1723 if (VerifyFPU) { 1724 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1725 } 1726 } else { 1727 // External c_calling_convention expects the FPU stack to be 'clean'. 1728 // Compiled code leaves it dirty. Do cleanup now. 1729 masm.empty_FPU_stack(); 1730 } 1731 if (sizeof_FFree_Float_Stack_All == -1) { 1732 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1733 } else { 1734 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1735 } 1736 %} 1737 1738 enc_class Verify_FPU_For_Leaf %{ 1739 if( VerifyFPU ) { 1740 MacroAssembler masm(&cbuf); 1741 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1742 } 1743 %} 1744 1745 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1746 // This is the instruction starting address for relocation info. 1747 MacroAssembler _masm(&cbuf); 1748 cbuf.set_insts_mark(); 1749 $$$emit8$primary; 1750 // CALL directly to the runtime 1751 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1752 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1753 __ post_call_nop(); 1754 1755 if (UseSSE >= 2) { 1756 MacroAssembler _masm(&cbuf); 1757 BasicType rt = tf()->return_type(); 1758 1759 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1760 // A C runtime call where the return value is unused. In SSE2+ 1761 // mode the result needs to be removed from the FPU stack. It's 1762 // likely that this function call could be removed by the 1763 // optimizer if the C function is a pure function. 1764 __ ffree(0); 1765 } else if (rt == T_FLOAT) { 1766 __ lea(rsp, Address(rsp, -4)); 1767 __ fstp_s(Address(rsp, 0)); 1768 __ movflt(xmm0, Address(rsp, 0)); 1769 __ lea(rsp, Address(rsp, 4)); 1770 } else if (rt == T_DOUBLE) { 1771 __ lea(rsp, Address(rsp, -8)); 1772 __ fstp_d(Address(rsp, 0)); 1773 __ movdbl(xmm0, Address(rsp, 0)); 1774 __ lea(rsp, Address(rsp, 8)); 1775 } 1776 } 1777 %} 1778 1779 enc_class pre_call_resets %{ 1780 // If method sets FPU control word restore it here 1781 debug_only(int off0 = cbuf.insts_size()); 1782 if (ra_->C->in_24_bit_fp_mode()) { 1783 MacroAssembler _masm(&cbuf); 1784 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1785 } 1786 // Clear upper bits of YMM registers when current compiled code uses 1787 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1788 MacroAssembler _masm(&cbuf); 1789 __ vzeroupper(); 1790 debug_only(int off1 = cbuf.insts_size()); 1791 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1792 %} 1793 1794 enc_class post_call_FPU %{ 1795 // If method sets FPU control word do it here also 1796 if (Compile::current()->in_24_bit_fp_mode()) { 1797 MacroAssembler masm(&cbuf); 1798 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1799 } 1800 %} 1801 1802 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1803 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1804 // who we intended to call. 1805 MacroAssembler _masm(&cbuf); 1806 cbuf.set_insts_mark(); 1807 $$$emit8$primary; 1808 1809 if (!_method) { 1810 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1811 runtime_call_Relocation::spec(), 1812 RELOC_IMM32); 1813 __ post_call_nop(); 1814 } else { 1815 int method_index = resolved_method_index(cbuf); 1816 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1817 : static_call_Relocation::spec(method_index); 1818 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1819 rspec, RELOC_DISP32); 1820 __ post_call_nop(); 1821 address mark = cbuf.insts_mark(); 1822 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1823 // Calls of the same statically bound method can share 1824 // a stub to the interpreter. 1825 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1826 } else { 1827 // Emit stubs for static call. 1828 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); 1829 if (stub == NULL) { 1830 ciEnv::current()->record_failure("CodeCache is full"); 1831 return; 1832 } 1833 } 1834 } 1835 %} 1836 1837 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1838 MacroAssembler _masm(&cbuf); 1839 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1840 __ post_call_nop(); 1841 %} 1842 1843 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1844 int disp = in_bytes(Method::from_compiled_offset()); 1845 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1846 1847 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1848 MacroAssembler _masm(&cbuf); 1849 cbuf.set_insts_mark(); 1850 $$$emit8$primary; 1851 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1852 emit_d8(cbuf, disp); // Displacement 1853 __ post_call_nop(); 1854 %} 1855 1856 // Following encoding is no longer used, but may be restored if calling 1857 // convention changes significantly. 1858 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1859 // 1860 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1861 // // int ic_reg = Matcher::inline_cache_reg(); 1862 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1863 // // int imo_reg = Matcher::interpreter_method_reg(); 1864 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1865 // 1866 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1867 // // // so we load it immediately before the call 1868 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1869 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1870 // 1871 // // xor rbp,ebp 1872 // emit_opcode(cbuf, 0x33); 1873 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1874 // 1875 // // CALL to interpreter. 1876 // cbuf.set_insts_mark(); 1877 // $$$emit8$primary; 1878 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1879 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1880 // %} 1881 1882 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1883 $$$emit8$primary; 1884 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1885 $$$emit8$shift$$constant; 1886 %} 1887 1888 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1889 // Load immediate does not have a zero or sign extended version 1890 // for 8-bit immediates 1891 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1892 $$$emit32$src$$constant; 1893 %} 1894 1895 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1896 // Load immediate does not have a zero or sign extended version 1897 // for 8-bit immediates 1898 emit_opcode(cbuf, $primary + $dst$$reg); 1899 $$$emit32$src$$constant; 1900 %} 1901 1902 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1903 // Load immediate does not have a zero or sign extended version 1904 // for 8-bit immediates 1905 int dst_enc = $dst$$reg; 1906 int src_con = $src$$constant & 0x0FFFFFFFFL; 1907 if (src_con == 0) { 1908 // xor dst, dst 1909 emit_opcode(cbuf, 0x33); 1910 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1911 } else { 1912 emit_opcode(cbuf, $primary + dst_enc); 1913 emit_d32(cbuf, src_con); 1914 } 1915 %} 1916 1917 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1918 // Load immediate does not have a zero or sign extended version 1919 // for 8-bit immediates 1920 int dst_enc = $dst$$reg + 2; 1921 int src_con = ((julong)($src$$constant)) >> 32; 1922 if (src_con == 0) { 1923 // xor dst, dst 1924 emit_opcode(cbuf, 0x33); 1925 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1926 } else { 1927 emit_opcode(cbuf, $primary + dst_enc); 1928 emit_d32(cbuf, src_con); 1929 } 1930 %} 1931 1932 1933 // Encode a reg-reg copy. If it is useless, then empty encoding. 1934 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1935 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1936 %} 1937 1938 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1939 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1940 %} 1941 1942 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1943 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1944 %} 1945 1946 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1947 $$$emit8$primary; 1948 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1949 %} 1950 1951 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1952 $$$emit8$secondary; 1953 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1954 %} 1955 1956 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1957 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1958 %} 1959 1960 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1961 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1962 %} 1963 1964 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1965 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1966 %} 1967 1968 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1969 // Output immediate 1970 $$$emit32$src$$constant; 1971 %} 1972 1973 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1974 // Output Float immediate bits 1975 jfloat jf = $src$$constant; 1976 int jf_as_bits = jint_cast( jf ); 1977 emit_d32(cbuf, jf_as_bits); 1978 %} 1979 1980 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1981 // Output Float immediate bits 1982 jfloat jf = $src$$constant; 1983 int jf_as_bits = jint_cast( jf ); 1984 emit_d32(cbuf, jf_as_bits); 1985 %} 1986 1987 enc_class Con16 (immI src) %{ // Con16(storeImmI) 1988 // Output immediate 1989 $$$emit16$src$$constant; 1990 %} 1991 1992 enc_class Con_d32(immI src) %{ 1993 emit_d32(cbuf,$src$$constant); 1994 %} 1995 1996 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 1997 // Output immediate memory reference 1998 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 1999 emit_d32(cbuf, 0x00); 2000 %} 2001 2002 enc_class lock_prefix( ) %{ 2003 emit_opcode(cbuf,0xF0); // [Lock] 2004 %} 2005 2006 // Cmp-xchg long value. 2007 // Note: we need to swap rbx, and rcx before and after the 2008 // cmpxchg8 instruction because the instruction uses 2009 // rcx as the high order word of the new value to store but 2010 // our register encoding uses rbx,. 2011 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2012 2013 // XCHG rbx,ecx 2014 emit_opcode(cbuf,0x87); 2015 emit_opcode(cbuf,0xD9); 2016 // [Lock] 2017 emit_opcode(cbuf,0xF0); 2018 // CMPXCHG8 [Eptr] 2019 emit_opcode(cbuf,0x0F); 2020 emit_opcode(cbuf,0xC7); 2021 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2022 // XCHG rbx,ecx 2023 emit_opcode(cbuf,0x87); 2024 emit_opcode(cbuf,0xD9); 2025 %} 2026 2027 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2028 // [Lock] 2029 emit_opcode(cbuf,0xF0); 2030 2031 // CMPXCHG [Eptr] 2032 emit_opcode(cbuf,0x0F); 2033 emit_opcode(cbuf,0xB1); 2034 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2035 %} 2036 2037 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2038 // [Lock] 2039 emit_opcode(cbuf,0xF0); 2040 2041 // CMPXCHGB [Eptr] 2042 emit_opcode(cbuf,0x0F); 2043 emit_opcode(cbuf,0xB0); 2044 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2045 %} 2046 2047 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2048 // [Lock] 2049 emit_opcode(cbuf,0xF0); 2050 2051 // 16-bit mode 2052 emit_opcode(cbuf, 0x66); 2053 2054 // CMPXCHGW [Eptr] 2055 emit_opcode(cbuf,0x0F); 2056 emit_opcode(cbuf,0xB1); 2057 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2058 %} 2059 2060 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2061 int res_encoding = $res$$reg; 2062 2063 // MOV res,0 2064 emit_opcode( cbuf, 0xB8 + res_encoding); 2065 emit_d32( cbuf, 0 ); 2066 // JNE,s fail 2067 emit_opcode(cbuf,0x75); 2068 emit_d8(cbuf, 5 ); 2069 // MOV res,1 2070 emit_opcode( cbuf, 0xB8 + res_encoding); 2071 emit_d32( cbuf, 1 ); 2072 // fail: 2073 %} 2074 2075 enc_class set_instruction_start( ) %{ 2076 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2077 %} 2078 2079 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2080 int reg_encoding = $ereg$$reg; 2081 int base = $mem$$base; 2082 int index = $mem$$index; 2083 int scale = $mem$$scale; 2084 int displace = $mem$$disp; 2085 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2086 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2087 %} 2088 2089 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2090 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2091 int base = $mem$$base; 2092 int index = $mem$$index; 2093 int scale = $mem$$scale; 2094 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2095 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2096 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2097 %} 2098 2099 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2100 int r1, r2; 2101 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2102 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2103 emit_opcode(cbuf,0x0F); 2104 emit_opcode(cbuf,$tertiary); 2105 emit_rm(cbuf, 0x3, r1, r2); 2106 emit_d8(cbuf,$cnt$$constant); 2107 emit_d8(cbuf,$primary); 2108 emit_rm(cbuf, 0x3, $secondary, r1); 2109 emit_d8(cbuf,$cnt$$constant); 2110 %} 2111 2112 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2113 emit_opcode( cbuf, 0x8B ); // Move 2114 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2115 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2116 emit_d8(cbuf,$primary); 2117 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2118 emit_d8(cbuf,$cnt$$constant-32); 2119 } 2120 emit_d8(cbuf,$primary); 2121 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2122 emit_d8(cbuf,31); 2123 %} 2124 2125 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2126 int r1, r2; 2127 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2128 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2129 2130 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2131 emit_rm(cbuf, 0x3, r1, r2); 2132 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2133 emit_opcode(cbuf,$primary); 2134 emit_rm(cbuf, 0x3, $secondary, r1); 2135 emit_d8(cbuf,$cnt$$constant-32); 2136 } 2137 emit_opcode(cbuf,0x33); // XOR r2,r2 2138 emit_rm(cbuf, 0x3, r2, r2); 2139 %} 2140 2141 // Clone of RegMem but accepts an extra parameter to access each 2142 // half of a double in memory; it never needs relocation info. 2143 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2144 emit_opcode(cbuf,$opcode$$constant); 2145 int reg_encoding = $rm_reg$$reg; 2146 int base = $mem$$base; 2147 int index = $mem$$index; 2148 int scale = $mem$$scale; 2149 int displace = $mem$$disp + $disp_for_half$$constant; 2150 relocInfo::relocType disp_reloc = relocInfo::none; 2151 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2152 %} 2153 2154 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2155 // 2156 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2157 // and it never needs relocation information. 2158 // Frequently used to move data between FPU's Stack Top and memory. 2159 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2160 int rm_byte_opcode = $rm_opcode$$constant; 2161 int base = $mem$$base; 2162 int index = $mem$$index; 2163 int scale = $mem$$scale; 2164 int displace = $mem$$disp; 2165 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2166 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2167 %} 2168 2169 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2170 int rm_byte_opcode = $rm_opcode$$constant; 2171 int base = $mem$$base; 2172 int index = $mem$$index; 2173 int scale = $mem$$scale; 2174 int displace = $mem$$disp; 2175 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2176 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2177 %} 2178 2179 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2180 int reg_encoding = $dst$$reg; 2181 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2182 int index = 0x04; // 0x04 indicates no index 2183 int scale = 0x00; // 0x00 indicates no scale 2184 int displace = $src1$$constant; // 0x00 indicates no displacement 2185 relocInfo::relocType disp_reloc = relocInfo::none; 2186 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2187 %} 2188 2189 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2190 // Compare dst,src 2191 emit_opcode(cbuf,0x3B); 2192 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2193 // jmp dst < src around move 2194 emit_opcode(cbuf,0x7C); 2195 emit_d8(cbuf,2); 2196 // move dst,src 2197 emit_opcode(cbuf,0x8B); 2198 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2199 %} 2200 2201 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2202 // Compare dst,src 2203 emit_opcode(cbuf,0x3B); 2204 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2205 // jmp dst > src around move 2206 emit_opcode(cbuf,0x7F); 2207 emit_d8(cbuf,2); 2208 // move dst,src 2209 emit_opcode(cbuf,0x8B); 2210 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2211 %} 2212 2213 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2214 // If src is FPR1, we can just FST to store it. 2215 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2216 int reg_encoding = 0x2; // Just store 2217 int base = $mem$$base; 2218 int index = $mem$$index; 2219 int scale = $mem$$scale; 2220 int displace = $mem$$disp; 2221 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2222 if( $src$$reg != FPR1L_enc ) { 2223 reg_encoding = 0x3; // Store & pop 2224 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2225 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2226 } 2227 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2228 emit_opcode(cbuf,$primary); 2229 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2230 %} 2231 2232 enc_class neg_reg(rRegI dst) %{ 2233 // NEG $dst 2234 emit_opcode(cbuf,0xF7); 2235 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2236 %} 2237 2238 enc_class setLT_reg(eCXRegI dst) %{ 2239 // SETLT $dst 2240 emit_opcode(cbuf,0x0F); 2241 emit_opcode(cbuf,0x9C); 2242 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2243 %} 2244 2245 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2246 int tmpReg = $tmp$$reg; 2247 2248 // SUB $p,$q 2249 emit_opcode(cbuf,0x2B); 2250 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2251 // SBB $tmp,$tmp 2252 emit_opcode(cbuf,0x1B); 2253 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2254 // AND $tmp,$y 2255 emit_opcode(cbuf,0x23); 2256 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2257 // ADD $p,$tmp 2258 emit_opcode(cbuf,0x03); 2259 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2260 %} 2261 2262 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2263 // TEST shift,32 2264 emit_opcode(cbuf,0xF7); 2265 emit_rm(cbuf, 0x3, 0, ECX_enc); 2266 emit_d32(cbuf,0x20); 2267 // JEQ,s small 2268 emit_opcode(cbuf, 0x74); 2269 emit_d8(cbuf, 0x04); 2270 // MOV $dst.hi,$dst.lo 2271 emit_opcode( cbuf, 0x8B ); 2272 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2273 // CLR $dst.lo 2274 emit_opcode(cbuf, 0x33); 2275 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2276 // small: 2277 // SHLD $dst.hi,$dst.lo,$shift 2278 emit_opcode(cbuf,0x0F); 2279 emit_opcode(cbuf,0xA5); 2280 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2281 // SHL $dst.lo,$shift" 2282 emit_opcode(cbuf,0xD3); 2283 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2284 %} 2285 2286 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2287 // TEST shift,32 2288 emit_opcode(cbuf,0xF7); 2289 emit_rm(cbuf, 0x3, 0, ECX_enc); 2290 emit_d32(cbuf,0x20); 2291 // JEQ,s small 2292 emit_opcode(cbuf, 0x74); 2293 emit_d8(cbuf, 0x04); 2294 // MOV $dst.lo,$dst.hi 2295 emit_opcode( cbuf, 0x8B ); 2296 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2297 // CLR $dst.hi 2298 emit_opcode(cbuf, 0x33); 2299 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2300 // small: 2301 // SHRD $dst.lo,$dst.hi,$shift 2302 emit_opcode(cbuf,0x0F); 2303 emit_opcode(cbuf,0xAD); 2304 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2305 // SHR $dst.hi,$shift" 2306 emit_opcode(cbuf,0xD3); 2307 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2308 %} 2309 2310 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2311 // TEST shift,32 2312 emit_opcode(cbuf,0xF7); 2313 emit_rm(cbuf, 0x3, 0, ECX_enc); 2314 emit_d32(cbuf,0x20); 2315 // JEQ,s small 2316 emit_opcode(cbuf, 0x74); 2317 emit_d8(cbuf, 0x05); 2318 // MOV $dst.lo,$dst.hi 2319 emit_opcode( cbuf, 0x8B ); 2320 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2321 // SAR $dst.hi,31 2322 emit_opcode(cbuf, 0xC1); 2323 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2324 emit_d8(cbuf, 0x1F ); 2325 // small: 2326 // SHRD $dst.lo,$dst.hi,$shift 2327 emit_opcode(cbuf,0x0F); 2328 emit_opcode(cbuf,0xAD); 2329 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2330 // SAR $dst.hi,$shift" 2331 emit_opcode(cbuf,0xD3); 2332 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2333 %} 2334 2335 2336 // ----------------- Encodings for floating point unit ----------------- 2337 // May leave result in FPU-TOS or FPU reg depending on opcodes 2338 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2339 $$$emit8$primary; 2340 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2341 %} 2342 2343 // Pop argument in FPR0 with FSTP ST(0) 2344 enc_class PopFPU() %{ 2345 emit_opcode( cbuf, 0xDD ); 2346 emit_d8( cbuf, 0xD8 ); 2347 %} 2348 2349 // !!!!! equivalent to Pop_Reg_F 2350 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2351 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2352 emit_d8( cbuf, 0xD8+$dst$$reg ); 2353 %} 2354 2355 enc_class Push_Reg_DPR( regDPR dst ) %{ 2356 emit_opcode( cbuf, 0xD9 ); 2357 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2358 %} 2359 2360 enc_class strictfp_bias1( regDPR dst ) %{ 2361 emit_opcode( cbuf, 0xDB ); // FLD m80real 2362 emit_opcode( cbuf, 0x2D ); 2363 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2364 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2365 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2366 %} 2367 2368 enc_class strictfp_bias2( regDPR dst ) %{ 2369 emit_opcode( cbuf, 0xDB ); // FLD m80real 2370 emit_opcode( cbuf, 0x2D ); 2371 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2372 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2373 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2374 %} 2375 2376 // Special case for moving an integer register to a stack slot. 2377 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2378 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2379 %} 2380 2381 // Special case for moving a register to a stack slot. 2382 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2383 // Opcode already emitted 2384 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2385 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2386 emit_d32(cbuf, $dst$$disp); // Displacement 2387 %} 2388 2389 // Push the integer in stackSlot 'src' onto FP-stack 2390 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2391 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2392 %} 2393 2394 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2395 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2396 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2397 %} 2398 2399 // Same as Pop_Mem_F except for opcode 2400 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2401 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2402 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2403 %} 2404 2405 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2406 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2407 emit_d8( cbuf, 0xD8+$dst$$reg ); 2408 %} 2409 2410 enc_class Push_Reg_FPR( regFPR dst ) %{ 2411 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2412 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2413 %} 2414 2415 // Push FPU's float to a stack-slot, and pop FPU-stack 2416 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2417 int pop = 0x02; 2418 if ($src$$reg != FPR1L_enc) { 2419 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2420 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2421 pop = 0x03; 2422 } 2423 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2424 %} 2425 2426 // Push FPU's double to a stack-slot, and pop FPU-stack 2427 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2428 int pop = 0x02; 2429 if ($src$$reg != FPR1L_enc) { 2430 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2431 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2432 pop = 0x03; 2433 } 2434 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2435 %} 2436 2437 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2438 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2439 int pop = 0xD0 - 1; // -1 since we skip FLD 2440 if ($src$$reg != FPR1L_enc) { 2441 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2442 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2443 pop = 0xD8; 2444 } 2445 emit_opcode( cbuf, 0xDD ); 2446 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2447 %} 2448 2449 2450 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2451 // load dst in FPR0 2452 emit_opcode( cbuf, 0xD9 ); 2453 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2454 if ($src$$reg != FPR1L_enc) { 2455 // fincstp 2456 emit_opcode (cbuf, 0xD9); 2457 emit_opcode (cbuf, 0xF7); 2458 // swap src with FPR1: 2459 // FXCH FPR1 with src 2460 emit_opcode(cbuf, 0xD9); 2461 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2462 // fdecstp 2463 emit_opcode (cbuf, 0xD9); 2464 emit_opcode (cbuf, 0xF6); 2465 } 2466 %} 2467 2468 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2469 MacroAssembler _masm(&cbuf); 2470 __ subptr(rsp, 8); 2471 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2472 __ fld_d(Address(rsp, 0)); 2473 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2474 __ fld_d(Address(rsp, 0)); 2475 %} 2476 2477 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2478 MacroAssembler _masm(&cbuf); 2479 __ subptr(rsp, 4); 2480 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2481 __ fld_s(Address(rsp, 0)); 2482 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2483 __ fld_s(Address(rsp, 0)); 2484 %} 2485 2486 enc_class Push_ResultD(regD dst) %{ 2487 MacroAssembler _masm(&cbuf); 2488 __ fstp_d(Address(rsp, 0)); 2489 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2490 __ addptr(rsp, 8); 2491 %} 2492 2493 enc_class Push_ResultF(regF dst, immI d8) %{ 2494 MacroAssembler _masm(&cbuf); 2495 __ fstp_s(Address(rsp, 0)); 2496 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2497 __ addptr(rsp, $d8$$constant); 2498 %} 2499 2500 enc_class Push_SrcD(regD src) %{ 2501 MacroAssembler _masm(&cbuf); 2502 __ subptr(rsp, 8); 2503 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2504 __ fld_d(Address(rsp, 0)); 2505 %} 2506 2507 enc_class push_stack_temp_qword() %{ 2508 MacroAssembler _masm(&cbuf); 2509 __ subptr(rsp, 8); 2510 %} 2511 2512 enc_class pop_stack_temp_qword() %{ 2513 MacroAssembler _masm(&cbuf); 2514 __ addptr(rsp, 8); 2515 %} 2516 2517 enc_class push_xmm_to_fpr1(regD src) %{ 2518 MacroAssembler _masm(&cbuf); 2519 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2520 __ fld_d(Address(rsp, 0)); 2521 %} 2522 2523 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2524 if ($src$$reg != FPR1L_enc) { 2525 // fincstp 2526 emit_opcode (cbuf, 0xD9); 2527 emit_opcode (cbuf, 0xF7); 2528 // FXCH FPR1 with src 2529 emit_opcode(cbuf, 0xD9); 2530 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2531 // fdecstp 2532 emit_opcode (cbuf, 0xD9); 2533 emit_opcode (cbuf, 0xF6); 2534 } 2535 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2536 // // FSTP FPR$dst$$reg 2537 // emit_opcode( cbuf, 0xDD ); 2538 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2539 %} 2540 2541 enc_class fnstsw_sahf_skip_parity() %{ 2542 // fnstsw ax 2543 emit_opcode( cbuf, 0xDF ); 2544 emit_opcode( cbuf, 0xE0 ); 2545 // sahf 2546 emit_opcode( cbuf, 0x9E ); 2547 // jnp ::skip 2548 emit_opcode( cbuf, 0x7B ); 2549 emit_opcode( cbuf, 0x05 ); 2550 %} 2551 2552 enc_class emitModDPR() %{ 2553 // fprem must be iterative 2554 // :: loop 2555 // fprem 2556 emit_opcode( cbuf, 0xD9 ); 2557 emit_opcode( cbuf, 0xF8 ); 2558 // wait 2559 emit_opcode( cbuf, 0x9b ); 2560 // fnstsw ax 2561 emit_opcode( cbuf, 0xDF ); 2562 emit_opcode( cbuf, 0xE0 ); 2563 // sahf 2564 emit_opcode( cbuf, 0x9E ); 2565 // jp ::loop 2566 emit_opcode( cbuf, 0x0F ); 2567 emit_opcode( cbuf, 0x8A ); 2568 emit_opcode( cbuf, 0xF4 ); 2569 emit_opcode( cbuf, 0xFF ); 2570 emit_opcode( cbuf, 0xFF ); 2571 emit_opcode( cbuf, 0xFF ); 2572 %} 2573 2574 enc_class fpu_flags() %{ 2575 // fnstsw_ax 2576 emit_opcode( cbuf, 0xDF); 2577 emit_opcode( cbuf, 0xE0); 2578 // test ax,0x0400 2579 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2580 emit_opcode( cbuf, 0xA9 ); 2581 emit_d16 ( cbuf, 0x0400 ); 2582 // // // This sequence works, but stalls for 12-16 cycles on PPro 2583 // // test rax,0x0400 2584 // emit_opcode( cbuf, 0xA9 ); 2585 // emit_d32 ( cbuf, 0x00000400 ); 2586 // 2587 // jz exit (no unordered comparison) 2588 emit_opcode( cbuf, 0x74 ); 2589 emit_d8 ( cbuf, 0x02 ); 2590 // mov ah,1 - treat as LT case (set carry flag) 2591 emit_opcode( cbuf, 0xB4 ); 2592 emit_d8 ( cbuf, 0x01 ); 2593 // sahf 2594 emit_opcode( cbuf, 0x9E); 2595 %} 2596 2597 enc_class cmpF_P6_fixup() %{ 2598 // Fixup the integer flags in case comparison involved a NaN 2599 // 2600 // JNP exit (no unordered comparison, P-flag is set by NaN) 2601 emit_opcode( cbuf, 0x7B ); 2602 emit_d8 ( cbuf, 0x03 ); 2603 // MOV AH,1 - treat as LT case (set carry flag) 2604 emit_opcode( cbuf, 0xB4 ); 2605 emit_d8 ( cbuf, 0x01 ); 2606 // SAHF 2607 emit_opcode( cbuf, 0x9E); 2608 // NOP // target for branch to avoid branch to branch 2609 emit_opcode( cbuf, 0x90); 2610 %} 2611 2612 // fnstsw_ax(); 2613 // sahf(); 2614 // movl(dst, nan_result); 2615 // jcc(Assembler::parity, exit); 2616 // movl(dst, less_result); 2617 // jcc(Assembler::below, exit); 2618 // movl(dst, equal_result); 2619 // jcc(Assembler::equal, exit); 2620 // movl(dst, greater_result); 2621 2622 // less_result = 1; 2623 // greater_result = -1; 2624 // equal_result = 0; 2625 // nan_result = -1; 2626 2627 enc_class CmpF_Result(rRegI dst) %{ 2628 // fnstsw_ax(); 2629 emit_opcode( cbuf, 0xDF); 2630 emit_opcode( cbuf, 0xE0); 2631 // sahf 2632 emit_opcode( cbuf, 0x9E); 2633 // movl(dst, nan_result); 2634 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2635 emit_d32( cbuf, -1 ); 2636 // jcc(Assembler::parity, exit); 2637 emit_opcode( cbuf, 0x7A ); 2638 emit_d8 ( cbuf, 0x13 ); 2639 // movl(dst, less_result); 2640 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2641 emit_d32( cbuf, -1 ); 2642 // jcc(Assembler::below, exit); 2643 emit_opcode( cbuf, 0x72 ); 2644 emit_d8 ( cbuf, 0x0C ); 2645 // movl(dst, equal_result); 2646 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2647 emit_d32( cbuf, 0 ); 2648 // jcc(Assembler::equal, exit); 2649 emit_opcode( cbuf, 0x74 ); 2650 emit_d8 ( cbuf, 0x05 ); 2651 // movl(dst, greater_result); 2652 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2653 emit_d32( cbuf, 1 ); 2654 %} 2655 2656 2657 // Compare the longs and set flags 2658 // BROKEN! Do Not use as-is 2659 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2660 // CMP $src1.hi,$src2.hi 2661 emit_opcode( cbuf, 0x3B ); 2662 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2663 // JNE,s done 2664 emit_opcode(cbuf,0x75); 2665 emit_d8(cbuf, 2 ); 2666 // CMP $src1.lo,$src2.lo 2667 emit_opcode( cbuf, 0x3B ); 2668 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2669 // done: 2670 %} 2671 2672 enc_class convert_int_long( regL dst, rRegI src ) %{ 2673 // mov $dst.lo,$src 2674 int dst_encoding = $dst$$reg; 2675 int src_encoding = $src$$reg; 2676 encode_Copy( cbuf, dst_encoding , src_encoding ); 2677 // mov $dst.hi,$src 2678 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2679 // sar $dst.hi,31 2680 emit_opcode( cbuf, 0xC1 ); 2681 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2682 emit_d8(cbuf, 0x1F ); 2683 %} 2684 2685 enc_class convert_long_double( eRegL src ) %{ 2686 // push $src.hi 2687 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2688 // push $src.lo 2689 emit_opcode(cbuf, 0x50+$src$$reg ); 2690 // fild 64-bits at [SP] 2691 emit_opcode(cbuf,0xdf); 2692 emit_d8(cbuf, 0x6C); 2693 emit_d8(cbuf, 0x24); 2694 emit_d8(cbuf, 0x00); 2695 // pop stack 2696 emit_opcode(cbuf, 0x83); // add SP, #8 2697 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2698 emit_d8(cbuf, 0x8); 2699 %} 2700 2701 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2702 // IMUL EDX:EAX,$src1 2703 emit_opcode( cbuf, 0xF7 ); 2704 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2705 // SAR EDX,$cnt-32 2706 int shift_count = ((int)$cnt$$constant) - 32; 2707 if (shift_count > 0) { 2708 emit_opcode(cbuf, 0xC1); 2709 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2710 emit_d8(cbuf, shift_count); 2711 } 2712 %} 2713 2714 // this version doesn't have add sp, 8 2715 enc_class convert_long_double2( eRegL src ) %{ 2716 // push $src.hi 2717 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2718 // push $src.lo 2719 emit_opcode(cbuf, 0x50+$src$$reg ); 2720 // fild 64-bits at [SP] 2721 emit_opcode(cbuf,0xdf); 2722 emit_d8(cbuf, 0x6C); 2723 emit_d8(cbuf, 0x24); 2724 emit_d8(cbuf, 0x00); 2725 %} 2726 2727 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2728 // Basic idea: long = (long)int * (long)int 2729 // IMUL EDX:EAX, src 2730 emit_opcode( cbuf, 0xF7 ); 2731 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2732 %} 2733 2734 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2735 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2736 // MUL EDX:EAX, src 2737 emit_opcode( cbuf, 0xF7 ); 2738 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2739 %} 2740 2741 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2742 // Basic idea: lo(result) = lo(x_lo * y_lo) 2743 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2744 // MOV $tmp,$src.lo 2745 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2746 // IMUL $tmp,EDX 2747 emit_opcode( cbuf, 0x0F ); 2748 emit_opcode( cbuf, 0xAF ); 2749 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2750 // MOV EDX,$src.hi 2751 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2752 // IMUL EDX,EAX 2753 emit_opcode( cbuf, 0x0F ); 2754 emit_opcode( cbuf, 0xAF ); 2755 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2756 // ADD $tmp,EDX 2757 emit_opcode( cbuf, 0x03 ); 2758 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2759 // MUL EDX:EAX,$src.lo 2760 emit_opcode( cbuf, 0xF7 ); 2761 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2762 // ADD EDX,ESI 2763 emit_opcode( cbuf, 0x03 ); 2764 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2765 %} 2766 2767 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2768 // Basic idea: lo(result) = lo(src * y_lo) 2769 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2770 // IMUL $tmp,EDX,$src 2771 emit_opcode( cbuf, 0x6B ); 2772 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2773 emit_d8( cbuf, (int)$src$$constant ); 2774 // MOV EDX,$src 2775 emit_opcode(cbuf, 0xB8 + EDX_enc); 2776 emit_d32( cbuf, (int)$src$$constant ); 2777 // MUL EDX:EAX,EDX 2778 emit_opcode( cbuf, 0xF7 ); 2779 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2780 // ADD EDX,ESI 2781 emit_opcode( cbuf, 0x03 ); 2782 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2783 %} 2784 2785 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2786 // PUSH src1.hi 2787 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2788 // PUSH src1.lo 2789 emit_opcode(cbuf, 0x50+$src1$$reg ); 2790 // PUSH src2.hi 2791 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2792 // PUSH src2.lo 2793 emit_opcode(cbuf, 0x50+$src2$$reg ); 2794 // CALL directly to the runtime 2795 MacroAssembler _masm(&cbuf); 2796 cbuf.set_insts_mark(); 2797 emit_opcode(cbuf,0xE8); // Call into runtime 2798 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2799 __ post_call_nop(); 2800 // Restore stack 2801 emit_opcode(cbuf, 0x83); // add SP, #framesize 2802 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2803 emit_d8(cbuf, 4*4); 2804 %} 2805 2806 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2807 // PUSH src1.hi 2808 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2809 // PUSH src1.lo 2810 emit_opcode(cbuf, 0x50+$src1$$reg ); 2811 // PUSH src2.hi 2812 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2813 // PUSH src2.lo 2814 emit_opcode(cbuf, 0x50+$src2$$reg ); 2815 // CALL directly to the runtime 2816 MacroAssembler _masm(&cbuf); 2817 cbuf.set_insts_mark(); 2818 emit_opcode(cbuf,0xE8); // Call into runtime 2819 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2820 __ post_call_nop(); 2821 // Restore stack 2822 emit_opcode(cbuf, 0x83); // add SP, #framesize 2823 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2824 emit_d8(cbuf, 4*4); 2825 %} 2826 2827 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2828 // MOV $tmp,$src.lo 2829 emit_opcode(cbuf, 0x8B); 2830 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2831 // OR $tmp,$src.hi 2832 emit_opcode(cbuf, 0x0B); 2833 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2834 %} 2835 2836 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2837 // CMP $src1.lo,$src2.lo 2838 emit_opcode( cbuf, 0x3B ); 2839 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2840 // JNE,s skip 2841 emit_cc(cbuf, 0x70, 0x5); 2842 emit_d8(cbuf,2); 2843 // CMP $src1.hi,$src2.hi 2844 emit_opcode( cbuf, 0x3B ); 2845 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2846 %} 2847 2848 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2849 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2850 emit_opcode( cbuf, 0x3B ); 2851 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2852 // MOV $tmp,$src1.hi 2853 emit_opcode( cbuf, 0x8B ); 2854 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2855 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2856 emit_opcode( cbuf, 0x1B ); 2857 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2858 %} 2859 2860 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2861 // XOR $tmp,$tmp 2862 emit_opcode(cbuf,0x33); // XOR 2863 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2864 // CMP $tmp,$src.lo 2865 emit_opcode( cbuf, 0x3B ); 2866 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2867 // SBB $tmp,$src.hi 2868 emit_opcode( cbuf, 0x1B ); 2869 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2870 %} 2871 2872 // Sniff, sniff... smells like Gnu Superoptimizer 2873 enc_class neg_long( eRegL dst ) %{ 2874 emit_opcode(cbuf,0xF7); // NEG hi 2875 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2876 emit_opcode(cbuf,0xF7); // NEG lo 2877 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2878 emit_opcode(cbuf,0x83); // SBB hi,0 2879 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2880 emit_d8 (cbuf,0 ); 2881 %} 2882 2883 enc_class enc_pop_rdx() %{ 2884 emit_opcode(cbuf,0x5A); 2885 %} 2886 2887 enc_class enc_rethrow() %{ 2888 MacroAssembler _masm(&cbuf); 2889 cbuf.set_insts_mark(); 2890 emit_opcode(cbuf, 0xE9); // jmp entry 2891 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2892 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2893 __ post_call_nop(); 2894 %} 2895 2896 2897 // Convert a double to an int. Java semantics require we do complex 2898 // manglelations in the corner cases. So we set the rounding mode to 2899 // 'zero', store the darned double down as an int, and reset the 2900 // rounding mode to 'nearest'. The hardware throws an exception which 2901 // patches up the correct value directly to the stack. 2902 enc_class DPR2I_encoding( regDPR src ) %{ 2903 // Flip to round-to-zero mode. We attempted to allow invalid-op 2904 // exceptions here, so that a NAN or other corner-case value will 2905 // thrown an exception (but normal values get converted at full speed). 2906 // However, I2C adapters and other float-stack manglers leave pending 2907 // invalid-op exceptions hanging. We would have to clear them before 2908 // enabling them and that is more expensive than just testing for the 2909 // invalid value Intel stores down in the corner cases. 2910 emit_opcode(cbuf,0xD9); // FLDCW trunc 2911 emit_opcode(cbuf,0x2D); 2912 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2913 // Allocate a word 2914 emit_opcode(cbuf,0x83); // SUB ESP,4 2915 emit_opcode(cbuf,0xEC); 2916 emit_d8(cbuf,0x04); 2917 // Encoding assumes a double has been pushed into FPR0. 2918 // Store down the double as an int, popping the FPU stack 2919 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2920 emit_opcode(cbuf,0x1C); 2921 emit_d8(cbuf,0x24); 2922 // Restore the rounding mode; mask the exception 2923 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2924 emit_opcode(cbuf,0x2D); 2925 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2926 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2927 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2928 2929 // Load the converted int; adjust CPU stack 2930 emit_opcode(cbuf,0x58); // POP EAX 2931 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2932 emit_d32 (cbuf,0x80000000); // 0x80000000 2933 emit_opcode(cbuf,0x75); // JNE around_slow_call 2934 emit_d8 (cbuf,0x07); // Size of slow_call 2935 // Push src onto stack slow-path 2936 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2937 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2938 // CALL directly to the runtime 2939 MacroAssembler _masm(&cbuf); 2940 cbuf.set_insts_mark(); 2941 emit_opcode(cbuf,0xE8); // Call into runtime 2942 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2943 __ post_call_nop(); 2944 // Carry on here... 2945 %} 2946 2947 enc_class DPR2L_encoding( regDPR src ) %{ 2948 emit_opcode(cbuf,0xD9); // FLDCW trunc 2949 emit_opcode(cbuf,0x2D); 2950 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2951 // Allocate a word 2952 emit_opcode(cbuf,0x83); // SUB ESP,8 2953 emit_opcode(cbuf,0xEC); 2954 emit_d8(cbuf,0x08); 2955 // Encoding assumes a double has been pushed into FPR0. 2956 // Store down the double as a long, popping the FPU stack 2957 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2958 emit_opcode(cbuf,0x3C); 2959 emit_d8(cbuf,0x24); 2960 // Restore the rounding mode; mask the exception 2961 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2962 emit_opcode(cbuf,0x2D); 2963 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2964 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2965 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2966 2967 // Load the converted int; adjust CPU stack 2968 emit_opcode(cbuf,0x58); // POP EAX 2969 emit_opcode(cbuf,0x5A); // POP EDX 2970 emit_opcode(cbuf,0x81); // CMP EDX,imm 2971 emit_d8 (cbuf,0xFA); // rdx 2972 emit_d32 (cbuf,0x80000000); // 0x80000000 2973 emit_opcode(cbuf,0x75); // JNE around_slow_call 2974 emit_d8 (cbuf,0x07+4); // Size of slow_call 2975 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2976 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2977 emit_opcode(cbuf,0x75); // JNE around_slow_call 2978 emit_d8 (cbuf,0x07); // Size of slow_call 2979 // Push src onto stack slow-path 2980 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2981 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2982 // CALL directly to the runtime 2983 MacroAssembler _masm(&cbuf); 2984 cbuf.set_insts_mark(); 2985 emit_opcode(cbuf,0xE8); // Call into runtime 2986 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2987 __ post_call_nop(); 2988 // Carry on here... 2989 %} 2990 2991 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 2992 // Operand was loaded from memory into fp ST (stack top) 2993 // FMUL ST,$src /* D8 C8+i */ 2994 emit_opcode(cbuf, 0xD8); 2995 emit_opcode(cbuf, 0xC8 + $src1$$reg); 2996 %} 2997 2998 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 2999 // FADDP ST,src2 /* D8 C0+i */ 3000 emit_opcode(cbuf, 0xD8); 3001 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3002 //could use FADDP src2,fpST /* DE C0+i */ 3003 %} 3004 3005 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3006 // FADDP src2,ST /* DE C0+i */ 3007 emit_opcode(cbuf, 0xDE); 3008 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3009 %} 3010 3011 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3012 // Operand has been loaded into fp ST (stack top) 3013 // FSUB ST,$src1 3014 emit_opcode(cbuf, 0xD8); 3015 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3016 3017 // FDIV 3018 emit_opcode(cbuf, 0xD8); 3019 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3020 %} 3021 3022 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3023 // Operand was loaded from memory into fp ST (stack top) 3024 // FADD ST,$src /* D8 C0+i */ 3025 emit_opcode(cbuf, 0xD8); 3026 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3027 3028 // FMUL ST,src2 /* D8 C*+i */ 3029 emit_opcode(cbuf, 0xD8); 3030 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3031 %} 3032 3033 3034 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3035 // Operand was loaded from memory into fp ST (stack top) 3036 // FADD ST,$src /* D8 C0+i */ 3037 emit_opcode(cbuf, 0xD8); 3038 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3039 3040 // FMULP src2,ST /* DE C8+i */ 3041 emit_opcode(cbuf, 0xDE); 3042 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3043 %} 3044 3045 // Atomically load the volatile long 3046 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3047 emit_opcode(cbuf,0xDF); 3048 int rm_byte_opcode = 0x05; 3049 int base = $mem$$base; 3050 int index = $mem$$index; 3051 int scale = $mem$$scale; 3052 int displace = $mem$$disp; 3053 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3054 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3055 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3056 %} 3057 3058 // Volatile Store Long. Must be atomic, so move it into 3059 // the FP TOS and then do a 64-bit FIST. Has to probe the 3060 // target address before the store (for null-ptr checks) 3061 // so the memory operand is used twice in the encoding. 3062 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3063 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3064 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3065 emit_opcode(cbuf,0xDF); 3066 int rm_byte_opcode = 0x07; 3067 int base = $mem$$base; 3068 int index = $mem$$index; 3069 int scale = $mem$$scale; 3070 int displace = $mem$$disp; 3071 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3072 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3073 %} 3074 3075 %} 3076 3077 3078 //----------FRAME-------------------------------------------------------------- 3079 // Definition of frame structure and management information. 3080 // 3081 // S T A C K L A Y O U T Allocators stack-slot number 3082 // | (to get allocators register number 3083 // G Owned by | | v add OptoReg::stack0()) 3084 // r CALLER | | 3085 // o | +--------+ pad to even-align allocators stack-slot 3086 // w V | pad0 | numbers; owned by CALLER 3087 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3088 // h ^ | in | 5 3089 // | | args | 4 Holes in incoming args owned by SELF 3090 // | | | | 3 3091 // | | +--------+ 3092 // V | | old out| Empty on Intel, window on Sparc 3093 // | old |preserve| Must be even aligned. 3094 // | SP-+--------+----> Matcher::_old_SP, even aligned 3095 // | | in | 3 area for Intel ret address 3096 // Owned by |preserve| Empty on Sparc. 3097 // SELF +--------+ 3098 // | | pad2 | 2 pad to align old SP 3099 // | +--------+ 1 3100 // | | locks | 0 3101 // | +--------+----> OptoReg::stack0(), even aligned 3102 // | | pad1 | 11 pad to align new SP 3103 // | +--------+ 3104 // | | | 10 3105 // | | spills | 9 spills 3106 // V | | 8 (pad0 slot for callee) 3107 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3108 // ^ | out | 7 3109 // | | args | 6 Holes in outgoing args owned by CALLEE 3110 // Owned by +--------+ 3111 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3112 // | new |preserve| Must be even-aligned. 3113 // | SP-+--------+----> Matcher::_new_SP, even aligned 3114 // | | | 3115 // 3116 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3117 // known from SELF's arguments and the Java calling convention. 3118 // Region 6-7 is determined per call site. 3119 // Note 2: If the calling convention leaves holes in the incoming argument 3120 // area, those holes are owned by SELF. Holes in the outgoing area 3121 // are owned by the CALLEE. Holes should not be necessary in the 3122 // incoming area, as the Java calling convention is completely under 3123 // the control of the AD file. Doubles can be sorted and packed to 3124 // avoid holes. Holes in the outgoing arguments may be necessary for 3125 // varargs C calling conventions. 3126 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3127 // even aligned with pad0 as needed. 3128 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3129 // region 6-11 is even aligned; it may be padded out more so that 3130 // the region from SP to FP meets the minimum stack alignment. 3131 3132 frame %{ 3133 // These three registers define part of the calling convention 3134 // between compiled code and the interpreter. 3135 inline_cache_reg(EAX); // Inline Cache Register 3136 3137 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3138 cisc_spilling_operand_name(indOffset32); 3139 3140 // Number of stack slots consumed by locking an object 3141 sync_stack_slots(1); 3142 3143 // Compiled code's Frame Pointer 3144 frame_pointer(ESP); 3145 // Interpreter stores its frame pointer in a register which is 3146 // stored to the stack by I2CAdaptors. 3147 // I2CAdaptors convert from interpreted java to compiled java. 3148 interpreter_frame_pointer(EBP); 3149 3150 // Stack alignment requirement 3151 // Alignment size in bytes (128-bit -> 16 bytes) 3152 stack_alignment(StackAlignmentInBytes); 3153 3154 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3155 // for calls to C. Supports the var-args backing area for register parms. 3156 varargs_C_out_slots_killed(0); 3157 3158 // The after-PROLOG location of the return address. Location of 3159 // return address specifies a type (REG or STACK) and a number 3160 // representing the register number (i.e. - use a register name) or 3161 // stack slot. 3162 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3163 // Otherwise, it is above the locks and verification slot and alignment word 3164 return_addr(STACK - 1 + 3165 align_up((Compile::current()->in_preserve_stack_slots() + 3166 Compile::current()->fixed_slots()), 3167 stack_alignment_in_slots())); 3168 3169 // Location of C & interpreter return values 3170 c_return_value %{ 3171 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3172 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3173 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3174 3175 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3176 // that C functions return float and double results in XMM0. 3177 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3178 return OptoRegPair(XMM0b_num,XMM0_num); 3179 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3180 return OptoRegPair(OptoReg::Bad,XMM0_num); 3181 3182 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3183 %} 3184 3185 // Location of return values 3186 return_value %{ 3187 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3188 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3189 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3190 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3191 return OptoRegPair(XMM0b_num,XMM0_num); 3192 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3193 return OptoRegPair(OptoReg::Bad,XMM0_num); 3194 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3195 %} 3196 3197 %} 3198 3199 //----------ATTRIBUTES--------------------------------------------------------- 3200 //----------Operand Attributes------------------------------------------------- 3201 op_attrib op_cost(0); // Required cost attribute 3202 3203 //----------Instruction Attributes--------------------------------------------- 3204 ins_attrib ins_cost(100); // Required cost attribute 3205 ins_attrib ins_size(8); // Required size attribute (in bits) 3206 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3207 // non-matching short branch variant of some 3208 // long branch? 3209 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3210 // specifies the alignment that some part of the instruction (not 3211 // necessarily the start) requires. If > 1, a compute_padding() 3212 // function must be provided for the instruction 3213 3214 //----------OPERANDS----------------------------------------------------------- 3215 // Operand definitions must precede instruction definitions for correct parsing 3216 // in the ADLC because operands constitute user defined types which are used in 3217 // instruction definitions. 3218 3219 //----------Simple Operands---------------------------------------------------- 3220 // Immediate Operands 3221 // Integer Immediate 3222 operand immI() %{ 3223 match(ConI); 3224 3225 op_cost(10); 3226 format %{ %} 3227 interface(CONST_INTER); 3228 %} 3229 3230 // Constant for test vs zero 3231 operand immI_0() %{ 3232 predicate(n->get_int() == 0); 3233 match(ConI); 3234 3235 op_cost(0); 3236 format %{ %} 3237 interface(CONST_INTER); 3238 %} 3239 3240 // Constant for increment 3241 operand immI_1() %{ 3242 predicate(n->get_int() == 1); 3243 match(ConI); 3244 3245 op_cost(0); 3246 format %{ %} 3247 interface(CONST_INTER); 3248 %} 3249 3250 // Constant for decrement 3251 operand immI_M1() %{ 3252 predicate(n->get_int() == -1); 3253 match(ConI); 3254 3255 op_cost(0); 3256 format %{ %} 3257 interface(CONST_INTER); 3258 %} 3259 3260 // Valid scale values for addressing modes 3261 operand immI2() %{ 3262 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3263 match(ConI); 3264 3265 format %{ %} 3266 interface(CONST_INTER); 3267 %} 3268 3269 operand immI8() %{ 3270 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3271 match(ConI); 3272 3273 op_cost(5); 3274 format %{ %} 3275 interface(CONST_INTER); 3276 %} 3277 3278 operand immU8() %{ 3279 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3280 match(ConI); 3281 3282 op_cost(5); 3283 format %{ %} 3284 interface(CONST_INTER); 3285 %} 3286 3287 operand immI16() %{ 3288 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3289 match(ConI); 3290 3291 op_cost(10); 3292 format %{ %} 3293 interface(CONST_INTER); 3294 %} 3295 3296 // Int Immediate non-negative 3297 operand immU31() 3298 %{ 3299 predicate(n->get_int() >= 0); 3300 match(ConI); 3301 3302 op_cost(0); 3303 format %{ %} 3304 interface(CONST_INTER); 3305 %} 3306 3307 // Constant for long shifts 3308 operand immI_32() %{ 3309 predicate( n->get_int() == 32 ); 3310 match(ConI); 3311 3312 op_cost(0); 3313 format %{ %} 3314 interface(CONST_INTER); 3315 %} 3316 3317 operand immI_1_31() %{ 3318 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3319 match(ConI); 3320 3321 op_cost(0); 3322 format %{ %} 3323 interface(CONST_INTER); 3324 %} 3325 3326 operand immI_32_63() %{ 3327 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3328 match(ConI); 3329 op_cost(0); 3330 3331 format %{ %} 3332 interface(CONST_INTER); 3333 %} 3334 3335 operand immI_2() %{ 3336 predicate( n->get_int() == 2 ); 3337 match(ConI); 3338 3339 op_cost(0); 3340 format %{ %} 3341 interface(CONST_INTER); 3342 %} 3343 3344 operand immI_3() %{ 3345 predicate( n->get_int() == 3 ); 3346 match(ConI); 3347 3348 op_cost(0); 3349 format %{ %} 3350 interface(CONST_INTER); 3351 %} 3352 3353 operand immI_4() 3354 %{ 3355 predicate(n->get_int() == 4); 3356 match(ConI); 3357 3358 op_cost(0); 3359 format %{ %} 3360 interface(CONST_INTER); 3361 %} 3362 3363 operand immI_8() 3364 %{ 3365 predicate(n->get_int() == 8); 3366 match(ConI); 3367 3368 op_cost(0); 3369 format %{ %} 3370 interface(CONST_INTER); 3371 %} 3372 3373 // Pointer Immediate 3374 operand immP() %{ 3375 match(ConP); 3376 3377 op_cost(10); 3378 format %{ %} 3379 interface(CONST_INTER); 3380 %} 3381 3382 // NULL Pointer Immediate 3383 operand immP0() %{ 3384 predicate( n->get_ptr() == 0 ); 3385 match(ConP); 3386 op_cost(0); 3387 3388 format %{ %} 3389 interface(CONST_INTER); 3390 %} 3391 3392 // Long Immediate 3393 operand immL() %{ 3394 match(ConL); 3395 3396 op_cost(20); 3397 format %{ %} 3398 interface(CONST_INTER); 3399 %} 3400 3401 // Long Immediate zero 3402 operand immL0() %{ 3403 predicate( n->get_long() == 0L ); 3404 match(ConL); 3405 op_cost(0); 3406 3407 format %{ %} 3408 interface(CONST_INTER); 3409 %} 3410 3411 // Long Immediate zero 3412 operand immL_M1() %{ 3413 predicate( n->get_long() == -1L ); 3414 match(ConL); 3415 op_cost(0); 3416 3417 format %{ %} 3418 interface(CONST_INTER); 3419 %} 3420 3421 // Long immediate from 0 to 127. 3422 // Used for a shorter form of long mul by 10. 3423 operand immL_127() %{ 3424 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3425 match(ConL); 3426 op_cost(0); 3427 3428 format %{ %} 3429 interface(CONST_INTER); 3430 %} 3431 3432 // Long Immediate: low 32-bit mask 3433 operand immL_32bits() %{ 3434 predicate(n->get_long() == 0xFFFFFFFFL); 3435 match(ConL); 3436 op_cost(0); 3437 3438 format %{ %} 3439 interface(CONST_INTER); 3440 %} 3441 3442 // Long Immediate: low 32-bit mask 3443 operand immL32() %{ 3444 predicate(n->get_long() == (int)(n->get_long())); 3445 match(ConL); 3446 op_cost(20); 3447 3448 format %{ %} 3449 interface(CONST_INTER); 3450 %} 3451 3452 //Double Immediate zero 3453 operand immDPR0() %{ 3454 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3455 // bug that generates code such that NaNs compare equal to 0.0 3456 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3457 match(ConD); 3458 3459 op_cost(5); 3460 format %{ %} 3461 interface(CONST_INTER); 3462 %} 3463 3464 // Double Immediate one 3465 operand immDPR1() %{ 3466 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3467 match(ConD); 3468 3469 op_cost(5); 3470 format %{ %} 3471 interface(CONST_INTER); 3472 %} 3473 3474 // Double Immediate 3475 operand immDPR() %{ 3476 predicate(UseSSE<=1); 3477 match(ConD); 3478 3479 op_cost(5); 3480 format %{ %} 3481 interface(CONST_INTER); 3482 %} 3483 3484 operand immD() %{ 3485 predicate(UseSSE>=2); 3486 match(ConD); 3487 3488 op_cost(5); 3489 format %{ %} 3490 interface(CONST_INTER); 3491 %} 3492 3493 // Double Immediate zero 3494 operand immD0() %{ 3495 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3496 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3497 // compare equal to -0.0. 3498 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3499 match(ConD); 3500 3501 format %{ %} 3502 interface(CONST_INTER); 3503 %} 3504 3505 // Float Immediate zero 3506 operand immFPR0() %{ 3507 predicate(UseSSE == 0 && n->getf() == 0.0F); 3508 match(ConF); 3509 3510 op_cost(5); 3511 format %{ %} 3512 interface(CONST_INTER); 3513 %} 3514 3515 // Float Immediate one 3516 operand immFPR1() %{ 3517 predicate(UseSSE == 0 && n->getf() == 1.0F); 3518 match(ConF); 3519 3520 op_cost(5); 3521 format %{ %} 3522 interface(CONST_INTER); 3523 %} 3524 3525 // Float Immediate 3526 operand immFPR() %{ 3527 predicate( UseSSE == 0 ); 3528 match(ConF); 3529 3530 op_cost(5); 3531 format %{ %} 3532 interface(CONST_INTER); 3533 %} 3534 3535 // Float Immediate 3536 operand immF() %{ 3537 predicate(UseSSE >= 1); 3538 match(ConF); 3539 3540 op_cost(5); 3541 format %{ %} 3542 interface(CONST_INTER); 3543 %} 3544 3545 // Float Immediate zero. Zero and not -0.0 3546 operand immF0() %{ 3547 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3548 match(ConF); 3549 3550 op_cost(5); 3551 format %{ %} 3552 interface(CONST_INTER); 3553 %} 3554 3555 // Immediates for special shifts (sign extend) 3556 3557 // Constants for increment 3558 operand immI_16() %{ 3559 predicate( n->get_int() == 16 ); 3560 match(ConI); 3561 3562 format %{ %} 3563 interface(CONST_INTER); 3564 %} 3565 3566 operand immI_24() %{ 3567 predicate( n->get_int() == 24 ); 3568 match(ConI); 3569 3570 format %{ %} 3571 interface(CONST_INTER); 3572 %} 3573 3574 // Constant for byte-wide masking 3575 operand immI_255() %{ 3576 predicate( n->get_int() == 255 ); 3577 match(ConI); 3578 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 // Constant for short-wide masking 3584 operand immI_65535() %{ 3585 predicate(n->get_int() == 65535); 3586 match(ConI); 3587 3588 format %{ %} 3589 interface(CONST_INTER); 3590 %} 3591 3592 operand kReg() 3593 %{ 3594 constraint(ALLOC_IN_RC(vectmask_reg)); 3595 match(RegVectMask); 3596 format %{%} 3597 interface(REG_INTER); 3598 %} 3599 3600 operand kReg_K1() 3601 %{ 3602 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3603 match(RegVectMask); 3604 format %{%} 3605 interface(REG_INTER); 3606 %} 3607 3608 operand kReg_K2() 3609 %{ 3610 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3611 match(RegVectMask); 3612 format %{%} 3613 interface(REG_INTER); 3614 %} 3615 3616 // Special Registers 3617 operand kReg_K3() 3618 %{ 3619 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3620 match(RegVectMask); 3621 format %{%} 3622 interface(REG_INTER); 3623 %} 3624 3625 operand kReg_K4() 3626 %{ 3627 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3628 match(RegVectMask); 3629 format %{%} 3630 interface(REG_INTER); 3631 %} 3632 3633 operand kReg_K5() 3634 %{ 3635 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3636 match(RegVectMask); 3637 format %{%} 3638 interface(REG_INTER); 3639 %} 3640 3641 operand kReg_K6() 3642 %{ 3643 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3644 match(RegVectMask); 3645 format %{%} 3646 interface(REG_INTER); 3647 %} 3648 3649 // Special Registers 3650 operand kReg_K7() 3651 %{ 3652 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3653 match(RegVectMask); 3654 format %{%} 3655 interface(REG_INTER); 3656 %} 3657 3658 // Register Operands 3659 // Integer Register 3660 operand rRegI() %{ 3661 constraint(ALLOC_IN_RC(int_reg)); 3662 match(RegI); 3663 match(xRegI); 3664 match(eAXRegI); 3665 match(eBXRegI); 3666 match(eCXRegI); 3667 match(eDXRegI); 3668 match(eDIRegI); 3669 match(eSIRegI); 3670 3671 format %{ %} 3672 interface(REG_INTER); 3673 %} 3674 3675 // Subset of Integer Register 3676 operand xRegI(rRegI reg) %{ 3677 constraint(ALLOC_IN_RC(int_x_reg)); 3678 match(reg); 3679 match(eAXRegI); 3680 match(eBXRegI); 3681 match(eCXRegI); 3682 match(eDXRegI); 3683 3684 format %{ %} 3685 interface(REG_INTER); 3686 %} 3687 3688 // Special Registers 3689 operand eAXRegI(xRegI reg) %{ 3690 constraint(ALLOC_IN_RC(eax_reg)); 3691 match(reg); 3692 match(rRegI); 3693 3694 format %{ "EAX" %} 3695 interface(REG_INTER); 3696 %} 3697 3698 // Special Registers 3699 operand eBXRegI(xRegI reg) %{ 3700 constraint(ALLOC_IN_RC(ebx_reg)); 3701 match(reg); 3702 match(rRegI); 3703 3704 format %{ "EBX" %} 3705 interface(REG_INTER); 3706 %} 3707 3708 operand eCXRegI(xRegI reg) %{ 3709 constraint(ALLOC_IN_RC(ecx_reg)); 3710 match(reg); 3711 match(rRegI); 3712 3713 format %{ "ECX" %} 3714 interface(REG_INTER); 3715 %} 3716 3717 operand eDXRegI(xRegI reg) %{ 3718 constraint(ALLOC_IN_RC(edx_reg)); 3719 match(reg); 3720 match(rRegI); 3721 3722 format %{ "EDX" %} 3723 interface(REG_INTER); 3724 %} 3725 3726 operand eDIRegI(xRegI reg) %{ 3727 constraint(ALLOC_IN_RC(edi_reg)); 3728 match(reg); 3729 match(rRegI); 3730 3731 format %{ "EDI" %} 3732 interface(REG_INTER); 3733 %} 3734 3735 operand naxRegI() %{ 3736 constraint(ALLOC_IN_RC(nax_reg)); 3737 match(RegI); 3738 match(eCXRegI); 3739 match(eDXRegI); 3740 match(eSIRegI); 3741 match(eDIRegI); 3742 3743 format %{ %} 3744 interface(REG_INTER); 3745 %} 3746 3747 operand nadxRegI() %{ 3748 constraint(ALLOC_IN_RC(nadx_reg)); 3749 match(RegI); 3750 match(eBXRegI); 3751 match(eCXRegI); 3752 match(eSIRegI); 3753 match(eDIRegI); 3754 3755 format %{ %} 3756 interface(REG_INTER); 3757 %} 3758 3759 operand ncxRegI() %{ 3760 constraint(ALLOC_IN_RC(ncx_reg)); 3761 match(RegI); 3762 match(eAXRegI); 3763 match(eDXRegI); 3764 match(eSIRegI); 3765 match(eDIRegI); 3766 3767 format %{ %} 3768 interface(REG_INTER); 3769 %} 3770 3771 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3772 // // 3773 operand eSIRegI(xRegI reg) %{ 3774 constraint(ALLOC_IN_RC(esi_reg)); 3775 match(reg); 3776 match(rRegI); 3777 3778 format %{ "ESI" %} 3779 interface(REG_INTER); 3780 %} 3781 3782 // Pointer Register 3783 operand anyRegP() %{ 3784 constraint(ALLOC_IN_RC(any_reg)); 3785 match(RegP); 3786 match(eAXRegP); 3787 match(eBXRegP); 3788 match(eCXRegP); 3789 match(eDIRegP); 3790 match(eRegP); 3791 3792 format %{ %} 3793 interface(REG_INTER); 3794 %} 3795 3796 operand eRegP() %{ 3797 constraint(ALLOC_IN_RC(int_reg)); 3798 match(RegP); 3799 match(eAXRegP); 3800 match(eBXRegP); 3801 match(eCXRegP); 3802 match(eDIRegP); 3803 3804 format %{ %} 3805 interface(REG_INTER); 3806 %} 3807 3808 operand rRegP() %{ 3809 constraint(ALLOC_IN_RC(int_reg)); 3810 match(RegP); 3811 match(eAXRegP); 3812 match(eBXRegP); 3813 match(eCXRegP); 3814 match(eDIRegP); 3815 3816 format %{ %} 3817 interface(REG_INTER); 3818 %} 3819 3820 // On windows95, EBP is not safe to use for implicit null tests. 3821 operand eRegP_no_EBP() %{ 3822 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3823 match(RegP); 3824 match(eAXRegP); 3825 match(eBXRegP); 3826 match(eCXRegP); 3827 match(eDIRegP); 3828 3829 op_cost(100); 3830 format %{ %} 3831 interface(REG_INTER); 3832 %} 3833 3834 operand naxRegP() %{ 3835 constraint(ALLOC_IN_RC(nax_reg)); 3836 match(RegP); 3837 match(eBXRegP); 3838 match(eDXRegP); 3839 match(eCXRegP); 3840 match(eSIRegP); 3841 match(eDIRegP); 3842 3843 format %{ %} 3844 interface(REG_INTER); 3845 %} 3846 3847 operand nabxRegP() %{ 3848 constraint(ALLOC_IN_RC(nabx_reg)); 3849 match(RegP); 3850 match(eCXRegP); 3851 match(eDXRegP); 3852 match(eSIRegP); 3853 match(eDIRegP); 3854 3855 format %{ %} 3856 interface(REG_INTER); 3857 %} 3858 3859 operand pRegP() %{ 3860 constraint(ALLOC_IN_RC(p_reg)); 3861 match(RegP); 3862 match(eBXRegP); 3863 match(eDXRegP); 3864 match(eSIRegP); 3865 match(eDIRegP); 3866 3867 format %{ %} 3868 interface(REG_INTER); 3869 %} 3870 3871 // Special Registers 3872 // Return a pointer value 3873 operand eAXRegP(eRegP reg) %{ 3874 constraint(ALLOC_IN_RC(eax_reg)); 3875 match(reg); 3876 format %{ "EAX" %} 3877 interface(REG_INTER); 3878 %} 3879 3880 // Used in AtomicAdd 3881 operand eBXRegP(eRegP reg) %{ 3882 constraint(ALLOC_IN_RC(ebx_reg)); 3883 match(reg); 3884 format %{ "EBX" %} 3885 interface(REG_INTER); 3886 %} 3887 3888 // Tail-call (interprocedural jump) to interpreter 3889 operand eCXRegP(eRegP reg) %{ 3890 constraint(ALLOC_IN_RC(ecx_reg)); 3891 match(reg); 3892 format %{ "ECX" %} 3893 interface(REG_INTER); 3894 %} 3895 3896 operand eDXRegP(eRegP reg) %{ 3897 constraint(ALLOC_IN_RC(edx_reg)); 3898 match(reg); 3899 format %{ "EDX" %} 3900 interface(REG_INTER); 3901 %} 3902 3903 operand eSIRegP(eRegP reg) %{ 3904 constraint(ALLOC_IN_RC(esi_reg)); 3905 match(reg); 3906 format %{ "ESI" %} 3907 interface(REG_INTER); 3908 %} 3909 3910 // Used in rep stosw 3911 operand eDIRegP(eRegP reg) %{ 3912 constraint(ALLOC_IN_RC(edi_reg)); 3913 match(reg); 3914 format %{ "EDI" %} 3915 interface(REG_INTER); 3916 %} 3917 3918 operand eRegL() %{ 3919 constraint(ALLOC_IN_RC(long_reg)); 3920 match(RegL); 3921 match(eADXRegL); 3922 3923 format %{ %} 3924 interface(REG_INTER); 3925 %} 3926 3927 operand eADXRegL( eRegL reg ) %{ 3928 constraint(ALLOC_IN_RC(eadx_reg)); 3929 match(reg); 3930 3931 format %{ "EDX:EAX" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 operand eBCXRegL( eRegL reg ) %{ 3936 constraint(ALLOC_IN_RC(ebcx_reg)); 3937 match(reg); 3938 3939 format %{ "EBX:ECX" %} 3940 interface(REG_INTER); 3941 %} 3942 3943 operand eBDPRegL( eRegL reg ) %{ 3944 constraint(ALLOC_IN_RC(ebpd_reg)); 3945 match(reg); 3946 3947 format %{ "EBP:EDI" %} 3948 interface(REG_INTER); 3949 %} 3950 // Special case for integer high multiply 3951 operand eADXRegL_low_only() %{ 3952 constraint(ALLOC_IN_RC(eadx_reg)); 3953 match(RegL); 3954 3955 format %{ "EAX" %} 3956 interface(REG_INTER); 3957 %} 3958 3959 // Flags register, used as output of compare instructions 3960 operand rFlagsReg() %{ 3961 constraint(ALLOC_IN_RC(int_flags)); 3962 match(RegFlags); 3963 3964 format %{ "EFLAGS" %} 3965 interface(REG_INTER); 3966 %} 3967 3968 // Flags register, used as output of compare instructions 3969 operand eFlagsReg() %{ 3970 constraint(ALLOC_IN_RC(int_flags)); 3971 match(RegFlags); 3972 3973 format %{ "EFLAGS" %} 3974 interface(REG_INTER); 3975 %} 3976 3977 // Flags register, used as output of FLOATING POINT compare instructions 3978 operand eFlagsRegU() %{ 3979 constraint(ALLOC_IN_RC(int_flags)); 3980 match(RegFlags); 3981 3982 format %{ "EFLAGS_U" %} 3983 interface(REG_INTER); 3984 %} 3985 3986 operand eFlagsRegUCF() %{ 3987 constraint(ALLOC_IN_RC(int_flags)); 3988 match(RegFlags); 3989 predicate(false); 3990 3991 format %{ "EFLAGS_U_CF" %} 3992 interface(REG_INTER); 3993 %} 3994 3995 // Condition Code Register used by long compare 3996 operand flagsReg_long_LTGE() %{ 3997 constraint(ALLOC_IN_RC(int_flags)); 3998 match(RegFlags); 3999 format %{ "FLAGS_LTGE" %} 4000 interface(REG_INTER); 4001 %} 4002 operand flagsReg_long_EQNE() %{ 4003 constraint(ALLOC_IN_RC(int_flags)); 4004 match(RegFlags); 4005 format %{ "FLAGS_EQNE" %} 4006 interface(REG_INTER); 4007 %} 4008 operand flagsReg_long_LEGT() %{ 4009 constraint(ALLOC_IN_RC(int_flags)); 4010 match(RegFlags); 4011 format %{ "FLAGS_LEGT" %} 4012 interface(REG_INTER); 4013 %} 4014 4015 // Condition Code Register used by unsigned long compare 4016 operand flagsReg_ulong_LTGE() %{ 4017 constraint(ALLOC_IN_RC(int_flags)); 4018 match(RegFlags); 4019 format %{ "FLAGS_U_LTGE" %} 4020 interface(REG_INTER); 4021 %} 4022 operand flagsReg_ulong_EQNE() %{ 4023 constraint(ALLOC_IN_RC(int_flags)); 4024 match(RegFlags); 4025 format %{ "FLAGS_U_EQNE" %} 4026 interface(REG_INTER); 4027 %} 4028 operand flagsReg_ulong_LEGT() %{ 4029 constraint(ALLOC_IN_RC(int_flags)); 4030 match(RegFlags); 4031 format %{ "FLAGS_U_LEGT" %} 4032 interface(REG_INTER); 4033 %} 4034 4035 // Float register operands 4036 operand regDPR() %{ 4037 predicate( UseSSE < 2 ); 4038 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4039 match(RegD); 4040 match(regDPR1); 4041 match(regDPR2); 4042 format %{ %} 4043 interface(REG_INTER); 4044 %} 4045 4046 operand regDPR1(regDPR reg) %{ 4047 predicate( UseSSE < 2 ); 4048 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4049 match(reg); 4050 format %{ "FPR1" %} 4051 interface(REG_INTER); 4052 %} 4053 4054 operand regDPR2(regDPR reg) %{ 4055 predicate( UseSSE < 2 ); 4056 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4057 match(reg); 4058 format %{ "FPR2" %} 4059 interface(REG_INTER); 4060 %} 4061 4062 operand regnotDPR1(regDPR reg) %{ 4063 predicate( UseSSE < 2 ); 4064 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4065 match(reg); 4066 format %{ %} 4067 interface(REG_INTER); 4068 %} 4069 4070 // Float register operands 4071 operand regFPR() %{ 4072 predicate( UseSSE < 2 ); 4073 constraint(ALLOC_IN_RC(fp_flt_reg)); 4074 match(RegF); 4075 match(regFPR1); 4076 format %{ %} 4077 interface(REG_INTER); 4078 %} 4079 4080 // Float register operands 4081 operand regFPR1(regFPR reg) %{ 4082 predicate( UseSSE < 2 ); 4083 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4084 match(reg); 4085 format %{ "FPR1" %} 4086 interface(REG_INTER); 4087 %} 4088 4089 // XMM Float register operands 4090 operand regF() %{ 4091 predicate( UseSSE>=1 ); 4092 constraint(ALLOC_IN_RC(float_reg_legacy)); 4093 match(RegF); 4094 format %{ %} 4095 interface(REG_INTER); 4096 %} 4097 4098 operand legRegF() %{ 4099 predicate( UseSSE>=1 ); 4100 constraint(ALLOC_IN_RC(float_reg_legacy)); 4101 match(RegF); 4102 format %{ %} 4103 interface(REG_INTER); 4104 %} 4105 4106 // Float register operands 4107 operand vlRegF() %{ 4108 constraint(ALLOC_IN_RC(float_reg_vl)); 4109 match(RegF); 4110 4111 format %{ %} 4112 interface(REG_INTER); 4113 %} 4114 4115 // XMM Double register operands 4116 operand regD() %{ 4117 predicate( UseSSE>=2 ); 4118 constraint(ALLOC_IN_RC(double_reg_legacy)); 4119 match(RegD); 4120 format %{ %} 4121 interface(REG_INTER); 4122 %} 4123 4124 // Double register operands 4125 operand legRegD() %{ 4126 predicate( UseSSE>=2 ); 4127 constraint(ALLOC_IN_RC(double_reg_legacy)); 4128 match(RegD); 4129 format %{ %} 4130 interface(REG_INTER); 4131 %} 4132 4133 operand vlRegD() %{ 4134 constraint(ALLOC_IN_RC(double_reg_vl)); 4135 match(RegD); 4136 4137 format %{ %} 4138 interface(REG_INTER); 4139 %} 4140 4141 //----------Memory Operands---------------------------------------------------- 4142 // Direct Memory Operand 4143 operand direct(immP addr) %{ 4144 match(addr); 4145 4146 format %{ "[$addr]" %} 4147 interface(MEMORY_INTER) %{ 4148 base(0xFFFFFFFF); 4149 index(0x4); 4150 scale(0x0); 4151 disp($addr); 4152 %} 4153 %} 4154 4155 // Indirect Memory Operand 4156 operand indirect(eRegP reg) %{ 4157 constraint(ALLOC_IN_RC(int_reg)); 4158 match(reg); 4159 4160 format %{ "[$reg]" %} 4161 interface(MEMORY_INTER) %{ 4162 base($reg); 4163 index(0x4); 4164 scale(0x0); 4165 disp(0x0); 4166 %} 4167 %} 4168 4169 // Indirect Memory Plus Short Offset Operand 4170 operand indOffset8(eRegP reg, immI8 off) %{ 4171 match(AddP reg off); 4172 4173 format %{ "[$reg + $off]" %} 4174 interface(MEMORY_INTER) %{ 4175 base($reg); 4176 index(0x4); 4177 scale(0x0); 4178 disp($off); 4179 %} 4180 %} 4181 4182 // Indirect Memory Plus Long Offset Operand 4183 operand indOffset32(eRegP reg, immI off) %{ 4184 match(AddP reg off); 4185 4186 format %{ "[$reg + $off]" %} 4187 interface(MEMORY_INTER) %{ 4188 base($reg); 4189 index(0x4); 4190 scale(0x0); 4191 disp($off); 4192 %} 4193 %} 4194 4195 // Indirect Memory Plus Long Offset Operand 4196 operand indOffset32X(rRegI reg, immP off) %{ 4197 match(AddP off reg); 4198 4199 format %{ "[$reg + $off]" %} 4200 interface(MEMORY_INTER) %{ 4201 base($reg); 4202 index(0x4); 4203 scale(0x0); 4204 disp($off); 4205 %} 4206 %} 4207 4208 // Indirect Memory Plus Index Register Plus Offset Operand 4209 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4210 match(AddP (AddP reg ireg) off); 4211 4212 op_cost(10); 4213 format %{"[$reg + $off + $ireg]" %} 4214 interface(MEMORY_INTER) %{ 4215 base($reg); 4216 index($ireg); 4217 scale(0x0); 4218 disp($off); 4219 %} 4220 %} 4221 4222 // Indirect Memory Plus Index Register Plus Offset Operand 4223 operand indIndex(eRegP reg, rRegI ireg) %{ 4224 match(AddP reg ireg); 4225 4226 op_cost(10); 4227 format %{"[$reg + $ireg]" %} 4228 interface(MEMORY_INTER) %{ 4229 base($reg); 4230 index($ireg); 4231 scale(0x0); 4232 disp(0x0); 4233 %} 4234 %} 4235 4236 // // ------------------------------------------------------------------------- 4237 // // 486 architecture doesn't support "scale * index + offset" with out a base 4238 // // ------------------------------------------------------------------------- 4239 // // Scaled Memory Operands 4240 // // Indirect Memory Times Scale Plus Offset Operand 4241 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4242 // match(AddP off (LShiftI ireg scale)); 4243 // 4244 // op_cost(10); 4245 // format %{"[$off + $ireg << $scale]" %} 4246 // interface(MEMORY_INTER) %{ 4247 // base(0x4); 4248 // index($ireg); 4249 // scale($scale); 4250 // disp($off); 4251 // %} 4252 // %} 4253 4254 // Indirect Memory Times Scale Plus Index Register 4255 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4256 match(AddP reg (LShiftI ireg scale)); 4257 4258 op_cost(10); 4259 format %{"[$reg + $ireg << $scale]" %} 4260 interface(MEMORY_INTER) %{ 4261 base($reg); 4262 index($ireg); 4263 scale($scale); 4264 disp(0x0); 4265 %} 4266 %} 4267 4268 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4269 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4270 match(AddP (AddP reg (LShiftI ireg scale)) off); 4271 4272 op_cost(10); 4273 format %{"[$reg + $off + $ireg << $scale]" %} 4274 interface(MEMORY_INTER) %{ 4275 base($reg); 4276 index($ireg); 4277 scale($scale); 4278 disp($off); 4279 %} 4280 %} 4281 4282 //----------Load Long Memory Operands------------------------------------------ 4283 // The load-long idiom will use it's address expression again after loading 4284 // the first word of the long. If the load-long destination overlaps with 4285 // registers used in the addressing expression, the 2nd half will be loaded 4286 // from a clobbered address. Fix this by requiring that load-long use 4287 // address registers that do not overlap with the load-long target. 4288 4289 // load-long support 4290 operand load_long_RegP() %{ 4291 constraint(ALLOC_IN_RC(esi_reg)); 4292 match(RegP); 4293 match(eSIRegP); 4294 op_cost(100); 4295 format %{ %} 4296 interface(REG_INTER); 4297 %} 4298 4299 // Indirect Memory Operand Long 4300 operand load_long_indirect(load_long_RegP reg) %{ 4301 constraint(ALLOC_IN_RC(esi_reg)); 4302 match(reg); 4303 4304 format %{ "[$reg]" %} 4305 interface(MEMORY_INTER) %{ 4306 base($reg); 4307 index(0x4); 4308 scale(0x0); 4309 disp(0x0); 4310 %} 4311 %} 4312 4313 // Indirect Memory Plus Long Offset Operand 4314 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4315 match(AddP reg off); 4316 4317 format %{ "[$reg + $off]" %} 4318 interface(MEMORY_INTER) %{ 4319 base($reg); 4320 index(0x4); 4321 scale(0x0); 4322 disp($off); 4323 %} 4324 %} 4325 4326 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4327 4328 4329 //----------Special Memory Operands-------------------------------------------- 4330 // Stack Slot Operand - This operand is used for loading and storing temporary 4331 // values on the stack where a match requires a value to 4332 // flow through memory. 4333 operand stackSlotP(sRegP reg) %{ 4334 constraint(ALLOC_IN_RC(stack_slots)); 4335 // No match rule because this operand is only generated in matching 4336 format %{ "[$reg]" %} 4337 interface(MEMORY_INTER) %{ 4338 base(0x4); // ESP 4339 index(0x4); // No Index 4340 scale(0x0); // No Scale 4341 disp($reg); // Stack Offset 4342 %} 4343 %} 4344 4345 operand stackSlotI(sRegI reg) %{ 4346 constraint(ALLOC_IN_RC(stack_slots)); 4347 // No match rule because this operand is only generated in matching 4348 format %{ "[$reg]" %} 4349 interface(MEMORY_INTER) %{ 4350 base(0x4); // ESP 4351 index(0x4); // No Index 4352 scale(0x0); // No Scale 4353 disp($reg); // Stack Offset 4354 %} 4355 %} 4356 4357 operand stackSlotF(sRegF reg) %{ 4358 constraint(ALLOC_IN_RC(stack_slots)); 4359 // No match rule because this operand is only generated in matching 4360 format %{ "[$reg]" %} 4361 interface(MEMORY_INTER) %{ 4362 base(0x4); // ESP 4363 index(0x4); // No Index 4364 scale(0x0); // No Scale 4365 disp($reg); // Stack Offset 4366 %} 4367 %} 4368 4369 operand stackSlotD(sRegD reg) %{ 4370 constraint(ALLOC_IN_RC(stack_slots)); 4371 // No match rule because this operand is only generated in matching 4372 format %{ "[$reg]" %} 4373 interface(MEMORY_INTER) %{ 4374 base(0x4); // ESP 4375 index(0x4); // No Index 4376 scale(0x0); // No Scale 4377 disp($reg); // Stack Offset 4378 %} 4379 %} 4380 4381 operand stackSlotL(sRegL reg) %{ 4382 constraint(ALLOC_IN_RC(stack_slots)); 4383 // No match rule because this operand is only generated in matching 4384 format %{ "[$reg]" %} 4385 interface(MEMORY_INTER) %{ 4386 base(0x4); // ESP 4387 index(0x4); // No Index 4388 scale(0x0); // No Scale 4389 disp($reg); // Stack Offset 4390 %} 4391 %} 4392 4393 //----------Conditional Branch Operands---------------------------------------- 4394 // Comparison Op - This is the operation of the comparison, and is limited to 4395 // the following set of codes: 4396 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4397 // 4398 // Other attributes of the comparison, such as unsignedness, are specified 4399 // by the comparison instruction that sets a condition code flags register. 4400 // That result is represented by a flags operand whose subtype is appropriate 4401 // to the unsignedness (etc.) of the comparison. 4402 // 4403 // Later, the instruction which matches both the Comparison Op (a Bool) and 4404 // the flags (produced by the Cmp) specifies the coding of the comparison op 4405 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4406 4407 // Comparison Code 4408 operand cmpOp() %{ 4409 match(Bool); 4410 4411 format %{ "" %} 4412 interface(COND_INTER) %{ 4413 equal(0x4, "e"); 4414 not_equal(0x5, "ne"); 4415 less(0xC, "l"); 4416 greater_equal(0xD, "ge"); 4417 less_equal(0xE, "le"); 4418 greater(0xF, "g"); 4419 overflow(0x0, "o"); 4420 no_overflow(0x1, "no"); 4421 %} 4422 %} 4423 4424 // Comparison Code, unsigned compare. Used by FP also, with 4425 // C2 (unordered) turned into GT or LT already. The other bits 4426 // C0 and C3 are turned into Carry & Zero flags. 4427 operand cmpOpU() %{ 4428 match(Bool); 4429 4430 format %{ "" %} 4431 interface(COND_INTER) %{ 4432 equal(0x4, "e"); 4433 not_equal(0x5, "ne"); 4434 less(0x2, "b"); 4435 greater_equal(0x3, "nb"); 4436 less_equal(0x6, "be"); 4437 greater(0x7, "nbe"); 4438 overflow(0x0, "o"); 4439 no_overflow(0x1, "no"); 4440 %} 4441 %} 4442 4443 // Floating comparisons that don't require any fixup for the unordered case 4444 operand cmpOpUCF() %{ 4445 match(Bool); 4446 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4447 n->as_Bool()->_test._test == BoolTest::ge || 4448 n->as_Bool()->_test._test == BoolTest::le || 4449 n->as_Bool()->_test._test == BoolTest::gt); 4450 format %{ "" %} 4451 interface(COND_INTER) %{ 4452 equal(0x4, "e"); 4453 not_equal(0x5, "ne"); 4454 less(0x2, "b"); 4455 greater_equal(0x3, "nb"); 4456 less_equal(0x6, "be"); 4457 greater(0x7, "nbe"); 4458 overflow(0x0, "o"); 4459 no_overflow(0x1, "no"); 4460 %} 4461 %} 4462 4463 4464 // Floating comparisons that can be fixed up with extra conditional jumps 4465 operand cmpOpUCF2() %{ 4466 match(Bool); 4467 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4468 n->as_Bool()->_test._test == BoolTest::eq); 4469 format %{ "" %} 4470 interface(COND_INTER) %{ 4471 equal(0x4, "e"); 4472 not_equal(0x5, "ne"); 4473 less(0x2, "b"); 4474 greater_equal(0x3, "nb"); 4475 less_equal(0x6, "be"); 4476 greater(0x7, "nbe"); 4477 overflow(0x0, "o"); 4478 no_overflow(0x1, "no"); 4479 %} 4480 %} 4481 4482 // Comparison Code for FP conditional move 4483 operand cmpOp_fcmov() %{ 4484 match(Bool); 4485 4486 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4487 n->as_Bool()->_test._test != BoolTest::no_overflow); 4488 format %{ "" %} 4489 interface(COND_INTER) %{ 4490 equal (0x0C8); 4491 not_equal (0x1C8); 4492 less (0x0C0); 4493 greater_equal(0x1C0); 4494 less_equal (0x0D0); 4495 greater (0x1D0); 4496 overflow(0x0, "o"); // not really supported by the instruction 4497 no_overflow(0x1, "no"); // not really supported by the instruction 4498 %} 4499 %} 4500 4501 // Comparison Code used in long compares 4502 operand cmpOp_commute() %{ 4503 match(Bool); 4504 4505 format %{ "" %} 4506 interface(COND_INTER) %{ 4507 equal(0x4, "e"); 4508 not_equal(0x5, "ne"); 4509 less(0xF, "g"); 4510 greater_equal(0xE, "le"); 4511 less_equal(0xD, "ge"); 4512 greater(0xC, "l"); 4513 overflow(0x0, "o"); 4514 no_overflow(0x1, "no"); 4515 %} 4516 %} 4517 4518 // Comparison Code used in unsigned long compares 4519 operand cmpOpU_commute() %{ 4520 match(Bool); 4521 4522 format %{ "" %} 4523 interface(COND_INTER) %{ 4524 equal(0x4, "e"); 4525 not_equal(0x5, "ne"); 4526 less(0x7, "nbe"); 4527 greater_equal(0x6, "be"); 4528 less_equal(0x3, "nb"); 4529 greater(0x2, "b"); 4530 overflow(0x0, "o"); 4531 no_overflow(0x1, "no"); 4532 %} 4533 %} 4534 4535 //----------OPERAND CLASSES---------------------------------------------------- 4536 // Operand Classes are groups of operands that are used as to simplify 4537 // instruction definitions by not requiring the AD writer to specify separate 4538 // instructions for every form of operand when the instruction accepts 4539 // multiple operand types with the same basic encoding and format. The classic 4540 // case of this is memory operands. 4541 4542 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4543 indIndex, indIndexScale, indIndexScaleOffset); 4544 4545 // Long memory operations are encoded in 2 instructions and a +4 offset. 4546 // This means some kind of offset is always required and you cannot use 4547 // an oop as the offset (done when working on static globals). 4548 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4549 indIndex, indIndexScale, indIndexScaleOffset); 4550 4551 4552 //----------PIPELINE----------------------------------------------------------- 4553 // Rules which define the behavior of the target architectures pipeline. 4554 pipeline %{ 4555 4556 //----------ATTRIBUTES--------------------------------------------------------- 4557 attributes %{ 4558 variable_size_instructions; // Fixed size instructions 4559 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4560 instruction_unit_size = 1; // An instruction is 1 bytes long 4561 instruction_fetch_unit_size = 16; // The processor fetches one line 4562 instruction_fetch_units = 1; // of 16 bytes 4563 4564 // List of nop instructions 4565 nops( MachNop ); 4566 %} 4567 4568 //----------RESOURCES---------------------------------------------------------- 4569 // Resources are the functional units available to the machine 4570 4571 // Generic P2/P3 pipeline 4572 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4573 // 3 instructions decoded per cycle. 4574 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4575 // 2 ALU op, only ALU0 handles mul/div instructions. 4576 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4577 MS0, MS1, MEM = MS0 | MS1, 4578 BR, FPU, 4579 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4580 4581 //----------PIPELINE DESCRIPTION----------------------------------------------- 4582 // Pipeline Description specifies the stages in the machine's pipeline 4583 4584 // Generic P2/P3 pipeline 4585 pipe_desc(S0, S1, S2, S3, S4, S5); 4586 4587 //----------PIPELINE CLASSES--------------------------------------------------- 4588 // Pipeline Classes describe the stages in which input and output are 4589 // referenced by the hardware pipeline. 4590 4591 // Naming convention: ialu or fpu 4592 // Then: _reg 4593 // Then: _reg if there is a 2nd register 4594 // Then: _long if it's a pair of instructions implementing a long 4595 // Then: _fat if it requires the big decoder 4596 // Or: _mem if it requires the big decoder and a memory unit. 4597 4598 // Integer ALU reg operation 4599 pipe_class ialu_reg(rRegI dst) %{ 4600 single_instruction; 4601 dst : S4(write); 4602 dst : S3(read); 4603 DECODE : S0; // any decoder 4604 ALU : S3; // any alu 4605 %} 4606 4607 // Long ALU reg operation 4608 pipe_class ialu_reg_long(eRegL dst) %{ 4609 instruction_count(2); 4610 dst : S4(write); 4611 dst : S3(read); 4612 DECODE : S0(2); // any 2 decoders 4613 ALU : S3(2); // both alus 4614 %} 4615 4616 // Integer ALU reg operation using big decoder 4617 pipe_class ialu_reg_fat(rRegI dst) %{ 4618 single_instruction; 4619 dst : S4(write); 4620 dst : S3(read); 4621 D0 : S0; // big decoder only 4622 ALU : S3; // any alu 4623 %} 4624 4625 // Long ALU reg operation using big decoder 4626 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4627 instruction_count(2); 4628 dst : S4(write); 4629 dst : S3(read); 4630 D0 : S0(2); // big decoder only; twice 4631 ALU : S3(2); // any 2 alus 4632 %} 4633 4634 // Integer ALU reg-reg operation 4635 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4636 single_instruction; 4637 dst : S4(write); 4638 src : S3(read); 4639 DECODE : S0; // any decoder 4640 ALU : S3; // any alu 4641 %} 4642 4643 // Long ALU reg-reg operation 4644 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4645 instruction_count(2); 4646 dst : S4(write); 4647 src : S3(read); 4648 DECODE : S0(2); // any 2 decoders 4649 ALU : S3(2); // both alus 4650 %} 4651 4652 // Integer ALU reg-reg operation 4653 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4654 single_instruction; 4655 dst : S4(write); 4656 src : S3(read); 4657 D0 : S0; // big decoder only 4658 ALU : S3; // any alu 4659 %} 4660 4661 // Long ALU reg-reg operation 4662 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4663 instruction_count(2); 4664 dst : S4(write); 4665 src : S3(read); 4666 D0 : S0(2); // big decoder only; twice 4667 ALU : S3(2); // both alus 4668 %} 4669 4670 // Integer ALU reg-mem operation 4671 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4672 single_instruction; 4673 dst : S5(write); 4674 mem : S3(read); 4675 D0 : S0; // big decoder only 4676 ALU : S4; // any alu 4677 MEM : S3; // any mem 4678 %} 4679 4680 // Long ALU reg-mem operation 4681 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4682 instruction_count(2); 4683 dst : S5(write); 4684 mem : S3(read); 4685 D0 : S0(2); // big decoder only; twice 4686 ALU : S4(2); // any 2 alus 4687 MEM : S3(2); // both mems 4688 %} 4689 4690 // Integer mem operation (prefetch) 4691 pipe_class ialu_mem(memory mem) 4692 %{ 4693 single_instruction; 4694 mem : S3(read); 4695 D0 : S0; // big decoder only 4696 MEM : S3; // any mem 4697 %} 4698 4699 // Integer Store to Memory 4700 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4701 single_instruction; 4702 mem : S3(read); 4703 src : S5(read); 4704 D0 : S0; // big decoder only 4705 ALU : S4; // any alu 4706 MEM : S3; 4707 %} 4708 4709 // Long Store to Memory 4710 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4711 instruction_count(2); 4712 mem : S3(read); 4713 src : S5(read); 4714 D0 : S0(2); // big decoder only; twice 4715 ALU : S4(2); // any 2 alus 4716 MEM : S3(2); // Both mems 4717 %} 4718 4719 // Integer Store to Memory 4720 pipe_class ialu_mem_imm(memory mem) %{ 4721 single_instruction; 4722 mem : S3(read); 4723 D0 : S0; // big decoder only 4724 ALU : S4; // any alu 4725 MEM : S3; 4726 %} 4727 4728 // Integer ALU0 reg-reg operation 4729 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4730 single_instruction; 4731 dst : S4(write); 4732 src : S3(read); 4733 D0 : S0; // Big decoder only 4734 ALU0 : S3; // only alu0 4735 %} 4736 4737 // Integer ALU0 reg-mem operation 4738 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4739 single_instruction; 4740 dst : S5(write); 4741 mem : S3(read); 4742 D0 : S0; // big decoder only 4743 ALU0 : S4; // ALU0 only 4744 MEM : S3; // any mem 4745 %} 4746 4747 // Integer ALU reg-reg operation 4748 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4749 single_instruction; 4750 cr : S4(write); 4751 src1 : S3(read); 4752 src2 : S3(read); 4753 DECODE : S0; // any decoder 4754 ALU : S3; // any alu 4755 %} 4756 4757 // Integer ALU reg-imm operation 4758 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4759 single_instruction; 4760 cr : S4(write); 4761 src1 : S3(read); 4762 DECODE : S0; // any decoder 4763 ALU : S3; // any alu 4764 %} 4765 4766 // Integer ALU reg-mem operation 4767 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4768 single_instruction; 4769 cr : S4(write); 4770 src1 : S3(read); 4771 src2 : S3(read); 4772 D0 : S0; // big decoder only 4773 ALU : S4; // any alu 4774 MEM : S3; 4775 %} 4776 4777 // Conditional move reg-reg 4778 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4779 instruction_count(4); 4780 y : S4(read); 4781 q : S3(read); 4782 p : S3(read); 4783 DECODE : S0(4); // any decoder 4784 %} 4785 4786 // Conditional move reg-reg 4787 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4788 single_instruction; 4789 dst : S4(write); 4790 src : S3(read); 4791 cr : S3(read); 4792 DECODE : S0; // any decoder 4793 %} 4794 4795 // Conditional move reg-mem 4796 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4797 single_instruction; 4798 dst : S4(write); 4799 src : S3(read); 4800 cr : S3(read); 4801 DECODE : S0; // any decoder 4802 MEM : S3; 4803 %} 4804 4805 // Conditional move reg-reg long 4806 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4807 single_instruction; 4808 dst : S4(write); 4809 src : S3(read); 4810 cr : S3(read); 4811 DECODE : S0(2); // any 2 decoders 4812 %} 4813 4814 // Conditional move double reg-reg 4815 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4816 single_instruction; 4817 dst : S4(write); 4818 src : S3(read); 4819 cr : S3(read); 4820 DECODE : S0; // any decoder 4821 %} 4822 4823 // Float reg-reg operation 4824 pipe_class fpu_reg(regDPR dst) %{ 4825 instruction_count(2); 4826 dst : S3(read); 4827 DECODE : S0(2); // any 2 decoders 4828 FPU : S3; 4829 %} 4830 4831 // Float reg-reg operation 4832 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4833 instruction_count(2); 4834 dst : S4(write); 4835 src : S3(read); 4836 DECODE : S0(2); // any 2 decoders 4837 FPU : S3; 4838 %} 4839 4840 // Float reg-reg operation 4841 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4842 instruction_count(3); 4843 dst : S4(write); 4844 src1 : S3(read); 4845 src2 : S3(read); 4846 DECODE : S0(3); // any 3 decoders 4847 FPU : S3(2); 4848 %} 4849 4850 // Float reg-reg operation 4851 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4852 instruction_count(4); 4853 dst : S4(write); 4854 src1 : S3(read); 4855 src2 : S3(read); 4856 src3 : S3(read); 4857 DECODE : S0(4); // any 3 decoders 4858 FPU : S3(2); 4859 %} 4860 4861 // Float reg-reg operation 4862 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4863 instruction_count(4); 4864 dst : S4(write); 4865 src1 : S3(read); 4866 src2 : S3(read); 4867 src3 : S3(read); 4868 DECODE : S1(3); // any 3 decoders 4869 D0 : S0; // Big decoder only 4870 FPU : S3(2); 4871 MEM : S3; 4872 %} 4873 4874 // Float reg-mem operation 4875 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4876 instruction_count(2); 4877 dst : S5(write); 4878 mem : S3(read); 4879 D0 : S0; // big decoder only 4880 DECODE : S1; // any decoder for FPU POP 4881 FPU : S4; 4882 MEM : S3; // any mem 4883 %} 4884 4885 // Float reg-mem operation 4886 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4887 instruction_count(3); 4888 dst : S5(write); 4889 src1 : S3(read); 4890 mem : S3(read); 4891 D0 : S0; // big decoder only 4892 DECODE : S1(2); // any decoder for FPU POP 4893 FPU : S4; 4894 MEM : S3; // any mem 4895 %} 4896 4897 // Float mem-reg operation 4898 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4899 instruction_count(2); 4900 src : S5(read); 4901 mem : S3(read); 4902 DECODE : S0; // any decoder for FPU PUSH 4903 D0 : S1; // big decoder only 4904 FPU : S4; 4905 MEM : S3; // any mem 4906 %} 4907 4908 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4909 instruction_count(3); 4910 src1 : S3(read); 4911 src2 : S3(read); 4912 mem : S3(read); 4913 DECODE : S0(2); // any decoder for FPU PUSH 4914 D0 : S1; // big decoder only 4915 FPU : S4; 4916 MEM : S3; // any mem 4917 %} 4918 4919 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4920 instruction_count(3); 4921 src1 : S3(read); 4922 src2 : S3(read); 4923 mem : S4(read); 4924 DECODE : S0; // any decoder for FPU PUSH 4925 D0 : S0(2); // big decoder only 4926 FPU : S4; 4927 MEM : S3(2); // any mem 4928 %} 4929 4930 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4931 instruction_count(2); 4932 src1 : S3(read); 4933 dst : S4(read); 4934 D0 : S0(2); // big decoder only 4935 MEM : S3(2); // any mem 4936 %} 4937 4938 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4939 instruction_count(3); 4940 src1 : S3(read); 4941 src2 : S3(read); 4942 dst : S4(read); 4943 D0 : S0(3); // big decoder only 4944 FPU : S4; 4945 MEM : S3(3); // any mem 4946 %} 4947 4948 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4949 instruction_count(3); 4950 src1 : S4(read); 4951 mem : S4(read); 4952 DECODE : S0; // any decoder for FPU PUSH 4953 D0 : S0(2); // big decoder only 4954 FPU : S4; 4955 MEM : S3(2); // any mem 4956 %} 4957 4958 // Float load constant 4959 pipe_class fpu_reg_con(regDPR dst) %{ 4960 instruction_count(2); 4961 dst : S5(write); 4962 D0 : S0; // big decoder only for the load 4963 DECODE : S1; // any decoder for FPU POP 4964 FPU : S4; 4965 MEM : S3; // any mem 4966 %} 4967 4968 // Float load constant 4969 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4970 instruction_count(3); 4971 dst : S5(write); 4972 src : S3(read); 4973 D0 : S0; // big decoder only for the load 4974 DECODE : S1(2); // any decoder for FPU POP 4975 FPU : S4; 4976 MEM : S3; // any mem 4977 %} 4978 4979 // UnConditional branch 4980 pipe_class pipe_jmp( label labl ) %{ 4981 single_instruction; 4982 BR : S3; 4983 %} 4984 4985 // Conditional branch 4986 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4987 single_instruction; 4988 cr : S1(read); 4989 BR : S3; 4990 %} 4991 4992 // Allocation idiom 4993 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4994 instruction_count(1); force_serialization; 4995 fixed_latency(6); 4996 heap_ptr : S3(read); 4997 DECODE : S0(3); 4998 D0 : S2; 4999 MEM : S3; 5000 ALU : S3(2); 5001 dst : S5(write); 5002 BR : S5; 5003 %} 5004 5005 // Generic big/slow expanded idiom 5006 pipe_class pipe_slow( ) %{ 5007 instruction_count(10); multiple_bundles; force_serialization; 5008 fixed_latency(100); 5009 D0 : S0(2); 5010 MEM : S3(2); 5011 %} 5012 5013 // The real do-nothing guy 5014 pipe_class empty( ) %{ 5015 instruction_count(0); 5016 %} 5017 5018 // Define the class for the Nop node 5019 define %{ 5020 MachNop = empty; 5021 %} 5022 5023 %} 5024 5025 //----------INSTRUCTIONS------------------------------------------------------- 5026 // 5027 // match -- States which machine-independent subtree may be replaced 5028 // by this instruction. 5029 // ins_cost -- The estimated cost of this instruction is used by instruction 5030 // selection to identify a minimum cost tree of machine 5031 // instructions that matches a tree of machine-independent 5032 // instructions. 5033 // format -- A string providing the disassembly for this instruction. 5034 // The value of an instruction's operand may be inserted 5035 // by referring to it with a '$' prefix. 5036 // opcode -- Three instruction opcodes may be provided. These are referred 5037 // to within an encode class as $primary, $secondary, and $tertiary 5038 // respectively. The primary opcode is commonly used to 5039 // indicate the type of machine instruction, while secondary 5040 // and tertiary are often used for prefix options or addressing 5041 // modes. 5042 // ins_encode -- A list of encode classes with parameters. The encode class 5043 // name must have been defined in an 'enc_class' specification 5044 // in the encode section of the architecture description. 5045 5046 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5047 // Load Float 5048 instruct MoveF2LEG(legRegF dst, regF src) %{ 5049 match(Set dst src); 5050 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5051 ins_encode %{ 5052 ShouldNotReachHere(); 5053 %} 5054 ins_pipe( fpu_reg_reg ); 5055 %} 5056 5057 // Load Float 5058 instruct MoveLEG2F(regF dst, legRegF src) %{ 5059 match(Set dst src); 5060 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5061 ins_encode %{ 5062 ShouldNotReachHere(); 5063 %} 5064 ins_pipe( fpu_reg_reg ); 5065 %} 5066 5067 // Load Float 5068 instruct MoveF2VL(vlRegF dst, regF src) %{ 5069 match(Set dst src); 5070 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5071 ins_encode %{ 5072 ShouldNotReachHere(); 5073 %} 5074 ins_pipe( fpu_reg_reg ); 5075 %} 5076 5077 // Load Float 5078 instruct MoveVL2F(regF dst, vlRegF src) %{ 5079 match(Set dst src); 5080 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5081 ins_encode %{ 5082 ShouldNotReachHere(); 5083 %} 5084 ins_pipe( fpu_reg_reg ); 5085 %} 5086 5087 5088 5089 // Load Double 5090 instruct MoveD2LEG(legRegD dst, regD src) %{ 5091 match(Set dst src); 5092 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5093 ins_encode %{ 5094 ShouldNotReachHere(); 5095 %} 5096 ins_pipe( fpu_reg_reg ); 5097 %} 5098 5099 // Load Double 5100 instruct MoveLEG2D(regD dst, legRegD src) %{ 5101 match(Set dst src); 5102 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5103 ins_encode %{ 5104 ShouldNotReachHere(); 5105 %} 5106 ins_pipe( fpu_reg_reg ); 5107 %} 5108 5109 // Load Double 5110 instruct MoveD2VL(vlRegD dst, regD src) %{ 5111 match(Set dst src); 5112 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5113 ins_encode %{ 5114 ShouldNotReachHere(); 5115 %} 5116 ins_pipe( fpu_reg_reg ); 5117 %} 5118 5119 // Load Double 5120 instruct MoveVL2D(regD dst, vlRegD src) %{ 5121 match(Set dst src); 5122 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5123 ins_encode %{ 5124 ShouldNotReachHere(); 5125 %} 5126 ins_pipe( fpu_reg_reg ); 5127 %} 5128 5129 //----------BSWAP-Instruction-------------------------------------------------- 5130 instruct bytes_reverse_int(rRegI dst) %{ 5131 match(Set dst (ReverseBytesI dst)); 5132 5133 format %{ "BSWAP $dst" %} 5134 opcode(0x0F, 0xC8); 5135 ins_encode( OpcP, OpcSReg(dst) ); 5136 ins_pipe( ialu_reg ); 5137 %} 5138 5139 instruct bytes_reverse_long(eRegL dst) %{ 5140 match(Set dst (ReverseBytesL dst)); 5141 5142 format %{ "BSWAP $dst.lo\n\t" 5143 "BSWAP $dst.hi\n\t" 5144 "XCHG $dst.lo $dst.hi" %} 5145 5146 ins_cost(125); 5147 ins_encode( bswap_long_bytes(dst) ); 5148 ins_pipe( ialu_reg_reg); 5149 %} 5150 5151 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5152 match(Set dst (ReverseBytesUS dst)); 5153 effect(KILL cr); 5154 5155 format %{ "BSWAP $dst\n\t" 5156 "SHR $dst,16\n\t" %} 5157 ins_encode %{ 5158 __ bswapl($dst$$Register); 5159 __ shrl($dst$$Register, 16); 5160 %} 5161 ins_pipe( ialu_reg ); 5162 %} 5163 5164 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5165 match(Set dst (ReverseBytesS dst)); 5166 effect(KILL cr); 5167 5168 format %{ "BSWAP $dst\n\t" 5169 "SAR $dst,16\n\t" %} 5170 ins_encode %{ 5171 __ bswapl($dst$$Register); 5172 __ sarl($dst$$Register, 16); 5173 %} 5174 ins_pipe( ialu_reg ); 5175 %} 5176 5177 5178 //---------- Zeros Count Instructions ------------------------------------------ 5179 5180 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5181 predicate(UseCountLeadingZerosInstruction); 5182 match(Set dst (CountLeadingZerosI src)); 5183 effect(KILL cr); 5184 5185 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5186 ins_encode %{ 5187 __ lzcntl($dst$$Register, $src$$Register); 5188 %} 5189 ins_pipe(ialu_reg); 5190 %} 5191 5192 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5193 predicate(!UseCountLeadingZerosInstruction); 5194 match(Set dst (CountLeadingZerosI src)); 5195 effect(KILL cr); 5196 5197 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5198 "JNZ skip\n\t" 5199 "MOV $dst, -1\n" 5200 "skip:\n\t" 5201 "NEG $dst\n\t" 5202 "ADD $dst, 31" %} 5203 ins_encode %{ 5204 Register Rdst = $dst$$Register; 5205 Register Rsrc = $src$$Register; 5206 Label skip; 5207 __ bsrl(Rdst, Rsrc); 5208 __ jccb(Assembler::notZero, skip); 5209 __ movl(Rdst, -1); 5210 __ bind(skip); 5211 __ negl(Rdst); 5212 __ addl(Rdst, BitsPerInt - 1); 5213 %} 5214 ins_pipe(ialu_reg); 5215 %} 5216 5217 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5218 predicate(UseCountLeadingZerosInstruction); 5219 match(Set dst (CountLeadingZerosL src)); 5220 effect(TEMP dst, KILL cr); 5221 5222 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5223 "JNC done\n\t" 5224 "LZCNT $dst, $src.lo\n\t" 5225 "ADD $dst, 32\n" 5226 "done:" %} 5227 ins_encode %{ 5228 Register Rdst = $dst$$Register; 5229 Register Rsrc = $src$$Register; 5230 Label done; 5231 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5232 __ jccb(Assembler::carryClear, done); 5233 __ lzcntl(Rdst, Rsrc); 5234 __ addl(Rdst, BitsPerInt); 5235 __ bind(done); 5236 %} 5237 ins_pipe(ialu_reg); 5238 %} 5239 5240 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5241 predicate(!UseCountLeadingZerosInstruction); 5242 match(Set dst (CountLeadingZerosL src)); 5243 effect(TEMP dst, KILL cr); 5244 5245 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5246 "JZ msw_is_zero\n\t" 5247 "ADD $dst, 32\n\t" 5248 "JMP not_zero\n" 5249 "msw_is_zero:\n\t" 5250 "BSR $dst, $src.lo\n\t" 5251 "JNZ not_zero\n\t" 5252 "MOV $dst, -1\n" 5253 "not_zero:\n\t" 5254 "NEG $dst\n\t" 5255 "ADD $dst, 63\n" %} 5256 ins_encode %{ 5257 Register Rdst = $dst$$Register; 5258 Register Rsrc = $src$$Register; 5259 Label msw_is_zero; 5260 Label not_zero; 5261 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5262 __ jccb(Assembler::zero, msw_is_zero); 5263 __ addl(Rdst, BitsPerInt); 5264 __ jmpb(not_zero); 5265 __ bind(msw_is_zero); 5266 __ bsrl(Rdst, Rsrc); 5267 __ jccb(Assembler::notZero, not_zero); 5268 __ movl(Rdst, -1); 5269 __ bind(not_zero); 5270 __ negl(Rdst); 5271 __ addl(Rdst, BitsPerLong - 1); 5272 %} 5273 ins_pipe(ialu_reg); 5274 %} 5275 5276 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5277 predicate(UseCountTrailingZerosInstruction); 5278 match(Set dst (CountTrailingZerosI src)); 5279 effect(KILL cr); 5280 5281 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5282 ins_encode %{ 5283 __ tzcntl($dst$$Register, $src$$Register); 5284 %} 5285 ins_pipe(ialu_reg); 5286 %} 5287 5288 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5289 predicate(!UseCountTrailingZerosInstruction); 5290 match(Set dst (CountTrailingZerosI src)); 5291 effect(KILL cr); 5292 5293 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5294 "JNZ done\n\t" 5295 "MOV $dst, 32\n" 5296 "done:" %} 5297 ins_encode %{ 5298 Register Rdst = $dst$$Register; 5299 Label done; 5300 __ bsfl(Rdst, $src$$Register); 5301 __ jccb(Assembler::notZero, done); 5302 __ movl(Rdst, BitsPerInt); 5303 __ bind(done); 5304 %} 5305 ins_pipe(ialu_reg); 5306 %} 5307 5308 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5309 predicate(UseCountTrailingZerosInstruction); 5310 match(Set dst (CountTrailingZerosL src)); 5311 effect(TEMP dst, KILL cr); 5312 5313 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5314 "JNC done\n\t" 5315 "TZCNT $dst, $src.hi\n\t" 5316 "ADD $dst, 32\n" 5317 "done:" %} 5318 ins_encode %{ 5319 Register Rdst = $dst$$Register; 5320 Register Rsrc = $src$$Register; 5321 Label done; 5322 __ tzcntl(Rdst, Rsrc); 5323 __ jccb(Assembler::carryClear, done); 5324 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5325 __ addl(Rdst, BitsPerInt); 5326 __ bind(done); 5327 %} 5328 ins_pipe(ialu_reg); 5329 %} 5330 5331 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5332 predicate(!UseCountTrailingZerosInstruction); 5333 match(Set dst (CountTrailingZerosL src)); 5334 effect(TEMP dst, KILL cr); 5335 5336 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5337 "JNZ done\n\t" 5338 "BSF $dst, $src.hi\n\t" 5339 "JNZ msw_not_zero\n\t" 5340 "MOV $dst, 32\n" 5341 "msw_not_zero:\n\t" 5342 "ADD $dst, 32\n" 5343 "done:" %} 5344 ins_encode %{ 5345 Register Rdst = $dst$$Register; 5346 Register Rsrc = $src$$Register; 5347 Label msw_not_zero; 5348 Label done; 5349 __ bsfl(Rdst, Rsrc); 5350 __ jccb(Assembler::notZero, done); 5351 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5352 __ jccb(Assembler::notZero, msw_not_zero); 5353 __ movl(Rdst, BitsPerInt); 5354 __ bind(msw_not_zero); 5355 __ addl(Rdst, BitsPerInt); 5356 __ bind(done); 5357 %} 5358 ins_pipe(ialu_reg); 5359 %} 5360 5361 5362 //---------- Population Count Instructions ------------------------------------- 5363 5364 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5365 predicate(UsePopCountInstruction); 5366 match(Set dst (PopCountI src)); 5367 effect(KILL cr); 5368 5369 format %{ "POPCNT $dst, $src" %} 5370 ins_encode %{ 5371 __ popcntl($dst$$Register, $src$$Register); 5372 %} 5373 ins_pipe(ialu_reg); 5374 %} 5375 5376 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5377 predicate(UsePopCountInstruction); 5378 match(Set dst (PopCountI (LoadI mem))); 5379 effect(KILL cr); 5380 5381 format %{ "POPCNT $dst, $mem" %} 5382 ins_encode %{ 5383 __ popcntl($dst$$Register, $mem$$Address); 5384 %} 5385 ins_pipe(ialu_reg); 5386 %} 5387 5388 // Note: Long.bitCount(long) returns an int. 5389 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5390 predicate(UsePopCountInstruction); 5391 match(Set dst (PopCountL src)); 5392 effect(KILL cr, TEMP tmp, TEMP dst); 5393 5394 format %{ "POPCNT $dst, $src.lo\n\t" 5395 "POPCNT $tmp, $src.hi\n\t" 5396 "ADD $dst, $tmp" %} 5397 ins_encode %{ 5398 __ popcntl($dst$$Register, $src$$Register); 5399 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5400 __ addl($dst$$Register, $tmp$$Register); 5401 %} 5402 ins_pipe(ialu_reg); 5403 %} 5404 5405 // Note: Long.bitCount(long) returns an int. 5406 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5407 predicate(UsePopCountInstruction); 5408 match(Set dst (PopCountL (LoadL mem))); 5409 effect(KILL cr, TEMP tmp, TEMP dst); 5410 5411 format %{ "POPCNT $dst, $mem\n\t" 5412 "POPCNT $tmp, $mem+4\n\t" 5413 "ADD $dst, $tmp" %} 5414 ins_encode %{ 5415 //__ popcntl($dst$$Register, $mem$$Address$$first); 5416 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5417 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5418 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5419 __ addl($dst$$Register, $tmp$$Register); 5420 %} 5421 ins_pipe(ialu_reg); 5422 %} 5423 5424 5425 //----------Load/Store/Move Instructions--------------------------------------- 5426 //----------Load Instructions-------------------------------------------------- 5427 // Load Byte (8bit signed) 5428 instruct loadB(xRegI dst, memory mem) %{ 5429 match(Set dst (LoadB mem)); 5430 5431 ins_cost(125); 5432 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5433 5434 ins_encode %{ 5435 __ movsbl($dst$$Register, $mem$$Address); 5436 %} 5437 5438 ins_pipe(ialu_reg_mem); 5439 %} 5440 5441 // Load Byte (8bit signed) into Long Register 5442 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5443 match(Set dst (ConvI2L (LoadB mem))); 5444 effect(KILL cr); 5445 5446 ins_cost(375); 5447 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5448 "MOV $dst.hi,$dst.lo\n\t" 5449 "SAR $dst.hi,7" %} 5450 5451 ins_encode %{ 5452 __ movsbl($dst$$Register, $mem$$Address); 5453 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5454 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5455 %} 5456 5457 ins_pipe(ialu_reg_mem); 5458 %} 5459 5460 // Load Unsigned Byte (8bit UNsigned) 5461 instruct loadUB(xRegI dst, memory mem) %{ 5462 match(Set dst (LoadUB mem)); 5463 5464 ins_cost(125); 5465 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5466 5467 ins_encode %{ 5468 __ movzbl($dst$$Register, $mem$$Address); 5469 %} 5470 5471 ins_pipe(ialu_reg_mem); 5472 %} 5473 5474 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5475 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5476 match(Set dst (ConvI2L (LoadUB mem))); 5477 effect(KILL cr); 5478 5479 ins_cost(250); 5480 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5481 "XOR $dst.hi,$dst.hi" %} 5482 5483 ins_encode %{ 5484 Register Rdst = $dst$$Register; 5485 __ movzbl(Rdst, $mem$$Address); 5486 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5487 %} 5488 5489 ins_pipe(ialu_reg_mem); 5490 %} 5491 5492 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5493 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5494 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5495 effect(KILL cr); 5496 5497 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5498 "XOR $dst.hi,$dst.hi\n\t" 5499 "AND $dst.lo,right_n_bits($mask, 8)" %} 5500 ins_encode %{ 5501 Register Rdst = $dst$$Register; 5502 __ movzbl(Rdst, $mem$$Address); 5503 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5504 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5505 %} 5506 ins_pipe(ialu_reg_mem); 5507 %} 5508 5509 // Load Short (16bit signed) 5510 instruct loadS(rRegI dst, memory mem) %{ 5511 match(Set dst (LoadS mem)); 5512 5513 ins_cost(125); 5514 format %{ "MOVSX $dst,$mem\t# short" %} 5515 5516 ins_encode %{ 5517 __ movswl($dst$$Register, $mem$$Address); 5518 %} 5519 5520 ins_pipe(ialu_reg_mem); 5521 %} 5522 5523 // Load Short (16 bit signed) to Byte (8 bit signed) 5524 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5525 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5526 5527 ins_cost(125); 5528 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5529 ins_encode %{ 5530 __ movsbl($dst$$Register, $mem$$Address); 5531 %} 5532 ins_pipe(ialu_reg_mem); 5533 %} 5534 5535 // Load Short (16bit signed) into Long Register 5536 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5537 match(Set dst (ConvI2L (LoadS mem))); 5538 effect(KILL cr); 5539 5540 ins_cost(375); 5541 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5542 "MOV $dst.hi,$dst.lo\n\t" 5543 "SAR $dst.hi,15" %} 5544 5545 ins_encode %{ 5546 __ movswl($dst$$Register, $mem$$Address); 5547 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5548 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5549 %} 5550 5551 ins_pipe(ialu_reg_mem); 5552 %} 5553 5554 // Load Unsigned Short/Char (16bit unsigned) 5555 instruct loadUS(rRegI dst, memory mem) %{ 5556 match(Set dst (LoadUS mem)); 5557 5558 ins_cost(125); 5559 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5560 5561 ins_encode %{ 5562 __ movzwl($dst$$Register, $mem$$Address); 5563 %} 5564 5565 ins_pipe(ialu_reg_mem); 5566 %} 5567 5568 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5569 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5570 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5571 5572 ins_cost(125); 5573 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5574 ins_encode %{ 5575 __ movsbl($dst$$Register, $mem$$Address); 5576 %} 5577 ins_pipe(ialu_reg_mem); 5578 %} 5579 5580 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5581 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5582 match(Set dst (ConvI2L (LoadUS mem))); 5583 effect(KILL cr); 5584 5585 ins_cost(250); 5586 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5587 "XOR $dst.hi,$dst.hi" %} 5588 5589 ins_encode %{ 5590 __ movzwl($dst$$Register, $mem$$Address); 5591 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5592 %} 5593 5594 ins_pipe(ialu_reg_mem); 5595 %} 5596 5597 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5598 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5599 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5600 effect(KILL cr); 5601 5602 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5603 "XOR $dst.hi,$dst.hi" %} 5604 ins_encode %{ 5605 Register Rdst = $dst$$Register; 5606 __ movzbl(Rdst, $mem$$Address); 5607 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5608 %} 5609 ins_pipe(ialu_reg_mem); 5610 %} 5611 5612 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5613 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5614 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5615 effect(KILL cr); 5616 5617 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5618 "XOR $dst.hi,$dst.hi\n\t" 5619 "AND $dst.lo,right_n_bits($mask, 16)" %} 5620 ins_encode %{ 5621 Register Rdst = $dst$$Register; 5622 __ movzwl(Rdst, $mem$$Address); 5623 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5624 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5625 %} 5626 ins_pipe(ialu_reg_mem); 5627 %} 5628 5629 // Load Integer 5630 instruct loadI(rRegI dst, memory mem) %{ 5631 match(Set dst (LoadI mem)); 5632 5633 ins_cost(125); 5634 format %{ "MOV $dst,$mem\t# int" %} 5635 5636 ins_encode %{ 5637 __ movl($dst$$Register, $mem$$Address); 5638 %} 5639 5640 ins_pipe(ialu_reg_mem); 5641 %} 5642 5643 // Load Integer (32 bit signed) to Byte (8 bit signed) 5644 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5645 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5646 5647 ins_cost(125); 5648 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5649 ins_encode %{ 5650 __ movsbl($dst$$Register, $mem$$Address); 5651 %} 5652 ins_pipe(ialu_reg_mem); 5653 %} 5654 5655 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5656 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5657 match(Set dst (AndI (LoadI mem) mask)); 5658 5659 ins_cost(125); 5660 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5661 ins_encode %{ 5662 __ movzbl($dst$$Register, $mem$$Address); 5663 %} 5664 ins_pipe(ialu_reg_mem); 5665 %} 5666 5667 // Load Integer (32 bit signed) to Short (16 bit signed) 5668 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5669 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5670 5671 ins_cost(125); 5672 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5673 ins_encode %{ 5674 __ movswl($dst$$Register, $mem$$Address); 5675 %} 5676 ins_pipe(ialu_reg_mem); 5677 %} 5678 5679 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5680 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5681 match(Set dst (AndI (LoadI mem) mask)); 5682 5683 ins_cost(125); 5684 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5685 ins_encode %{ 5686 __ movzwl($dst$$Register, $mem$$Address); 5687 %} 5688 ins_pipe(ialu_reg_mem); 5689 %} 5690 5691 // Load Integer into Long Register 5692 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5693 match(Set dst (ConvI2L (LoadI mem))); 5694 effect(KILL cr); 5695 5696 ins_cost(375); 5697 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5698 "MOV $dst.hi,$dst.lo\n\t" 5699 "SAR $dst.hi,31" %} 5700 5701 ins_encode %{ 5702 __ movl($dst$$Register, $mem$$Address); 5703 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5704 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5705 %} 5706 5707 ins_pipe(ialu_reg_mem); 5708 %} 5709 5710 // Load Integer with mask 0xFF into Long Register 5711 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5712 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5713 effect(KILL cr); 5714 5715 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5716 "XOR $dst.hi,$dst.hi" %} 5717 ins_encode %{ 5718 Register Rdst = $dst$$Register; 5719 __ movzbl(Rdst, $mem$$Address); 5720 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5721 %} 5722 ins_pipe(ialu_reg_mem); 5723 %} 5724 5725 // Load Integer with mask 0xFFFF into Long Register 5726 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5727 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5728 effect(KILL cr); 5729 5730 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5731 "XOR $dst.hi,$dst.hi" %} 5732 ins_encode %{ 5733 Register Rdst = $dst$$Register; 5734 __ movzwl(Rdst, $mem$$Address); 5735 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5736 %} 5737 ins_pipe(ialu_reg_mem); 5738 %} 5739 5740 // Load Integer with 31-bit mask into Long Register 5741 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5742 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5743 effect(KILL cr); 5744 5745 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5746 "XOR $dst.hi,$dst.hi\n\t" 5747 "AND $dst.lo,$mask" %} 5748 ins_encode %{ 5749 Register Rdst = $dst$$Register; 5750 __ movl(Rdst, $mem$$Address); 5751 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5752 __ andl(Rdst, $mask$$constant); 5753 %} 5754 ins_pipe(ialu_reg_mem); 5755 %} 5756 5757 // Load Unsigned Integer into Long Register 5758 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5759 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5760 effect(KILL cr); 5761 5762 ins_cost(250); 5763 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5764 "XOR $dst.hi,$dst.hi" %} 5765 5766 ins_encode %{ 5767 __ movl($dst$$Register, $mem$$Address); 5768 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5769 %} 5770 5771 ins_pipe(ialu_reg_mem); 5772 %} 5773 5774 // Load Long. Cannot clobber address while loading, so restrict address 5775 // register to ESI 5776 instruct loadL(eRegL dst, load_long_memory mem) %{ 5777 predicate(!((LoadLNode*)n)->require_atomic_access()); 5778 match(Set dst (LoadL mem)); 5779 5780 ins_cost(250); 5781 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5782 "MOV $dst.hi,$mem+4" %} 5783 5784 ins_encode %{ 5785 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5786 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5787 __ movl($dst$$Register, Amemlo); 5788 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5789 %} 5790 5791 ins_pipe(ialu_reg_long_mem); 5792 %} 5793 5794 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5795 // then store it down to the stack and reload on the int 5796 // side. 5797 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5798 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5799 match(Set dst (LoadL mem)); 5800 5801 ins_cost(200); 5802 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5803 "FISTp $dst" %} 5804 ins_encode(enc_loadL_volatile(mem,dst)); 5805 ins_pipe( fpu_reg_mem ); 5806 %} 5807 5808 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5809 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5810 match(Set dst (LoadL mem)); 5811 effect(TEMP tmp); 5812 ins_cost(180); 5813 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5814 "MOVSD $dst,$tmp" %} 5815 ins_encode %{ 5816 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5817 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5818 %} 5819 ins_pipe( pipe_slow ); 5820 %} 5821 5822 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5823 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5824 match(Set dst (LoadL mem)); 5825 effect(TEMP tmp); 5826 ins_cost(160); 5827 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5828 "MOVD $dst.lo,$tmp\n\t" 5829 "PSRLQ $tmp,32\n\t" 5830 "MOVD $dst.hi,$tmp" %} 5831 ins_encode %{ 5832 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5833 __ movdl($dst$$Register, $tmp$$XMMRegister); 5834 __ psrlq($tmp$$XMMRegister, 32); 5835 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5836 %} 5837 ins_pipe( pipe_slow ); 5838 %} 5839 5840 // Load Range 5841 instruct loadRange(rRegI dst, memory mem) %{ 5842 match(Set dst (LoadRange mem)); 5843 5844 ins_cost(125); 5845 format %{ "MOV $dst,$mem" %} 5846 opcode(0x8B); 5847 ins_encode( OpcP, RegMem(dst,mem)); 5848 ins_pipe( ialu_reg_mem ); 5849 %} 5850 5851 5852 // Load Pointer 5853 instruct loadP(eRegP dst, memory mem) %{ 5854 match(Set dst (LoadP mem)); 5855 5856 ins_cost(125); 5857 format %{ "MOV $dst,$mem" %} 5858 opcode(0x8B); 5859 ins_encode( OpcP, RegMem(dst,mem)); 5860 ins_pipe( ialu_reg_mem ); 5861 %} 5862 5863 // Load Klass Pointer 5864 instruct loadKlass(eRegP dst, memory mem) %{ 5865 match(Set dst (LoadKlass mem)); 5866 5867 ins_cost(125); 5868 format %{ "MOV $dst,$mem" %} 5869 opcode(0x8B); 5870 ins_encode( OpcP, RegMem(dst,mem)); 5871 ins_pipe( ialu_reg_mem ); 5872 %} 5873 5874 // Load Double 5875 instruct loadDPR(regDPR dst, memory mem) %{ 5876 predicate(UseSSE<=1); 5877 match(Set dst (LoadD mem)); 5878 5879 ins_cost(150); 5880 format %{ "FLD_D ST,$mem\n\t" 5881 "FSTP $dst" %} 5882 opcode(0xDD); /* DD /0 */ 5883 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5884 Pop_Reg_DPR(dst) ); 5885 ins_pipe( fpu_reg_mem ); 5886 %} 5887 5888 // Load Double to XMM 5889 instruct loadD(regD dst, memory mem) %{ 5890 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5891 match(Set dst (LoadD mem)); 5892 ins_cost(145); 5893 format %{ "MOVSD $dst,$mem" %} 5894 ins_encode %{ 5895 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5896 %} 5897 ins_pipe( pipe_slow ); 5898 %} 5899 5900 instruct loadD_partial(regD dst, memory mem) %{ 5901 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5902 match(Set dst (LoadD mem)); 5903 ins_cost(145); 5904 format %{ "MOVLPD $dst,$mem" %} 5905 ins_encode %{ 5906 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5907 %} 5908 ins_pipe( pipe_slow ); 5909 %} 5910 5911 // Load to XMM register (single-precision floating point) 5912 // MOVSS instruction 5913 instruct loadF(regF dst, memory mem) %{ 5914 predicate(UseSSE>=1); 5915 match(Set dst (LoadF mem)); 5916 ins_cost(145); 5917 format %{ "MOVSS $dst,$mem" %} 5918 ins_encode %{ 5919 __ movflt ($dst$$XMMRegister, $mem$$Address); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 // Load Float 5925 instruct loadFPR(regFPR dst, memory mem) %{ 5926 predicate(UseSSE==0); 5927 match(Set dst (LoadF mem)); 5928 5929 ins_cost(150); 5930 format %{ "FLD_S ST,$mem\n\t" 5931 "FSTP $dst" %} 5932 opcode(0xD9); /* D9 /0 */ 5933 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5934 Pop_Reg_FPR(dst) ); 5935 ins_pipe( fpu_reg_mem ); 5936 %} 5937 5938 // Load Effective Address 5939 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5940 match(Set dst mem); 5941 5942 ins_cost(110); 5943 format %{ "LEA $dst,$mem" %} 5944 opcode(0x8D); 5945 ins_encode( OpcP, RegMem(dst,mem)); 5946 ins_pipe( ialu_reg_reg_fat ); 5947 %} 5948 5949 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5950 match(Set dst mem); 5951 5952 ins_cost(110); 5953 format %{ "LEA $dst,$mem" %} 5954 opcode(0x8D); 5955 ins_encode( OpcP, RegMem(dst,mem)); 5956 ins_pipe( ialu_reg_reg_fat ); 5957 %} 5958 5959 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5960 match(Set dst mem); 5961 5962 ins_cost(110); 5963 format %{ "LEA $dst,$mem" %} 5964 opcode(0x8D); 5965 ins_encode( OpcP, RegMem(dst,mem)); 5966 ins_pipe( ialu_reg_reg_fat ); 5967 %} 5968 5969 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5970 match(Set dst mem); 5971 5972 ins_cost(110); 5973 format %{ "LEA $dst,$mem" %} 5974 opcode(0x8D); 5975 ins_encode( OpcP, RegMem(dst,mem)); 5976 ins_pipe( ialu_reg_reg_fat ); 5977 %} 5978 5979 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5980 match(Set dst mem); 5981 5982 ins_cost(110); 5983 format %{ "LEA $dst,$mem" %} 5984 opcode(0x8D); 5985 ins_encode( OpcP, RegMem(dst,mem)); 5986 ins_pipe( ialu_reg_reg_fat ); 5987 %} 5988 5989 // Load Constant 5990 instruct loadConI(rRegI dst, immI src) %{ 5991 match(Set dst src); 5992 5993 format %{ "MOV $dst,$src" %} 5994 ins_encode( LdImmI(dst, src) ); 5995 ins_pipe( ialu_reg_fat ); 5996 %} 5997 5998 // Load Constant zero 5999 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 6000 match(Set dst src); 6001 effect(KILL cr); 6002 6003 ins_cost(50); 6004 format %{ "XOR $dst,$dst" %} 6005 opcode(0x33); /* + rd */ 6006 ins_encode( OpcP, RegReg( dst, dst ) ); 6007 ins_pipe( ialu_reg ); 6008 %} 6009 6010 instruct loadConP(eRegP dst, immP src) %{ 6011 match(Set dst src); 6012 6013 format %{ "MOV $dst,$src" %} 6014 opcode(0xB8); /* + rd */ 6015 ins_encode( LdImmP(dst, src) ); 6016 ins_pipe( ialu_reg_fat ); 6017 %} 6018 6019 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6020 match(Set dst src); 6021 effect(KILL cr); 6022 ins_cost(200); 6023 format %{ "MOV $dst.lo,$src.lo\n\t" 6024 "MOV $dst.hi,$src.hi" %} 6025 opcode(0xB8); 6026 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6027 ins_pipe( ialu_reg_long_fat ); 6028 %} 6029 6030 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6031 match(Set dst src); 6032 effect(KILL cr); 6033 ins_cost(150); 6034 format %{ "XOR $dst.lo,$dst.lo\n\t" 6035 "XOR $dst.hi,$dst.hi" %} 6036 opcode(0x33,0x33); 6037 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6038 ins_pipe( ialu_reg_long ); 6039 %} 6040 6041 // The instruction usage is guarded by predicate in operand immFPR(). 6042 instruct loadConFPR(regFPR dst, immFPR con) %{ 6043 match(Set dst con); 6044 ins_cost(125); 6045 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6046 "FSTP $dst" %} 6047 ins_encode %{ 6048 __ fld_s($constantaddress($con)); 6049 __ fstp_d($dst$$reg); 6050 %} 6051 ins_pipe(fpu_reg_con); 6052 %} 6053 6054 // The instruction usage is guarded by predicate in operand immFPR0(). 6055 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6056 match(Set dst con); 6057 ins_cost(125); 6058 format %{ "FLDZ ST\n\t" 6059 "FSTP $dst" %} 6060 ins_encode %{ 6061 __ fldz(); 6062 __ fstp_d($dst$$reg); 6063 %} 6064 ins_pipe(fpu_reg_con); 6065 %} 6066 6067 // The instruction usage is guarded by predicate in operand immFPR1(). 6068 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6069 match(Set dst con); 6070 ins_cost(125); 6071 format %{ "FLD1 ST\n\t" 6072 "FSTP $dst" %} 6073 ins_encode %{ 6074 __ fld1(); 6075 __ fstp_d($dst$$reg); 6076 %} 6077 ins_pipe(fpu_reg_con); 6078 %} 6079 6080 // The instruction usage is guarded by predicate in operand immF(). 6081 instruct loadConF(regF dst, immF con) %{ 6082 match(Set dst con); 6083 ins_cost(125); 6084 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6085 ins_encode %{ 6086 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6087 %} 6088 ins_pipe(pipe_slow); 6089 %} 6090 6091 // The instruction usage is guarded by predicate in operand immF0(). 6092 instruct loadConF0(regF dst, immF0 src) %{ 6093 match(Set dst src); 6094 ins_cost(100); 6095 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6096 ins_encode %{ 6097 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6098 %} 6099 ins_pipe(pipe_slow); 6100 %} 6101 6102 // The instruction usage is guarded by predicate in operand immDPR(). 6103 instruct loadConDPR(regDPR dst, immDPR con) %{ 6104 match(Set dst con); 6105 ins_cost(125); 6106 6107 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6108 "FSTP $dst" %} 6109 ins_encode %{ 6110 __ fld_d($constantaddress($con)); 6111 __ fstp_d($dst$$reg); 6112 %} 6113 ins_pipe(fpu_reg_con); 6114 %} 6115 6116 // The instruction usage is guarded by predicate in operand immDPR0(). 6117 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6118 match(Set dst con); 6119 ins_cost(125); 6120 6121 format %{ "FLDZ ST\n\t" 6122 "FSTP $dst" %} 6123 ins_encode %{ 6124 __ fldz(); 6125 __ fstp_d($dst$$reg); 6126 %} 6127 ins_pipe(fpu_reg_con); 6128 %} 6129 6130 // The instruction usage is guarded by predicate in operand immDPR1(). 6131 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6132 match(Set dst con); 6133 ins_cost(125); 6134 6135 format %{ "FLD1 ST\n\t" 6136 "FSTP $dst" %} 6137 ins_encode %{ 6138 __ fld1(); 6139 __ fstp_d($dst$$reg); 6140 %} 6141 ins_pipe(fpu_reg_con); 6142 %} 6143 6144 // The instruction usage is guarded by predicate in operand immD(). 6145 instruct loadConD(regD dst, immD con) %{ 6146 match(Set dst con); 6147 ins_cost(125); 6148 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6149 ins_encode %{ 6150 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6151 %} 6152 ins_pipe(pipe_slow); 6153 %} 6154 6155 // The instruction usage is guarded by predicate in operand immD0(). 6156 instruct loadConD0(regD dst, immD0 src) %{ 6157 match(Set dst src); 6158 ins_cost(100); 6159 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6160 ins_encode %{ 6161 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6162 %} 6163 ins_pipe( pipe_slow ); 6164 %} 6165 6166 // Load Stack Slot 6167 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6168 match(Set dst src); 6169 ins_cost(125); 6170 6171 format %{ "MOV $dst,$src" %} 6172 opcode(0x8B); 6173 ins_encode( OpcP, RegMem(dst,src)); 6174 ins_pipe( ialu_reg_mem ); 6175 %} 6176 6177 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6178 match(Set dst src); 6179 6180 ins_cost(200); 6181 format %{ "MOV $dst,$src.lo\n\t" 6182 "MOV $dst+4,$src.hi" %} 6183 opcode(0x8B, 0x8B); 6184 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6185 ins_pipe( ialu_mem_long_reg ); 6186 %} 6187 6188 // Load Stack Slot 6189 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6190 match(Set dst src); 6191 ins_cost(125); 6192 6193 format %{ "MOV $dst,$src" %} 6194 opcode(0x8B); 6195 ins_encode( OpcP, RegMem(dst,src)); 6196 ins_pipe( ialu_reg_mem ); 6197 %} 6198 6199 // Load Stack Slot 6200 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6201 match(Set dst src); 6202 ins_cost(125); 6203 6204 format %{ "FLD_S $src\n\t" 6205 "FSTP $dst" %} 6206 opcode(0xD9); /* D9 /0, FLD m32real */ 6207 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6208 Pop_Reg_FPR(dst) ); 6209 ins_pipe( fpu_reg_mem ); 6210 %} 6211 6212 // Load Stack Slot 6213 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6214 match(Set dst src); 6215 ins_cost(125); 6216 6217 format %{ "FLD_D $src\n\t" 6218 "FSTP $dst" %} 6219 opcode(0xDD); /* DD /0, FLD m64real */ 6220 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6221 Pop_Reg_DPR(dst) ); 6222 ins_pipe( fpu_reg_mem ); 6223 %} 6224 6225 // Prefetch instructions for allocation. 6226 // Must be safe to execute with invalid address (cannot fault). 6227 6228 instruct prefetchAlloc0( memory mem ) %{ 6229 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6230 match(PrefetchAllocation mem); 6231 ins_cost(0); 6232 size(0); 6233 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6234 ins_encode(); 6235 ins_pipe(empty); 6236 %} 6237 6238 instruct prefetchAlloc( memory mem ) %{ 6239 predicate(AllocatePrefetchInstr==3); 6240 match( PrefetchAllocation mem ); 6241 ins_cost(100); 6242 6243 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6244 ins_encode %{ 6245 __ prefetchw($mem$$Address); 6246 %} 6247 ins_pipe(ialu_mem); 6248 %} 6249 6250 instruct prefetchAllocNTA( memory mem ) %{ 6251 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6252 match(PrefetchAllocation mem); 6253 ins_cost(100); 6254 6255 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6256 ins_encode %{ 6257 __ prefetchnta($mem$$Address); 6258 %} 6259 ins_pipe(ialu_mem); 6260 %} 6261 6262 instruct prefetchAllocT0( memory mem ) %{ 6263 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6264 match(PrefetchAllocation mem); 6265 ins_cost(100); 6266 6267 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6268 ins_encode %{ 6269 __ prefetcht0($mem$$Address); 6270 %} 6271 ins_pipe(ialu_mem); 6272 %} 6273 6274 instruct prefetchAllocT2( memory mem ) %{ 6275 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6276 match(PrefetchAllocation mem); 6277 ins_cost(100); 6278 6279 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6280 ins_encode %{ 6281 __ prefetcht2($mem$$Address); 6282 %} 6283 ins_pipe(ialu_mem); 6284 %} 6285 6286 //----------Store Instructions------------------------------------------------- 6287 6288 // Store Byte 6289 instruct storeB(memory mem, xRegI src) %{ 6290 match(Set mem (StoreB mem src)); 6291 6292 ins_cost(125); 6293 format %{ "MOV8 $mem,$src" %} 6294 opcode(0x88); 6295 ins_encode( OpcP, RegMem( src, mem ) ); 6296 ins_pipe( ialu_mem_reg ); 6297 %} 6298 6299 // Store Char/Short 6300 instruct storeC(memory mem, rRegI src) %{ 6301 match(Set mem (StoreC mem src)); 6302 6303 ins_cost(125); 6304 format %{ "MOV16 $mem,$src" %} 6305 opcode(0x89, 0x66); 6306 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6307 ins_pipe( ialu_mem_reg ); 6308 %} 6309 6310 // Store Integer 6311 instruct storeI(memory mem, rRegI src) %{ 6312 match(Set mem (StoreI mem src)); 6313 6314 ins_cost(125); 6315 format %{ "MOV $mem,$src" %} 6316 opcode(0x89); 6317 ins_encode( OpcP, RegMem( src, mem ) ); 6318 ins_pipe( ialu_mem_reg ); 6319 %} 6320 6321 // Store Long 6322 instruct storeL(long_memory mem, eRegL src) %{ 6323 predicate(!((StoreLNode*)n)->require_atomic_access()); 6324 match(Set mem (StoreL mem src)); 6325 6326 ins_cost(200); 6327 format %{ "MOV $mem,$src.lo\n\t" 6328 "MOV $mem+4,$src.hi" %} 6329 opcode(0x89, 0x89); 6330 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6331 ins_pipe( ialu_mem_long_reg ); 6332 %} 6333 6334 // Store Long to Integer 6335 instruct storeL2I(memory mem, eRegL src) %{ 6336 match(Set mem (StoreI mem (ConvL2I src))); 6337 6338 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6339 ins_encode %{ 6340 __ movl($mem$$Address, $src$$Register); 6341 %} 6342 ins_pipe(ialu_mem_reg); 6343 %} 6344 6345 // Volatile Store Long. Must be atomic, so move it into 6346 // the FP TOS and then do a 64-bit FIST. Has to probe the 6347 // target address before the store (for null-ptr checks) 6348 // so the memory operand is used twice in the encoding. 6349 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6350 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6351 match(Set mem (StoreL mem src)); 6352 effect( KILL cr ); 6353 ins_cost(400); 6354 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6355 "FILD $src\n\t" 6356 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6357 opcode(0x3B); 6358 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6359 ins_pipe( fpu_reg_mem ); 6360 %} 6361 6362 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6363 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6364 match(Set mem (StoreL mem src)); 6365 effect( TEMP tmp, KILL cr ); 6366 ins_cost(380); 6367 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6368 "MOVSD $tmp,$src\n\t" 6369 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6370 ins_encode %{ 6371 __ cmpl(rax, $mem$$Address); 6372 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6373 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6374 %} 6375 ins_pipe( pipe_slow ); 6376 %} 6377 6378 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6379 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6380 match(Set mem (StoreL mem src)); 6381 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6382 ins_cost(360); 6383 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6384 "MOVD $tmp,$src.lo\n\t" 6385 "MOVD $tmp2,$src.hi\n\t" 6386 "PUNPCKLDQ $tmp,$tmp2\n\t" 6387 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6388 ins_encode %{ 6389 __ cmpl(rax, $mem$$Address); 6390 __ movdl($tmp$$XMMRegister, $src$$Register); 6391 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6392 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6393 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6394 %} 6395 ins_pipe( pipe_slow ); 6396 %} 6397 6398 // Store Pointer; for storing unknown oops and raw pointers 6399 instruct storeP(memory mem, anyRegP src) %{ 6400 match(Set mem (StoreP mem src)); 6401 6402 ins_cost(125); 6403 format %{ "MOV $mem,$src" %} 6404 opcode(0x89); 6405 ins_encode( OpcP, RegMem( src, mem ) ); 6406 ins_pipe( ialu_mem_reg ); 6407 %} 6408 6409 // Store Integer Immediate 6410 instruct storeImmI(memory mem, immI src) %{ 6411 match(Set mem (StoreI mem src)); 6412 6413 ins_cost(150); 6414 format %{ "MOV $mem,$src" %} 6415 opcode(0xC7); /* C7 /0 */ 6416 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6417 ins_pipe( ialu_mem_imm ); 6418 %} 6419 6420 // Store Short/Char Immediate 6421 instruct storeImmI16(memory mem, immI16 src) %{ 6422 predicate(UseStoreImmI16); 6423 match(Set mem (StoreC mem src)); 6424 6425 ins_cost(150); 6426 format %{ "MOV16 $mem,$src" %} 6427 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6428 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6429 ins_pipe( ialu_mem_imm ); 6430 %} 6431 6432 // Store Pointer Immediate; null pointers or constant oops that do not 6433 // need card-mark barriers. 6434 instruct storeImmP(memory mem, immP src) %{ 6435 match(Set mem (StoreP mem src)); 6436 6437 ins_cost(150); 6438 format %{ "MOV $mem,$src" %} 6439 opcode(0xC7); /* C7 /0 */ 6440 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6441 ins_pipe( ialu_mem_imm ); 6442 %} 6443 6444 // Store Byte Immediate 6445 instruct storeImmB(memory mem, immI8 src) %{ 6446 match(Set mem (StoreB mem src)); 6447 6448 ins_cost(150); 6449 format %{ "MOV8 $mem,$src" %} 6450 opcode(0xC6); /* C6 /0 */ 6451 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6452 ins_pipe( ialu_mem_imm ); 6453 %} 6454 6455 // Store CMS card-mark Immediate 6456 instruct storeImmCM(memory mem, immI8 src) %{ 6457 match(Set mem (StoreCM mem src)); 6458 6459 ins_cost(150); 6460 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6461 opcode(0xC6); /* C6 /0 */ 6462 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6463 ins_pipe( ialu_mem_imm ); 6464 %} 6465 6466 // Store Double 6467 instruct storeDPR( memory mem, regDPR1 src) %{ 6468 predicate(UseSSE<=1); 6469 match(Set mem (StoreD mem src)); 6470 6471 ins_cost(100); 6472 format %{ "FST_D $mem,$src" %} 6473 opcode(0xDD); /* DD /2 */ 6474 ins_encode( enc_FPR_store(mem,src) ); 6475 ins_pipe( fpu_mem_reg ); 6476 %} 6477 6478 // Store double does rounding on x86 6479 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6480 predicate(UseSSE<=1); 6481 match(Set mem (StoreD mem (RoundDouble src))); 6482 6483 ins_cost(100); 6484 format %{ "FST_D $mem,$src\t# round" %} 6485 opcode(0xDD); /* DD /2 */ 6486 ins_encode( enc_FPR_store(mem,src) ); 6487 ins_pipe( fpu_mem_reg ); 6488 %} 6489 6490 // Store XMM register to memory (double-precision floating points) 6491 // MOVSD instruction 6492 instruct storeD(memory mem, regD src) %{ 6493 predicate(UseSSE>=2); 6494 match(Set mem (StoreD mem src)); 6495 ins_cost(95); 6496 format %{ "MOVSD $mem,$src" %} 6497 ins_encode %{ 6498 __ movdbl($mem$$Address, $src$$XMMRegister); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 // Store XMM register to memory (single-precision floating point) 6504 // MOVSS instruction 6505 instruct storeF(memory mem, regF src) %{ 6506 predicate(UseSSE>=1); 6507 match(Set mem (StoreF mem src)); 6508 ins_cost(95); 6509 format %{ "MOVSS $mem,$src" %} 6510 ins_encode %{ 6511 __ movflt($mem$$Address, $src$$XMMRegister); 6512 %} 6513 ins_pipe( pipe_slow ); 6514 %} 6515 6516 6517 // Store Float 6518 instruct storeFPR( memory mem, regFPR1 src) %{ 6519 predicate(UseSSE==0); 6520 match(Set mem (StoreF mem src)); 6521 6522 ins_cost(100); 6523 format %{ "FST_S $mem,$src" %} 6524 opcode(0xD9); /* D9 /2 */ 6525 ins_encode( enc_FPR_store(mem,src) ); 6526 ins_pipe( fpu_mem_reg ); 6527 %} 6528 6529 // Store Float does rounding on x86 6530 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6531 predicate(UseSSE==0); 6532 match(Set mem (StoreF mem (RoundFloat src))); 6533 6534 ins_cost(100); 6535 format %{ "FST_S $mem,$src\t# round" %} 6536 opcode(0xD9); /* D9 /2 */ 6537 ins_encode( enc_FPR_store(mem,src) ); 6538 ins_pipe( fpu_mem_reg ); 6539 %} 6540 6541 // Store Float does rounding on x86 6542 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6543 predicate(UseSSE<=1); 6544 match(Set mem (StoreF mem (ConvD2F src))); 6545 6546 ins_cost(100); 6547 format %{ "FST_S $mem,$src\t# D-round" %} 6548 opcode(0xD9); /* D9 /2 */ 6549 ins_encode( enc_FPR_store(mem,src) ); 6550 ins_pipe( fpu_mem_reg ); 6551 %} 6552 6553 // Store immediate Float value (it is faster than store from FPU register) 6554 // The instruction usage is guarded by predicate in operand immFPR(). 6555 instruct storeFPR_imm( memory mem, immFPR src) %{ 6556 match(Set mem (StoreF mem src)); 6557 6558 ins_cost(50); 6559 format %{ "MOV $mem,$src\t# store float" %} 6560 opcode(0xC7); /* C7 /0 */ 6561 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6562 ins_pipe( ialu_mem_imm ); 6563 %} 6564 6565 // Store immediate Float value (it is faster than store from XMM register) 6566 // The instruction usage is guarded by predicate in operand immF(). 6567 instruct storeF_imm( memory mem, immF src) %{ 6568 match(Set mem (StoreF mem src)); 6569 6570 ins_cost(50); 6571 format %{ "MOV $mem,$src\t# store float" %} 6572 opcode(0xC7); /* C7 /0 */ 6573 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6574 ins_pipe( ialu_mem_imm ); 6575 %} 6576 6577 // Store Integer to stack slot 6578 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6579 match(Set dst src); 6580 6581 ins_cost(100); 6582 format %{ "MOV $dst,$src" %} 6583 opcode(0x89); 6584 ins_encode( OpcPRegSS( dst, src ) ); 6585 ins_pipe( ialu_mem_reg ); 6586 %} 6587 6588 // Store Integer to stack slot 6589 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6590 match(Set dst src); 6591 6592 ins_cost(100); 6593 format %{ "MOV $dst,$src" %} 6594 opcode(0x89); 6595 ins_encode( OpcPRegSS( dst, src ) ); 6596 ins_pipe( ialu_mem_reg ); 6597 %} 6598 6599 // Store Long to stack slot 6600 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6601 match(Set dst src); 6602 6603 ins_cost(200); 6604 format %{ "MOV $dst,$src.lo\n\t" 6605 "MOV $dst+4,$src.hi" %} 6606 opcode(0x89, 0x89); 6607 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6608 ins_pipe( ialu_mem_long_reg ); 6609 %} 6610 6611 //----------MemBar Instructions----------------------------------------------- 6612 // Memory barrier flavors 6613 6614 instruct membar_acquire() %{ 6615 match(MemBarAcquire); 6616 match(LoadFence); 6617 ins_cost(400); 6618 6619 size(0); 6620 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6621 ins_encode(); 6622 ins_pipe(empty); 6623 %} 6624 6625 instruct membar_acquire_lock() %{ 6626 match(MemBarAcquireLock); 6627 ins_cost(0); 6628 6629 size(0); 6630 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6631 ins_encode( ); 6632 ins_pipe(empty); 6633 %} 6634 6635 instruct membar_release() %{ 6636 match(MemBarRelease); 6637 match(StoreFence); 6638 ins_cost(400); 6639 6640 size(0); 6641 format %{ "MEMBAR-release ! (empty encoding)" %} 6642 ins_encode( ); 6643 ins_pipe(empty); 6644 %} 6645 6646 instruct membar_release_lock() %{ 6647 match(MemBarReleaseLock); 6648 ins_cost(0); 6649 6650 size(0); 6651 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6652 ins_encode( ); 6653 ins_pipe(empty); 6654 %} 6655 6656 instruct membar_volatile(eFlagsReg cr) %{ 6657 match(MemBarVolatile); 6658 effect(KILL cr); 6659 ins_cost(400); 6660 6661 format %{ 6662 $$template 6663 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6664 %} 6665 ins_encode %{ 6666 __ membar(Assembler::StoreLoad); 6667 %} 6668 ins_pipe(pipe_slow); 6669 %} 6670 6671 instruct unnecessary_membar_volatile() %{ 6672 match(MemBarVolatile); 6673 predicate(Matcher::post_store_load_barrier(n)); 6674 ins_cost(0); 6675 6676 size(0); 6677 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6678 ins_encode( ); 6679 ins_pipe(empty); 6680 %} 6681 6682 instruct membar_storestore() %{ 6683 match(MemBarStoreStore); 6684 match(StoreStoreFence); 6685 ins_cost(0); 6686 6687 size(0); 6688 format %{ "MEMBAR-storestore (empty encoding)" %} 6689 ins_encode( ); 6690 ins_pipe(empty); 6691 %} 6692 6693 //----------Move Instructions-------------------------------------------------- 6694 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6695 match(Set dst (CastX2P src)); 6696 format %{ "# X2P $dst, $src" %} 6697 ins_encode( /*empty encoding*/ ); 6698 ins_cost(0); 6699 ins_pipe(empty); 6700 %} 6701 6702 instruct castP2X(rRegI dst, eRegP src ) %{ 6703 match(Set dst (CastP2X src)); 6704 ins_cost(50); 6705 format %{ "MOV $dst, $src\t# CastP2X" %} 6706 ins_encode( enc_Copy( dst, src) ); 6707 ins_pipe( ialu_reg_reg ); 6708 %} 6709 6710 //----------Conditional Move--------------------------------------------------- 6711 // Conditional move 6712 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6713 predicate(!VM_Version::supports_cmov() ); 6714 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6715 ins_cost(200); 6716 format %{ "J$cop,us skip\t# signed cmove\n\t" 6717 "MOV $dst,$src\n" 6718 "skip:" %} 6719 ins_encode %{ 6720 Label Lskip; 6721 // Invert sense of branch from sense of CMOV 6722 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6723 __ movl($dst$$Register, $src$$Register); 6724 __ bind(Lskip); 6725 %} 6726 ins_pipe( pipe_cmov_reg ); 6727 %} 6728 6729 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6730 predicate(!VM_Version::supports_cmov() ); 6731 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6732 ins_cost(200); 6733 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6734 "MOV $dst,$src\n" 6735 "skip:" %} 6736 ins_encode %{ 6737 Label Lskip; 6738 // Invert sense of branch from sense of CMOV 6739 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6740 __ movl($dst$$Register, $src$$Register); 6741 __ bind(Lskip); 6742 %} 6743 ins_pipe( pipe_cmov_reg ); 6744 %} 6745 6746 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6747 predicate(VM_Version::supports_cmov() ); 6748 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6749 ins_cost(200); 6750 format %{ "CMOV$cop $dst,$src" %} 6751 opcode(0x0F,0x40); 6752 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6753 ins_pipe( pipe_cmov_reg ); 6754 %} 6755 6756 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6757 predicate(VM_Version::supports_cmov() ); 6758 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6759 ins_cost(200); 6760 format %{ "CMOV$cop $dst,$src" %} 6761 opcode(0x0F,0x40); 6762 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6763 ins_pipe( pipe_cmov_reg ); 6764 %} 6765 6766 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6767 predicate(VM_Version::supports_cmov() ); 6768 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6769 ins_cost(200); 6770 expand %{ 6771 cmovI_regU(cop, cr, dst, src); 6772 %} 6773 %} 6774 6775 // Conditional move 6776 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6777 predicate(VM_Version::supports_cmov() ); 6778 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6779 ins_cost(250); 6780 format %{ "CMOV$cop $dst,$src" %} 6781 opcode(0x0F,0x40); 6782 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6783 ins_pipe( pipe_cmov_mem ); 6784 %} 6785 6786 // Conditional move 6787 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6788 predicate(VM_Version::supports_cmov() ); 6789 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6790 ins_cost(250); 6791 format %{ "CMOV$cop $dst,$src" %} 6792 opcode(0x0F,0x40); 6793 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6794 ins_pipe( pipe_cmov_mem ); 6795 %} 6796 6797 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6798 predicate(VM_Version::supports_cmov() ); 6799 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6800 ins_cost(250); 6801 expand %{ 6802 cmovI_memU(cop, cr, dst, src); 6803 %} 6804 %} 6805 6806 // Conditional move 6807 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6808 predicate(VM_Version::supports_cmov() ); 6809 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6810 ins_cost(200); 6811 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6812 opcode(0x0F,0x40); 6813 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6814 ins_pipe( pipe_cmov_reg ); 6815 %} 6816 6817 // Conditional move (non-P6 version) 6818 // Note: a CMoveP is generated for stubs and native wrappers 6819 // regardless of whether we are on a P6, so we 6820 // emulate a cmov here 6821 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6822 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6823 ins_cost(300); 6824 format %{ "Jn$cop skip\n\t" 6825 "MOV $dst,$src\t# pointer\n" 6826 "skip:" %} 6827 opcode(0x8b); 6828 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6829 ins_pipe( pipe_cmov_reg ); 6830 %} 6831 6832 // Conditional move 6833 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6834 predicate(VM_Version::supports_cmov() ); 6835 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6836 ins_cost(200); 6837 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6838 opcode(0x0F,0x40); 6839 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6840 ins_pipe( pipe_cmov_reg ); 6841 %} 6842 6843 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6844 predicate(VM_Version::supports_cmov() ); 6845 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6846 ins_cost(200); 6847 expand %{ 6848 cmovP_regU(cop, cr, dst, src); 6849 %} 6850 %} 6851 6852 // DISABLED: Requires the ADLC to emit a bottom_type call that 6853 // correctly meets the two pointer arguments; one is an incoming 6854 // register but the other is a memory operand. ALSO appears to 6855 // be buggy with implicit null checks. 6856 // 6857 //// Conditional move 6858 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6859 // predicate(VM_Version::supports_cmov() ); 6860 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6861 // ins_cost(250); 6862 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6863 // opcode(0x0F,0x40); 6864 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6865 // ins_pipe( pipe_cmov_mem ); 6866 //%} 6867 // 6868 //// Conditional move 6869 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6870 // predicate(VM_Version::supports_cmov() ); 6871 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6872 // ins_cost(250); 6873 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6874 // opcode(0x0F,0x40); 6875 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6876 // ins_pipe( pipe_cmov_mem ); 6877 //%} 6878 6879 // Conditional move 6880 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6881 predicate(UseSSE<=1); 6882 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6883 ins_cost(200); 6884 format %{ "FCMOV$cop $dst,$src\t# double" %} 6885 opcode(0xDA); 6886 ins_encode( enc_cmov_dpr(cop,src) ); 6887 ins_pipe( pipe_cmovDPR_reg ); 6888 %} 6889 6890 // Conditional move 6891 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6892 predicate(UseSSE==0); 6893 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6894 ins_cost(200); 6895 format %{ "FCMOV$cop $dst,$src\t# float" %} 6896 opcode(0xDA); 6897 ins_encode( enc_cmov_dpr(cop,src) ); 6898 ins_pipe( pipe_cmovDPR_reg ); 6899 %} 6900 6901 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6902 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6903 predicate(UseSSE<=1); 6904 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6905 ins_cost(200); 6906 format %{ "Jn$cop skip\n\t" 6907 "MOV $dst,$src\t# double\n" 6908 "skip:" %} 6909 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6910 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6911 ins_pipe( pipe_cmovDPR_reg ); 6912 %} 6913 6914 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6915 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6916 predicate(UseSSE==0); 6917 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6918 ins_cost(200); 6919 format %{ "Jn$cop skip\n\t" 6920 "MOV $dst,$src\t# float\n" 6921 "skip:" %} 6922 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6923 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6924 ins_pipe( pipe_cmovDPR_reg ); 6925 %} 6926 6927 // No CMOVE with SSE/SSE2 6928 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6929 predicate (UseSSE>=1); 6930 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6931 ins_cost(200); 6932 format %{ "Jn$cop skip\n\t" 6933 "MOVSS $dst,$src\t# float\n" 6934 "skip:" %} 6935 ins_encode %{ 6936 Label skip; 6937 // Invert sense of branch from sense of CMOV 6938 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6939 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6940 __ bind(skip); 6941 %} 6942 ins_pipe( pipe_slow ); 6943 %} 6944 6945 // No CMOVE with SSE/SSE2 6946 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6947 predicate (UseSSE>=2); 6948 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6949 ins_cost(200); 6950 format %{ "Jn$cop skip\n\t" 6951 "MOVSD $dst,$src\t# float\n" 6952 "skip:" %} 6953 ins_encode %{ 6954 Label skip; 6955 // Invert sense of branch from sense of CMOV 6956 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6957 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6958 __ bind(skip); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 // unsigned version 6964 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6965 predicate (UseSSE>=1); 6966 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6967 ins_cost(200); 6968 format %{ "Jn$cop skip\n\t" 6969 "MOVSS $dst,$src\t# float\n" 6970 "skip:" %} 6971 ins_encode %{ 6972 Label skip; 6973 // Invert sense of branch from sense of CMOV 6974 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6975 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6976 __ bind(skip); 6977 %} 6978 ins_pipe( pipe_slow ); 6979 %} 6980 6981 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6982 predicate (UseSSE>=1); 6983 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6984 ins_cost(200); 6985 expand %{ 6986 fcmovF_regU(cop, cr, dst, src); 6987 %} 6988 %} 6989 6990 // unsigned version 6991 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6992 predicate (UseSSE>=2); 6993 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6994 ins_cost(200); 6995 format %{ "Jn$cop skip\n\t" 6996 "MOVSD $dst,$src\t# float\n" 6997 "skip:" %} 6998 ins_encode %{ 6999 Label skip; 7000 // Invert sense of branch from sense of CMOV 7001 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7002 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7003 __ bind(skip); 7004 %} 7005 ins_pipe( pipe_slow ); 7006 %} 7007 7008 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7009 predicate (UseSSE>=2); 7010 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7011 ins_cost(200); 7012 expand %{ 7013 fcmovD_regU(cop, cr, dst, src); 7014 %} 7015 %} 7016 7017 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7018 predicate(VM_Version::supports_cmov() ); 7019 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7020 ins_cost(200); 7021 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7022 "CMOV$cop $dst.hi,$src.hi" %} 7023 opcode(0x0F,0x40); 7024 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7025 ins_pipe( pipe_cmov_reg_long ); 7026 %} 7027 7028 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7029 predicate(VM_Version::supports_cmov() ); 7030 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7031 ins_cost(200); 7032 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7033 "CMOV$cop $dst.hi,$src.hi" %} 7034 opcode(0x0F,0x40); 7035 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7036 ins_pipe( pipe_cmov_reg_long ); 7037 %} 7038 7039 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7040 predicate(VM_Version::supports_cmov() ); 7041 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7042 ins_cost(200); 7043 expand %{ 7044 cmovL_regU(cop, cr, dst, src); 7045 %} 7046 %} 7047 7048 //----------Arithmetic Instructions-------------------------------------------- 7049 //----------Addition Instructions---------------------------------------------- 7050 7051 // Integer Addition Instructions 7052 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7053 match(Set dst (AddI dst src)); 7054 effect(KILL cr); 7055 7056 size(2); 7057 format %{ "ADD $dst,$src" %} 7058 opcode(0x03); 7059 ins_encode( OpcP, RegReg( dst, src) ); 7060 ins_pipe( ialu_reg_reg ); 7061 %} 7062 7063 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7064 match(Set dst (AddI dst src)); 7065 effect(KILL cr); 7066 7067 format %{ "ADD $dst,$src" %} 7068 opcode(0x81, 0x00); /* /0 id */ 7069 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7070 ins_pipe( ialu_reg ); 7071 %} 7072 7073 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7074 predicate(UseIncDec); 7075 match(Set dst (AddI dst src)); 7076 effect(KILL cr); 7077 7078 size(1); 7079 format %{ "INC $dst" %} 7080 opcode(0x40); /* */ 7081 ins_encode( Opc_plus( primary, dst ) ); 7082 ins_pipe( ialu_reg ); 7083 %} 7084 7085 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7086 match(Set dst (AddI src0 src1)); 7087 ins_cost(110); 7088 7089 format %{ "LEA $dst,[$src0 + $src1]" %} 7090 opcode(0x8D); /* 0x8D /r */ 7091 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7092 ins_pipe( ialu_reg_reg ); 7093 %} 7094 7095 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7096 match(Set dst (AddP src0 src1)); 7097 ins_cost(110); 7098 7099 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7100 opcode(0x8D); /* 0x8D /r */ 7101 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7102 ins_pipe( ialu_reg_reg ); 7103 %} 7104 7105 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7106 predicate(UseIncDec); 7107 match(Set dst (AddI dst src)); 7108 effect(KILL cr); 7109 7110 size(1); 7111 format %{ "DEC $dst" %} 7112 opcode(0x48); /* */ 7113 ins_encode( Opc_plus( primary, dst ) ); 7114 ins_pipe( ialu_reg ); 7115 %} 7116 7117 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7118 match(Set dst (AddP dst src)); 7119 effect(KILL cr); 7120 7121 size(2); 7122 format %{ "ADD $dst,$src" %} 7123 opcode(0x03); 7124 ins_encode( OpcP, RegReg( dst, src) ); 7125 ins_pipe( ialu_reg_reg ); 7126 %} 7127 7128 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7129 match(Set dst (AddP dst src)); 7130 effect(KILL cr); 7131 7132 format %{ "ADD $dst,$src" %} 7133 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7134 // ins_encode( RegImm( dst, src) ); 7135 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7136 ins_pipe( ialu_reg ); 7137 %} 7138 7139 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7140 match(Set dst (AddI dst (LoadI src))); 7141 effect(KILL cr); 7142 7143 ins_cost(150); 7144 format %{ "ADD $dst,$src" %} 7145 opcode(0x03); 7146 ins_encode( OpcP, RegMem( dst, src) ); 7147 ins_pipe( ialu_reg_mem ); 7148 %} 7149 7150 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7151 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7152 effect(KILL cr); 7153 7154 ins_cost(150); 7155 format %{ "ADD $dst,$src" %} 7156 opcode(0x01); /* Opcode 01 /r */ 7157 ins_encode( OpcP, RegMem( src, dst ) ); 7158 ins_pipe( ialu_mem_reg ); 7159 %} 7160 7161 // Add Memory with Immediate 7162 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7163 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7164 effect(KILL cr); 7165 7166 ins_cost(125); 7167 format %{ "ADD $dst,$src" %} 7168 opcode(0x81); /* Opcode 81 /0 id */ 7169 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7170 ins_pipe( ialu_mem_imm ); 7171 %} 7172 7173 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7174 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7175 effect(KILL cr); 7176 7177 ins_cost(125); 7178 format %{ "INC $dst" %} 7179 opcode(0xFF); /* Opcode FF /0 */ 7180 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7181 ins_pipe( ialu_mem_imm ); 7182 %} 7183 7184 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7185 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7186 effect(KILL cr); 7187 7188 ins_cost(125); 7189 format %{ "DEC $dst" %} 7190 opcode(0xFF); /* Opcode FF /1 */ 7191 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7192 ins_pipe( ialu_mem_imm ); 7193 %} 7194 7195 7196 instruct checkCastPP( eRegP dst ) %{ 7197 match(Set dst (CheckCastPP dst)); 7198 7199 size(0); 7200 format %{ "#checkcastPP of $dst" %} 7201 ins_encode( /*empty encoding*/ ); 7202 ins_pipe( empty ); 7203 %} 7204 7205 instruct castPP( eRegP dst ) %{ 7206 match(Set dst (CastPP dst)); 7207 format %{ "#castPP of $dst" %} 7208 ins_encode( /*empty encoding*/ ); 7209 ins_pipe( empty ); 7210 %} 7211 7212 instruct castII( rRegI dst ) %{ 7213 match(Set dst (CastII dst)); 7214 format %{ "#castII of $dst" %} 7215 ins_encode( /*empty encoding*/ ); 7216 ins_cost(0); 7217 ins_pipe( empty ); 7218 %} 7219 7220 instruct castLL( eRegL dst ) %{ 7221 match(Set dst (CastLL dst)); 7222 format %{ "#castLL of $dst" %} 7223 ins_encode( /*empty encoding*/ ); 7224 ins_cost(0); 7225 ins_pipe( empty ); 7226 %} 7227 7228 instruct castFF( regF dst ) %{ 7229 predicate(UseSSE >= 1); 7230 match(Set dst (CastFF dst)); 7231 format %{ "#castFF of $dst" %} 7232 ins_encode( /*empty encoding*/ ); 7233 ins_cost(0); 7234 ins_pipe( empty ); 7235 %} 7236 7237 instruct castDD( regD dst ) %{ 7238 predicate(UseSSE >= 2); 7239 match(Set dst (CastDD dst)); 7240 format %{ "#castDD of $dst" %} 7241 ins_encode( /*empty encoding*/ ); 7242 ins_cost(0); 7243 ins_pipe( empty ); 7244 %} 7245 7246 instruct castFF_PR( regFPR dst ) %{ 7247 predicate(UseSSE < 1); 7248 match(Set dst (CastFF dst)); 7249 format %{ "#castFF of $dst" %} 7250 ins_encode( /*empty encoding*/ ); 7251 ins_cost(0); 7252 ins_pipe( empty ); 7253 %} 7254 7255 instruct castDD_PR( regDPR dst ) %{ 7256 predicate(UseSSE < 2); 7257 match(Set dst (CastDD dst)); 7258 format %{ "#castDD of $dst" %} 7259 ins_encode( /*empty encoding*/ ); 7260 ins_cost(0); 7261 ins_pipe( empty ); 7262 %} 7263 7264 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7265 7266 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7267 predicate(VM_Version::supports_cx8()); 7268 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7269 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7270 effect(KILL cr, KILL oldval); 7271 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7272 "MOV $res,0\n\t" 7273 "JNE,s fail\n\t" 7274 "MOV $res,1\n" 7275 "fail:" %} 7276 ins_encode( enc_cmpxchg8(mem_ptr), 7277 enc_flags_ne_to_boolean(res) ); 7278 ins_pipe( pipe_cmpxchg ); 7279 %} 7280 7281 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7282 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7283 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7284 effect(KILL cr, KILL oldval); 7285 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7286 "MOV $res,0\n\t" 7287 "JNE,s fail\n\t" 7288 "MOV $res,1\n" 7289 "fail:" %} 7290 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7291 ins_pipe( pipe_cmpxchg ); 7292 %} 7293 7294 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7295 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7296 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7297 effect(KILL cr, KILL oldval); 7298 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7299 "MOV $res,0\n\t" 7300 "JNE,s fail\n\t" 7301 "MOV $res,1\n" 7302 "fail:" %} 7303 ins_encode( enc_cmpxchgb(mem_ptr), 7304 enc_flags_ne_to_boolean(res) ); 7305 ins_pipe( pipe_cmpxchg ); 7306 %} 7307 7308 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7309 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7310 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7311 effect(KILL cr, KILL oldval); 7312 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7313 "MOV $res,0\n\t" 7314 "JNE,s fail\n\t" 7315 "MOV $res,1\n" 7316 "fail:" %} 7317 ins_encode( enc_cmpxchgw(mem_ptr), 7318 enc_flags_ne_to_boolean(res) ); 7319 ins_pipe( pipe_cmpxchg ); 7320 %} 7321 7322 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7323 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7324 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7325 effect(KILL cr, KILL oldval); 7326 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7327 "MOV $res,0\n\t" 7328 "JNE,s fail\n\t" 7329 "MOV $res,1\n" 7330 "fail:" %} 7331 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7332 ins_pipe( pipe_cmpxchg ); 7333 %} 7334 7335 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7336 predicate(VM_Version::supports_cx8()); 7337 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7338 effect(KILL cr); 7339 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7340 ins_encode( enc_cmpxchg8(mem_ptr) ); 7341 ins_pipe( pipe_cmpxchg ); 7342 %} 7343 7344 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7345 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7346 effect(KILL cr); 7347 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7348 ins_encode( enc_cmpxchg(mem_ptr) ); 7349 ins_pipe( pipe_cmpxchg ); 7350 %} 7351 7352 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7353 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7354 effect(KILL cr); 7355 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7356 ins_encode( enc_cmpxchgb(mem_ptr) ); 7357 ins_pipe( pipe_cmpxchg ); 7358 %} 7359 7360 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7361 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7362 effect(KILL cr); 7363 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7364 ins_encode( enc_cmpxchgw(mem_ptr) ); 7365 ins_pipe( pipe_cmpxchg ); 7366 %} 7367 7368 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7369 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7370 effect(KILL cr); 7371 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7372 ins_encode( enc_cmpxchg(mem_ptr) ); 7373 ins_pipe( pipe_cmpxchg ); 7374 %} 7375 7376 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7377 predicate(n->as_LoadStore()->result_not_used()); 7378 match(Set dummy (GetAndAddB mem add)); 7379 effect(KILL cr); 7380 format %{ "ADDB [$mem],$add" %} 7381 ins_encode %{ 7382 __ lock(); 7383 __ addb($mem$$Address, $add$$constant); 7384 %} 7385 ins_pipe( pipe_cmpxchg ); 7386 %} 7387 7388 // Important to match to xRegI: only 8-bit regs. 7389 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7390 match(Set newval (GetAndAddB mem newval)); 7391 effect(KILL cr); 7392 format %{ "XADDB [$mem],$newval" %} 7393 ins_encode %{ 7394 __ lock(); 7395 __ xaddb($mem$$Address, $newval$$Register); 7396 %} 7397 ins_pipe( pipe_cmpxchg ); 7398 %} 7399 7400 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7401 predicate(n->as_LoadStore()->result_not_used()); 7402 match(Set dummy (GetAndAddS mem add)); 7403 effect(KILL cr); 7404 format %{ "ADDS [$mem],$add" %} 7405 ins_encode %{ 7406 __ lock(); 7407 __ addw($mem$$Address, $add$$constant); 7408 %} 7409 ins_pipe( pipe_cmpxchg ); 7410 %} 7411 7412 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7413 match(Set newval (GetAndAddS mem newval)); 7414 effect(KILL cr); 7415 format %{ "XADDS [$mem],$newval" %} 7416 ins_encode %{ 7417 __ lock(); 7418 __ xaddw($mem$$Address, $newval$$Register); 7419 %} 7420 ins_pipe( pipe_cmpxchg ); 7421 %} 7422 7423 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7424 predicate(n->as_LoadStore()->result_not_used()); 7425 match(Set dummy (GetAndAddI mem add)); 7426 effect(KILL cr); 7427 format %{ "ADDL [$mem],$add" %} 7428 ins_encode %{ 7429 __ lock(); 7430 __ addl($mem$$Address, $add$$constant); 7431 %} 7432 ins_pipe( pipe_cmpxchg ); 7433 %} 7434 7435 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7436 match(Set newval (GetAndAddI mem newval)); 7437 effect(KILL cr); 7438 format %{ "XADDL [$mem],$newval" %} 7439 ins_encode %{ 7440 __ lock(); 7441 __ xaddl($mem$$Address, $newval$$Register); 7442 %} 7443 ins_pipe( pipe_cmpxchg ); 7444 %} 7445 7446 // Important to match to xRegI: only 8-bit regs. 7447 instruct xchgB( memory mem, xRegI newval) %{ 7448 match(Set newval (GetAndSetB mem newval)); 7449 format %{ "XCHGB $newval,[$mem]" %} 7450 ins_encode %{ 7451 __ xchgb($newval$$Register, $mem$$Address); 7452 %} 7453 ins_pipe( pipe_cmpxchg ); 7454 %} 7455 7456 instruct xchgS( memory mem, rRegI newval) %{ 7457 match(Set newval (GetAndSetS mem newval)); 7458 format %{ "XCHGW $newval,[$mem]" %} 7459 ins_encode %{ 7460 __ xchgw($newval$$Register, $mem$$Address); 7461 %} 7462 ins_pipe( pipe_cmpxchg ); 7463 %} 7464 7465 instruct xchgI( memory mem, rRegI newval) %{ 7466 match(Set newval (GetAndSetI mem newval)); 7467 format %{ "XCHGL $newval,[$mem]" %} 7468 ins_encode %{ 7469 __ xchgl($newval$$Register, $mem$$Address); 7470 %} 7471 ins_pipe( pipe_cmpxchg ); 7472 %} 7473 7474 instruct xchgP( memory mem, pRegP newval) %{ 7475 match(Set newval (GetAndSetP mem newval)); 7476 format %{ "XCHGL $newval,[$mem]" %} 7477 ins_encode %{ 7478 __ xchgl($newval$$Register, $mem$$Address); 7479 %} 7480 ins_pipe( pipe_cmpxchg ); 7481 %} 7482 7483 //----------Subtraction Instructions------------------------------------------- 7484 7485 // Integer Subtraction Instructions 7486 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7487 match(Set dst (SubI dst src)); 7488 effect(KILL cr); 7489 7490 size(2); 7491 format %{ "SUB $dst,$src" %} 7492 opcode(0x2B); 7493 ins_encode( OpcP, RegReg( dst, src) ); 7494 ins_pipe( ialu_reg_reg ); 7495 %} 7496 7497 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7498 match(Set dst (SubI dst src)); 7499 effect(KILL cr); 7500 7501 format %{ "SUB $dst,$src" %} 7502 opcode(0x81,0x05); /* Opcode 81 /5 */ 7503 // ins_encode( RegImm( dst, src) ); 7504 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7505 ins_pipe( ialu_reg ); 7506 %} 7507 7508 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7509 match(Set dst (SubI dst (LoadI src))); 7510 effect(KILL cr); 7511 7512 ins_cost(150); 7513 format %{ "SUB $dst,$src" %} 7514 opcode(0x2B); 7515 ins_encode( OpcP, RegMem( dst, src) ); 7516 ins_pipe( ialu_reg_mem ); 7517 %} 7518 7519 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7520 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7521 effect(KILL cr); 7522 7523 ins_cost(150); 7524 format %{ "SUB $dst,$src" %} 7525 opcode(0x29); /* Opcode 29 /r */ 7526 ins_encode( OpcP, RegMem( src, dst ) ); 7527 ins_pipe( ialu_mem_reg ); 7528 %} 7529 7530 // Subtract from a pointer 7531 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7532 match(Set dst (AddP dst (SubI zero src))); 7533 effect(KILL cr); 7534 7535 size(2); 7536 format %{ "SUB $dst,$src" %} 7537 opcode(0x2B); 7538 ins_encode( OpcP, RegReg( dst, src) ); 7539 ins_pipe( ialu_reg_reg ); 7540 %} 7541 7542 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7543 match(Set dst (SubI zero dst)); 7544 effect(KILL cr); 7545 7546 size(2); 7547 format %{ "NEG $dst" %} 7548 opcode(0xF7,0x03); // Opcode F7 /3 7549 ins_encode( OpcP, RegOpc( dst ) ); 7550 ins_pipe( ialu_reg ); 7551 %} 7552 7553 //----------Multiplication/Division Instructions------------------------------- 7554 // Integer Multiplication Instructions 7555 // Multiply Register 7556 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7557 match(Set dst (MulI dst src)); 7558 effect(KILL cr); 7559 7560 size(3); 7561 ins_cost(300); 7562 format %{ "IMUL $dst,$src" %} 7563 opcode(0xAF, 0x0F); 7564 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7565 ins_pipe( ialu_reg_reg_alu0 ); 7566 %} 7567 7568 // Multiply 32-bit Immediate 7569 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7570 match(Set dst (MulI src imm)); 7571 effect(KILL cr); 7572 7573 ins_cost(300); 7574 format %{ "IMUL $dst,$src,$imm" %} 7575 opcode(0x69); /* 69 /r id */ 7576 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7577 ins_pipe( ialu_reg_reg_alu0 ); 7578 %} 7579 7580 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7581 match(Set dst src); 7582 effect(KILL cr); 7583 7584 // Note that this is artificially increased to make it more expensive than loadConL 7585 ins_cost(250); 7586 format %{ "MOV EAX,$src\t// low word only" %} 7587 opcode(0xB8); 7588 ins_encode( LdImmL_Lo(dst, src) ); 7589 ins_pipe( ialu_reg_fat ); 7590 %} 7591 7592 // Multiply by 32-bit Immediate, taking the shifted high order results 7593 // (special case for shift by 32) 7594 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7595 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7596 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7597 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7598 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7599 effect(USE src1, KILL cr); 7600 7601 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7602 ins_cost(0*100 + 1*400 - 150); 7603 format %{ "IMUL EDX:EAX,$src1" %} 7604 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 // Multiply by 32-bit Immediate, taking the shifted high order results 7609 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7610 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7611 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7612 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7613 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7614 effect(USE src1, KILL cr); 7615 7616 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7617 ins_cost(1*100 + 1*400 - 150); 7618 format %{ "IMUL EDX:EAX,$src1\n\t" 7619 "SAR EDX,$cnt-32" %} 7620 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7621 ins_pipe( pipe_slow ); 7622 %} 7623 7624 // Multiply Memory 32-bit Immediate 7625 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7626 match(Set dst (MulI (LoadI src) imm)); 7627 effect(KILL cr); 7628 7629 ins_cost(300); 7630 format %{ "IMUL $dst,$src,$imm" %} 7631 opcode(0x69); /* 69 /r id */ 7632 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7633 ins_pipe( ialu_reg_mem_alu0 ); 7634 %} 7635 7636 // Multiply Memory 7637 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7638 match(Set dst (MulI dst (LoadI src))); 7639 effect(KILL cr); 7640 7641 ins_cost(350); 7642 format %{ "IMUL $dst,$src" %} 7643 opcode(0xAF, 0x0F); 7644 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7645 ins_pipe( ialu_reg_mem_alu0 ); 7646 %} 7647 7648 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7649 %{ 7650 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7651 effect(KILL cr, KILL src2); 7652 7653 expand %{ mulI_eReg(dst, src1, cr); 7654 mulI_eReg(src2, src3, cr); 7655 addI_eReg(dst, src2, cr); %} 7656 %} 7657 7658 // Multiply Register Int to Long 7659 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7660 // Basic Idea: long = (long)int * (long)int 7661 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7662 effect(DEF dst, USE src, USE src1, KILL flags); 7663 7664 ins_cost(300); 7665 format %{ "IMUL $dst,$src1" %} 7666 7667 ins_encode( long_int_multiply( dst, src1 ) ); 7668 ins_pipe( ialu_reg_reg_alu0 ); 7669 %} 7670 7671 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7672 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7673 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7674 effect(KILL flags); 7675 7676 ins_cost(300); 7677 format %{ "MUL $dst,$src1" %} 7678 7679 ins_encode( long_uint_multiply(dst, src1) ); 7680 ins_pipe( ialu_reg_reg_alu0 ); 7681 %} 7682 7683 // Multiply Register Long 7684 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7685 match(Set dst (MulL dst src)); 7686 effect(KILL cr, TEMP tmp); 7687 ins_cost(4*100+3*400); 7688 // Basic idea: lo(result) = lo(x_lo * y_lo) 7689 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7690 format %{ "MOV $tmp,$src.lo\n\t" 7691 "IMUL $tmp,EDX\n\t" 7692 "MOV EDX,$src.hi\n\t" 7693 "IMUL EDX,EAX\n\t" 7694 "ADD $tmp,EDX\n\t" 7695 "MUL EDX:EAX,$src.lo\n\t" 7696 "ADD EDX,$tmp" %} 7697 ins_encode( long_multiply( dst, src, tmp ) ); 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 // Multiply Register Long where the left operand's high 32 bits are zero 7702 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7703 predicate(is_operand_hi32_zero(n->in(1))); 7704 match(Set dst (MulL dst src)); 7705 effect(KILL cr, TEMP tmp); 7706 ins_cost(2*100+2*400); 7707 // Basic idea: lo(result) = lo(x_lo * y_lo) 7708 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7709 format %{ "MOV $tmp,$src.hi\n\t" 7710 "IMUL $tmp,EAX\n\t" 7711 "MUL EDX:EAX,$src.lo\n\t" 7712 "ADD EDX,$tmp" %} 7713 ins_encode %{ 7714 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7715 __ imull($tmp$$Register, rax); 7716 __ mull($src$$Register); 7717 __ addl(rdx, $tmp$$Register); 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 // Multiply Register Long where the right operand's high 32 bits are zero 7723 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7724 predicate(is_operand_hi32_zero(n->in(2))); 7725 match(Set dst (MulL dst src)); 7726 effect(KILL cr, TEMP tmp); 7727 ins_cost(2*100+2*400); 7728 // Basic idea: lo(result) = lo(x_lo * y_lo) 7729 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7730 format %{ "MOV $tmp,$src.lo\n\t" 7731 "IMUL $tmp,EDX\n\t" 7732 "MUL EDX:EAX,$src.lo\n\t" 7733 "ADD EDX,$tmp" %} 7734 ins_encode %{ 7735 __ movl($tmp$$Register, $src$$Register); 7736 __ imull($tmp$$Register, rdx); 7737 __ mull($src$$Register); 7738 __ addl(rdx, $tmp$$Register); 7739 %} 7740 ins_pipe( pipe_slow ); 7741 %} 7742 7743 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7744 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7745 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7746 match(Set dst (MulL dst src)); 7747 effect(KILL cr); 7748 ins_cost(1*400); 7749 // Basic idea: lo(result) = lo(x_lo * y_lo) 7750 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7751 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7752 ins_encode %{ 7753 __ mull($src$$Register); 7754 %} 7755 ins_pipe( pipe_slow ); 7756 %} 7757 7758 // Multiply Register Long by small constant 7759 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7760 match(Set dst (MulL dst src)); 7761 effect(KILL cr, TEMP tmp); 7762 ins_cost(2*100+2*400); 7763 size(12); 7764 // Basic idea: lo(result) = lo(src * EAX) 7765 // hi(result) = hi(src * EAX) + lo(src * EDX) 7766 format %{ "IMUL $tmp,EDX,$src\n\t" 7767 "MOV EDX,$src\n\t" 7768 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7769 "ADD EDX,$tmp" %} 7770 ins_encode( long_multiply_con( dst, src, tmp ) ); 7771 ins_pipe( pipe_slow ); 7772 %} 7773 7774 // Integer DIV with Register 7775 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7776 match(Set rax (DivI rax div)); 7777 effect(KILL rdx, KILL cr); 7778 size(26); 7779 ins_cost(30*100+10*100); 7780 format %{ "CMP EAX,0x80000000\n\t" 7781 "JNE,s normal\n\t" 7782 "XOR EDX,EDX\n\t" 7783 "CMP ECX,-1\n\t" 7784 "JE,s done\n" 7785 "normal: CDQ\n\t" 7786 "IDIV $div\n\t" 7787 "done:" %} 7788 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7789 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7790 ins_pipe( ialu_reg_reg_alu0 ); 7791 %} 7792 7793 // Divide Register Long 7794 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7795 match(Set dst (DivL src1 src2)); 7796 effect(CALL); 7797 ins_cost(10000); 7798 format %{ "PUSH $src1.hi\n\t" 7799 "PUSH $src1.lo\n\t" 7800 "PUSH $src2.hi\n\t" 7801 "PUSH $src2.lo\n\t" 7802 "CALL SharedRuntime::ldiv\n\t" 7803 "ADD ESP,16" %} 7804 ins_encode( long_div(src1,src2) ); 7805 ins_pipe( pipe_slow ); 7806 %} 7807 7808 // Integer DIVMOD with Register, both quotient and mod results 7809 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7810 match(DivModI rax div); 7811 effect(KILL cr); 7812 size(26); 7813 ins_cost(30*100+10*100); 7814 format %{ "CMP EAX,0x80000000\n\t" 7815 "JNE,s normal\n\t" 7816 "XOR EDX,EDX\n\t" 7817 "CMP ECX,-1\n\t" 7818 "JE,s done\n" 7819 "normal: CDQ\n\t" 7820 "IDIV $div\n\t" 7821 "done:" %} 7822 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7823 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7824 ins_pipe( pipe_slow ); 7825 %} 7826 7827 // Integer MOD with Register 7828 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7829 match(Set rdx (ModI rax div)); 7830 effect(KILL rax, KILL cr); 7831 7832 size(26); 7833 ins_cost(300); 7834 format %{ "CDQ\n\t" 7835 "IDIV $div" %} 7836 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7837 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7838 ins_pipe( ialu_reg_reg_alu0 ); 7839 %} 7840 7841 // Remainder Register Long 7842 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7843 match(Set dst (ModL src1 src2)); 7844 effect(CALL); 7845 ins_cost(10000); 7846 format %{ "PUSH $src1.hi\n\t" 7847 "PUSH $src1.lo\n\t" 7848 "PUSH $src2.hi\n\t" 7849 "PUSH $src2.lo\n\t" 7850 "CALL SharedRuntime::lrem\n\t" 7851 "ADD ESP,16" %} 7852 ins_encode( long_mod(src1,src2) ); 7853 ins_pipe( pipe_slow ); 7854 %} 7855 7856 // Divide Register Long (no special case since divisor != -1) 7857 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7858 match(Set dst (DivL dst imm)); 7859 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7860 ins_cost(1000); 7861 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7862 "XOR $tmp2,$tmp2\n\t" 7863 "CMP $tmp,EDX\n\t" 7864 "JA,s fast\n\t" 7865 "MOV $tmp2,EAX\n\t" 7866 "MOV EAX,EDX\n\t" 7867 "MOV EDX,0\n\t" 7868 "JLE,s pos\n\t" 7869 "LNEG EAX : $tmp2\n\t" 7870 "DIV $tmp # unsigned division\n\t" 7871 "XCHG EAX,$tmp2\n\t" 7872 "DIV $tmp\n\t" 7873 "LNEG $tmp2 : EAX\n\t" 7874 "JMP,s done\n" 7875 "pos:\n\t" 7876 "DIV $tmp\n\t" 7877 "XCHG EAX,$tmp2\n" 7878 "fast:\n\t" 7879 "DIV $tmp\n" 7880 "done:\n\t" 7881 "MOV EDX,$tmp2\n\t" 7882 "NEG EDX:EAX # if $imm < 0" %} 7883 ins_encode %{ 7884 int con = (int)$imm$$constant; 7885 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7886 int pcon = (con > 0) ? con : -con; 7887 Label Lfast, Lpos, Ldone; 7888 7889 __ movl($tmp$$Register, pcon); 7890 __ xorl($tmp2$$Register,$tmp2$$Register); 7891 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7892 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7893 7894 __ movl($tmp2$$Register, $dst$$Register); // save 7895 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7896 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7897 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7898 7899 // Negative dividend. 7900 // convert value to positive to use unsigned division 7901 __ lneg($dst$$Register, $tmp2$$Register); 7902 __ divl($tmp$$Register); 7903 __ xchgl($dst$$Register, $tmp2$$Register); 7904 __ divl($tmp$$Register); 7905 // revert result back to negative 7906 __ lneg($tmp2$$Register, $dst$$Register); 7907 __ jmpb(Ldone); 7908 7909 __ bind(Lpos); 7910 __ divl($tmp$$Register); // Use unsigned division 7911 __ xchgl($dst$$Register, $tmp2$$Register); 7912 // Fallthrow for final divide, tmp2 has 32 bit hi result 7913 7914 __ bind(Lfast); 7915 // fast path: src is positive 7916 __ divl($tmp$$Register); // Use unsigned division 7917 7918 __ bind(Ldone); 7919 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7920 if (con < 0) { 7921 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7922 } 7923 %} 7924 ins_pipe( pipe_slow ); 7925 %} 7926 7927 // Remainder Register Long (remainder fit into 32 bits) 7928 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7929 match(Set dst (ModL dst imm)); 7930 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7931 ins_cost(1000); 7932 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7933 "CMP $tmp,EDX\n\t" 7934 "JA,s fast\n\t" 7935 "MOV $tmp2,EAX\n\t" 7936 "MOV EAX,EDX\n\t" 7937 "MOV EDX,0\n\t" 7938 "JLE,s pos\n\t" 7939 "LNEG EAX : $tmp2\n\t" 7940 "DIV $tmp # unsigned division\n\t" 7941 "MOV EAX,$tmp2\n\t" 7942 "DIV $tmp\n\t" 7943 "NEG EDX\n\t" 7944 "JMP,s done\n" 7945 "pos:\n\t" 7946 "DIV $tmp\n\t" 7947 "MOV EAX,$tmp2\n" 7948 "fast:\n\t" 7949 "DIV $tmp\n" 7950 "done:\n\t" 7951 "MOV EAX,EDX\n\t" 7952 "SAR EDX,31\n\t" %} 7953 ins_encode %{ 7954 int con = (int)$imm$$constant; 7955 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7956 int pcon = (con > 0) ? con : -con; 7957 Label Lfast, Lpos, Ldone; 7958 7959 __ movl($tmp$$Register, pcon); 7960 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7961 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7962 7963 __ movl($tmp2$$Register, $dst$$Register); // save 7964 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7965 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7966 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7967 7968 // Negative dividend. 7969 // convert value to positive to use unsigned division 7970 __ lneg($dst$$Register, $tmp2$$Register); 7971 __ divl($tmp$$Register); 7972 __ movl($dst$$Register, $tmp2$$Register); 7973 __ divl($tmp$$Register); 7974 // revert remainder back to negative 7975 __ negl(HIGH_FROM_LOW($dst$$Register)); 7976 __ jmpb(Ldone); 7977 7978 __ bind(Lpos); 7979 __ divl($tmp$$Register); 7980 __ movl($dst$$Register, $tmp2$$Register); 7981 7982 __ bind(Lfast); 7983 // fast path: src is positive 7984 __ divl($tmp$$Register); 7985 7986 __ bind(Ldone); 7987 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7988 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7989 7990 %} 7991 ins_pipe( pipe_slow ); 7992 %} 7993 7994 // Integer Shift Instructions 7995 // Shift Left by one 7996 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 7997 match(Set dst (LShiftI dst shift)); 7998 effect(KILL cr); 7999 8000 size(2); 8001 format %{ "SHL $dst,$shift" %} 8002 opcode(0xD1, 0x4); /* D1 /4 */ 8003 ins_encode( OpcP, RegOpc( dst ) ); 8004 ins_pipe( ialu_reg ); 8005 %} 8006 8007 // Shift Left by 8-bit immediate 8008 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8009 match(Set dst (LShiftI dst shift)); 8010 effect(KILL cr); 8011 8012 size(3); 8013 format %{ "SHL $dst,$shift" %} 8014 opcode(0xC1, 0x4); /* C1 /4 ib */ 8015 ins_encode( RegOpcImm( dst, shift) ); 8016 ins_pipe( ialu_reg ); 8017 %} 8018 8019 // Shift Left by variable 8020 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8021 match(Set dst (LShiftI dst shift)); 8022 effect(KILL cr); 8023 8024 size(2); 8025 format %{ "SHL $dst,$shift" %} 8026 opcode(0xD3, 0x4); /* D3 /4 */ 8027 ins_encode( OpcP, RegOpc( dst ) ); 8028 ins_pipe( ialu_reg_reg ); 8029 %} 8030 8031 // Arithmetic shift right by one 8032 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8033 match(Set dst (RShiftI dst shift)); 8034 effect(KILL cr); 8035 8036 size(2); 8037 format %{ "SAR $dst,$shift" %} 8038 opcode(0xD1, 0x7); /* D1 /7 */ 8039 ins_encode( OpcP, RegOpc( dst ) ); 8040 ins_pipe( ialu_reg ); 8041 %} 8042 8043 // Arithmetic shift right by one 8044 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8045 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8046 effect(KILL cr); 8047 format %{ "SAR $dst,$shift" %} 8048 opcode(0xD1, 0x7); /* D1 /7 */ 8049 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8050 ins_pipe( ialu_mem_imm ); 8051 %} 8052 8053 // Arithmetic Shift Right by 8-bit immediate 8054 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8055 match(Set dst (RShiftI dst shift)); 8056 effect(KILL cr); 8057 8058 size(3); 8059 format %{ "SAR $dst,$shift" %} 8060 opcode(0xC1, 0x7); /* C1 /7 ib */ 8061 ins_encode( RegOpcImm( dst, shift ) ); 8062 ins_pipe( ialu_mem_imm ); 8063 %} 8064 8065 // Arithmetic Shift Right by 8-bit immediate 8066 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8067 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8068 effect(KILL cr); 8069 8070 format %{ "SAR $dst,$shift" %} 8071 opcode(0xC1, 0x7); /* C1 /7 ib */ 8072 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8073 ins_pipe( ialu_mem_imm ); 8074 %} 8075 8076 // Arithmetic Shift Right by variable 8077 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8078 match(Set dst (RShiftI dst shift)); 8079 effect(KILL cr); 8080 8081 size(2); 8082 format %{ "SAR $dst,$shift" %} 8083 opcode(0xD3, 0x7); /* D3 /7 */ 8084 ins_encode( OpcP, RegOpc( dst ) ); 8085 ins_pipe( ialu_reg_reg ); 8086 %} 8087 8088 // Logical shift right by one 8089 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8090 match(Set dst (URShiftI dst shift)); 8091 effect(KILL cr); 8092 8093 size(2); 8094 format %{ "SHR $dst,$shift" %} 8095 opcode(0xD1, 0x5); /* D1 /5 */ 8096 ins_encode( OpcP, RegOpc( dst ) ); 8097 ins_pipe( ialu_reg ); 8098 %} 8099 8100 // Logical Shift Right by 8-bit immediate 8101 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8102 match(Set dst (URShiftI dst shift)); 8103 effect(KILL cr); 8104 8105 size(3); 8106 format %{ "SHR $dst,$shift" %} 8107 opcode(0xC1, 0x5); /* C1 /5 ib */ 8108 ins_encode( RegOpcImm( dst, shift) ); 8109 ins_pipe( ialu_reg ); 8110 %} 8111 8112 8113 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8114 // This idiom is used by the compiler for the i2b bytecode. 8115 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8116 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8117 8118 size(3); 8119 format %{ "MOVSX $dst,$src :8" %} 8120 ins_encode %{ 8121 __ movsbl($dst$$Register, $src$$Register); 8122 %} 8123 ins_pipe(ialu_reg_reg); 8124 %} 8125 8126 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8127 // This idiom is used by the compiler the i2s bytecode. 8128 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8129 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8130 8131 size(3); 8132 format %{ "MOVSX $dst,$src :16" %} 8133 ins_encode %{ 8134 __ movswl($dst$$Register, $src$$Register); 8135 %} 8136 ins_pipe(ialu_reg_reg); 8137 %} 8138 8139 8140 // Logical Shift Right by variable 8141 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8142 match(Set dst (URShiftI dst shift)); 8143 effect(KILL cr); 8144 8145 size(2); 8146 format %{ "SHR $dst,$shift" %} 8147 opcode(0xD3, 0x5); /* D3 /5 */ 8148 ins_encode( OpcP, RegOpc( dst ) ); 8149 ins_pipe( ialu_reg_reg ); 8150 %} 8151 8152 8153 //----------Logical Instructions----------------------------------------------- 8154 //----------Integer Logical Instructions--------------------------------------- 8155 // And Instructions 8156 // And Register with Register 8157 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8158 match(Set dst (AndI dst src)); 8159 effect(KILL cr); 8160 8161 size(2); 8162 format %{ "AND $dst,$src" %} 8163 opcode(0x23); 8164 ins_encode( OpcP, RegReg( dst, src) ); 8165 ins_pipe( ialu_reg_reg ); 8166 %} 8167 8168 // And Register with Immediate 8169 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8170 match(Set dst (AndI dst src)); 8171 effect(KILL cr); 8172 8173 format %{ "AND $dst,$src" %} 8174 opcode(0x81,0x04); /* Opcode 81 /4 */ 8175 // ins_encode( RegImm( dst, src) ); 8176 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8177 ins_pipe( ialu_reg ); 8178 %} 8179 8180 // And Register with Memory 8181 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8182 match(Set dst (AndI dst (LoadI src))); 8183 effect(KILL cr); 8184 8185 ins_cost(150); 8186 format %{ "AND $dst,$src" %} 8187 opcode(0x23); 8188 ins_encode( OpcP, RegMem( dst, src) ); 8189 ins_pipe( ialu_reg_mem ); 8190 %} 8191 8192 // And Memory with Register 8193 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8194 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8195 effect(KILL cr); 8196 8197 ins_cost(150); 8198 format %{ "AND $dst,$src" %} 8199 opcode(0x21); /* Opcode 21 /r */ 8200 ins_encode( OpcP, RegMem( src, dst ) ); 8201 ins_pipe( ialu_mem_reg ); 8202 %} 8203 8204 // And Memory with Immediate 8205 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8206 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8207 effect(KILL cr); 8208 8209 ins_cost(125); 8210 format %{ "AND $dst,$src" %} 8211 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8212 // ins_encode( MemImm( dst, src) ); 8213 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8214 ins_pipe( ialu_mem_imm ); 8215 %} 8216 8217 // BMI1 instructions 8218 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8219 match(Set dst (AndI (XorI src1 minus_1) src2)); 8220 predicate(UseBMI1Instructions); 8221 effect(KILL cr); 8222 8223 format %{ "ANDNL $dst, $src1, $src2" %} 8224 8225 ins_encode %{ 8226 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8227 %} 8228 ins_pipe(ialu_reg); 8229 %} 8230 8231 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8232 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8233 predicate(UseBMI1Instructions); 8234 effect(KILL cr); 8235 8236 ins_cost(125); 8237 format %{ "ANDNL $dst, $src1, $src2" %} 8238 8239 ins_encode %{ 8240 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8241 %} 8242 ins_pipe(ialu_reg_mem); 8243 %} 8244 8245 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8246 match(Set dst (AndI (SubI imm_zero src) src)); 8247 predicate(UseBMI1Instructions); 8248 effect(KILL cr); 8249 8250 format %{ "BLSIL $dst, $src" %} 8251 8252 ins_encode %{ 8253 __ blsil($dst$$Register, $src$$Register); 8254 %} 8255 ins_pipe(ialu_reg); 8256 %} 8257 8258 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8259 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8260 predicate(UseBMI1Instructions); 8261 effect(KILL cr); 8262 8263 ins_cost(125); 8264 format %{ "BLSIL $dst, $src" %} 8265 8266 ins_encode %{ 8267 __ blsil($dst$$Register, $src$$Address); 8268 %} 8269 ins_pipe(ialu_reg_mem); 8270 %} 8271 8272 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8273 %{ 8274 match(Set dst (XorI (AddI src minus_1) src)); 8275 predicate(UseBMI1Instructions); 8276 effect(KILL cr); 8277 8278 format %{ "BLSMSKL $dst, $src" %} 8279 8280 ins_encode %{ 8281 __ blsmskl($dst$$Register, $src$$Register); 8282 %} 8283 8284 ins_pipe(ialu_reg); 8285 %} 8286 8287 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8288 %{ 8289 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8290 predicate(UseBMI1Instructions); 8291 effect(KILL cr); 8292 8293 ins_cost(125); 8294 format %{ "BLSMSKL $dst, $src" %} 8295 8296 ins_encode %{ 8297 __ blsmskl($dst$$Register, $src$$Address); 8298 %} 8299 8300 ins_pipe(ialu_reg_mem); 8301 %} 8302 8303 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8304 %{ 8305 match(Set dst (AndI (AddI src minus_1) src) ); 8306 predicate(UseBMI1Instructions); 8307 effect(KILL cr); 8308 8309 format %{ "BLSRL $dst, $src" %} 8310 8311 ins_encode %{ 8312 __ blsrl($dst$$Register, $src$$Register); 8313 %} 8314 8315 ins_pipe(ialu_reg); 8316 %} 8317 8318 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8319 %{ 8320 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8321 predicate(UseBMI1Instructions); 8322 effect(KILL cr); 8323 8324 ins_cost(125); 8325 format %{ "BLSRL $dst, $src" %} 8326 8327 ins_encode %{ 8328 __ blsrl($dst$$Register, $src$$Address); 8329 %} 8330 8331 ins_pipe(ialu_reg_mem); 8332 %} 8333 8334 // Or Instructions 8335 // Or Register with Register 8336 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8337 match(Set dst (OrI dst src)); 8338 effect(KILL cr); 8339 8340 size(2); 8341 format %{ "OR $dst,$src" %} 8342 opcode(0x0B); 8343 ins_encode( OpcP, RegReg( dst, src) ); 8344 ins_pipe( ialu_reg_reg ); 8345 %} 8346 8347 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8348 match(Set dst (OrI dst (CastP2X src))); 8349 effect(KILL cr); 8350 8351 size(2); 8352 format %{ "OR $dst,$src" %} 8353 opcode(0x0B); 8354 ins_encode( OpcP, RegReg( dst, src) ); 8355 ins_pipe( ialu_reg_reg ); 8356 %} 8357 8358 8359 // Or Register with Immediate 8360 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8361 match(Set dst (OrI dst src)); 8362 effect(KILL cr); 8363 8364 format %{ "OR $dst,$src" %} 8365 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8366 // ins_encode( RegImm( dst, src) ); 8367 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8368 ins_pipe( ialu_reg ); 8369 %} 8370 8371 // Or Register with Memory 8372 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8373 match(Set dst (OrI dst (LoadI src))); 8374 effect(KILL cr); 8375 8376 ins_cost(150); 8377 format %{ "OR $dst,$src" %} 8378 opcode(0x0B); 8379 ins_encode( OpcP, RegMem( dst, src) ); 8380 ins_pipe( ialu_reg_mem ); 8381 %} 8382 8383 // Or Memory with Register 8384 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8385 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8386 effect(KILL cr); 8387 8388 ins_cost(150); 8389 format %{ "OR $dst,$src" %} 8390 opcode(0x09); /* Opcode 09 /r */ 8391 ins_encode( OpcP, RegMem( src, dst ) ); 8392 ins_pipe( ialu_mem_reg ); 8393 %} 8394 8395 // Or Memory with Immediate 8396 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8397 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8398 effect(KILL cr); 8399 8400 ins_cost(125); 8401 format %{ "OR $dst,$src" %} 8402 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8403 // ins_encode( MemImm( dst, src) ); 8404 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8405 ins_pipe( ialu_mem_imm ); 8406 %} 8407 8408 // ROL/ROR 8409 // ROL expand 8410 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8411 effect(USE_DEF dst, USE shift, KILL cr); 8412 8413 format %{ "ROL $dst, $shift" %} 8414 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8415 ins_encode( OpcP, RegOpc( dst )); 8416 ins_pipe( ialu_reg ); 8417 %} 8418 8419 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8420 effect(USE_DEF dst, USE shift, KILL cr); 8421 8422 format %{ "ROL $dst, $shift" %} 8423 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8424 ins_encode( RegOpcImm(dst, shift) ); 8425 ins_pipe(ialu_reg); 8426 %} 8427 8428 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8429 effect(USE_DEF dst, USE shift, KILL cr); 8430 8431 format %{ "ROL $dst, $shift" %} 8432 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8433 ins_encode(OpcP, RegOpc(dst)); 8434 ins_pipe( ialu_reg_reg ); 8435 %} 8436 // end of ROL expand 8437 8438 // ROL 32bit by one once 8439 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8440 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8441 8442 expand %{ 8443 rolI_eReg_imm1(dst, lshift, cr); 8444 %} 8445 %} 8446 8447 // ROL 32bit var by imm8 once 8448 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8449 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8450 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8451 8452 expand %{ 8453 rolI_eReg_imm8(dst, lshift, cr); 8454 %} 8455 %} 8456 8457 // ROL 32bit var by var once 8458 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8459 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8460 8461 expand %{ 8462 rolI_eReg_CL(dst, shift, cr); 8463 %} 8464 %} 8465 8466 // ROL 32bit var by var once 8467 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8468 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8469 8470 expand %{ 8471 rolI_eReg_CL(dst, shift, cr); 8472 %} 8473 %} 8474 8475 // ROR expand 8476 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8477 effect(USE_DEF dst, USE shift, KILL cr); 8478 8479 format %{ "ROR $dst, $shift" %} 8480 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8481 ins_encode( OpcP, RegOpc( dst ) ); 8482 ins_pipe( ialu_reg ); 8483 %} 8484 8485 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8486 effect (USE_DEF dst, USE shift, KILL cr); 8487 8488 format %{ "ROR $dst, $shift" %} 8489 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8490 ins_encode( RegOpcImm(dst, shift) ); 8491 ins_pipe( ialu_reg ); 8492 %} 8493 8494 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8495 effect(USE_DEF dst, USE shift, KILL cr); 8496 8497 format %{ "ROR $dst, $shift" %} 8498 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8499 ins_encode(OpcP, RegOpc(dst)); 8500 ins_pipe( ialu_reg_reg ); 8501 %} 8502 // end of ROR expand 8503 8504 // ROR right once 8505 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8506 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8507 8508 expand %{ 8509 rorI_eReg_imm1(dst, rshift, cr); 8510 %} 8511 %} 8512 8513 // ROR 32bit by immI8 once 8514 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8515 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8516 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8517 8518 expand %{ 8519 rorI_eReg_imm8(dst, rshift, cr); 8520 %} 8521 %} 8522 8523 // ROR 32bit var by var once 8524 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8525 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8526 8527 expand %{ 8528 rorI_eReg_CL(dst, shift, cr); 8529 %} 8530 %} 8531 8532 // ROR 32bit var by var once 8533 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8534 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8535 8536 expand %{ 8537 rorI_eReg_CL(dst, shift, cr); 8538 %} 8539 %} 8540 8541 // Xor Instructions 8542 // Xor Register with Register 8543 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8544 match(Set dst (XorI dst src)); 8545 effect(KILL cr); 8546 8547 size(2); 8548 format %{ "XOR $dst,$src" %} 8549 opcode(0x33); 8550 ins_encode( OpcP, RegReg( dst, src) ); 8551 ins_pipe( ialu_reg_reg ); 8552 %} 8553 8554 // Xor Register with Immediate -1 8555 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8556 match(Set dst (XorI dst imm)); 8557 8558 size(2); 8559 format %{ "NOT $dst" %} 8560 ins_encode %{ 8561 __ notl($dst$$Register); 8562 %} 8563 ins_pipe( ialu_reg ); 8564 %} 8565 8566 // Xor Register with Immediate 8567 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8568 match(Set dst (XorI dst src)); 8569 effect(KILL cr); 8570 8571 format %{ "XOR $dst,$src" %} 8572 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8573 // ins_encode( RegImm( dst, src) ); 8574 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8575 ins_pipe( ialu_reg ); 8576 %} 8577 8578 // Xor Register with Memory 8579 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8580 match(Set dst (XorI dst (LoadI src))); 8581 effect(KILL cr); 8582 8583 ins_cost(150); 8584 format %{ "XOR $dst,$src" %} 8585 opcode(0x33); 8586 ins_encode( OpcP, RegMem(dst, src) ); 8587 ins_pipe( ialu_reg_mem ); 8588 %} 8589 8590 // Xor Memory with Register 8591 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8592 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8593 effect(KILL cr); 8594 8595 ins_cost(150); 8596 format %{ "XOR $dst,$src" %} 8597 opcode(0x31); /* Opcode 31 /r */ 8598 ins_encode( OpcP, RegMem( src, dst ) ); 8599 ins_pipe( ialu_mem_reg ); 8600 %} 8601 8602 // Xor Memory with Immediate 8603 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8604 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8605 effect(KILL cr); 8606 8607 ins_cost(125); 8608 format %{ "XOR $dst,$src" %} 8609 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8610 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8611 ins_pipe( ialu_mem_imm ); 8612 %} 8613 8614 //----------Convert Int to Boolean--------------------------------------------- 8615 8616 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8617 effect( DEF dst, USE src ); 8618 format %{ "MOV $dst,$src" %} 8619 ins_encode( enc_Copy( dst, src) ); 8620 ins_pipe( ialu_reg_reg ); 8621 %} 8622 8623 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8624 effect( USE_DEF dst, USE src, KILL cr ); 8625 8626 size(4); 8627 format %{ "NEG $dst\n\t" 8628 "ADC $dst,$src" %} 8629 ins_encode( neg_reg(dst), 8630 OpcRegReg(0x13,dst,src) ); 8631 ins_pipe( ialu_reg_reg_long ); 8632 %} 8633 8634 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8635 match(Set dst (Conv2B src)); 8636 8637 expand %{ 8638 movI_nocopy(dst,src); 8639 ci2b(dst,src,cr); 8640 %} 8641 %} 8642 8643 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8644 effect( DEF dst, USE src ); 8645 format %{ "MOV $dst,$src" %} 8646 ins_encode( enc_Copy( dst, src) ); 8647 ins_pipe( ialu_reg_reg ); 8648 %} 8649 8650 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8651 effect( USE_DEF dst, USE src, KILL cr ); 8652 format %{ "NEG $dst\n\t" 8653 "ADC $dst,$src" %} 8654 ins_encode( neg_reg(dst), 8655 OpcRegReg(0x13,dst,src) ); 8656 ins_pipe( ialu_reg_reg_long ); 8657 %} 8658 8659 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8660 match(Set dst (Conv2B src)); 8661 8662 expand %{ 8663 movP_nocopy(dst,src); 8664 cp2b(dst,src,cr); 8665 %} 8666 %} 8667 8668 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8669 match(Set dst (CmpLTMask p q)); 8670 effect(KILL cr); 8671 ins_cost(400); 8672 8673 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8674 format %{ "XOR $dst,$dst\n\t" 8675 "CMP $p,$q\n\t" 8676 "SETlt $dst\n\t" 8677 "NEG $dst" %} 8678 ins_encode %{ 8679 Register Rp = $p$$Register; 8680 Register Rq = $q$$Register; 8681 Register Rd = $dst$$Register; 8682 Label done; 8683 __ xorl(Rd, Rd); 8684 __ cmpl(Rp, Rq); 8685 __ setb(Assembler::less, Rd); 8686 __ negl(Rd); 8687 %} 8688 8689 ins_pipe(pipe_slow); 8690 %} 8691 8692 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8693 match(Set dst (CmpLTMask dst zero)); 8694 effect(DEF dst, KILL cr); 8695 ins_cost(100); 8696 8697 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8698 ins_encode %{ 8699 __ sarl($dst$$Register, 31); 8700 %} 8701 ins_pipe(ialu_reg); 8702 %} 8703 8704 /* better to save a register than avoid a branch */ 8705 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8706 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8707 effect(KILL cr); 8708 ins_cost(400); 8709 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8710 "JGE done\n\t" 8711 "ADD $p,$y\n" 8712 "done: " %} 8713 ins_encode %{ 8714 Register Rp = $p$$Register; 8715 Register Rq = $q$$Register; 8716 Register Ry = $y$$Register; 8717 Label done; 8718 __ subl(Rp, Rq); 8719 __ jccb(Assembler::greaterEqual, done); 8720 __ addl(Rp, Ry); 8721 __ bind(done); 8722 %} 8723 8724 ins_pipe(pipe_cmplt); 8725 %} 8726 8727 /* better to save a register than avoid a branch */ 8728 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8729 match(Set y (AndI (CmpLTMask p q) y)); 8730 effect(KILL cr); 8731 8732 ins_cost(300); 8733 8734 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8735 "JLT done\n\t" 8736 "XORL $y, $y\n" 8737 "done: " %} 8738 ins_encode %{ 8739 Register Rp = $p$$Register; 8740 Register Rq = $q$$Register; 8741 Register Ry = $y$$Register; 8742 Label done; 8743 __ cmpl(Rp, Rq); 8744 __ jccb(Assembler::less, done); 8745 __ xorl(Ry, Ry); 8746 __ bind(done); 8747 %} 8748 8749 ins_pipe(pipe_cmplt); 8750 %} 8751 8752 /* If I enable this, I encourage spilling in the inner loop of compress. 8753 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8754 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8755 */ 8756 //----------Overflow Math Instructions----------------------------------------- 8757 8758 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8759 %{ 8760 match(Set cr (OverflowAddI op1 op2)); 8761 effect(DEF cr, USE_KILL op1, USE op2); 8762 8763 format %{ "ADD $op1, $op2\t# overflow check int" %} 8764 8765 ins_encode %{ 8766 __ addl($op1$$Register, $op2$$Register); 8767 %} 8768 ins_pipe(ialu_reg_reg); 8769 %} 8770 8771 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8772 %{ 8773 match(Set cr (OverflowAddI op1 op2)); 8774 effect(DEF cr, USE_KILL op1, USE op2); 8775 8776 format %{ "ADD $op1, $op2\t# overflow check int" %} 8777 8778 ins_encode %{ 8779 __ addl($op1$$Register, $op2$$constant); 8780 %} 8781 ins_pipe(ialu_reg_reg); 8782 %} 8783 8784 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8785 %{ 8786 match(Set cr (OverflowSubI op1 op2)); 8787 8788 format %{ "CMP $op1, $op2\t# overflow check int" %} 8789 ins_encode %{ 8790 __ cmpl($op1$$Register, $op2$$Register); 8791 %} 8792 ins_pipe(ialu_reg_reg); 8793 %} 8794 8795 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8796 %{ 8797 match(Set cr (OverflowSubI op1 op2)); 8798 8799 format %{ "CMP $op1, $op2\t# overflow check int" %} 8800 ins_encode %{ 8801 __ cmpl($op1$$Register, $op2$$constant); 8802 %} 8803 ins_pipe(ialu_reg_reg); 8804 %} 8805 8806 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8807 %{ 8808 match(Set cr (OverflowSubI zero op2)); 8809 effect(DEF cr, USE_KILL op2); 8810 8811 format %{ "NEG $op2\t# overflow check int" %} 8812 ins_encode %{ 8813 __ negl($op2$$Register); 8814 %} 8815 ins_pipe(ialu_reg_reg); 8816 %} 8817 8818 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8819 %{ 8820 match(Set cr (OverflowMulI op1 op2)); 8821 effect(DEF cr, USE_KILL op1, USE op2); 8822 8823 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8824 ins_encode %{ 8825 __ imull($op1$$Register, $op2$$Register); 8826 %} 8827 ins_pipe(ialu_reg_reg_alu0); 8828 %} 8829 8830 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8831 %{ 8832 match(Set cr (OverflowMulI op1 op2)); 8833 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8834 8835 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8836 ins_encode %{ 8837 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8838 %} 8839 ins_pipe(ialu_reg_reg_alu0); 8840 %} 8841 8842 // Integer Absolute Instructions 8843 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8844 %{ 8845 match(Set dst (AbsI src)); 8846 effect(TEMP dst, TEMP tmp, KILL cr); 8847 format %{ "movl $tmp, $src\n\t" 8848 "sarl $tmp, 31\n\t" 8849 "movl $dst, $src\n\t" 8850 "xorl $dst, $tmp\n\t" 8851 "subl $dst, $tmp\n" 8852 %} 8853 ins_encode %{ 8854 __ movl($tmp$$Register, $src$$Register); 8855 __ sarl($tmp$$Register, 31); 8856 __ movl($dst$$Register, $src$$Register); 8857 __ xorl($dst$$Register, $tmp$$Register); 8858 __ subl($dst$$Register, $tmp$$Register); 8859 %} 8860 8861 ins_pipe(ialu_reg_reg); 8862 %} 8863 8864 //----------Long Instructions------------------------------------------------ 8865 // Add Long Register with Register 8866 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8867 match(Set dst (AddL dst src)); 8868 effect(KILL cr); 8869 ins_cost(200); 8870 format %{ "ADD $dst.lo,$src.lo\n\t" 8871 "ADC $dst.hi,$src.hi" %} 8872 opcode(0x03, 0x13); 8873 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8874 ins_pipe( ialu_reg_reg_long ); 8875 %} 8876 8877 // Add Long Register with Immediate 8878 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8879 match(Set dst (AddL dst src)); 8880 effect(KILL cr); 8881 format %{ "ADD $dst.lo,$src.lo\n\t" 8882 "ADC $dst.hi,$src.hi" %} 8883 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8884 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8885 ins_pipe( ialu_reg_long ); 8886 %} 8887 8888 // Add Long Register with Memory 8889 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8890 match(Set dst (AddL dst (LoadL mem))); 8891 effect(KILL cr); 8892 ins_cost(125); 8893 format %{ "ADD $dst.lo,$mem\n\t" 8894 "ADC $dst.hi,$mem+4" %} 8895 opcode(0x03, 0x13); 8896 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8897 ins_pipe( ialu_reg_long_mem ); 8898 %} 8899 8900 // Subtract Long Register with Register. 8901 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8902 match(Set dst (SubL dst src)); 8903 effect(KILL cr); 8904 ins_cost(200); 8905 format %{ "SUB $dst.lo,$src.lo\n\t" 8906 "SBB $dst.hi,$src.hi" %} 8907 opcode(0x2B, 0x1B); 8908 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8909 ins_pipe( ialu_reg_reg_long ); 8910 %} 8911 8912 // Subtract Long Register with Immediate 8913 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8914 match(Set dst (SubL dst src)); 8915 effect(KILL cr); 8916 format %{ "SUB $dst.lo,$src.lo\n\t" 8917 "SBB $dst.hi,$src.hi" %} 8918 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8919 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8920 ins_pipe( ialu_reg_long ); 8921 %} 8922 8923 // Subtract Long Register with Memory 8924 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8925 match(Set dst (SubL dst (LoadL mem))); 8926 effect(KILL cr); 8927 ins_cost(125); 8928 format %{ "SUB $dst.lo,$mem\n\t" 8929 "SBB $dst.hi,$mem+4" %} 8930 opcode(0x2B, 0x1B); 8931 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8932 ins_pipe( ialu_reg_long_mem ); 8933 %} 8934 8935 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8936 match(Set dst (SubL zero dst)); 8937 effect(KILL cr); 8938 ins_cost(300); 8939 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8940 ins_encode( neg_long(dst) ); 8941 ins_pipe( ialu_reg_reg_long ); 8942 %} 8943 8944 // And Long Register with Register 8945 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8946 match(Set dst (AndL dst src)); 8947 effect(KILL cr); 8948 format %{ "AND $dst.lo,$src.lo\n\t" 8949 "AND $dst.hi,$src.hi" %} 8950 opcode(0x23,0x23); 8951 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8952 ins_pipe( ialu_reg_reg_long ); 8953 %} 8954 8955 // And Long Register with Immediate 8956 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8957 match(Set dst (AndL dst src)); 8958 effect(KILL cr); 8959 format %{ "AND $dst.lo,$src.lo\n\t" 8960 "AND $dst.hi,$src.hi" %} 8961 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8962 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8963 ins_pipe( ialu_reg_long ); 8964 %} 8965 8966 // And Long Register with Memory 8967 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8968 match(Set dst (AndL dst (LoadL mem))); 8969 effect(KILL cr); 8970 ins_cost(125); 8971 format %{ "AND $dst.lo,$mem\n\t" 8972 "AND $dst.hi,$mem+4" %} 8973 opcode(0x23, 0x23); 8974 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8975 ins_pipe( ialu_reg_long_mem ); 8976 %} 8977 8978 // BMI1 instructions 8979 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8980 match(Set dst (AndL (XorL src1 minus_1) src2)); 8981 predicate(UseBMI1Instructions); 8982 effect(KILL cr, TEMP dst); 8983 8984 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8985 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8986 %} 8987 8988 ins_encode %{ 8989 Register Rdst = $dst$$Register; 8990 Register Rsrc1 = $src1$$Register; 8991 Register Rsrc2 = $src2$$Register; 8992 __ andnl(Rdst, Rsrc1, Rsrc2); 8993 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8994 %} 8995 ins_pipe(ialu_reg_reg_long); 8996 %} 8997 8998 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8999 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9000 predicate(UseBMI1Instructions); 9001 effect(KILL cr, TEMP dst); 9002 9003 ins_cost(125); 9004 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9005 "ANDNL $dst.hi, $src1.hi, $src2+4" 9006 %} 9007 9008 ins_encode %{ 9009 Register Rdst = $dst$$Register; 9010 Register Rsrc1 = $src1$$Register; 9011 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9012 9013 __ andnl(Rdst, Rsrc1, $src2$$Address); 9014 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9015 %} 9016 ins_pipe(ialu_reg_mem); 9017 %} 9018 9019 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9020 match(Set dst (AndL (SubL imm_zero src) src)); 9021 predicate(UseBMI1Instructions); 9022 effect(KILL cr, TEMP dst); 9023 9024 format %{ "MOVL $dst.hi, 0\n\t" 9025 "BLSIL $dst.lo, $src.lo\n\t" 9026 "JNZ done\n\t" 9027 "BLSIL $dst.hi, $src.hi\n" 9028 "done:" 9029 %} 9030 9031 ins_encode %{ 9032 Label done; 9033 Register Rdst = $dst$$Register; 9034 Register Rsrc = $src$$Register; 9035 __ movl(HIGH_FROM_LOW(Rdst), 0); 9036 __ blsil(Rdst, Rsrc); 9037 __ jccb(Assembler::notZero, done); 9038 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9039 __ bind(done); 9040 %} 9041 ins_pipe(ialu_reg); 9042 %} 9043 9044 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9045 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9046 predicate(UseBMI1Instructions); 9047 effect(KILL cr, TEMP dst); 9048 9049 ins_cost(125); 9050 format %{ "MOVL $dst.hi, 0\n\t" 9051 "BLSIL $dst.lo, $src\n\t" 9052 "JNZ done\n\t" 9053 "BLSIL $dst.hi, $src+4\n" 9054 "done:" 9055 %} 9056 9057 ins_encode %{ 9058 Label done; 9059 Register Rdst = $dst$$Register; 9060 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9061 9062 __ movl(HIGH_FROM_LOW(Rdst), 0); 9063 __ blsil(Rdst, $src$$Address); 9064 __ jccb(Assembler::notZero, done); 9065 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9066 __ bind(done); 9067 %} 9068 ins_pipe(ialu_reg_mem); 9069 %} 9070 9071 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9072 %{ 9073 match(Set dst (XorL (AddL src minus_1) src)); 9074 predicate(UseBMI1Instructions); 9075 effect(KILL cr, TEMP dst); 9076 9077 format %{ "MOVL $dst.hi, 0\n\t" 9078 "BLSMSKL $dst.lo, $src.lo\n\t" 9079 "JNC done\n\t" 9080 "BLSMSKL $dst.hi, $src.hi\n" 9081 "done:" 9082 %} 9083 9084 ins_encode %{ 9085 Label done; 9086 Register Rdst = $dst$$Register; 9087 Register Rsrc = $src$$Register; 9088 __ movl(HIGH_FROM_LOW(Rdst), 0); 9089 __ blsmskl(Rdst, Rsrc); 9090 __ jccb(Assembler::carryClear, done); 9091 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9092 __ bind(done); 9093 %} 9094 9095 ins_pipe(ialu_reg); 9096 %} 9097 9098 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9099 %{ 9100 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9101 predicate(UseBMI1Instructions); 9102 effect(KILL cr, TEMP dst); 9103 9104 ins_cost(125); 9105 format %{ "MOVL $dst.hi, 0\n\t" 9106 "BLSMSKL $dst.lo, $src\n\t" 9107 "JNC done\n\t" 9108 "BLSMSKL $dst.hi, $src+4\n" 9109 "done:" 9110 %} 9111 9112 ins_encode %{ 9113 Label done; 9114 Register Rdst = $dst$$Register; 9115 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9116 9117 __ movl(HIGH_FROM_LOW(Rdst), 0); 9118 __ blsmskl(Rdst, $src$$Address); 9119 __ jccb(Assembler::carryClear, done); 9120 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9121 __ bind(done); 9122 %} 9123 9124 ins_pipe(ialu_reg_mem); 9125 %} 9126 9127 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9128 %{ 9129 match(Set dst (AndL (AddL src minus_1) src) ); 9130 predicate(UseBMI1Instructions); 9131 effect(KILL cr, TEMP dst); 9132 9133 format %{ "MOVL $dst.hi, $src.hi\n\t" 9134 "BLSRL $dst.lo, $src.lo\n\t" 9135 "JNC done\n\t" 9136 "BLSRL $dst.hi, $src.hi\n" 9137 "done:" 9138 %} 9139 9140 ins_encode %{ 9141 Label done; 9142 Register Rdst = $dst$$Register; 9143 Register Rsrc = $src$$Register; 9144 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9145 __ blsrl(Rdst, Rsrc); 9146 __ jccb(Assembler::carryClear, done); 9147 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9148 __ bind(done); 9149 %} 9150 9151 ins_pipe(ialu_reg); 9152 %} 9153 9154 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9155 %{ 9156 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9157 predicate(UseBMI1Instructions); 9158 effect(KILL cr, TEMP dst); 9159 9160 ins_cost(125); 9161 format %{ "MOVL $dst.hi, $src+4\n\t" 9162 "BLSRL $dst.lo, $src\n\t" 9163 "JNC done\n\t" 9164 "BLSRL $dst.hi, $src+4\n" 9165 "done:" 9166 %} 9167 9168 ins_encode %{ 9169 Label done; 9170 Register Rdst = $dst$$Register; 9171 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9172 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9173 __ blsrl(Rdst, $src$$Address); 9174 __ jccb(Assembler::carryClear, done); 9175 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9176 __ bind(done); 9177 %} 9178 9179 ins_pipe(ialu_reg_mem); 9180 %} 9181 9182 // Or Long Register with Register 9183 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9184 match(Set dst (OrL dst src)); 9185 effect(KILL cr); 9186 format %{ "OR $dst.lo,$src.lo\n\t" 9187 "OR $dst.hi,$src.hi" %} 9188 opcode(0x0B,0x0B); 9189 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9190 ins_pipe( ialu_reg_reg_long ); 9191 %} 9192 9193 // Or Long Register with Immediate 9194 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9195 match(Set dst (OrL dst src)); 9196 effect(KILL cr); 9197 format %{ "OR $dst.lo,$src.lo\n\t" 9198 "OR $dst.hi,$src.hi" %} 9199 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9200 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9201 ins_pipe( ialu_reg_long ); 9202 %} 9203 9204 // Or Long Register with Memory 9205 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9206 match(Set dst (OrL dst (LoadL mem))); 9207 effect(KILL cr); 9208 ins_cost(125); 9209 format %{ "OR $dst.lo,$mem\n\t" 9210 "OR $dst.hi,$mem+4" %} 9211 opcode(0x0B,0x0B); 9212 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9213 ins_pipe( ialu_reg_long_mem ); 9214 %} 9215 9216 // Xor Long Register with Register 9217 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9218 match(Set dst (XorL dst src)); 9219 effect(KILL cr); 9220 format %{ "XOR $dst.lo,$src.lo\n\t" 9221 "XOR $dst.hi,$src.hi" %} 9222 opcode(0x33,0x33); 9223 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9224 ins_pipe( ialu_reg_reg_long ); 9225 %} 9226 9227 // Xor Long Register with Immediate -1 9228 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9229 match(Set dst (XorL dst imm)); 9230 format %{ "NOT $dst.lo\n\t" 9231 "NOT $dst.hi" %} 9232 ins_encode %{ 9233 __ notl($dst$$Register); 9234 __ notl(HIGH_FROM_LOW($dst$$Register)); 9235 %} 9236 ins_pipe( ialu_reg_long ); 9237 %} 9238 9239 // Xor Long Register with Immediate 9240 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9241 match(Set dst (XorL dst src)); 9242 effect(KILL cr); 9243 format %{ "XOR $dst.lo,$src.lo\n\t" 9244 "XOR $dst.hi,$src.hi" %} 9245 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9246 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9247 ins_pipe( ialu_reg_long ); 9248 %} 9249 9250 // Xor Long Register with Memory 9251 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9252 match(Set dst (XorL dst (LoadL mem))); 9253 effect(KILL cr); 9254 ins_cost(125); 9255 format %{ "XOR $dst.lo,$mem\n\t" 9256 "XOR $dst.hi,$mem+4" %} 9257 opcode(0x33,0x33); 9258 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9259 ins_pipe( ialu_reg_long_mem ); 9260 %} 9261 9262 // Shift Left Long by 1 9263 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9264 predicate(UseNewLongLShift); 9265 match(Set dst (LShiftL dst cnt)); 9266 effect(KILL cr); 9267 ins_cost(100); 9268 format %{ "ADD $dst.lo,$dst.lo\n\t" 9269 "ADC $dst.hi,$dst.hi" %} 9270 ins_encode %{ 9271 __ addl($dst$$Register,$dst$$Register); 9272 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9273 %} 9274 ins_pipe( ialu_reg_long ); 9275 %} 9276 9277 // Shift Left Long by 2 9278 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9279 predicate(UseNewLongLShift); 9280 match(Set dst (LShiftL dst cnt)); 9281 effect(KILL cr); 9282 ins_cost(100); 9283 format %{ "ADD $dst.lo,$dst.lo\n\t" 9284 "ADC $dst.hi,$dst.hi\n\t" 9285 "ADD $dst.lo,$dst.lo\n\t" 9286 "ADC $dst.hi,$dst.hi" %} 9287 ins_encode %{ 9288 __ addl($dst$$Register,$dst$$Register); 9289 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9290 __ addl($dst$$Register,$dst$$Register); 9291 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9292 %} 9293 ins_pipe( ialu_reg_long ); 9294 %} 9295 9296 // Shift Left Long by 3 9297 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9298 predicate(UseNewLongLShift); 9299 match(Set dst (LShiftL dst cnt)); 9300 effect(KILL cr); 9301 ins_cost(100); 9302 format %{ "ADD $dst.lo,$dst.lo\n\t" 9303 "ADC $dst.hi,$dst.hi\n\t" 9304 "ADD $dst.lo,$dst.lo\n\t" 9305 "ADC $dst.hi,$dst.hi\n\t" 9306 "ADD $dst.lo,$dst.lo\n\t" 9307 "ADC $dst.hi,$dst.hi" %} 9308 ins_encode %{ 9309 __ addl($dst$$Register,$dst$$Register); 9310 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9311 __ addl($dst$$Register,$dst$$Register); 9312 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9313 __ addl($dst$$Register,$dst$$Register); 9314 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9315 %} 9316 ins_pipe( ialu_reg_long ); 9317 %} 9318 9319 // Shift Left Long by 1-31 9320 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9321 match(Set dst (LShiftL dst cnt)); 9322 effect(KILL cr); 9323 ins_cost(200); 9324 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9325 "SHL $dst.lo,$cnt" %} 9326 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9327 ins_encode( move_long_small_shift(dst,cnt) ); 9328 ins_pipe( ialu_reg_long ); 9329 %} 9330 9331 // Shift Left Long by 32-63 9332 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9333 match(Set dst (LShiftL dst cnt)); 9334 effect(KILL cr); 9335 ins_cost(300); 9336 format %{ "MOV $dst.hi,$dst.lo\n" 9337 "\tSHL $dst.hi,$cnt-32\n" 9338 "\tXOR $dst.lo,$dst.lo" %} 9339 opcode(0xC1, 0x4); /* C1 /4 ib */ 9340 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9341 ins_pipe( ialu_reg_long ); 9342 %} 9343 9344 // Shift Left Long by variable 9345 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9346 match(Set dst (LShiftL dst shift)); 9347 effect(KILL cr); 9348 ins_cost(500+200); 9349 size(17); 9350 format %{ "TEST $shift,32\n\t" 9351 "JEQ,s small\n\t" 9352 "MOV $dst.hi,$dst.lo\n\t" 9353 "XOR $dst.lo,$dst.lo\n" 9354 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9355 "SHL $dst.lo,$shift" %} 9356 ins_encode( shift_left_long( dst, shift ) ); 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 // Shift Right Long by 1-31 9361 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9362 match(Set dst (URShiftL dst cnt)); 9363 effect(KILL cr); 9364 ins_cost(200); 9365 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9366 "SHR $dst.hi,$cnt" %} 9367 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9368 ins_encode( move_long_small_shift(dst,cnt) ); 9369 ins_pipe( ialu_reg_long ); 9370 %} 9371 9372 // Shift Right Long by 32-63 9373 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9374 match(Set dst (URShiftL dst cnt)); 9375 effect(KILL cr); 9376 ins_cost(300); 9377 format %{ "MOV $dst.lo,$dst.hi\n" 9378 "\tSHR $dst.lo,$cnt-32\n" 9379 "\tXOR $dst.hi,$dst.hi" %} 9380 opcode(0xC1, 0x5); /* C1 /5 ib */ 9381 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9382 ins_pipe( ialu_reg_long ); 9383 %} 9384 9385 // Shift Right Long by variable 9386 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9387 match(Set dst (URShiftL dst shift)); 9388 effect(KILL cr); 9389 ins_cost(600); 9390 size(17); 9391 format %{ "TEST $shift,32\n\t" 9392 "JEQ,s small\n\t" 9393 "MOV $dst.lo,$dst.hi\n\t" 9394 "XOR $dst.hi,$dst.hi\n" 9395 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9396 "SHR $dst.hi,$shift" %} 9397 ins_encode( shift_right_long( dst, shift ) ); 9398 ins_pipe( pipe_slow ); 9399 %} 9400 9401 // Shift Right Long by 1-31 9402 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9403 match(Set dst (RShiftL dst cnt)); 9404 effect(KILL cr); 9405 ins_cost(200); 9406 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9407 "SAR $dst.hi,$cnt" %} 9408 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9409 ins_encode( move_long_small_shift(dst,cnt) ); 9410 ins_pipe( ialu_reg_long ); 9411 %} 9412 9413 // Shift Right Long by 32-63 9414 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9415 match(Set dst (RShiftL dst cnt)); 9416 effect(KILL cr); 9417 ins_cost(300); 9418 format %{ "MOV $dst.lo,$dst.hi\n" 9419 "\tSAR $dst.lo,$cnt-32\n" 9420 "\tSAR $dst.hi,31" %} 9421 opcode(0xC1, 0x7); /* C1 /7 ib */ 9422 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9423 ins_pipe( ialu_reg_long ); 9424 %} 9425 9426 // Shift Right arithmetic Long by variable 9427 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9428 match(Set dst (RShiftL dst shift)); 9429 effect(KILL cr); 9430 ins_cost(600); 9431 size(18); 9432 format %{ "TEST $shift,32\n\t" 9433 "JEQ,s small\n\t" 9434 "MOV $dst.lo,$dst.hi\n\t" 9435 "SAR $dst.hi,31\n" 9436 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9437 "SAR $dst.hi,$shift" %} 9438 ins_encode( shift_right_arith_long( dst, shift ) ); 9439 ins_pipe( pipe_slow ); 9440 %} 9441 9442 9443 //----------Double Instructions------------------------------------------------ 9444 // Double Math 9445 9446 // Compare & branch 9447 9448 // P6 version of float compare, sets condition codes in EFLAGS 9449 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9450 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9451 match(Set cr (CmpD src1 src2)); 9452 effect(KILL rax); 9453 ins_cost(150); 9454 format %{ "FLD $src1\n\t" 9455 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9456 "JNP exit\n\t" 9457 "MOV ah,1 // saw a NaN, set CF\n\t" 9458 "SAHF\n" 9459 "exit:\tNOP // avoid branch to branch" %} 9460 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9461 ins_encode( Push_Reg_DPR(src1), 9462 OpcP, RegOpc(src2), 9463 cmpF_P6_fixup ); 9464 ins_pipe( pipe_slow ); 9465 %} 9466 9467 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9468 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9469 match(Set cr (CmpD src1 src2)); 9470 ins_cost(150); 9471 format %{ "FLD $src1\n\t" 9472 "FUCOMIP ST,$src2 // P6 instruction" %} 9473 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9474 ins_encode( Push_Reg_DPR(src1), 9475 OpcP, RegOpc(src2)); 9476 ins_pipe( pipe_slow ); 9477 %} 9478 9479 // Compare & branch 9480 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9481 predicate(UseSSE<=1); 9482 match(Set cr (CmpD src1 src2)); 9483 effect(KILL rax); 9484 ins_cost(200); 9485 format %{ "FLD $src1\n\t" 9486 "FCOMp $src2\n\t" 9487 "FNSTSW AX\n\t" 9488 "TEST AX,0x400\n\t" 9489 "JZ,s flags\n\t" 9490 "MOV AH,1\t# unordered treat as LT\n" 9491 "flags:\tSAHF" %} 9492 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9493 ins_encode( Push_Reg_DPR(src1), 9494 OpcP, RegOpc(src2), 9495 fpu_flags); 9496 ins_pipe( pipe_slow ); 9497 %} 9498 9499 // Compare vs zero into -1,0,1 9500 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9501 predicate(UseSSE<=1); 9502 match(Set dst (CmpD3 src1 zero)); 9503 effect(KILL cr, KILL rax); 9504 ins_cost(280); 9505 format %{ "FTSTD $dst,$src1" %} 9506 opcode(0xE4, 0xD9); 9507 ins_encode( Push_Reg_DPR(src1), 9508 OpcS, OpcP, PopFPU, 9509 CmpF_Result(dst)); 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 // Compare into -1,0,1 9514 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9515 predicate(UseSSE<=1); 9516 match(Set dst (CmpD3 src1 src2)); 9517 effect(KILL cr, KILL rax); 9518 ins_cost(300); 9519 format %{ "FCMPD $dst,$src1,$src2" %} 9520 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9521 ins_encode( Push_Reg_DPR(src1), 9522 OpcP, RegOpc(src2), 9523 CmpF_Result(dst)); 9524 ins_pipe( pipe_slow ); 9525 %} 9526 9527 // float compare and set condition codes in EFLAGS by XMM regs 9528 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9529 predicate(UseSSE>=2); 9530 match(Set cr (CmpD src1 src2)); 9531 ins_cost(145); 9532 format %{ "UCOMISD $src1,$src2\n\t" 9533 "JNP,s exit\n\t" 9534 "PUSHF\t# saw NaN, set CF\n\t" 9535 "AND [rsp], #0xffffff2b\n\t" 9536 "POPF\n" 9537 "exit:" %} 9538 ins_encode %{ 9539 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9540 emit_cmpfp_fixup(_masm); 9541 %} 9542 ins_pipe( pipe_slow ); 9543 %} 9544 9545 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9546 predicate(UseSSE>=2); 9547 match(Set cr (CmpD src1 src2)); 9548 ins_cost(100); 9549 format %{ "UCOMISD $src1,$src2" %} 9550 ins_encode %{ 9551 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9552 %} 9553 ins_pipe( pipe_slow ); 9554 %} 9555 9556 // float compare and set condition codes in EFLAGS by XMM regs 9557 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9558 predicate(UseSSE>=2); 9559 match(Set cr (CmpD src1 (LoadD src2))); 9560 ins_cost(145); 9561 format %{ "UCOMISD $src1,$src2\n\t" 9562 "JNP,s exit\n\t" 9563 "PUSHF\t# saw NaN, set CF\n\t" 9564 "AND [rsp], #0xffffff2b\n\t" 9565 "POPF\n" 9566 "exit:" %} 9567 ins_encode %{ 9568 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9569 emit_cmpfp_fixup(_masm); 9570 %} 9571 ins_pipe( pipe_slow ); 9572 %} 9573 9574 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9575 predicate(UseSSE>=2); 9576 match(Set cr (CmpD src1 (LoadD src2))); 9577 ins_cost(100); 9578 format %{ "UCOMISD $src1,$src2" %} 9579 ins_encode %{ 9580 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9581 %} 9582 ins_pipe( pipe_slow ); 9583 %} 9584 9585 // Compare into -1,0,1 in XMM 9586 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9587 predicate(UseSSE>=2); 9588 match(Set dst (CmpD3 src1 src2)); 9589 effect(KILL cr); 9590 ins_cost(255); 9591 format %{ "UCOMISD $src1, $src2\n\t" 9592 "MOV $dst, #-1\n\t" 9593 "JP,s done\n\t" 9594 "JB,s done\n\t" 9595 "SETNE $dst\n\t" 9596 "MOVZB $dst, $dst\n" 9597 "done:" %} 9598 ins_encode %{ 9599 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9600 emit_cmpfp3(_masm, $dst$$Register); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 // Compare into -1,0,1 in XMM and memory 9606 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9607 predicate(UseSSE>=2); 9608 match(Set dst (CmpD3 src1 (LoadD src2))); 9609 effect(KILL cr); 9610 ins_cost(275); 9611 format %{ "UCOMISD $src1, $src2\n\t" 9612 "MOV $dst, #-1\n\t" 9613 "JP,s done\n\t" 9614 "JB,s done\n\t" 9615 "SETNE $dst\n\t" 9616 "MOVZB $dst, $dst\n" 9617 "done:" %} 9618 ins_encode %{ 9619 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9620 emit_cmpfp3(_masm, $dst$$Register); 9621 %} 9622 ins_pipe( pipe_slow ); 9623 %} 9624 9625 9626 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9627 predicate (UseSSE <=1); 9628 match(Set dst (SubD dst src)); 9629 9630 format %{ "FLD $src\n\t" 9631 "DSUBp $dst,ST" %} 9632 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9633 ins_cost(150); 9634 ins_encode( Push_Reg_DPR(src), 9635 OpcP, RegOpc(dst) ); 9636 ins_pipe( fpu_reg_reg ); 9637 %} 9638 9639 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9640 predicate (UseSSE <=1); 9641 match(Set dst (RoundDouble (SubD src1 src2))); 9642 ins_cost(250); 9643 9644 format %{ "FLD $src2\n\t" 9645 "DSUB ST,$src1\n\t" 9646 "FSTP_D $dst\t# D-round" %} 9647 opcode(0xD8, 0x5); 9648 ins_encode( Push_Reg_DPR(src2), 9649 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9650 ins_pipe( fpu_mem_reg_reg ); 9651 %} 9652 9653 9654 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9655 predicate (UseSSE <=1); 9656 match(Set dst (SubD dst (LoadD src))); 9657 ins_cost(150); 9658 9659 format %{ "FLD $src\n\t" 9660 "DSUBp $dst,ST" %} 9661 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9662 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9663 OpcP, RegOpc(dst) ); 9664 ins_pipe( fpu_reg_mem ); 9665 %} 9666 9667 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9668 predicate (UseSSE<=1); 9669 match(Set dst (AbsD src)); 9670 ins_cost(100); 9671 format %{ "FABS" %} 9672 opcode(0xE1, 0xD9); 9673 ins_encode( OpcS, OpcP ); 9674 ins_pipe( fpu_reg_reg ); 9675 %} 9676 9677 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9678 predicate(UseSSE<=1); 9679 match(Set dst (NegD src)); 9680 ins_cost(100); 9681 format %{ "FCHS" %} 9682 opcode(0xE0, 0xD9); 9683 ins_encode( OpcS, OpcP ); 9684 ins_pipe( fpu_reg_reg ); 9685 %} 9686 9687 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9688 predicate(UseSSE<=1); 9689 match(Set dst (AddD dst src)); 9690 format %{ "FLD $src\n\t" 9691 "DADD $dst,ST" %} 9692 size(4); 9693 ins_cost(150); 9694 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9695 ins_encode( Push_Reg_DPR(src), 9696 OpcP, RegOpc(dst) ); 9697 ins_pipe( fpu_reg_reg ); 9698 %} 9699 9700 9701 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9702 predicate(UseSSE<=1); 9703 match(Set dst (RoundDouble (AddD src1 src2))); 9704 ins_cost(250); 9705 9706 format %{ "FLD $src2\n\t" 9707 "DADD ST,$src1\n\t" 9708 "FSTP_D $dst\t# D-round" %} 9709 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9710 ins_encode( Push_Reg_DPR(src2), 9711 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9712 ins_pipe( fpu_mem_reg_reg ); 9713 %} 9714 9715 9716 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9717 predicate(UseSSE<=1); 9718 match(Set dst (AddD dst (LoadD src))); 9719 ins_cost(150); 9720 9721 format %{ "FLD $src\n\t" 9722 "DADDp $dst,ST" %} 9723 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9724 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9725 OpcP, RegOpc(dst) ); 9726 ins_pipe( fpu_reg_mem ); 9727 %} 9728 9729 // add-to-memory 9730 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9731 predicate(UseSSE<=1); 9732 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9733 ins_cost(150); 9734 9735 format %{ "FLD_D $dst\n\t" 9736 "DADD ST,$src\n\t" 9737 "FST_D $dst" %} 9738 opcode(0xDD, 0x0); 9739 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9740 Opcode(0xD8), RegOpc(src), 9741 set_instruction_start, 9742 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9743 ins_pipe( fpu_reg_mem ); 9744 %} 9745 9746 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9747 predicate(UseSSE<=1); 9748 match(Set dst (AddD dst con)); 9749 ins_cost(125); 9750 format %{ "FLD1\n\t" 9751 "DADDp $dst,ST" %} 9752 ins_encode %{ 9753 __ fld1(); 9754 __ faddp($dst$$reg); 9755 %} 9756 ins_pipe(fpu_reg); 9757 %} 9758 9759 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9760 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9761 match(Set dst (AddD dst con)); 9762 ins_cost(200); 9763 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9764 "DADDp $dst,ST" %} 9765 ins_encode %{ 9766 __ fld_d($constantaddress($con)); 9767 __ faddp($dst$$reg); 9768 %} 9769 ins_pipe(fpu_reg_mem); 9770 %} 9771 9772 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9773 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9774 match(Set dst (RoundDouble (AddD src con))); 9775 ins_cost(200); 9776 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9777 "DADD ST,$src\n\t" 9778 "FSTP_D $dst\t# D-round" %} 9779 ins_encode %{ 9780 __ fld_d($constantaddress($con)); 9781 __ fadd($src$$reg); 9782 __ fstp_d(Address(rsp, $dst$$disp)); 9783 %} 9784 ins_pipe(fpu_mem_reg_con); 9785 %} 9786 9787 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9788 predicate(UseSSE<=1); 9789 match(Set dst (MulD dst src)); 9790 format %{ "FLD $src\n\t" 9791 "DMULp $dst,ST" %} 9792 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9793 ins_cost(150); 9794 ins_encode( Push_Reg_DPR(src), 9795 OpcP, RegOpc(dst) ); 9796 ins_pipe( fpu_reg_reg ); 9797 %} 9798 9799 // Strict FP instruction biases argument before multiply then 9800 // biases result to avoid double rounding of subnormals. 9801 // 9802 // scale arg1 by multiplying arg1 by 2^(-15360) 9803 // load arg2 9804 // multiply scaled arg1 by arg2 9805 // rescale product by 2^(15360) 9806 // 9807 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9808 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9809 match(Set dst (MulD dst src)); 9810 ins_cost(1); // Select this instruction for all FP double multiplies 9811 9812 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9813 "DMULp $dst,ST\n\t" 9814 "FLD $src\n\t" 9815 "DMULp $dst,ST\n\t" 9816 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9817 "DMULp $dst,ST\n\t" %} 9818 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9819 ins_encode( strictfp_bias1(dst), 9820 Push_Reg_DPR(src), 9821 OpcP, RegOpc(dst), 9822 strictfp_bias2(dst) ); 9823 ins_pipe( fpu_reg_reg ); 9824 %} 9825 9826 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9827 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9828 match(Set dst (MulD dst con)); 9829 ins_cost(200); 9830 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9831 "DMULp $dst,ST" %} 9832 ins_encode %{ 9833 __ fld_d($constantaddress($con)); 9834 __ fmulp($dst$$reg); 9835 %} 9836 ins_pipe(fpu_reg_mem); 9837 %} 9838 9839 9840 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9841 predicate( UseSSE<=1 ); 9842 match(Set dst (MulD dst (LoadD src))); 9843 ins_cost(200); 9844 format %{ "FLD_D $src\n\t" 9845 "DMULp $dst,ST" %} 9846 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9847 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9848 OpcP, RegOpc(dst) ); 9849 ins_pipe( fpu_reg_mem ); 9850 %} 9851 9852 // 9853 // Cisc-alternate to reg-reg multiply 9854 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9855 predicate( UseSSE<=1 ); 9856 match(Set dst (MulD src (LoadD mem))); 9857 ins_cost(250); 9858 format %{ "FLD_D $mem\n\t" 9859 "DMUL ST,$src\n\t" 9860 "FSTP_D $dst" %} 9861 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9862 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9863 OpcReg_FPR(src), 9864 Pop_Reg_DPR(dst) ); 9865 ins_pipe( fpu_reg_reg_mem ); 9866 %} 9867 9868 9869 // MACRO3 -- addDPR a mulDPR 9870 // This instruction is a '2-address' instruction in that the result goes 9871 // back to src2. This eliminates a move from the macro; possibly the 9872 // register allocator will have to add it back (and maybe not). 9873 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9874 predicate( UseSSE<=1 ); 9875 match(Set src2 (AddD (MulD src0 src1) src2)); 9876 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9877 "DMUL ST,$src1\n\t" 9878 "DADDp $src2,ST" %} 9879 ins_cost(250); 9880 opcode(0xDD); /* LoadD DD /0 */ 9881 ins_encode( Push_Reg_FPR(src0), 9882 FMul_ST_reg(src1), 9883 FAddP_reg_ST(src2) ); 9884 ins_pipe( fpu_reg_reg_reg ); 9885 %} 9886 9887 9888 // MACRO3 -- subDPR a mulDPR 9889 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9890 predicate( UseSSE<=1 ); 9891 match(Set src2 (SubD (MulD src0 src1) src2)); 9892 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9893 "DMUL ST,$src1\n\t" 9894 "DSUBRp $src2,ST" %} 9895 ins_cost(250); 9896 ins_encode( Push_Reg_FPR(src0), 9897 FMul_ST_reg(src1), 9898 Opcode(0xDE), Opc_plus(0xE0,src2)); 9899 ins_pipe( fpu_reg_reg_reg ); 9900 %} 9901 9902 9903 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9904 predicate( UseSSE<=1 ); 9905 match(Set dst (DivD dst src)); 9906 9907 format %{ "FLD $src\n\t" 9908 "FDIVp $dst,ST" %} 9909 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9910 ins_cost(150); 9911 ins_encode( Push_Reg_DPR(src), 9912 OpcP, RegOpc(dst) ); 9913 ins_pipe( fpu_reg_reg ); 9914 %} 9915 9916 // Strict FP instruction biases argument before division then 9917 // biases result, to avoid double rounding of subnormals. 9918 // 9919 // scale dividend by multiplying dividend by 2^(-15360) 9920 // load divisor 9921 // divide scaled dividend by divisor 9922 // rescale quotient by 2^(15360) 9923 // 9924 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9925 predicate (UseSSE<=1); 9926 match(Set dst (DivD dst src)); 9927 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9928 ins_cost(01); 9929 9930 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9931 "DMULp $dst,ST\n\t" 9932 "FLD $src\n\t" 9933 "FDIVp $dst,ST\n\t" 9934 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9935 "DMULp $dst,ST\n\t" %} 9936 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9937 ins_encode( strictfp_bias1(dst), 9938 Push_Reg_DPR(src), 9939 OpcP, RegOpc(dst), 9940 strictfp_bias2(dst) ); 9941 ins_pipe( fpu_reg_reg ); 9942 %} 9943 9944 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9945 predicate(UseSSE<=1); 9946 match(Set dst (ModD dst src)); 9947 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9948 9949 format %{ "DMOD $dst,$src" %} 9950 ins_cost(250); 9951 ins_encode(Push_Reg_Mod_DPR(dst, src), 9952 emitModDPR(), 9953 Push_Result_Mod_DPR(src), 9954 Pop_Reg_DPR(dst)); 9955 ins_pipe( pipe_slow ); 9956 %} 9957 9958 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9959 predicate(UseSSE>=2); 9960 match(Set dst (ModD src0 src1)); 9961 effect(KILL rax, KILL cr); 9962 9963 format %{ "SUB ESP,8\t # DMOD\n" 9964 "\tMOVSD [ESP+0],$src1\n" 9965 "\tFLD_D [ESP+0]\n" 9966 "\tMOVSD [ESP+0],$src0\n" 9967 "\tFLD_D [ESP+0]\n" 9968 "loop:\tFPREM\n" 9969 "\tFWAIT\n" 9970 "\tFNSTSW AX\n" 9971 "\tSAHF\n" 9972 "\tJP loop\n" 9973 "\tFSTP_D [ESP+0]\n" 9974 "\tMOVSD $dst,[ESP+0]\n" 9975 "\tADD ESP,8\n" 9976 "\tFSTP ST0\t # Restore FPU Stack" 9977 %} 9978 ins_cost(250); 9979 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9980 ins_pipe( pipe_slow ); 9981 %} 9982 9983 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9984 predicate (UseSSE<=1); 9985 match(Set dst(AtanD dst src)); 9986 format %{ "DATA $dst,$src" %} 9987 opcode(0xD9, 0xF3); 9988 ins_encode( Push_Reg_DPR(src), 9989 OpcP, OpcS, RegOpc(dst) ); 9990 ins_pipe( pipe_slow ); 9991 %} 9992 9993 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9994 predicate (UseSSE>=2); 9995 match(Set dst(AtanD dst src)); 9996 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9997 format %{ "DATA $dst,$src" %} 9998 opcode(0xD9, 0xF3); 9999 ins_encode( Push_SrcD(src), 10000 OpcP, OpcS, Push_ResultD(dst) ); 10001 ins_pipe( pipe_slow ); 10002 %} 10003 10004 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10005 predicate (UseSSE<=1); 10006 match(Set dst (SqrtD src)); 10007 format %{ "DSQRT $dst,$src" %} 10008 opcode(0xFA, 0xD9); 10009 ins_encode( Push_Reg_DPR(src), 10010 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 //-------------Float Instructions------------------------------- 10015 // Float Math 10016 10017 // Code for float compare: 10018 // fcompp(); 10019 // fwait(); fnstsw_ax(); 10020 // sahf(); 10021 // movl(dst, unordered_result); 10022 // jcc(Assembler::parity, exit); 10023 // movl(dst, less_result); 10024 // jcc(Assembler::below, exit); 10025 // movl(dst, equal_result); 10026 // jcc(Assembler::equal, exit); 10027 // movl(dst, greater_result); 10028 // exit: 10029 10030 // P6 version of float compare, sets condition codes in EFLAGS 10031 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10032 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10033 match(Set cr (CmpF src1 src2)); 10034 effect(KILL rax); 10035 ins_cost(150); 10036 format %{ "FLD $src1\n\t" 10037 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10038 "JNP exit\n\t" 10039 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10040 "SAHF\n" 10041 "exit:\tNOP // avoid branch to branch" %} 10042 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10043 ins_encode( Push_Reg_DPR(src1), 10044 OpcP, RegOpc(src2), 10045 cmpF_P6_fixup ); 10046 ins_pipe( pipe_slow ); 10047 %} 10048 10049 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10050 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10051 match(Set cr (CmpF src1 src2)); 10052 ins_cost(100); 10053 format %{ "FLD $src1\n\t" 10054 "FUCOMIP ST,$src2 // P6 instruction" %} 10055 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10056 ins_encode( Push_Reg_DPR(src1), 10057 OpcP, RegOpc(src2)); 10058 ins_pipe( pipe_slow ); 10059 %} 10060 10061 10062 // Compare & branch 10063 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10064 predicate(UseSSE == 0); 10065 match(Set cr (CmpF src1 src2)); 10066 effect(KILL rax); 10067 ins_cost(200); 10068 format %{ "FLD $src1\n\t" 10069 "FCOMp $src2\n\t" 10070 "FNSTSW AX\n\t" 10071 "TEST AX,0x400\n\t" 10072 "JZ,s flags\n\t" 10073 "MOV AH,1\t# unordered treat as LT\n" 10074 "flags:\tSAHF" %} 10075 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10076 ins_encode( Push_Reg_DPR(src1), 10077 OpcP, RegOpc(src2), 10078 fpu_flags); 10079 ins_pipe( pipe_slow ); 10080 %} 10081 10082 // Compare vs zero into -1,0,1 10083 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10084 predicate(UseSSE == 0); 10085 match(Set dst (CmpF3 src1 zero)); 10086 effect(KILL cr, KILL rax); 10087 ins_cost(280); 10088 format %{ "FTSTF $dst,$src1" %} 10089 opcode(0xE4, 0xD9); 10090 ins_encode( Push_Reg_DPR(src1), 10091 OpcS, OpcP, PopFPU, 10092 CmpF_Result(dst)); 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 // Compare into -1,0,1 10097 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10098 predicate(UseSSE == 0); 10099 match(Set dst (CmpF3 src1 src2)); 10100 effect(KILL cr, KILL rax); 10101 ins_cost(300); 10102 format %{ "FCMPF $dst,$src1,$src2" %} 10103 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10104 ins_encode( Push_Reg_DPR(src1), 10105 OpcP, RegOpc(src2), 10106 CmpF_Result(dst)); 10107 ins_pipe( pipe_slow ); 10108 %} 10109 10110 // float compare and set condition codes in EFLAGS by XMM regs 10111 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10112 predicate(UseSSE>=1); 10113 match(Set cr (CmpF src1 src2)); 10114 ins_cost(145); 10115 format %{ "UCOMISS $src1,$src2\n\t" 10116 "JNP,s exit\n\t" 10117 "PUSHF\t# saw NaN, set CF\n\t" 10118 "AND [rsp], #0xffffff2b\n\t" 10119 "POPF\n" 10120 "exit:" %} 10121 ins_encode %{ 10122 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10123 emit_cmpfp_fixup(_masm); 10124 %} 10125 ins_pipe( pipe_slow ); 10126 %} 10127 10128 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10129 predicate(UseSSE>=1); 10130 match(Set cr (CmpF src1 src2)); 10131 ins_cost(100); 10132 format %{ "UCOMISS $src1,$src2" %} 10133 ins_encode %{ 10134 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10135 %} 10136 ins_pipe( pipe_slow ); 10137 %} 10138 10139 // float compare and set condition codes in EFLAGS by XMM regs 10140 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10141 predicate(UseSSE>=1); 10142 match(Set cr (CmpF src1 (LoadF src2))); 10143 ins_cost(165); 10144 format %{ "UCOMISS $src1,$src2\n\t" 10145 "JNP,s exit\n\t" 10146 "PUSHF\t# saw NaN, set CF\n\t" 10147 "AND [rsp], #0xffffff2b\n\t" 10148 "POPF\n" 10149 "exit:" %} 10150 ins_encode %{ 10151 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10152 emit_cmpfp_fixup(_masm); 10153 %} 10154 ins_pipe( pipe_slow ); 10155 %} 10156 10157 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10158 predicate(UseSSE>=1); 10159 match(Set cr (CmpF src1 (LoadF src2))); 10160 ins_cost(100); 10161 format %{ "UCOMISS $src1,$src2" %} 10162 ins_encode %{ 10163 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10164 %} 10165 ins_pipe( pipe_slow ); 10166 %} 10167 10168 // Compare into -1,0,1 in XMM 10169 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10170 predicate(UseSSE>=1); 10171 match(Set dst (CmpF3 src1 src2)); 10172 effect(KILL cr); 10173 ins_cost(255); 10174 format %{ "UCOMISS $src1, $src2\n\t" 10175 "MOV $dst, #-1\n\t" 10176 "JP,s done\n\t" 10177 "JB,s done\n\t" 10178 "SETNE $dst\n\t" 10179 "MOVZB $dst, $dst\n" 10180 "done:" %} 10181 ins_encode %{ 10182 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10183 emit_cmpfp3(_masm, $dst$$Register); 10184 %} 10185 ins_pipe( pipe_slow ); 10186 %} 10187 10188 // Compare into -1,0,1 in XMM and memory 10189 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10190 predicate(UseSSE>=1); 10191 match(Set dst (CmpF3 src1 (LoadF src2))); 10192 effect(KILL cr); 10193 ins_cost(275); 10194 format %{ "UCOMISS $src1, $src2\n\t" 10195 "MOV $dst, #-1\n\t" 10196 "JP,s done\n\t" 10197 "JB,s done\n\t" 10198 "SETNE $dst\n\t" 10199 "MOVZB $dst, $dst\n" 10200 "done:" %} 10201 ins_encode %{ 10202 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10203 emit_cmpfp3(_masm, $dst$$Register); 10204 %} 10205 ins_pipe( pipe_slow ); 10206 %} 10207 10208 // Spill to obtain 24-bit precision 10209 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10210 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10211 match(Set dst (SubF src1 src2)); 10212 10213 format %{ "FSUB $dst,$src1 - $src2" %} 10214 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10215 ins_encode( Push_Reg_FPR(src1), 10216 OpcReg_FPR(src2), 10217 Pop_Mem_FPR(dst) ); 10218 ins_pipe( fpu_mem_reg_reg ); 10219 %} 10220 // 10221 // This instruction does not round to 24-bits 10222 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10223 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10224 match(Set dst (SubF dst src)); 10225 10226 format %{ "FSUB $dst,$src" %} 10227 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10228 ins_encode( Push_Reg_FPR(src), 10229 OpcP, RegOpc(dst) ); 10230 ins_pipe( fpu_reg_reg ); 10231 %} 10232 10233 // Spill to obtain 24-bit precision 10234 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10235 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10236 match(Set dst (AddF src1 src2)); 10237 10238 format %{ "FADD $dst,$src1,$src2" %} 10239 opcode(0xD8, 0x0); /* D8 C0+i */ 10240 ins_encode( Push_Reg_FPR(src2), 10241 OpcReg_FPR(src1), 10242 Pop_Mem_FPR(dst) ); 10243 ins_pipe( fpu_mem_reg_reg ); 10244 %} 10245 // 10246 // This instruction does not round to 24-bits 10247 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10248 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10249 match(Set dst (AddF dst src)); 10250 10251 format %{ "FLD $src\n\t" 10252 "FADDp $dst,ST" %} 10253 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10254 ins_encode( Push_Reg_FPR(src), 10255 OpcP, RegOpc(dst) ); 10256 ins_pipe( fpu_reg_reg ); 10257 %} 10258 10259 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10260 predicate(UseSSE==0); 10261 match(Set dst (AbsF src)); 10262 ins_cost(100); 10263 format %{ "FABS" %} 10264 opcode(0xE1, 0xD9); 10265 ins_encode( OpcS, OpcP ); 10266 ins_pipe( fpu_reg_reg ); 10267 %} 10268 10269 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10270 predicate(UseSSE==0); 10271 match(Set dst (NegF src)); 10272 ins_cost(100); 10273 format %{ "FCHS" %} 10274 opcode(0xE0, 0xD9); 10275 ins_encode( OpcS, OpcP ); 10276 ins_pipe( fpu_reg_reg ); 10277 %} 10278 10279 // Cisc-alternate to addFPR_reg 10280 // Spill to obtain 24-bit precision 10281 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10282 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10283 match(Set dst (AddF src1 (LoadF src2))); 10284 10285 format %{ "FLD $src2\n\t" 10286 "FADD ST,$src1\n\t" 10287 "FSTP_S $dst" %} 10288 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10289 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10290 OpcReg_FPR(src1), 10291 Pop_Mem_FPR(dst) ); 10292 ins_pipe( fpu_mem_reg_mem ); 10293 %} 10294 // 10295 // Cisc-alternate to addFPR_reg 10296 // This instruction does not round to 24-bits 10297 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10298 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10299 match(Set dst (AddF dst (LoadF src))); 10300 10301 format %{ "FADD $dst,$src" %} 10302 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10303 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10304 OpcP, RegOpc(dst) ); 10305 ins_pipe( fpu_reg_mem ); 10306 %} 10307 10308 // // Following two instructions for _222_mpegaudio 10309 // Spill to obtain 24-bit precision 10310 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10311 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10312 match(Set dst (AddF src1 src2)); 10313 10314 format %{ "FADD $dst,$src1,$src2" %} 10315 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10316 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10317 OpcReg_FPR(src2), 10318 Pop_Mem_FPR(dst) ); 10319 ins_pipe( fpu_mem_reg_mem ); 10320 %} 10321 10322 // Cisc-spill variant 10323 // Spill to obtain 24-bit precision 10324 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10325 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10326 match(Set dst (AddF src1 (LoadF src2))); 10327 10328 format %{ "FADD $dst,$src1,$src2 cisc" %} 10329 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10330 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10331 set_instruction_start, 10332 OpcP, RMopc_Mem(secondary,src1), 10333 Pop_Mem_FPR(dst) ); 10334 ins_pipe( fpu_mem_mem_mem ); 10335 %} 10336 10337 // Spill to obtain 24-bit precision 10338 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10339 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10340 match(Set dst (AddF src1 src2)); 10341 10342 format %{ "FADD $dst,$src1,$src2" %} 10343 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10344 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10345 set_instruction_start, 10346 OpcP, RMopc_Mem(secondary,src1), 10347 Pop_Mem_FPR(dst) ); 10348 ins_pipe( fpu_mem_mem_mem ); 10349 %} 10350 10351 10352 // Spill to obtain 24-bit precision 10353 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10354 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10355 match(Set dst (AddF src con)); 10356 format %{ "FLD $src\n\t" 10357 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10358 "FSTP_S $dst" %} 10359 ins_encode %{ 10360 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10361 __ fadd_s($constantaddress($con)); 10362 __ fstp_s(Address(rsp, $dst$$disp)); 10363 %} 10364 ins_pipe(fpu_mem_reg_con); 10365 %} 10366 // 10367 // This instruction does not round to 24-bits 10368 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10369 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10370 match(Set dst (AddF src con)); 10371 format %{ "FLD $src\n\t" 10372 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10373 "FSTP $dst" %} 10374 ins_encode %{ 10375 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10376 __ fadd_s($constantaddress($con)); 10377 __ fstp_d($dst$$reg); 10378 %} 10379 ins_pipe(fpu_reg_reg_con); 10380 %} 10381 10382 // Spill to obtain 24-bit precision 10383 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10384 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10385 match(Set dst (MulF src1 src2)); 10386 10387 format %{ "FLD $src1\n\t" 10388 "FMUL $src2\n\t" 10389 "FSTP_S $dst" %} 10390 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10391 ins_encode( Push_Reg_FPR(src1), 10392 OpcReg_FPR(src2), 10393 Pop_Mem_FPR(dst) ); 10394 ins_pipe( fpu_mem_reg_reg ); 10395 %} 10396 // 10397 // This instruction does not round to 24-bits 10398 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10399 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10400 match(Set dst (MulF src1 src2)); 10401 10402 format %{ "FLD $src1\n\t" 10403 "FMUL $src2\n\t" 10404 "FSTP_S $dst" %} 10405 opcode(0xD8, 0x1); /* D8 C8+i */ 10406 ins_encode( Push_Reg_FPR(src2), 10407 OpcReg_FPR(src1), 10408 Pop_Reg_FPR(dst) ); 10409 ins_pipe( fpu_reg_reg_reg ); 10410 %} 10411 10412 10413 // Spill to obtain 24-bit precision 10414 // Cisc-alternate to reg-reg multiply 10415 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10416 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10417 match(Set dst (MulF src1 (LoadF src2))); 10418 10419 format %{ "FLD_S $src2\n\t" 10420 "FMUL $src1\n\t" 10421 "FSTP_S $dst" %} 10422 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10423 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10424 OpcReg_FPR(src1), 10425 Pop_Mem_FPR(dst) ); 10426 ins_pipe( fpu_mem_reg_mem ); 10427 %} 10428 // 10429 // This instruction does not round to 24-bits 10430 // Cisc-alternate to reg-reg multiply 10431 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10432 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10433 match(Set dst (MulF src1 (LoadF src2))); 10434 10435 format %{ "FMUL $dst,$src1,$src2" %} 10436 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10437 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10438 OpcReg_FPR(src1), 10439 Pop_Reg_FPR(dst) ); 10440 ins_pipe( fpu_reg_reg_mem ); 10441 %} 10442 10443 // Spill to obtain 24-bit precision 10444 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10445 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10446 match(Set dst (MulF src1 src2)); 10447 10448 format %{ "FMUL $dst,$src1,$src2" %} 10449 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10450 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10451 set_instruction_start, 10452 OpcP, RMopc_Mem(secondary,src1), 10453 Pop_Mem_FPR(dst) ); 10454 ins_pipe( fpu_mem_mem_mem ); 10455 %} 10456 10457 // Spill to obtain 24-bit precision 10458 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10459 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10460 match(Set dst (MulF src con)); 10461 10462 format %{ "FLD $src\n\t" 10463 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10464 "FSTP_S $dst" %} 10465 ins_encode %{ 10466 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10467 __ fmul_s($constantaddress($con)); 10468 __ fstp_s(Address(rsp, $dst$$disp)); 10469 %} 10470 ins_pipe(fpu_mem_reg_con); 10471 %} 10472 // 10473 // This instruction does not round to 24-bits 10474 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10475 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10476 match(Set dst (MulF src con)); 10477 10478 format %{ "FLD $src\n\t" 10479 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10480 "FSTP $dst" %} 10481 ins_encode %{ 10482 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10483 __ fmul_s($constantaddress($con)); 10484 __ fstp_d($dst$$reg); 10485 %} 10486 ins_pipe(fpu_reg_reg_con); 10487 %} 10488 10489 10490 // 10491 // MACRO1 -- subsume unshared load into mulFPR 10492 // This instruction does not round to 24-bits 10493 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10494 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10495 match(Set dst (MulF (LoadF mem1) src)); 10496 10497 format %{ "FLD $mem1 ===MACRO1===\n\t" 10498 "FMUL ST,$src\n\t" 10499 "FSTP $dst" %} 10500 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10501 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10502 OpcReg_FPR(src), 10503 Pop_Reg_FPR(dst) ); 10504 ins_pipe( fpu_reg_reg_mem ); 10505 %} 10506 // 10507 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10508 // This instruction does not round to 24-bits 10509 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10510 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10511 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10512 ins_cost(95); 10513 10514 format %{ "FLD $mem1 ===MACRO2===\n\t" 10515 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10516 "FADD ST,$src2\n\t" 10517 "FSTP $dst" %} 10518 opcode(0xD9); /* LoadF D9 /0 */ 10519 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10520 FMul_ST_reg(src1), 10521 FAdd_ST_reg(src2), 10522 Pop_Reg_FPR(dst) ); 10523 ins_pipe( fpu_reg_mem_reg_reg ); 10524 %} 10525 10526 // MACRO3 -- addFPR a mulFPR 10527 // This instruction does not round to 24-bits. It is a '2-address' 10528 // instruction in that the result goes back to src2. This eliminates 10529 // a move from the macro; possibly the register allocator will have 10530 // to add it back (and maybe not). 10531 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10532 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10533 match(Set src2 (AddF (MulF src0 src1) src2)); 10534 10535 format %{ "FLD $src0 ===MACRO3===\n\t" 10536 "FMUL ST,$src1\n\t" 10537 "FADDP $src2,ST" %} 10538 opcode(0xD9); /* LoadF D9 /0 */ 10539 ins_encode( Push_Reg_FPR(src0), 10540 FMul_ST_reg(src1), 10541 FAddP_reg_ST(src2) ); 10542 ins_pipe( fpu_reg_reg_reg ); 10543 %} 10544 10545 // MACRO4 -- divFPR subFPR 10546 // This instruction does not round to 24-bits 10547 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10548 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10549 match(Set dst (DivF (SubF src2 src1) src3)); 10550 10551 format %{ "FLD $src2 ===MACRO4===\n\t" 10552 "FSUB ST,$src1\n\t" 10553 "FDIV ST,$src3\n\t" 10554 "FSTP $dst" %} 10555 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10556 ins_encode( Push_Reg_FPR(src2), 10557 subFPR_divFPR_encode(src1,src3), 10558 Pop_Reg_FPR(dst) ); 10559 ins_pipe( fpu_reg_reg_reg_reg ); 10560 %} 10561 10562 // Spill to obtain 24-bit precision 10563 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10564 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10565 match(Set dst (DivF src1 src2)); 10566 10567 format %{ "FDIV $dst,$src1,$src2" %} 10568 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10569 ins_encode( Push_Reg_FPR(src1), 10570 OpcReg_FPR(src2), 10571 Pop_Mem_FPR(dst) ); 10572 ins_pipe( fpu_mem_reg_reg ); 10573 %} 10574 // 10575 // This instruction does not round to 24-bits 10576 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10577 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10578 match(Set dst (DivF dst src)); 10579 10580 format %{ "FDIV $dst,$src" %} 10581 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10582 ins_encode( Push_Reg_FPR(src), 10583 OpcP, RegOpc(dst) ); 10584 ins_pipe( fpu_reg_reg ); 10585 %} 10586 10587 10588 // Spill to obtain 24-bit precision 10589 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10590 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10591 match(Set dst (ModF src1 src2)); 10592 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10593 10594 format %{ "FMOD $dst,$src1,$src2" %} 10595 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10596 emitModDPR(), 10597 Push_Result_Mod_DPR(src2), 10598 Pop_Mem_FPR(dst)); 10599 ins_pipe( pipe_slow ); 10600 %} 10601 // 10602 // This instruction does not round to 24-bits 10603 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10604 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10605 match(Set dst (ModF dst src)); 10606 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10607 10608 format %{ "FMOD $dst,$src" %} 10609 ins_encode(Push_Reg_Mod_DPR(dst, src), 10610 emitModDPR(), 10611 Push_Result_Mod_DPR(src), 10612 Pop_Reg_FPR(dst)); 10613 ins_pipe( pipe_slow ); 10614 %} 10615 10616 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10617 predicate(UseSSE>=1); 10618 match(Set dst (ModF src0 src1)); 10619 effect(KILL rax, KILL cr); 10620 format %{ "SUB ESP,4\t # FMOD\n" 10621 "\tMOVSS [ESP+0],$src1\n" 10622 "\tFLD_S [ESP+0]\n" 10623 "\tMOVSS [ESP+0],$src0\n" 10624 "\tFLD_S [ESP+0]\n" 10625 "loop:\tFPREM\n" 10626 "\tFWAIT\n" 10627 "\tFNSTSW AX\n" 10628 "\tSAHF\n" 10629 "\tJP loop\n" 10630 "\tFSTP_S [ESP+0]\n" 10631 "\tMOVSS $dst,[ESP+0]\n" 10632 "\tADD ESP,4\n" 10633 "\tFSTP ST0\t # Restore FPU Stack" 10634 %} 10635 ins_cost(250); 10636 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10637 ins_pipe( pipe_slow ); 10638 %} 10639 10640 10641 //----------Arithmetic Conversion Instructions--------------------------------- 10642 // The conversions operations are all Alpha sorted. Please keep it that way! 10643 10644 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10645 predicate(UseSSE==0); 10646 match(Set dst (RoundFloat src)); 10647 ins_cost(125); 10648 format %{ "FST_S $dst,$src\t# F-round" %} 10649 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10650 ins_pipe( fpu_mem_reg ); 10651 %} 10652 10653 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10654 predicate(UseSSE<=1); 10655 match(Set dst (RoundDouble src)); 10656 ins_cost(125); 10657 format %{ "FST_D $dst,$src\t# D-round" %} 10658 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10659 ins_pipe( fpu_mem_reg ); 10660 %} 10661 10662 // Force rounding to 24-bit precision and 6-bit exponent 10663 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10664 predicate(UseSSE==0); 10665 match(Set dst (ConvD2F src)); 10666 format %{ "FST_S $dst,$src\t# F-round" %} 10667 expand %{ 10668 roundFloat_mem_reg(dst,src); 10669 %} 10670 %} 10671 10672 // Force rounding to 24-bit precision and 6-bit exponent 10673 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10674 predicate(UseSSE==1); 10675 match(Set dst (ConvD2F src)); 10676 effect( KILL cr ); 10677 format %{ "SUB ESP,4\n\t" 10678 "FST_S [ESP],$src\t# F-round\n\t" 10679 "MOVSS $dst,[ESP]\n\t" 10680 "ADD ESP,4" %} 10681 ins_encode %{ 10682 __ subptr(rsp, 4); 10683 if ($src$$reg != FPR1L_enc) { 10684 __ fld_s($src$$reg-1); 10685 __ fstp_s(Address(rsp, 0)); 10686 } else { 10687 __ fst_s(Address(rsp, 0)); 10688 } 10689 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10690 __ addptr(rsp, 4); 10691 %} 10692 ins_pipe( pipe_slow ); 10693 %} 10694 10695 // Force rounding double precision to single precision 10696 instruct convD2F_reg(regF dst, regD src) %{ 10697 predicate(UseSSE>=2); 10698 match(Set dst (ConvD2F src)); 10699 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10700 ins_encode %{ 10701 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10702 %} 10703 ins_pipe( pipe_slow ); 10704 %} 10705 10706 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10707 predicate(UseSSE==0); 10708 match(Set dst (ConvF2D src)); 10709 format %{ "FST_S $dst,$src\t# D-round" %} 10710 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10711 ins_pipe( fpu_reg_reg ); 10712 %} 10713 10714 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10715 predicate(UseSSE==1); 10716 match(Set dst (ConvF2D src)); 10717 format %{ "FST_D $dst,$src\t# D-round" %} 10718 expand %{ 10719 roundDouble_mem_reg(dst,src); 10720 %} 10721 %} 10722 10723 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10724 predicate(UseSSE==1); 10725 match(Set dst (ConvF2D src)); 10726 effect( KILL cr ); 10727 format %{ "SUB ESP,4\n\t" 10728 "MOVSS [ESP] $src\n\t" 10729 "FLD_S [ESP]\n\t" 10730 "ADD ESP,4\n\t" 10731 "FSTP $dst\t# D-round" %} 10732 ins_encode %{ 10733 __ subptr(rsp, 4); 10734 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10735 __ fld_s(Address(rsp, 0)); 10736 __ addptr(rsp, 4); 10737 __ fstp_d($dst$$reg); 10738 %} 10739 ins_pipe( pipe_slow ); 10740 %} 10741 10742 instruct convF2D_reg(regD dst, regF src) %{ 10743 predicate(UseSSE>=2); 10744 match(Set dst (ConvF2D src)); 10745 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10746 ins_encode %{ 10747 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10748 %} 10749 ins_pipe( pipe_slow ); 10750 %} 10751 10752 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10753 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10754 predicate(UseSSE<=1); 10755 match(Set dst (ConvD2I src)); 10756 effect( KILL tmp, KILL cr ); 10757 format %{ "FLD $src\t# Convert double to int \n\t" 10758 "FLDCW trunc mode\n\t" 10759 "SUB ESP,4\n\t" 10760 "FISTp [ESP + #0]\n\t" 10761 "FLDCW std/24-bit mode\n\t" 10762 "POP EAX\n\t" 10763 "CMP EAX,0x80000000\n\t" 10764 "JNE,s fast\n\t" 10765 "FLD_D $src\n\t" 10766 "CALL d2i_wrapper\n" 10767 "fast:" %} 10768 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10769 ins_pipe( pipe_slow ); 10770 %} 10771 10772 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10773 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10774 predicate(UseSSE>=2); 10775 match(Set dst (ConvD2I src)); 10776 effect( KILL tmp, KILL cr ); 10777 format %{ "CVTTSD2SI $dst, $src\n\t" 10778 "CMP $dst,0x80000000\n\t" 10779 "JNE,s fast\n\t" 10780 "SUB ESP, 8\n\t" 10781 "MOVSD [ESP], $src\n\t" 10782 "FLD_D [ESP]\n\t" 10783 "ADD ESP, 8\n\t" 10784 "CALL d2i_wrapper\n" 10785 "fast:" %} 10786 ins_encode %{ 10787 Label fast; 10788 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10789 __ cmpl($dst$$Register, 0x80000000); 10790 __ jccb(Assembler::notEqual, fast); 10791 __ subptr(rsp, 8); 10792 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10793 __ fld_d(Address(rsp, 0)); 10794 __ addptr(rsp, 8); 10795 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10796 __ post_call_nop(); 10797 __ bind(fast); 10798 %} 10799 ins_pipe( pipe_slow ); 10800 %} 10801 10802 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10803 predicate(UseSSE<=1); 10804 match(Set dst (ConvD2L src)); 10805 effect( KILL cr ); 10806 format %{ "FLD $src\t# Convert double to long\n\t" 10807 "FLDCW trunc mode\n\t" 10808 "SUB ESP,8\n\t" 10809 "FISTp [ESP + #0]\n\t" 10810 "FLDCW std/24-bit mode\n\t" 10811 "POP EAX\n\t" 10812 "POP EDX\n\t" 10813 "CMP EDX,0x80000000\n\t" 10814 "JNE,s fast\n\t" 10815 "TEST EAX,EAX\n\t" 10816 "JNE,s fast\n\t" 10817 "FLD $src\n\t" 10818 "CALL d2l_wrapper\n" 10819 "fast:" %} 10820 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10821 ins_pipe( pipe_slow ); 10822 %} 10823 10824 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10825 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10826 predicate (UseSSE>=2); 10827 match(Set dst (ConvD2L src)); 10828 effect( KILL cr ); 10829 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10830 "MOVSD [ESP],$src\n\t" 10831 "FLD_D [ESP]\n\t" 10832 "FLDCW trunc mode\n\t" 10833 "FISTp [ESP + #0]\n\t" 10834 "FLDCW std/24-bit mode\n\t" 10835 "POP EAX\n\t" 10836 "POP EDX\n\t" 10837 "CMP EDX,0x80000000\n\t" 10838 "JNE,s fast\n\t" 10839 "TEST EAX,EAX\n\t" 10840 "JNE,s fast\n\t" 10841 "SUB ESP,8\n\t" 10842 "MOVSD [ESP],$src\n\t" 10843 "FLD_D [ESP]\n\t" 10844 "ADD ESP,8\n\t" 10845 "CALL d2l_wrapper\n" 10846 "fast:" %} 10847 ins_encode %{ 10848 Label fast; 10849 __ subptr(rsp, 8); 10850 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10851 __ fld_d(Address(rsp, 0)); 10852 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10853 __ fistp_d(Address(rsp, 0)); 10854 // Restore the rounding mode, mask the exception 10855 if (Compile::current()->in_24_bit_fp_mode()) { 10856 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10857 } else { 10858 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10859 } 10860 // Load the converted long, adjust CPU stack 10861 __ pop(rax); 10862 __ pop(rdx); 10863 __ cmpl(rdx, 0x80000000); 10864 __ jccb(Assembler::notEqual, fast); 10865 __ testl(rax, rax); 10866 __ jccb(Assembler::notEqual, fast); 10867 __ subptr(rsp, 8); 10868 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10869 __ fld_d(Address(rsp, 0)); 10870 __ addptr(rsp, 8); 10871 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10872 __ post_call_nop(); 10873 __ bind(fast); 10874 %} 10875 ins_pipe( pipe_slow ); 10876 %} 10877 10878 // Convert a double to an int. Java semantics require we do complex 10879 // manglations in the corner cases. So we set the rounding mode to 10880 // 'zero', store the darned double down as an int, and reset the 10881 // rounding mode to 'nearest'. The hardware stores a flag value down 10882 // if we would overflow or converted a NAN; we check for this and 10883 // and go the slow path if needed. 10884 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10885 predicate(UseSSE==0); 10886 match(Set dst (ConvF2I src)); 10887 effect( KILL tmp, KILL cr ); 10888 format %{ "FLD $src\t# Convert float to int \n\t" 10889 "FLDCW trunc mode\n\t" 10890 "SUB ESP,4\n\t" 10891 "FISTp [ESP + #0]\n\t" 10892 "FLDCW std/24-bit mode\n\t" 10893 "POP EAX\n\t" 10894 "CMP EAX,0x80000000\n\t" 10895 "JNE,s fast\n\t" 10896 "FLD $src\n\t" 10897 "CALL d2i_wrapper\n" 10898 "fast:" %} 10899 // DPR2I_encoding works for FPR2I 10900 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10901 ins_pipe( pipe_slow ); 10902 %} 10903 10904 // Convert a float in xmm to an int reg. 10905 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10906 predicate(UseSSE>=1); 10907 match(Set dst (ConvF2I src)); 10908 effect( KILL tmp, KILL cr ); 10909 format %{ "CVTTSS2SI $dst, $src\n\t" 10910 "CMP $dst,0x80000000\n\t" 10911 "JNE,s fast\n\t" 10912 "SUB ESP, 4\n\t" 10913 "MOVSS [ESP], $src\n\t" 10914 "FLD [ESP]\n\t" 10915 "ADD ESP, 4\n\t" 10916 "CALL d2i_wrapper\n" 10917 "fast:" %} 10918 ins_encode %{ 10919 Label fast; 10920 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10921 __ cmpl($dst$$Register, 0x80000000); 10922 __ jccb(Assembler::notEqual, fast); 10923 __ subptr(rsp, 4); 10924 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10925 __ fld_s(Address(rsp, 0)); 10926 __ addptr(rsp, 4); 10927 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10928 __ post_call_nop(); 10929 __ bind(fast); 10930 %} 10931 ins_pipe( pipe_slow ); 10932 %} 10933 10934 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10935 predicate(UseSSE==0); 10936 match(Set dst (ConvF2L src)); 10937 effect( KILL cr ); 10938 format %{ "FLD $src\t# Convert float to long\n\t" 10939 "FLDCW trunc mode\n\t" 10940 "SUB ESP,8\n\t" 10941 "FISTp [ESP + #0]\n\t" 10942 "FLDCW std/24-bit mode\n\t" 10943 "POP EAX\n\t" 10944 "POP EDX\n\t" 10945 "CMP EDX,0x80000000\n\t" 10946 "JNE,s fast\n\t" 10947 "TEST EAX,EAX\n\t" 10948 "JNE,s fast\n\t" 10949 "FLD $src\n\t" 10950 "CALL d2l_wrapper\n" 10951 "fast:" %} 10952 // DPR2L_encoding works for FPR2L 10953 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10954 ins_pipe( pipe_slow ); 10955 %} 10956 10957 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10958 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10959 predicate (UseSSE>=1); 10960 match(Set dst (ConvF2L src)); 10961 effect( KILL cr ); 10962 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10963 "MOVSS [ESP],$src\n\t" 10964 "FLD_S [ESP]\n\t" 10965 "FLDCW trunc mode\n\t" 10966 "FISTp [ESP + #0]\n\t" 10967 "FLDCW std/24-bit mode\n\t" 10968 "POP EAX\n\t" 10969 "POP EDX\n\t" 10970 "CMP EDX,0x80000000\n\t" 10971 "JNE,s fast\n\t" 10972 "TEST EAX,EAX\n\t" 10973 "JNE,s fast\n\t" 10974 "SUB ESP,4\t# Convert float to long\n\t" 10975 "MOVSS [ESP],$src\n\t" 10976 "FLD_S [ESP]\n\t" 10977 "ADD ESP,4\n\t" 10978 "CALL d2l_wrapper\n" 10979 "fast:" %} 10980 ins_encode %{ 10981 Label fast; 10982 __ subptr(rsp, 8); 10983 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10984 __ fld_s(Address(rsp, 0)); 10985 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10986 __ fistp_d(Address(rsp, 0)); 10987 // Restore the rounding mode, mask the exception 10988 if (Compile::current()->in_24_bit_fp_mode()) { 10989 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10990 } else { 10991 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10992 } 10993 // Load the converted long, adjust CPU stack 10994 __ pop(rax); 10995 __ pop(rdx); 10996 __ cmpl(rdx, 0x80000000); 10997 __ jccb(Assembler::notEqual, fast); 10998 __ testl(rax, rax); 10999 __ jccb(Assembler::notEqual, fast); 11000 __ subptr(rsp, 4); 11001 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11002 __ fld_s(Address(rsp, 0)); 11003 __ addptr(rsp, 4); 11004 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11005 __ post_call_nop(); 11006 __ bind(fast); 11007 %} 11008 ins_pipe( pipe_slow ); 11009 %} 11010 11011 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11012 predicate( UseSSE<=1 ); 11013 match(Set dst (ConvI2D src)); 11014 format %{ "FILD $src\n\t" 11015 "FSTP $dst" %} 11016 opcode(0xDB, 0x0); /* DB /0 */ 11017 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11018 ins_pipe( fpu_reg_mem ); 11019 %} 11020 11021 instruct convI2D_reg(regD dst, rRegI src) %{ 11022 predicate( UseSSE>=2 && !UseXmmI2D ); 11023 match(Set dst (ConvI2D src)); 11024 format %{ "CVTSI2SD $dst,$src" %} 11025 ins_encode %{ 11026 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11027 %} 11028 ins_pipe( pipe_slow ); 11029 %} 11030 11031 instruct convI2D_mem(regD dst, memory mem) %{ 11032 predicate( UseSSE>=2 ); 11033 match(Set dst (ConvI2D (LoadI mem))); 11034 format %{ "CVTSI2SD $dst,$mem" %} 11035 ins_encode %{ 11036 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11037 %} 11038 ins_pipe( pipe_slow ); 11039 %} 11040 11041 instruct convXI2D_reg(regD dst, rRegI src) 11042 %{ 11043 predicate( UseSSE>=2 && UseXmmI2D ); 11044 match(Set dst (ConvI2D src)); 11045 11046 format %{ "MOVD $dst,$src\n\t" 11047 "CVTDQ2PD $dst,$dst\t# i2d" %} 11048 ins_encode %{ 11049 __ movdl($dst$$XMMRegister, $src$$Register); 11050 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11051 %} 11052 ins_pipe(pipe_slow); // XXX 11053 %} 11054 11055 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11056 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11057 match(Set dst (ConvI2D (LoadI mem))); 11058 format %{ "FILD $mem\n\t" 11059 "FSTP $dst" %} 11060 opcode(0xDB); /* DB /0 */ 11061 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11062 Pop_Reg_DPR(dst)); 11063 ins_pipe( fpu_reg_mem ); 11064 %} 11065 11066 // Convert a byte to a float; no rounding step needed. 11067 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11068 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11069 match(Set dst (ConvI2F src)); 11070 format %{ "FILD $src\n\t" 11071 "FSTP $dst" %} 11072 11073 opcode(0xDB, 0x0); /* DB /0 */ 11074 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11075 ins_pipe( fpu_reg_mem ); 11076 %} 11077 11078 // In 24-bit mode, force exponent rounding by storing back out 11079 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11080 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11081 match(Set dst (ConvI2F src)); 11082 ins_cost(200); 11083 format %{ "FILD $src\n\t" 11084 "FSTP_S $dst" %} 11085 opcode(0xDB, 0x0); /* DB /0 */ 11086 ins_encode( Push_Mem_I(src), 11087 Pop_Mem_FPR(dst)); 11088 ins_pipe( fpu_mem_mem ); 11089 %} 11090 11091 // In 24-bit mode, force exponent rounding by storing back out 11092 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11093 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11094 match(Set dst (ConvI2F (LoadI mem))); 11095 ins_cost(200); 11096 format %{ "FILD $mem\n\t" 11097 "FSTP_S $dst" %} 11098 opcode(0xDB); /* DB /0 */ 11099 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11100 Pop_Mem_FPR(dst)); 11101 ins_pipe( fpu_mem_mem ); 11102 %} 11103 11104 // This instruction does not round to 24-bits 11105 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11106 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11107 match(Set dst (ConvI2F src)); 11108 format %{ "FILD $src\n\t" 11109 "FSTP $dst" %} 11110 opcode(0xDB, 0x0); /* DB /0 */ 11111 ins_encode( Push_Mem_I(src), 11112 Pop_Reg_FPR(dst)); 11113 ins_pipe( fpu_reg_mem ); 11114 %} 11115 11116 // This instruction does not round to 24-bits 11117 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11118 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11119 match(Set dst (ConvI2F (LoadI mem))); 11120 format %{ "FILD $mem\n\t" 11121 "FSTP $dst" %} 11122 opcode(0xDB); /* DB /0 */ 11123 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11124 Pop_Reg_FPR(dst)); 11125 ins_pipe( fpu_reg_mem ); 11126 %} 11127 11128 // Convert an int to a float in xmm; no rounding step needed. 11129 instruct convI2F_reg(regF dst, rRegI src) %{ 11130 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11131 match(Set dst (ConvI2F src)); 11132 format %{ "CVTSI2SS $dst, $src" %} 11133 ins_encode %{ 11134 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11135 %} 11136 ins_pipe( pipe_slow ); 11137 %} 11138 11139 instruct convXI2F_reg(regF dst, rRegI src) 11140 %{ 11141 predicate( UseSSE>=2 && UseXmmI2F ); 11142 match(Set dst (ConvI2F src)); 11143 11144 format %{ "MOVD $dst,$src\n\t" 11145 "CVTDQ2PS $dst,$dst\t# i2f" %} 11146 ins_encode %{ 11147 __ movdl($dst$$XMMRegister, $src$$Register); 11148 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11149 %} 11150 ins_pipe(pipe_slow); // XXX 11151 %} 11152 11153 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11154 match(Set dst (ConvI2L src)); 11155 effect(KILL cr); 11156 ins_cost(375); 11157 format %{ "MOV $dst.lo,$src\n\t" 11158 "MOV $dst.hi,$src\n\t" 11159 "SAR $dst.hi,31" %} 11160 ins_encode(convert_int_long(dst,src)); 11161 ins_pipe( ialu_reg_reg_long ); 11162 %} 11163 11164 // Zero-extend convert int to long 11165 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11166 match(Set dst (AndL (ConvI2L src) mask) ); 11167 effect( KILL flags ); 11168 ins_cost(250); 11169 format %{ "MOV $dst.lo,$src\n\t" 11170 "XOR $dst.hi,$dst.hi" %} 11171 opcode(0x33); // XOR 11172 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11173 ins_pipe( ialu_reg_reg_long ); 11174 %} 11175 11176 // Zero-extend long 11177 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11178 match(Set dst (AndL src mask) ); 11179 effect( KILL flags ); 11180 ins_cost(250); 11181 format %{ "MOV $dst.lo,$src.lo\n\t" 11182 "XOR $dst.hi,$dst.hi\n\t" %} 11183 opcode(0x33); // XOR 11184 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11185 ins_pipe( ialu_reg_reg_long ); 11186 %} 11187 11188 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11189 predicate (UseSSE<=1); 11190 match(Set dst (ConvL2D src)); 11191 effect( KILL cr ); 11192 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11193 "PUSH $src.lo\n\t" 11194 "FILD ST,[ESP + #0]\n\t" 11195 "ADD ESP,8\n\t" 11196 "FSTP_D $dst\t# D-round" %} 11197 opcode(0xDF, 0x5); /* DF /5 */ 11198 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11199 ins_pipe( pipe_slow ); 11200 %} 11201 11202 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11203 predicate (UseSSE>=2); 11204 match(Set dst (ConvL2D src)); 11205 effect( KILL cr ); 11206 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11207 "PUSH $src.lo\n\t" 11208 "FILD_D [ESP]\n\t" 11209 "FSTP_D [ESP]\n\t" 11210 "MOVSD $dst,[ESP]\n\t" 11211 "ADD ESP,8" %} 11212 opcode(0xDF, 0x5); /* DF /5 */ 11213 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11214 ins_pipe( pipe_slow ); 11215 %} 11216 11217 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11218 predicate (UseSSE>=1); 11219 match(Set dst (ConvL2F src)); 11220 effect( KILL cr ); 11221 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11222 "PUSH $src.lo\n\t" 11223 "FILD_D [ESP]\n\t" 11224 "FSTP_S [ESP]\n\t" 11225 "MOVSS $dst,[ESP]\n\t" 11226 "ADD ESP,8" %} 11227 opcode(0xDF, 0x5); /* DF /5 */ 11228 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11229 ins_pipe( pipe_slow ); 11230 %} 11231 11232 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11233 match(Set dst (ConvL2F src)); 11234 effect( KILL cr ); 11235 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11236 "PUSH $src.lo\n\t" 11237 "FILD ST,[ESP + #0]\n\t" 11238 "ADD ESP,8\n\t" 11239 "FSTP_S $dst\t# F-round" %} 11240 opcode(0xDF, 0x5); /* DF /5 */ 11241 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11242 ins_pipe( pipe_slow ); 11243 %} 11244 11245 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11246 match(Set dst (ConvL2I src)); 11247 effect( DEF dst, USE src ); 11248 format %{ "MOV $dst,$src.lo" %} 11249 ins_encode(enc_CopyL_Lo(dst,src)); 11250 ins_pipe( ialu_reg_reg ); 11251 %} 11252 11253 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11254 match(Set dst (MoveF2I src)); 11255 effect( DEF dst, USE src ); 11256 ins_cost(100); 11257 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11258 ins_encode %{ 11259 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11260 %} 11261 ins_pipe( ialu_reg_mem ); 11262 %} 11263 11264 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11265 predicate(UseSSE==0); 11266 match(Set dst (MoveF2I src)); 11267 effect( DEF dst, USE src ); 11268 11269 ins_cost(125); 11270 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11271 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11272 ins_pipe( fpu_mem_reg ); 11273 %} 11274 11275 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11276 predicate(UseSSE>=1); 11277 match(Set dst (MoveF2I src)); 11278 effect( DEF dst, USE src ); 11279 11280 ins_cost(95); 11281 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11282 ins_encode %{ 11283 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11284 %} 11285 ins_pipe( pipe_slow ); 11286 %} 11287 11288 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11289 predicate(UseSSE>=2); 11290 match(Set dst (MoveF2I src)); 11291 effect( DEF dst, USE src ); 11292 ins_cost(85); 11293 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11294 ins_encode %{ 11295 __ movdl($dst$$Register, $src$$XMMRegister); 11296 %} 11297 ins_pipe( pipe_slow ); 11298 %} 11299 11300 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11301 match(Set dst (MoveI2F src)); 11302 effect( DEF dst, USE src ); 11303 11304 ins_cost(100); 11305 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11306 ins_encode %{ 11307 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11308 %} 11309 ins_pipe( ialu_mem_reg ); 11310 %} 11311 11312 11313 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11314 predicate(UseSSE==0); 11315 match(Set dst (MoveI2F src)); 11316 effect(DEF dst, USE src); 11317 11318 ins_cost(125); 11319 format %{ "FLD_S $src\n\t" 11320 "FSTP $dst\t# MoveI2F_stack_reg" %} 11321 opcode(0xD9); /* D9 /0, FLD m32real */ 11322 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11323 Pop_Reg_FPR(dst) ); 11324 ins_pipe( fpu_reg_mem ); 11325 %} 11326 11327 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11328 predicate(UseSSE>=1); 11329 match(Set dst (MoveI2F src)); 11330 effect( DEF dst, USE src ); 11331 11332 ins_cost(95); 11333 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11334 ins_encode %{ 11335 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11336 %} 11337 ins_pipe( pipe_slow ); 11338 %} 11339 11340 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11341 predicate(UseSSE>=2); 11342 match(Set dst (MoveI2F src)); 11343 effect( DEF dst, USE src ); 11344 11345 ins_cost(85); 11346 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11347 ins_encode %{ 11348 __ movdl($dst$$XMMRegister, $src$$Register); 11349 %} 11350 ins_pipe( pipe_slow ); 11351 %} 11352 11353 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11354 match(Set dst (MoveD2L src)); 11355 effect(DEF dst, USE src); 11356 11357 ins_cost(250); 11358 format %{ "MOV $dst.lo,$src\n\t" 11359 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11360 opcode(0x8B, 0x8B); 11361 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11362 ins_pipe( ialu_mem_long_reg ); 11363 %} 11364 11365 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11366 predicate(UseSSE<=1); 11367 match(Set dst (MoveD2L src)); 11368 effect(DEF dst, USE src); 11369 11370 ins_cost(125); 11371 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11372 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11373 ins_pipe( fpu_mem_reg ); 11374 %} 11375 11376 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11377 predicate(UseSSE>=2); 11378 match(Set dst (MoveD2L src)); 11379 effect(DEF dst, USE src); 11380 ins_cost(95); 11381 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11382 ins_encode %{ 11383 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11384 %} 11385 ins_pipe( pipe_slow ); 11386 %} 11387 11388 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11389 predicate(UseSSE>=2); 11390 match(Set dst (MoveD2L src)); 11391 effect(DEF dst, USE src, TEMP tmp); 11392 ins_cost(85); 11393 format %{ "MOVD $dst.lo,$src\n\t" 11394 "PSHUFLW $tmp,$src,0x4E\n\t" 11395 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11396 ins_encode %{ 11397 __ movdl($dst$$Register, $src$$XMMRegister); 11398 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11399 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11400 %} 11401 ins_pipe( pipe_slow ); 11402 %} 11403 11404 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11405 match(Set dst (MoveL2D src)); 11406 effect(DEF dst, USE src); 11407 11408 ins_cost(200); 11409 format %{ "MOV $dst,$src.lo\n\t" 11410 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11411 opcode(0x89, 0x89); 11412 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11413 ins_pipe( ialu_mem_long_reg ); 11414 %} 11415 11416 11417 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11418 predicate(UseSSE<=1); 11419 match(Set dst (MoveL2D src)); 11420 effect(DEF dst, USE src); 11421 ins_cost(125); 11422 11423 format %{ "FLD_D $src\n\t" 11424 "FSTP $dst\t# MoveL2D_stack_reg" %} 11425 opcode(0xDD); /* DD /0, FLD m64real */ 11426 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11427 Pop_Reg_DPR(dst) ); 11428 ins_pipe( fpu_reg_mem ); 11429 %} 11430 11431 11432 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11433 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11434 match(Set dst (MoveL2D src)); 11435 effect(DEF dst, USE src); 11436 11437 ins_cost(95); 11438 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11439 ins_encode %{ 11440 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11441 %} 11442 ins_pipe( pipe_slow ); 11443 %} 11444 11445 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11446 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11447 match(Set dst (MoveL2D src)); 11448 effect(DEF dst, USE src); 11449 11450 ins_cost(95); 11451 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11452 ins_encode %{ 11453 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11454 %} 11455 ins_pipe( pipe_slow ); 11456 %} 11457 11458 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11459 predicate(UseSSE>=2); 11460 match(Set dst (MoveL2D src)); 11461 effect(TEMP dst, USE src, TEMP tmp); 11462 ins_cost(85); 11463 format %{ "MOVD $dst,$src.lo\n\t" 11464 "MOVD $tmp,$src.hi\n\t" 11465 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11466 ins_encode %{ 11467 __ movdl($dst$$XMMRegister, $src$$Register); 11468 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11469 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11470 %} 11471 ins_pipe( pipe_slow ); 11472 %} 11473 11474 //----------------------------- CompressBits/ExpandBits ------------------------ 11475 11476 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11477 predicate(n->bottom_type()->isa_long()); 11478 match(Set dst (CompressBits src mask)); 11479 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11480 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11481 ins_encode %{ 11482 Label exit, partail_result; 11483 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11484 // Merge the results of upper and lower destination registers such that upper destination 11485 // results are contiguously laid out after the lower destination result. 11486 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11487 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11488 __ popcntl($rtmp$$Register, $mask$$Register); 11489 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11490 __ cmpl($rtmp$$Register, 32); 11491 __ jccb(Assembler::equal, exit); 11492 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11493 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11494 // Shift left the contents of upper destination register by true bit count of lower mask register 11495 // and merge with lower destination register. 11496 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11497 __ orl($dst$$Register, $rtmp$$Register); 11498 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11499 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11500 // since contents of upper destination have already been copied to lower destination 11501 // register. 11502 __ cmpl($rtmp$$Register, 0); 11503 __ jccb(Assembler::greater, partail_result); 11504 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11505 __ jmp(exit); 11506 __ bind(partail_result); 11507 // Perform right shift over upper destination register to move out bits already copied 11508 // to lower destination register. 11509 __ subl($rtmp$$Register, 32); 11510 __ negl($rtmp$$Register); 11511 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11512 __ bind(exit); 11513 %} 11514 ins_pipe( pipe_slow ); 11515 %} 11516 11517 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11518 predicate(n->bottom_type()->isa_long()); 11519 match(Set dst (ExpandBits src mask)); 11520 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11521 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11522 ins_encode %{ 11523 // Extraction operation sequentially reads the bits from source register starting from LSB 11524 // and lays them out into destination register at bit locations corresponding to true bits 11525 // in mask register. Thus number of source bits read are equal to combined true bit count 11526 // of mask register pair. 11527 Label exit, mask_clipping; 11528 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11529 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11530 __ popcntl($rtmp$$Register, $mask$$Register); 11531 // If true bit count of lower mask register is 32 then none of bit of lower source register 11532 // will feed to upper destination register. 11533 __ cmpl($rtmp$$Register, 32); 11534 __ jccb(Assembler::equal, exit); 11535 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11536 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11537 // Shift right the contents of lower source register to remove already consumed bits. 11538 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11539 // Extract the bits from lower source register starting from LSB under the influence 11540 // of upper mask register. 11541 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11542 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11543 __ subl($rtmp$$Register, 32); 11544 __ negl($rtmp$$Register); 11545 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11546 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11547 // Clear the set bits in upper mask register which have been used to extract the contents 11548 // from lower source register. 11549 __ bind(mask_clipping); 11550 __ blsrl($mask$$Register, $mask$$Register); 11551 __ decrementl($rtmp$$Register, 1); 11552 __ jccb(Assembler::greater, mask_clipping); 11553 // Starting from LSB extract the bits from upper source register under the influence of 11554 // remaining set bits in upper mask register. 11555 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11556 // Merge the partial results extracted from lower and upper source register bits. 11557 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11558 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11559 __ bind(exit); 11560 %} 11561 ins_pipe( pipe_slow ); 11562 %} 11563 11564 // ======================================================================= 11565 // fast clearing of an array 11566 // Small ClearArray non-AVX512. 11567 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11568 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11569 match(Set dummy (ClearArray cnt base)); 11570 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11571 11572 format %{ $$template 11573 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11574 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11575 $$emit$$"JG LARGE\n\t" 11576 $$emit$$"SHL ECX, 1\n\t" 11577 $$emit$$"DEC ECX\n\t" 11578 $$emit$$"JS DONE\t# Zero length\n\t" 11579 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11580 $$emit$$"DEC ECX\n\t" 11581 $$emit$$"JGE LOOP\n\t" 11582 $$emit$$"JMP DONE\n\t" 11583 $$emit$$"# LARGE:\n\t" 11584 if (UseFastStosb) { 11585 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11586 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11587 } else if (UseXMMForObjInit) { 11588 $$emit$$"MOV RDI,RAX\n\t" 11589 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11590 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11591 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11592 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11593 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11594 $$emit$$"ADD 0x40,RAX\n\t" 11595 $$emit$$"# L_zero_64_bytes:\n\t" 11596 $$emit$$"SUB 0x8,RCX\n\t" 11597 $$emit$$"JGE L_loop\n\t" 11598 $$emit$$"ADD 0x4,RCX\n\t" 11599 $$emit$$"JL L_tail\n\t" 11600 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11601 $$emit$$"ADD 0x20,RAX\n\t" 11602 $$emit$$"SUB 0x4,RCX\n\t" 11603 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11604 $$emit$$"ADD 0x4,RCX\n\t" 11605 $$emit$$"JLE L_end\n\t" 11606 $$emit$$"DEC RCX\n\t" 11607 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11608 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11609 $$emit$$"ADD 0x8,RAX\n\t" 11610 $$emit$$"DEC RCX\n\t" 11611 $$emit$$"JGE L_sloop\n\t" 11612 $$emit$$"# L_end:\n\t" 11613 } else { 11614 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11615 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11616 } 11617 $$emit$$"# DONE" 11618 %} 11619 ins_encode %{ 11620 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11621 $tmp$$XMMRegister, false, knoreg); 11622 %} 11623 ins_pipe( pipe_slow ); 11624 %} 11625 11626 // Small ClearArray AVX512 non-constant length. 11627 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11628 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11629 match(Set dummy (ClearArray cnt base)); 11630 ins_cost(125); 11631 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11632 11633 format %{ $$template 11634 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11635 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11636 $$emit$$"JG LARGE\n\t" 11637 $$emit$$"SHL ECX, 1\n\t" 11638 $$emit$$"DEC ECX\n\t" 11639 $$emit$$"JS DONE\t# Zero length\n\t" 11640 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11641 $$emit$$"DEC ECX\n\t" 11642 $$emit$$"JGE LOOP\n\t" 11643 $$emit$$"JMP DONE\n\t" 11644 $$emit$$"# LARGE:\n\t" 11645 if (UseFastStosb) { 11646 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11647 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11648 } else if (UseXMMForObjInit) { 11649 $$emit$$"MOV RDI,RAX\n\t" 11650 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11651 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11652 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11653 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11654 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11655 $$emit$$"ADD 0x40,RAX\n\t" 11656 $$emit$$"# L_zero_64_bytes:\n\t" 11657 $$emit$$"SUB 0x8,RCX\n\t" 11658 $$emit$$"JGE L_loop\n\t" 11659 $$emit$$"ADD 0x4,RCX\n\t" 11660 $$emit$$"JL L_tail\n\t" 11661 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11662 $$emit$$"ADD 0x20,RAX\n\t" 11663 $$emit$$"SUB 0x4,RCX\n\t" 11664 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11665 $$emit$$"ADD 0x4,RCX\n\t" 11666 $$emit$$"JLE L_end\n\t" 11667 $$emit$$"DEC RCX\n\t" 11668 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11669 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11670 $$emit$$"ADD 0x8,RAX\n\t" 11671 $$emit$$"DEC RCX\n\t" 11672 $$emit$$"JGE L_sloop\n\t" 11673 $$emit$$"# L_end:\n\t" 11674 } else { 11675 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11676 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11677 } 11678 $$emit$$"# DONE" 11679 %} 11680 ins_encode %{ 11681 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11682 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11683 %} 11684 ins_pipe( pipe_slow ); 11685 %} 11686 11687 // Large ClearArray non-AVX512. 11688 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11689 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11690 match(Set dummy (ClearArray cnt base)); 11691 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11692 format %{ $$template 11693 if (UseFastStosb) { 11694 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11695 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11696 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11697 } else if (UseXMMForObjInit) { 11698 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11699 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11700 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11701 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11702 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11703 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11704 $$emit$$"ADD 0x40,RAX\n\t" 11705 $$emit$$"# L_zero_64_bytes:\n\t" 11706 $$emit$$"SUB 0x8,RCX\n\t" 11707 $$emit$$"JGE L_loop\n\t" 11708 $$emit$$"ADD 0x4,RCX\n\t" 11709 $$emit$$"JL L_tail\n\t" 11710 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11711 $$emit$$"ADD 0x20,RAX\n\t" 11712 $$emit$$"SUB 0x4,RCX\n\t" 11713 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11714 $$emit$$"ADD 0x4,RCX\n\t" 11715 $$emit$$"JLE L_end\n\t" 11716 $$emit$$"DEC RCX\n\t" 11717 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11718 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11719 $$emit$$"ADD 0x8,RAX\n\t" 11720 $$emit$$"DEC RCX\n\t" 11721 $$emit$$"JGE L_sloop\n\t" 11722 $$emit$$"# L_end:\n\t" 11723 } else { 11724 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11725 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11726 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11727 } 11728 $$emit$$"# DONE" 11729 %} 11730 ins_encode %{ 11731 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11732 $tmp$$XMMRegister, true, knoreg); 11733 %} 11734 ins_pipe( pipe_slow ); 11735 %} 11736 11737 // Large ClearArray AVX512. 11738 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11739 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11740 match(Set dummy (ClearArray cnt base)); 11741 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11742 format %{ $$template 11743 if (UseFastStosb) { 11744 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11745 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11746 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11747 } else if (UseXMMForObjInit) { 11748 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11749 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11750 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11751 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11752 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11753 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11754 $$emit$$"ADD 0x40,RAX\n\t" 11755 $$emit$$"# L_zero_64_bytes:\n\t" 11756 $$emit$$"SUB 0x8,RCX\n\t" 11757 $$emit$$"JGE L_loop\n\t" 11758 $$emit$$"ADD 0x4,RCX\n\t" 11759 $$emit$$"JL L_tail\n\t" 11760 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11761 $$emit$$"ADD 0x20,RAX\n\t" 11762 $$emit$$"SUB 0x4,RCX\n\t" 11763 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11764 $$emit$$"ADD 0x4,RCX\n\t" 11765 $$emit$$"JLE L_end\n\t" 11766 $$emit$$"DEC RCX\n\t" 11767 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11768 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11769 $$emit$$"ADD 0x8,RAX\n\t" 11770 $$emit$$"DEC RCX\n\t" 11771 $$emit$$"JGE L_sloop\n\t" 11772 $$emit$$"# L_end:\n\t" 11773 } else { 11774 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11775 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11776 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11777 } 11778 $$emit$$"# DONE" 11779 %} 11780 ins_encode %{ 11781 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11782 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11783 %} 11784 ins_pipe( pipe_slow ); 11785 %} 11786 11787 // Small ClearArray AVX512 constant length. 11788 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11789 %{ 11790 predicate(!((ClearArrayNode*)n)->is_large() && 11791 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11792 match(Set dummy (ClearArray cnt base)); 11793 ins_cost(100); 11794 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11795 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11796 ins_encode %{ 11797 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11798 %} 11799 ins_pipe(pipe_slow); 11800 %} 11801 11802 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11803 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11804 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11805 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11806 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11807 11808 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11809 ins_encode %{ 11810 __ string_compare($str1$$Register, $str2$$Register, 11811 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11812 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11813 %} 11814 ins_pipe( pipe_slow ); 11815 %} 11816 11817 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11818 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11819 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11820 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11821 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11822 11823 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11824 ins_encode %{ 11825 __ string_compare($str1$$Register, $str2$$Register, 11826 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11827 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11828 %} 11829 ins_pipe( pipe_slow ); 11830 %} 11831 11832 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11833 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11834 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11835 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11836 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11837 11838 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11839 ins_encode %{ 11840 __ string_compare($str1$$Register, $str2$$Register, 11841 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11842 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11843 %} 11844 ins_pipe( pipe_slow ); 11845 %} 11846 11847 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11848 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11849 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11850 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11851 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11852 11853 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11854 ins_encode %{ 11855 __ string_compare($str1$$Register, $str2$$Register, 11856 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11857 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11858 %} 11859 ins_pipe( pipe_slow ); 11860 %} 11861 11862 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11863 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11864 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11865 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11866 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11867 11868 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11869 ins_encode %{ 11870 __ string_compare($str1$$Register, $str2$$Register, 11871 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11872 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11873 %} 11874 ins_pipe( pipe_slow ); 11875 %} 11876 11877 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11878 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11879 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11880 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11881 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11882 11883 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11884 ins_encode %{ 11885 __ string_compare($str1$$Register, $str2$$Register, 11886 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11887 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11888 %} 11889 ins_pipe( pipe_slow ); 11890 %} 11891 11892 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11893 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11894 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11895 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11896 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11897 11898 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11899 ins_encode %{ 11900 __ string_compare($str2$$Register, $str1$$Register, 11901 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11902 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11903 %} 11904 ins_pipe( pipe_slow ); 11905 %} 11906 11907 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11908 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11909 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11910 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11911 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11912 11913 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11914 ins_encode %{ 11915 __ string_compare($str2$$Register, $str1$$Register, 11916 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11917 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11918 %} 11919 ins_pipe( pipe_slow ); 11920 %} 11921 11922 // fast string equals 11923 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11924 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11925 predicate(!VM_Version::supports_avx512vlbw()); 11926 match(Set result (StrEquals (Binary str1 str2) cnt)); 11927 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11928 11929 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11930 ins_encode %{ 11931 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11932 $cnt$$Register, $result$$Register, $tmp3$$Register, 11933 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11934 %} 11935 11936 ins_pipe( pipe_slow ); 11937 %} 11938 11939 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11940 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11941 predicate(VM_Version::supports_avx512vlbw()); 11942 match(Set result (StrEquals (Binary str1 str2) cnt)); 11943 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11944 11945 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11946 ins_encode %{ 11947 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11948 $cnt$$Register, $result$$Register, $tmp3$$Register, 11949 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11950 %} 11951 11952 ins_pipe( pipe_slow ); 11953 %} 11954 11955 11956 // fast search of substring with known size. 11957 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11958 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11959 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11960 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11961 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11962 11963 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11964 ins_encode %{ 11965 int icnt2 = (int)$int_cnt2$$constant; 11966 if (icnt2 >= 16) { 11967 // IndexOf for constant substrings with size >= 16 elements 11968 // which don't need to be loaded through stack. 11969 __ string_indexofC8($str1$$Register, $str2$$Register, 11970 $cnt1$$Register, $cnt2$$Register, 11971 icnt2, $result$$Register, 11972 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11973 } else { 11974 // Small strings are loaded through stack if they cross page boundary. 11975 __ string_indexof($str1$$Register, $str2$$Register, 11976 $cnt1$$Register, $cnt2$$Register, 11977 icnt2, $result$$Register, 11978 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11979 } 11980 %} 11981 ins_pipe( pipe_slow ); 11982 %} 11983 11984 // fast search of substring with known size. 11985 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11986 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11987 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11988 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11989 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11990 11991 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11992 ins_encode %{ 11993 int icnt2 = (int)$int_cnt2$$constant; 11994 if (icnt2 >= 8) { 11995 // IndexOf for constant substrings with size >= 8 elements 11996 // which don't need to be loaded through stack. 11997 __ string_indexofC8($str1$$Register, $str2$$Register, 11998 $cnt1$$Register, $cnt2$$Register, 11999 icnt2, $result$$Register, 12000 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12001 } else { 12002 // Small strings are loaded through stack if they cross page boundary. 12003 __ string_indexof($str1$$Register, $str2$$Register, 12004 $cnt1$$Register, $cnt2$$Register, 12005 icnt2, $result$$Register, 12006 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12007 } 12008 %} 12009 ins_pipe( pipe_slow ); 12010 %} 12011 12012 // fast search of substring with known size. 12013 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12014 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12015 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12016 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12017 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12018 12019 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12020 ins_encode %{ 12021 int icnt2 = (int)$int_cnt2$$constant; 12022 if (icnt2 >= 8) { 12023 // IndexOf for constant substrings with size >= 8 elements 12024 // which don't need to be loaded through stack. 12025 __ string_indexofC8($str1$$Register, $str2$$Register, 12026 $cnt1$$Register, $cnt2$$Register, 12027 icnt2, $result$$Register, 12028 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12029 } else { 12030 // Small strings are loaded through stack if they cross page boundary. 12031 __ string_indexof($str1$$Register, $str2$$Register, 12032 $cnt1$$Register, $cnt2$$Register, 12033 icnt2, $result$$Register, 12034 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12035 } 12036 %} 12037 ins_pipe( pipe_slow ); 12038 %} 12039 12040 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12041 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12042 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 12043 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12044 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12045 12046 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12047 ins_encode %{ 12048 __ string_indexof($str1$$Register, $str2$$Register, 12049 $cnt1$$Register, $cnt2$$Register, 12050 (-1), $result$$Register, 12051 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 12052 %} 12053 ins_pipe( pipe_slow ); 12054 %} 12055 12056 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12057 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12058 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12059 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12060 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12061 12062 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12063 ins_encode %{ 12064 __ string_indexof($str1$$Register, $str2$$Register, 12065 $cnt1$$Register, $cnt2$$Register, 12066 (-1), $result$$Register, 12067 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12068 %} 12069 ins_pipe( pipe_slow ); 12070 %} 12071 12072 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12073 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12074 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12075 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12076 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12077 12078 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12079 ins_encode %{ 12080 __ string_indexof($str1$$Register, $str2$$Register, 12081 $cnt1$$Register, $cnt2$$Register, 12082 (-1), $result$$Register, 12083 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12084 %} 12085 ins_pipe( pipe_slow ); 12086 %} 12087 12088 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12089 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12090 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12091 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12092 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12093 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12094 ins_encode %{ 12095 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12096 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12097 %} 12098 ins_pipe( pipe_slow ); 12099 %} 12100 12101 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12102 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12103 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12104 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12105 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12106 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12107 ins_encode %{ 12108 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12109 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12110 %} 12111 ins_pipe( pipe_slow ); 12112 %} 12113 12114 12115 // fast array equals 12116 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12117 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12118 %{ 12119 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12120 match(Set result (AryEq ary1 ary2)); 12121 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12122 //ins_cost(300); 12123 12124 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12125 ins_encode %{ 12126 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12127 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12128 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12129 %} 12130 ins_pipe( pipe_slow ); 12131 %} 12132 12133 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12134 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12135 %{ 12136 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12137 match(Set result (AryEq ary1 ary2)); 12138 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12139 //ins_cost(300); 12140 12141 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12142 ins_encode %{ 12143 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12144 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12145 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12146 %} 12147 ins_pipe( pipe_slow ); 12148 %} 12149 12150 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12151 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12152 %{ 12153 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12154 match(Set result (AryEq ary1 ary2)); 12155 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12156 //ins_cost(300); 12157 12158 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12159 ins_encode %{ 12160 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12161 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12162 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12163 %} 12164 ins_pipe( pipe_slow ); 12165 %} 12166 12167 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12168 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12169 %{ 12170 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12171 match(Set result (AryEq ary1 ary2)); 12172 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12173 //ins_cost(300); 12174 12175 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12176 ins_encode %{ 12177 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12178 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12179 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12180 %} 12181 ins_pipe( pipe_slow ); 12182 %} 12183 12184 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12185 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12186 %{ 12187 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12188 match(Set result (CountPositives ary1 len)); 12189 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12190 12191 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12192 ins_encode %{ 12193 __ count_positives($ary1$$Register, $len$$Register, 12194 $result$$Register, $tmp3$$Register, 12195 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12196 %} 12197 ins_pipe( pipe_slow ); 12198 %} 12199 12200 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12201 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12202 %{ 12203 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12204 match(Set result (CountPositives ary1 len)); 12205 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12206 12207 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12208 ins_encode %{ 12209 __ count_positives($ary1$$Register, $len$$Register, 12210 $result$$Register, $tmp3$$Register, 12211 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12212 %} 12213 ins_pipe( pipe_slow ); 12214 %} 12215 12216 12217 // fast char[] to byte[] compression 12218 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12219 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12220 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12221 match(Set result (StrCompressedCopy src (Binary dst len))); 12222 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12223 12224 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12225 ins_encode %{ 12226 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12227 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12228 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12229 knoreg, knoreg); 12230 %} 12231 ins_pipe( pipe_slow ); 12232 %} 12233 12234 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12235 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12236 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12237 match(Set result (StrCompressedCopy src (Binary dst len))); 12238 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12239 12240 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12241 ins_encode %{ 12242 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12243 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12244 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12245 $ktmp1$$KRegister, $ktmp2$$KRegister); 12246 %} 12247 ins_pipe( pipe_slow ); 12248 %} 12249 12250 // fast byte[] to char[] inflation 12251 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12252 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12253 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12254 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12255 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12256 12257 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12258 ins_encode %{ 12259 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12260 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12261 %} 12262 ins_pipe( pipe_slow ); 12263 %} 12264 12265 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12266 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12267 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12268 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12269 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12270 12271 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12272 ins_encode %{ 12273 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12274 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12275 %} 12276 ins_pipe( pipe_slow ); 12277 %} 12278 12279 // encode char[] to byte[] in ISO_8859_1 12280 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12281 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12282 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12283 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12284 match(Set result (EncodeISOArray src (Binary dst len))); 12285 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12286 12287 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12288 ins_encode %{ 12289 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12290 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12291 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12292 %} 12293 ins_pipe( pipe_slow ); 12294 %} 12295 12296 // encode char[] to byte[] in ASCII 12297 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12298 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12299 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12300 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12301 match(Set result (EncodeISOArray src (Binary dst len))); 12302 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12303 12304 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12305 ins_encode %{ 12306 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12307 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12308 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12309 %} 12310 ins_pipe( pipe_slow ); 12311 %} 12312 12313 //----------Control Flow Instructions------------------------------------------ 12314 // Signed compare Instructions 12315 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12316 match(Set cr (CmpI op1 op2)); 12317 effect( DEF cr, USE op1, USE op2 ); 12318 format %{ "CMP $op1,$op2" %} 12319 opcode(0x3B); /* Opcode 3B /r */ 12320 ins_encode( OpcP, RegReg( op1, op2) ); 12321 ins_pipe( ialu_cr_reg_reg ); 12322 %} 12323 12324 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12325 match(Set cr (CmpI op1 op2)); 12326 effect( DEF cr, USE op1 ); 12327 format %{ "CMP $op1,$op2" %} 12328 opcode(0x81,0x07); /* Opcode 81 /7 */ 12329 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12330 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12331 ins_pipe( ialu_cr_reg_imm ); 12332 %} 12333 12334 // Cisc-spilled version of cmpI_eReg 12335 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12336 match(Set cr (CmpI op1 (LoadI op2))); 12337 12338 format %{ "CMP $op1,$op2" %} 12339 ins_cost(500); 12340 opcode(0x3B); /* Opcode 3B /r */ 12341 ins_encode( OpcP, RegMem( op1, op2) ); 12342 ins_pipe( ialu_cr_reg_mem ); 12343 %} 12344 12345 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12346 match(Set cr (CmpI src zero)); 12347 effect( DEF cr, USE src ); 12348 12349 format %{ "TEST $src,$src" %} 12350 opcode(0x85); 12351 ins_encode( OpcP, RegReg( src, src ) ); 12352 ins_pipe( ialu_cr_reg_imm ); 12353 %} 12354 12355 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12356 match(Set cr (CmpI (AndI src con) zero)); 12357 12358 format %{ "TEST $src,$con" %} 12359 opcode(0xF7,0x00); 12360 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12361 ins_pipe( ialu_cr_reg_imm ); 12362 %} 12363 12364 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12365 match(Set cr (CmpI (AndI src mem) zero)); 12366 12367 format %{ "TEST $src,$mem" %} 12368 opcode(0x85); 12369 ins_encode( OpcP, RegMem( src, mem ) ); 12370 ins_pipe( ialu_cr_reg_mem ); 12371 %} 12372 12373 // Unsigned compare Instructions; really, same as signed except they 12374 // produce an eFlagsRegU instead of eFlagsReg. 12375 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12376 match(Set cr (CmpU op1 op2)); 12377 12378 format %{ "CMPu $op1,$op2" %} 12379 opcode(0x3B); /* Opcode 3B /r */ 12380 ins_encode( OpcP, RegReg( op1, op2) ); 12381 ins_pipe( ialu_cr_reg_reg ); 12382 %} 12383 12384 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12385 match(Set cr (CmpU op1 op2)); 12386 12387 format %{ "CMPu $op1,$op2" %} 12388 opcode(0x81,0x07); /* Opcode 81 /7 */ 12389 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12390 ins_pipe( ialu_cr_reg_imm ); 12391 %} 12392 12393 // // Cisc-spilled version of cmpU_eReg 12394 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12395 match(Set cr (CmpU op1 (LoadI op2))); 12396 12397 format %{ "CMPu $op1,$op2" %} 12398 ins_cost(500); 12399 opcode(0x3B); /* Opcode 3B /r */ 12400 ins_encode( OpcP, RegMem( op1, op2) ); 12401 ins_pipe( ialu_cr_reg_mem ); 12402 %} 12403 12404 // // Cisc-spilled version of cmpU_eReg 12405 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12406 // match(Set cr (CmpU (LoadI op1) op2)); 12407 // 12408 // format %{ "CMPu $op1,$op2" %} 12409 // ins_cost(500); 12410 // opcode(0x39); /* Opcode 39 /r */ 12411 // ins_encode( OpcP, RegMem( op1, op2) ); 12412 //%} 12413 12414 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12415 match(Set cr (CmpU src zero)); 12416 12417 format %{ "TESTu $src,$src" %} 12418 opcode(0x85); 12419 ins_encode( OpcP, RegReg( src, src ) ); 12420 ins_pipe( ialu_cr_reg_imm ); 12421 %} 12422 12423 // Unsigned pointer compare Instructions 12424 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12425 match(Set cr (CmpP op1 op2)); 12426 12427 format %{ "CMPu $op1,$op2" %} 12428 opcode(0x3B); /* Opcode 3B /r */ 12429 ins_encode( OpcP, RegReg( op1, op2) ); 12430 ins_pipe( ialu_cr_reg_reg ); 12431 %} 12432 12433 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12434 match(Set cr (CmpP op1 op2)); 12435 12436 format %{ "CMPu $op1,$op2" %} 12437 opcode(0x81,0x07); /* Opcode 81 /7 */ 12438 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12439 ins_pipe( ialu_cr_reg_imm ); 12440 %} 12441 12442 // // Cisc-spilled version of cmpP_eReg 12443 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12444 match(Set cr (CmpP op1 (LoadP op2))); 12445 12446 format %{ "CMPu $op1,$op2" %} 12447 ins_cost(500); 12448 opcode(0x3B); /* Opcode 3B /r */ 12449 ins_encode( OpcP, RegMem( op1, op2) ); 12450 ins_pipe( ialu_cr_reg_mem ); 12451 %} 12452 12453 // // Cisc-spilled version of cmpP_eReg 12454 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12455 // match(Set cr (CmpP (LoadP op1) op2)); 12456 // 12457 // format %{ "CMPu $op1,$op2" %} 12458 // ins_cost(500); 12459 // opcode(0x39); /* Opcode 39 /r */ 12460 // ins_encode( OpcP, RegMem( op1, op2) ); 12461 //%} 12462 12463 // Compare raw pointer (used in out-of-heap check). 12464 // Only works because non-oop pointers must be raw pointers 12465 // and raw pointers have no anti-dependencies. 12466 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12467 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12468 match(Set cr (CmpP op1 (LoadP op2))); 12469 12470 format %{ "CMPu $op1,$op2" %} 12471 opcode(0x3B); /* Opcode 3B /r */ 12472 ins_encode( OpcP, RegMem( op1, op2) ); 12473 ins_pipe( ialu_cr_reg_mem ); 12474 %} 12475 12476 // 12477 // This will generate a signed flags result. This should be ok 12478 // since any compare to a zero should be eq/neq. 12479 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12480 match(Set cr (CmpP src zero)); 12481 12482 format %{ "TEST $src,$src" %} 12483 opcode(0x85); 12484 ins_encode( OpcP, RegReg( src, src ) ); 12485 ins_pipe( ialu_cr_reg_imm ); 12486 %} 12487 12488 // Cisc-spilled version of testP_reg 12489 // This will generate a signed flags result. This should be ok 12490 // since any compare to a zero should be eq/neq. 12491 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12492 match(Set cr (CmpP (LoadP op) zero)); 12493 12494 format %{ "TEST $op,0xFFFFFFFF" %} 12495 ins_cost(500); 12496 opcode(0xF7); /* Opcode F7 /0 */ 12497 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12498 ins_pipe( ialu_cr_reg_imm ); 12499 %} 12500 12501 // Yanked all unsigned pointer compare operations. 12502 // Pointer compares are done with CmpP which is already unsigned. 12503 12504 //----------Max and Min-------------------------------------------------------- 12505 // Min Instructions 12506 //// 12507 // *** Min and Max using the conditional move are slower than the 12508 // *** branch version on a Pentium III. 12509 // // Conditional move for min 12510 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12511 // effect( USE_DEF op2, USE op1, USE cr ); 12512 // format %{ "CMOVlt $op2,$op1\t! min" %} 12513 // opcode(0x4C,0x0F); 12514 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12515 // ins_pipe( pipe_cmov_reg ); 12516 //%} 12517 // 12518 //// Min Register with Register (P6 version) 12519 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12520 // predicate(VM_Version::supports_cmov() ); 12521 // match(Set op2 (MinI op1 op2)); 12522 // ins_cost(200); 12523 // expand %{ 12524 // eFlagsReg cr; 12525 // compI_eReg(cr,op1,op2); 12526 // cmovI_reg_lt(op2,op1,cr); 12527 // %} 12528 //%} 12529 12530 // Min Register with Register (generic version) 12531 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12532 match(Set dst (MinI dst src)); 12533 effect(KILL flags); 12534 ins_cost(300); 12535 12536 format %{ "MIN $dst,$src" %} 12537 opcode(0xCC); 12538 ins_encode( min_enc(dst,src) ); 12539 ins_pipe( pipe_slow ); 12540 %} 12541 12542 // Max Register with Register 12543 // *** Min and Max using the conditional move are slower than the 12544 // *** branch version on a Pentium III. 12545 // // Conditional move for max 12546 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12547 // effect( USE_DEF op2, USE op1, USE cr ); 12548 // format %{ "CMOVgt $op2,$op1\t! max" %} 12549 // opcode(0x4F,0x0F); 12550 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12551 // ins_pipe( pipe_cmov_reg ); 12552 //%} 12553 // 12554 // // Max Register with Register (P6 version) 12555 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12556 // predicate(VM_Version::supports_cmov() ); 12557 // match(Set op2 (MaxI op1 op2)); 12558 // ins_cost(200); 12559 // expand %{ 12560 // eFlagsReg cr; 12561 // compI_eReg(cr,op1,op2); 12562 // cmovI_reg_gt(op2,op1,cr); 12563 // %} 12564 //%} 12565 12566 // Max Register with Register (generic version) 12567 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12568 match(Set dst (MaxI dst src)); 12569 effect(KILL flags); 12570 ins_cost(300); 12571 12572 format %{ "MAX $dst,$src" %} 12573 opcode(0xCC); 12574 ins_encode( max_enc(dst,src) ); 12575 ins_pipe( pipe_slow ); 12576 %} 12577 12578 // ============================================================================ 12579 // Counted Loop limit node which represents exact final iterator value. 12580 // Note: the resulting value should fit into integer range since 12581 // counted loops have limit check on overflow. 12582 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12583 match(Set limit (LoopLimit (Binary init limit) stride)); 12584 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12585 ins_cost(300); 12586 12587 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12588 ins_encode %{ 12589 int strd = (int)$stride$$constant; 12590 assert(strd != 1 && strd != -1, "sanity"); 12591 int m1 = (strd > 0) ? 1 : -1; 12592 // Convert limit to long (EAX:EDX) 12593 __ cdql(); 12594 // Convert init to long (init:tmp) 12595 __ movl($tmp$$Register, $init$$Register); 12596 __ sarl($tmp$$Register, 31); 12597 // $limit - $init 12598 __ subl($limit$$Register, $init$$Register); 12599 __ sbbl($limit_hi$$Register, $tmp$$Register); 12600 // + ($stride - 1) 12601 if (strd > 0) { 12602 __ addl($limit$$Register, (strd - 1)); 12603 __ adcl($limit_hi$$Register, 0); 12604 __ movl($tmp$$Register, strd); 12605 } else { 12606 __ addl($limit$$Register, (strd + 1)); 12607 __ adcl($limit_hi$$Register, -1); 12608 __ lneg($limit_hi$$Register, $limit$$Register); 12609 __ movl($tmp$$Register, -strd); 12610 } 12611 // signed division: (EAX:EDX) / pos_stride 12612 __ idivl($tmp$$Register); 12613 if (strd < 0) { 12614 // restore sign 12615 __ negl($tmp$$Register); 12616 } 12617 // (EAX) * stride 12618 __ mull($tmp$$Register); 12619 // + init (ignore upper bits) 12620 __ addl($limit$$Register, $init$$Register); 12621 %} 12622 ins_pipe( pipe_slow ); 12623 %} 12624 12625 // ============================================================================ 12626 // Branch Instructions 12627 // Jump Table 12628 instruct jumpXtnd(rRegI switch_val) %{ 12629 match(Jump switch_val); 12630 ins_cost(350); 12631 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12632 ins_encode %{ 12633 // Jump to Address(table_base + switch_reg) 12634 Address index(noreg, $switch_val$$Register, Address::times_1); 12635 __ jump(ArrayAddress($constantaddress, index), noreg); 12636 %} 12637 ins_pipe(pipe_jmp); 12638 %} 12639 12640 // Jump Direct - Label defines a relative address from JMP+1 12641 instruct jmpDir(label labl) %{ 12642 match(Goto); 12643 effect(USE labl); 12644 12645 ins_cost(300); 12646 format %{ "JMP $labl" %} 12647 size(5); 12648 ins_encode %{ 12649 Label* L = $labl$$label; 12650 __ jmp(*L, false); // Always long jump 12651 %} 12652 ins_pipe( pipe_jmp ); 12653 %} 12654 12655 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12656 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12657 match(If cop cr); 12658 effect(USE labl); 12659 12660 ins_cost(300); 12661 format %{ "J$cop $labl" %} 12662 size(6); 12663 ins_encode %{ 12664 Label* L = $labl$$label; 12665 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12666 %} 12667 ins_pipe( pipe_jcc ); 12668 %} 12669 12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12671 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12672 match(CountedLoopEnd cop cr); 12673 effect(USE labl); 12674 12675 ins_cost(300); 12676 format %{ "J$cop $labl\t# Loop end" %} 12677 size(6); 12678 ins_encode %{ 12679 Label* L = $labl$$label; 12680 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12681 %} 12682 ins_pipe( pipe_jcc ); 12683 %} 12684 12685 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12686 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12687 match(CountedLoopEnd cop cmp); 12688 effect(USE labl); 12689 12690 ins_cost(300); 12691 format %{ "J$cop,u $labl\t# Loop end" %} 12692 size(6); 12693 ins_encode %{ 12694 Label* L = $labl$$label; 12695 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12696 %} 12697 ins_pipe( pipe_jcc ); 12698 %} 12699 12700 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12701 match(CountedLoopEnd cop cmp); 12702 effect(USE labl); 12703 12704 ins_cost(200); 12705 format %{ "J$cop,u $labl\t# Loop end" %} 12706 size(6); 12707 ins_encode %{ 12708 Label* L = $labl$$label; 12709 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12710 %} 12711 ins_pipe( pipe_jcc ); 12712 %} 12713 12714 // Jump Direct Conditional - using unsigned comparison 12715 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12716 match(If cop cmp); 12717 effect(USE labl); 12718 12719 ins_cost(300); 12720 format %{ "J$cop,u $labl" %} 12721 size(6); 12722 ins_encode %{ 12723 Label* L = $labl$$label; 12724 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12725 %} 12726 ins_pipe(pipe_jcc); 12727 %} 12728 12729 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12730 match(If cop cmp); 12731 effect(USE labl); 12732 12733 ins_cost(200); 12734 format %{ "J$cop,u $labl" %} 12735 size(6); 12736 ins_encode %{ 12737 Label* L = $labl$$label; 12738 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12739 %} 12740 ins_pipe(pipe_jcc); 12741 %} 12742 12743 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12744 match(If cop cmp); 12745 effect(USE labl); 12746 12747 ins_cost(200); 12748 format %{ $$template 12749 if ($cop$$cmpcode == Assembler::notEqual) { 12750 $$emit$$"JP,u $labl\n\t" 12751 $$emit$$"J$cop,u $labl" 12752 } else { 12753 $$emit$$"JP,u done\n\t" 12754 $$emit$$"J$cop,u $labl\n\t" 12755 $$emit$$"done:" 12756 } 12757 %} 12758 ins_encode %{ 12759 Label* l = $labl$$label; 12760 if ($cop$$cmpcode == Assembler::notEqual) { 12761 __ jcc(Assembler::parity, *l, false); 12762 __ jcc(Assembler::notEqual, *l, false); 12763 } else if ($cop$$cmpcode == Assembler::equal) { 12764 Label done; 12765 __ jccb(Assembler::parity, done); 12766 __ jcc(Assembler::equal, *l, false); 12767 __ bind(done); 12768 } else { 12769 ShouldNotReachHere(); 12770 } 12771 %} 12772 ins_pipe(pipe_jcc); 12773 %} 12774 12775 // ============================================================================ 12776 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12777 // array for an instance of the superklass. Set a hidden internal cache on a 12778 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12779 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12780 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12781 match(Set result (PartialSubtypeCheck sub super)); 12782 effect( KILL rcx, KILL cr ); 12783 12784 ins_cost(1100); // slightly larger than the next version 12785 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12786 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12787 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12788 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12789 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12790 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12791 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12792 "miss:\t" %} 12793 12794 opcode(0x1); // Force a XOR of EDI 12795 ins_encode( enc_PartialSubtypeCheck() ); 12796 ins_pipe( pipe_slow ); 12797 %} 12798 12799 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12800 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12801 effect( KILL rcx, KILL result ); 12802 12803 ins_cost(1000); 12804 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12805 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12806 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12807 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12808 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12809 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12810 "miss:\t" %} 12811 12812 opcode(0x0); // No need to XOR EDI 12813 ins_encode( enc_PartialSubtypeCheck() ); 12814 ins_pipe( pipe_slow ); 12815 %} 12816 12817 // ============================================================================ 12818 // Branch Instructions -- short offset versions 12819 // 12820 // These instructions are used to replace jumps of a long offset (the default 12821 // match) with jumps of a shorter offset. These instructions are all tagged 12822 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12823 // match rules in general matching. Instead, the ADLC generates a conversion 12824 // method in the MachNode which can be used to do in-place replacement of the 12825 // long variant with the shorter variant. The compiler will determine if a 12826 // branch can be taken by the is_short_branch_offset() predicate in the machine 12827 // specific code section of the file. 12828 12829 // Jump Direct - Label defines a relative address from JMP+1 12830 instruct jmpDir_short(label labl) %{ 12831 match(Goto); 12832 effect(USE labl); 12833 12834 ins_cost(300); 12835 format %{ "JMP,s $labl" %} 12836 size(2); 12837 ins_encode %{ 12838 Label* L = $labl$$label; 12839 __ jmpb(*L); 12840 %} 12841 ins_pipe( pipe_jmp ); 12842 ins_short_branch(1); 12843 %} 12844 12845 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12846 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12847 match(If cop cr); 12848 effect(USE labl); 12849 12850 ins_cost(300); 12851 format %{ "J$cop,s $labl" %} 12852 size(2); 12853 ins_encode %{ 12854 Label* L = $labl$$label; 12855 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12856 %} 12857 ins_pipe( pipe_jcc ); 12858 ins_short_branch(1); 12859 %} 12860 12861 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12862 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12863 match(CountedLoopEnd cop cr); 12864 effect(USE labl); 12865 12866 ins_cost(300); 12867 format %{ "J$cop,s $labl\t# Loop end" %} 12868 size(2); 12869 ins_encode %{ 12870 Label* L = $labl$$label; 12871 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12872 %} 12873 ins_pipe( pipe_jcc ); 12874 ins_short_branch(1); 12875 %} 12876 12877 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12878 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12879 match(CountedLoopEnd cop cmp); 12880 effect(USE labl); 12881 12882 ins_cost(300); 12883 format %{ "J$cop,us $labl\t# Loop end" %} 12884 size(2); 12885 ins_encode %{ 12886 Label* L = $labl$$label; 12887 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12888 %} 12889 ins_pipe( pipe_jcc ); 12890 ins_short_branch(1); 12891 %} 12892 12893 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12894 match(CountedLoopEnd cop cmp); 12895 effect(USE labl); 12896 12897 ins_cost(300); 12898 format %{ "J$cop,us $labl\t# Loop end" %} 12899 size(2); 12900 ins_encode %{ 12901 Label* L = $labl$$label; 12902 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12903 %} 12904 ins_pipe( pipe_jcc ); 12905 ins_short_branch(1); 12906 %} 12907 12908 // Jump Direct Conditional - using unsigned comparison 12909 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12910 match(If cop cmp); 12911 effect(USE labl); 12912 12913 ins_cost(300); 12914 format %{ "J$cop,us $labl" %} 12915 size(2); 12916 ins_encode %{ 12917 Label* L = $labl$$label; 12918 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12919 %} 12920 ins_pipe( pipe_jcc ); 12921 ins_short_branch(1); 12922 %} 12923 12924 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12925 match(If cop cmp); 12926 effect(USE labl); 12927 12928 ins_cost(300); 12929 format %{ "J$cop,us $labl" %} 12930 size(2); 12931 ins_encode %{ 12932 Label* L = $labl$$label; 12933 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12934 %} 12935 ins_pipe( pipe_jcc ); 12936 ins_short_branch(1); 12937 %} 12938 12939 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12940 match(If cop cmp); 12941 effect(USE labl); 12942 12943 ins_cost(300); 12944 format %{ $$template 12945 if ($cop$$cmpcode == Assembler::notEqual) { 12946 $$emit$$"JP,u,s $labl\n\t" 12947 $$emit$$"J$cop,u,s $labl" 12948 } else { 12949 $$emit$$"JP,u,s done\n\t" 12950 $$emit$$"J$cop,u,s $labl\n\t" 12951 $$emit$$"done:" 12952 } 12953 %} 12954 size(4); 12955 ins_encode %{ 12956 Label* l = $labl$$label; 12957 if ($cop$$cmpcode == Assembler::notEqual) { 12958 __ jccb(Assembler::parity, *l); 12959 __ jccb(Assembler::notEqual, *l); 12960 } else if ($cop$$cmpcode == Assembler::equal) { 12961 Label done; 12962 __ jccb(Assembler::parity, done); 12963 __ jccb(Assembler::equal, *l); 12964 __ bind(done); 12965 } else { 12966 ShouldNotReachHere(); 12967 } 12968 %} 12969 ins_pipe(pipe_jcc); 12970 ins_short_branch(1); 12971 %} 12972 12973 // ============================================================================ 12974 // Long Compare 12975 // 12976 // Currently we hold longs in 2 registers. Comparing such values efficiently 12977 // is tricky. The flavor of compare used depends on whether we are testing 12978 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12979 // The GE test is the negated LT test. The LE test can be had by commuting 12980 // the operands (yielding a GE test) and then negating; negate again for the 12981 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12982 // NE test is negated from that. 12983 12984 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12985 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12986 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12987 // are collapsed internally in the ADLC's dfa-gen code. The match for 12988 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12989 // foo match ends up with the wrong leaf. One fix is to not match both 12990 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12991 // both forms beat the trinary form of long-compare and both are very useful 12992 // on Intel which has so few registers. 12993 12994 // Manifest a CmpL result in an integer register. Very painful. 12995 // This is the test to avoid. 12996 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12997 match(Set dst (CmpL3 src1 src2)); 12998 effect( KILL flags ); 12999 ins_cost(1000); 13000 format %{ "XOR $dst,$dst\n\t" 13001 "CMP $src1.hi,$src2.hi\n\t" 13002 "JLT,s m_one\n\t" 13003 "JGT,s p_one\n\t" 13004 "CMP $src1.lo,$src2.lo\n\t" 13005 "JB,s m_one\n\t" 13006 "JEQ,s done\n" 13007 "p_one:\tINC $dst\n\t" 13008 "JMP,s done\n" 13009 "m_one:\tDEC $dst\n" 13010 "done:" %} 13011 ins_encode %{ 13012 Label p_one, m_one, done; 13013 __ xorptr($dst$$Register, $dst$$Register); 13014 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 13015 __ jccb(Assembler::less, m_one); 13016 __ jccb(Assembler::greater, p_one); 13017 __ cmpl($src1$$Register, $src2$$Register); 13018 __ jccb(Assembler::below, m_one); 13019 __ jccb(Assembler::equal, done); 13020 __ bind(p_one); 13021 __ incrementl($dst$$Register); 13022 __ jmpb(done); 13023 __ bind(m_one); 13024 __ decrementl($dst$$Register); 13025 __ bind(done); 13026 %} 13027 ins_pipe( pipe_slow ); 13028 %} 13029 13030 //====== 13031 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13032 // compares. Can be used for LE or GT compares by reversing arguments. 13033 // NOT GOOD FOR EQ/NE tests. 13034 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 13035 match( Set flags (CmpL src zero )); 13036 ins_cost(100); 13037 format %{ "TEST $src.hi,$src.hi" %} 13038 opcode(0x85); 13039 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 13040 ins_pipe( ialu_cr_reg_reg ); 13041 %} 13042 13043 // Manifest a CmpL result in the normal flags. Only good for LT or GE 13044 // compares. Can be used for LE or GT compares by reversing arguments. 13045 // NOT GOOD FOR EQ/NE tests. 13046 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13047 match( Set flags (CmpL src1 src2 )); 13048 effect( TEMP tmp ); 13049 ins_cost(300); 13050 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13051 "MOV $tmp,$src1.hi\n\t" 13052 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13053 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13054 ins_pipe( ialu_cr_reg_reg ); 13055 %} 13056 13057 // Long compares reg < zero/req OR reg >= zero/req. 13058 // Just a wrapper for a normal branch, plus the predicate test. 13059 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13060 match(If cmp flags); 13061 effect(USE labl); 13062 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13063 expand %{ 13064 jmpCon(cmp,flags,labl); // JLT or JGE... 13065 %} 13066 %} 13067 13068 //====== 13069 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13070 // compares. Can be used for LE or GT compares by reversing arguments. 13071 // NOT GOOD FOR EQ/NE tests. 13072 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13073 match(Set flags (CmpUL src zero)); 13074 ins_cost(100); 13075 format %{ "TEST $src.hi,$src.hi" %} 13076 opcode(0x85); 13077 ins_encode(OpcP, RegReg_Hi2(src, src)); 13078 ins_pipe(ialu_cr_reg_reg); 13079 %} 13080 13081 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13082 // compares. Can be used for LE or GT compares by reversing arguments. 13083 // NOT GOOD FOR EQ/NE tests. 13084 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13085 match(Set flags (CmpUL src1 src2)); 13086 effect(TEMP tmp); 13087 ins_cost(300); 13088 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13089 "MOV $tmp,$src1.hi\n\t" 13090 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13091 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13092 ins_pipe(ialu_cr_reg_reg); 13093 %} 13094 13095 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13096 // Just a wrapper for a normal branch, plus the predicate test. 13097 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13098 match(If cmp flags); 13099 effect(USE labl); 13100 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13101 expand %{ 13102 jmpCon(cmp, flags, labl); // JLT or JGE... 13103 %} 13104 %} 13105 13106 // Compare 2 longs and CMOVE longs. 13107 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13108 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13109 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13110 ins_cost(400); 13111 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13112 "CMOV$cmp $dst.hi,$src.hi" %} 13113 opcode(0x0F,0x40); 13114 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13115 ins_pipe( pipe_cmov_reg_long ); 13116 %} 13117 13118 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13119 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13120 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13121 ins_cost(500); 13122 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13123 "CMOV$cmp $dst.hi,$src.hi" %} 13124 opcode(0x0F,0x40); 13125 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13126 ins_pipe( pipe_cmov_reg_long ); 13127 %} 13128 13129 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13130 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13131 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13132 ins_cost(400); 13133 expand %{ 13134 cmovLL_reg_LTGE(cmp, flags, dst, src); 13135 %} 13136 %} 13137 13138 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13139 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13140 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13141 ins_cost(500); 13142 expand %{ 13143 cmovLL_mem_LTGE(cmp, flags, dst, src); 13144 %} 13145 %} 13146 13147 // Compare 2 longs and CMOVE ints. 13148 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13149 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13150 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13151 ins_cost(200); 13152 format %{ "CMOV$cmp $dst,$src" %} 13153 opcode(0x0F,0x40); 13154 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13155 ins_pipe( pipe_cmov_reg ); 13156 %} 13157 13158 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13159 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13160 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13161 ins_cost(250); 13162 format %{ "CMOV$cmp $dst,$src" %} 13163 opcode(0x0F,0x40); 13164 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13165 ins_pipe( pipe_cmov_mem ); 13166 %} 13167 13168 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13169 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13170 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13171 ins_cost(200); 13172 expand %{ 13173 cmovII_reg_LTGE(cmp, flags, dst, src); 13174 %} 13175 %} 13176 13177 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13178 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13179 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13180 ins_cost(250); 13181 expand %{ 13182 cmovII_mem_LTGE(cmp, flags, dst, src); 13183 %} 13184 %} 13185 13186 // Compare 2 longs and CMOVE ptrs. 13187 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13188 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13189 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13190 ins_cost(200); 13191 format %{ "CMOV$cmp $dst,$src" %} 13192 opcode(0x0F,0x40); 13193 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13194 ins_pipe( pipe_cmov_reg ); 13195 %} 13196 13197 // Compare 2 unsigned longs and CMOVE ptrs. 13198 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13199 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13200 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13201 ins_cost(200); 13202 expand %{ 13203 cmovPP_reg_LTGE(cmp,flags,dst,src); 13204 %} 13205 %} 13206 13207 // Compare 2 longs and CMOVE doubles 13208 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13209 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13210 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13211 ins_cost(200); 13212 expand %{ 13213 fcmovDPR_regS(cmp,flags,dst,src); 13214 %} 13215 %} 13216 13217 // Compare 2 longs and CMOVE doubles 13218 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13219 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13220 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13221 ins_cost(200); 13222 expand %{ 13223 fcmovD_regS(cmp,flags,dst,src); 13224 %} 13225 %} 13226 13227 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13228 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13229 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13230 ins_cost(200); 13231 expand %{ 13232 fcmovFPR_regS(cmp,flags,dst,src); 13233 %} 13234 %} 13235 13236 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13237 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13238 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13239 ins_cost(200); 13240 expand %{ 13241 fcmovF_regS(cmp,flags,dst,src); 13242 %} 13243 %} 13244 13245 //====== 13246 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13247 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13248 match( Set flags (CmpL src zero )); 13249 effect(TEMP tmp); 13250 ins_cost(200); 13251 format %{ "MOV $tmp,$src.lo\n\t" 13252 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13253 ins_encode( long_cmp_flags0( src, tmp ) ); 13254 ins_pipe( ialu_reg_reg_long ); 13255 %} 13256 13257 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13258 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13259 match( Set flags (CmpL src1 src2 )); 13260 ins_cost(200+300); 13261 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13262 "JNE,s skip\n\t" 13263 "CMP $src1.hi,$src2.hi\n\t" 13264 "skip:\t" %} 13265 ins_encode( long_cmp_flags1( src1, src2 ) ); 13266 ins_pipe( ialu_cr_reg_reg ); 13267 %} 13268 13269 // Long compare reg == zero/reg OR reg != zero/reg 13270 // Just a wrapper for a normal branch, plus the predicate test. 13271 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13272 match(If cmp flags); 13273 effect(USE labl); 13274 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13275 expand %{ 13276 jmpCon(cmp,flags,labl); // JEQ or JNE... 13277 %} 13278 %} 13279 13280 //====== 13281 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13282 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13283 match(Set flags (CmpUL src zero)); 13284 effect(TEMP tmp); 13285 ins_cost(200); 13286 format %{ "MOV $tmp,$src.lo\n\t" 13287 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13288 ins_encode(long_cmp_flags0(src, tmp)); 13289 ins_pipe(ialu_reg_reg_long); 13290 %} 13291 13292 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13293 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13294 match(Set flags (CmpUL src1 src2)); 13295 ins_cost(200+300); 13296 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13297 "JNE,s skip\n\t" 13298 "CMP $src1.hi,$src2.hi\n\t" 13299 "skip:\t" %} 13300 ins_encode(long_cmp_flags1(src1, src2)); 13301 ins_pipe(ialu_cr_reg_reg); 13302 %} 13303 13304 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13305 // Just a wrapper for a normal branch, plus the predicate test. 13306 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13307 match(If cmp flags); 13308 effect(USE labl); 13309 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13310 expand %{ 13311 jmpCon(cmp, flags, labl); // JEQ or JNE... 13312 %} 13313 %} 13314 13315 // Compare 2 longs and CMOVE longs. 13316 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13317 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13318 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13319 ins_cost(400); 13320 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13321 "CMOV$cmp $dst.hi,$src.hi" %} 13322 opcode(0x0F,0x40); 13323 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13324 ins_pipe( pipe_cmov_reg_long ); 13325 %} 13326 13327 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13328 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13329 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13330 ins_cost(500); 13331 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13332 "CMOV$cmp $dst.hi,$src.hi" %} 13333 opcode(0x0F,0x40); 13334 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13335 ins_pipe( pipe_cmov_reg_long ); 13336 %} 13337 13338 // Compare 2 longs and CMOVE ints. 13339 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13340 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13341 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13342 ins_cost(200); 13343 format %{ "CMOV$cmp $dst,$src" %} 13344 opcode(0x0F,0x40); 13345 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13346 ins_pipe( pipe_cmov_reg ); 13347 %} 13348 13349 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13350 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13351 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13352 ins_cost(250); 13353 format %{ "CMOV$cmp $dst,$src" %} 13354 opcode(0x0F,0x40); 13355 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13356 ins_pipe( pipe_cmov_mem ); 13357 %} 13358 13359 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13360 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13361 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13362 ins_cost(200); 13363 expand %{ 13364 cmovII_reg_EQNE(cmp, flags, dst, src); 13365 %} 13366 %} 13367 13368 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13369 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13370 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13371 ins_cost(250); 13372 expand %{ 13373 cmovII_mem_EQNE(cmp, flags, dst, src); 13374 %} 13375 %} 13376 13377 // Compare 2 longs and CMOVE ptrs. 13378 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13379 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13380 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13381 ins_cost(200); 13382 format %{ "CMOV$cmp $dst,$src" %} 13383 opcode(0x0F,0x40); 13384 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13385 ins_pipe( pipe_cmov_reg ); 13386 %} 13387 13388 // Compare 2 unsigned longs and CMOVE ptrs. 13389 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13390 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13391 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13392 ins_cost(200); 13393 expand %{ 13394 cmovPP_reg_EQNE(cmp,flags,dst,src); 13395 %} 13396 %} 13397 13398 // Compare 2 longs and CMOVE doubles 13399 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13400 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13401 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13402 ins_cost(200); 13403 expand %{ 13404 fcmovDPR_regS(cmp,flags,dst,src); 13405 %} 13406 %} 13407 13408 // Compare 2 longs and CMOVE doubles 13409 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13410 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13411 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13412 ins_cost(200); 13413 expand %{ 13414 fcmovD_regS(cmp,flags,dst,src); 13415 %} 13416 %} 13417 13418 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13419 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13420 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13421 ins_cost(200); 13422 expand %{ 13423 fcmovFPR_regS(cmp,flags,dst,src); 13424 %} 13425 %} 13426 13427 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13428 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13429 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13430 ins_cost(200); 13431 expand %{ 13432 fcmovF_regS(cmp,flags,dst,src); 13433 %} 13434 %} 13435 13436 //====== 13437 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13438 // Same as cmpL_reg_flags_LEGT except must negate src 13439 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13440 match( Set flags (CmpL src zero )); 13441 effect( TEMP tmp ); 13442 ins_cost(300); 13443 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13444 "CMP $tmp,$src.lo\n\t" 13445 "SBB $tmp,$src.hi\n\t" %} 13446 ins_encode( long_cmp_flags3(src, tmp) ); 13447 ins_pipe( ialu_reg_reg_long ); 13448 %} 13449 13450 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13451 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13452 // requires a commuted test to get the same result. 13453 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13454 match( Set flags (CmpL src1 src2 )); 13455 effect( TEMP tmp ); 13456 ins_cost(300); 13457 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13458 "MOV $tmp,$src2.hi\n\t" 13459 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13460 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13461 ins_pipe( ialu_cr_reg_reg ); 13462 %} 13463 13464 // Long compares reg < zero/req OR reg >= zero/req. 13465 // Just a wrapper for a normal branch, plus the predicate test 13466 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13467 match(If cmp flags); 13468 effect(USE labl); 13469 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13470 ins_cost(300); 13471 expand %{ 13472 jmpCon(cmp,flags,labl); // JGT or JLE... 13473 %} 13474 %} 13475 13476 //====== 13477 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13478 // Same as cmpUL_reg_flags_LEGT except must negate src 13479 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13480 match(Set flags (CmpUL src zero)); 13481 effect(TEMP tmp); 13482 ins_cost(300); 13483 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13484 "CMP $tmp,$src.lo\n\t" 13485 "SBB $tmp,$src.hi\n\t" %} 13486 ins_encode(long_cmp_flags3(src, tmp)); 13487 ins_pipe(ialu_reg_reg_long); 13488 %} 13489 13490 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13491 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13492 // requires a commuted test to get the same result. 13493 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13494 match(Set flags (CmpUL src1 src2)); 13495 effect(TEMP tmp); 13496 ins_cost(300); 13497 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13498 "MOV $tmp,$src2.hi\n\t" 13499 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13500 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13501 ins_pipe(ialu_cr_reg_reg); 13502 %} 13503 13504 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13505 // Just a wrapper for a normal branch, plus the predicate test 13506 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13507 match(If cmp flags); 13508 effect(USE labl); 13509 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13510 ins_cost(300); 13511 expand %{ 13512 jmpCon(cmp, flags, labl); // JGT or JLE... 13513 %} 13514 %} 13515 13516 // Compare 2 longs and CMOVE longs. 13517 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13518 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13519 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13520 ins_cost(400); 13521 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13522 "CMOV$cmp $dst.hi,$src.hi" %} 13523 opcode(0x0F,0x40); 13524 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13525 ins_pipe( pipe_cmov_reg_long ); 13526 %} 13527 13528 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13529 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13530 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13531 ins_cost(500); 13532 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13533 "CMOV$cmp $dst.hi,$src.hi+4" %} 13534 opcode(0x0F,0x40); 13535 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13536 ins_pipe( pipe_cmov_reg_long ); 13537 %} 13538 13539 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13540 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13541 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13542 ins_cost(400); 13543 expand %{ 13544 cmovLL_reg_LEGT(cmp, flags, dst, src); 13545 %} 13546 %} 13547 13548 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13549 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13550 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13551 ins_cost(500); 13552 expand %{ 13553 cmovLL_mem_LEGT(cmp, flags, dst, src); 13554 %} 13555 %} 13556 13557 // Compare 2 longs and CMOVE ints. 13558 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13559 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13560 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13561 ins_cost(200); 13562 format %{ "CMOV$cmp $dst,$src" %} 13563 opcode(0x0F,0x40); 13564 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13565 ins_pipe( pipe_cmov_reg ); 13566 %} 13567 13568 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13569 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13570 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13571 ins_cost(250); 13572 format %{ "CMOV$cmp $dst,$src" %} 13573 opcode(0x0F,0x40); 13574 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13575 ins_pipe( pipe_cmov_mem ); 13576 %} 13577 13578 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13579 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13580 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13581 ins_cost(200); 13582 expand %{ 13583 cmovII_reg_LEGT(cmp, flags, dst, src); 13584 %} 13585 %} 13586 13587 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13588 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13589 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13590 ins_cost(250); 13591 expand %{ 13592 cmovII_mem_LEGT(cmp, flags, dst, src); 13593 %} 13594 %} 13595 13596 // Compare 2 longs and CMOVE ptrs. 13597 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13598 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13599 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13600 ins_cost(200); 13601 format %{ "CMOV$cmp $dst,$src" %} 13602 opcode(0x0F,0x40); 13603 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13604 ins_pipe( pipe_cmov_reg ); 13605 %} 13606 13607 // Compare 2 unsigned longs and CMOVE ptrs. 13608 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13609 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13610 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13611 ins_cost(200); 13612 expand %{ 13613 cmovPP_reg_LEGT(cmp,flags,dst,src); 13614 %} 13615 %} 13616 13617 // Compare 2 longs and CMOVE doubles 13618 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13619 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13620 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13621 ins_cost(200); 13622 expand %{ 13623 fcmovDPR_regS(cmp,flags,dst,src); 13624 %} 13625 %} 13626 13627 // Compare 2 longs and CMOVE doubles 13628 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13629 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13630 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13631 ins_cost(200); 13632 expand %{ 13633 fcmovD_regS(cmp,flags,dst,src); 13634 %} 13635 %} 13636 13637 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13638 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13639 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13640 ins_cost(200); 13641 expand %{ 13642 fcmovFPR_regS(cmp,flags,dst,src); 13643 %} 13644 %} 13645 13646 13647 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13648 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13649 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13650 ins_cost(200); 13651 expand %{ 13652 fcmovF_regS(cmp,flags,dst,src); 13653 %} 13654 %} 13655 13656 13657 // ============================================================================ 13658 // Procedure Call/Return Instructions 13659 // Call Java Static Instruction 13660 // Note: If this code changes, the corresponding ret_addr_offset() and 13661 // compute_padding() functions will have to be adjusted. 13662 instruct CallStaticJavaDirect(method meth) %{ 13663 match(CallStaticJava); 13664 effect(USE meth); 13665 13666 ins_cost(300); 13667 format %{ "CALL,static " %} 13668 opcode(0xE8); /* E8 cd */ 13669 ins_encode( pre_call_resets, 13670 Java_Static_Call( meth ), 13671 call_epilog, 13672 post_call_FPU ); 13673 ins_pipe( pipe_slow ); 13674 ins_alignment(4); 13675 %} 13676 13677 // Call Java Dynamic Instruction 13678 // Note: If this code changes, the corresponding ret_addr_offset() and 13679 // compute_padding() functions will have to be adjusted. 13680 instruct CallDynamicJavaDirect(method meth) %{ 13681 match(CallDynamicJava); 13682 effect(USE meth); 13683 13684 ins_cost(300); 13685 format %{ "MOV EAX,(oop)-1\n\t" 13686 "CALL,dynamic" %} 13687 opcode(0xE8); /* E8 cd */ 13688 ins_encode( pre_call_resets, 13689 Java_Dynamic_Call( meth ), 13690 call_epilog, 13691 post_call_FPU ); 13692 ins_pipe( pipe_slow ); 13693 ins_alignment(4); 13694 %} 13695 13696 // Call Runtime Instruction 13697 instruct CallRuntimeDirect(method meth) %{ 13698 match(CallRuntime ); 13699 effect(USE meth); 13700 13701 ins_cost(300); 13702 format %{ "CALL,runtime " %} 13703 opcode(0xE8); /* E8 cd */ 13704 // Use FFREEs to clear entries in float stack 13705 ins_encode( pre_call_resets, 13706 FFree_Float_Stack_All, 13707 Java_To_Runtime( meth ), 13708 post_call_FPU ); 13709 ins_pipe( pipe_slow ); 13710 %} 13711 13712 // Call runtime without safepoint 13713 instruct CallLeafDirect(method meth) %{ 13714 match(CallLeaf); 13715 effect(USE meth); 13716 13717 ins_cost(300); 13718 format %{ "CALL_LEAF,runtime " %} 13719 opcode(0xE8); /* E8 cd */ 13720 ins_encode( pre_call_resets, 13721 FFree_Float_Stack_All, 13722 Java_To_Runtime( meth ), 13723 Verify_FPU_For_Leaf, post_call_FPU ); 13724 ins_pipe( pipe_slow ); 13725 %} 13726 13727 instruct CallLeafNoFPDirect(method meth) %{ 13728 match(CallLeafNoFP); 13729 effect(USE meth); 13730 13731 ins_cost(300); 13732 format %{ "CALL_LEAF_NOFP,runtime " %} 13733 opcode(0xE8); /* E8 cd */ 13734 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13735 ins_pipe( pipe_slow ); 13736 %} 13737 13738 13739 // Return Instruction 13740 // Remove the return address & jump to it. 13741 instruct Ret() %{ 13742 match(Return); 13743 format %{ "RET" %} 13744 opcode(0xC3); 13745 ins_encode(OpcP); 13746 ins_pipe( pipe_jmp ); 13747 %} 13748 13749 // Tail Call; Jump from runtime stub to Java code. 13750 // Also known as an 'interprocedural jump'. 13751 // Target of jump will eventually return to caller. 13752 // TailJump below removes the return address. 13753 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13754 match(TailCall jump_target method_ptr); 13755 ins_cost(300); 13756 format %{ "JMP $jump_target \t# EBX holds method" %} 13757 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13758 ins_encode( OpcP, RegOpc(jump_target) ); 13759 ins_pipe( pipe_jmp ); 13760 %} 13761 13762 13763 // Tail Jump; remove the return address; jump to target. 13764 // TailCall above leaves the return address around. 13765 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13766 match( TailJump jump_target ex_oop ); 13767 ins_cost(300); 13768 format %{ "POP EDX\t# pop return address into dummy\n\t" 13769 "JMP $jump_target " %} 13770 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13771 ins_encode( enc_pop_rdx, 13772 OpcP, RegOpc(jump_target) ); 13773 ins_pipe( pipe_jmp ); 13774 %} 13775 13776 // Create exception oop: created by stack-crawling runtime code. 13777 // Created exception is now available to this handler, and is setup 13778 // just prior to jumping to this handler. No code emitted. 13779 instruct CreateException( eAXRegP ex_oop ) 13780 %{ 13781 match(Set ex_oop (CreateEx)); 13782 13783 size(0); 13784 // use the following format syntax 13785 format %{ "# exception oop is in EAX; no code emitted" %} 13786 ins_encode(); 13787 ins_pipe( empty ); 13788 %} 13789 13790 13791 // Rethrow exception: 13792 // The exception oop will come in the first argument position. 13793 // Then JUMP (not call) to the rethrow stub code. 13794 instruct RethrowException() 13795 %{ 13796 match(Rethrow); 13797 13798 // use the following format syntax 13799 format %{ "JMP rethrow_stub" %} 13800 ins_encode(enc_rethrow); 13801 ins_pipe( pipe_jmp ); 13802 %} 13803 13804 // inlined locking and unlocking 13805 13806 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ 13807 predicate(Compile::current()->use_rtm()); 13808 match(Set cr (FastLock object box)); 13809 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); 13810 ins_cost(300); 13811 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13812 ins_encode %{ 13813 __ get_thread($thread$$Register); 13814 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13815 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, 13816 _rtm_counters, _stack_rtm_counters, 13817 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13818 true, ra_->C->profile_rtm()); 13819 %} 13820 ins_pipe(pipe_slow); 13821 %} 13822 13823 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13824 predicate(!Compile::current()->use_rtm()); 13825 match(Set cr (FastLock object box)); 13826 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13827 ins_cost(300); 13828 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13829 ins_encode %{ 13830 __ get_thread($thread$$Register); 13831 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13832 $scr$$Register, noreg, noreg, $thread$$Register, NULL, NULL, NULL, false, false); 13833 %} 13834 ins_pipe(pipe_slow); 13835 %} 13836 13837 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13838 match(Set cr (FastUnlock object box)); 13839 effect(TEMP tmp, USE_KILL box); 13840 ins_cost(300); 13841 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13842 ins_encode %{ 13843 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13844 %} 13845 ins_pipe(pipe_slow); 13846 %} 13847 13848 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13849 predicate(Matcher::vector_length(n) <= 32); 13850 match(Set dst (MaskAll src)); 13851 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13852 ins_encode %{ 13853 int mask_len = Matcher::vector_length(this); 13854 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13855 %} 13856 ins_pipe( pipe_slow ); 13857 %} 13858 13859 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13860 predicate(Matcher::vector_length(n) > 32); 13861 match(Set dst (MaskAll src)); 13862 effect(TEMP ktmp); 13863 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13864 ins_encode %{ 13865 int mask_len = Matcher::vector_length(this); 13866 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13867 %} 13868 ins_pipe( pipe_slow ); 13869 %} 13870 13871 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13872 predicate(Matcher::vector_length(n) > 32); 13873 match(Set dst (MaskAll src)); 13874 effect(TEMP ktmp); 13875 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13876 ins_encode %{ 13877 int mask_len = Matcher::vector_length(this); 13878 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13879 %} 13880 ins_pipe( pipe_slow ); 13881 %} 13882 13883 // ============================================================================ 13884 // Safepoint Instruction 13885 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13886 match(SafePoint poll); 13887 effect(KILL cr, USE poll); 13888 13889 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13890 ins_cost(125); 13891 // EBP would need size(3) 13892 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13893 ins_encode %{ 13894 __ relocate(relocInfo::poll_type); 13895 address pre_pc = __ pc(); 13896 __ testl(rax, Address($poll$$Register, 0)); 13897 address post_pc = __ pc(); 13898 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13899 %} 13900 ins_pipe(ialu_reg_mem); 13901 %} 13902 13903 13904 // ============================================================================ 13905 // This name is KNOWN by the ADLC and cannot be changed. 13906 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13907 // for this guy. 13908 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13909 match(Set dst (ThreadLocal)); 13910 effect(DEF dst, KILL cr); 13911 13912 format %{ "MOV $dst, Thread::current()" %} 13913 ins_encode %{ 13914 Register dstReg = as_Register($dst$$reg); 13915 __ get_thread(dstReg); 13916 %} 13917 ins_pipe( ialu_reg_fat ); 13918 %} 13919 13920 13921 13922 //----------PEEPHOLE RULES----------------------------------------------------- 13923 // These must follow all instruction definitions as they use the names 13924 // defined in the instructions definitions. 13925 // 13926 // peepmatch ( root_instr_name [preceding_instruction]* ); 13927 // 13928 // peepconstraint %{ 13929 // (instruction_number.operand_name relational_op instruction_number.operand_name 13930 // [, ...] ); 13931 // // instruction numbers are zero-based using left to right order in peepmatch 13932 // 13933 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13934 // // provide an instruction_number.operand_name for each operand that appears 13935 // // in the replacement instruction's match rule 13936 // 13937 // ---------VM FLAGS--------------------------------------------------------- 13938 // 13939 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13940 // 13941 // Each peephole rule is given an identifying number starting with zero and 13942 // increasing by one in the order seen by the parser. An individual peephole 13943 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13944 // on the command-line. 13945 // 13946 // ---------CURRENT LIMITATIONS---------------------------------------------- 13947 // 13948 // Only match adjacent instructions in same basic block 13949 // Only equality constraints 13950 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13951 // Only one replacement instruction 13952 // 13953 // ---------EXAMPLE---------------------------------------------------------- 13954 // 13955 // // pertinent parts of existing instructions in architecture description 13956 // instruct movI(rRegI dst, rRegI src) %{ 13957 // match(Set dst (CopyI src)); 13958 // %} 13959 // 13960 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13961 // match(Set dst (AddI dst src)); 13962 // effect(KILL cr); 13963 // %} 13964 // 13965 // // Change (inc mov) to lea 13966 // peephole %{ 13967 // // increment preceded by register-register move 13968 // peepmatch ( incI_eReg movI ); 13969 // // require that the destination register of the increment 13970 // // match the destination register of the move 13971 // peepconstraint ( 0.dst == 1.dst ); 13972 // // construct a replacement instruction that sets 13973 // // the destination to ( move's source register + one ) 13974 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13975 // %} 13976 // 13977 // Implementation no longer uses movX instructions since 13978 // machine-independent system no longer uses CopyX nodes. 13979 // 13980 // peephole %{ 13981 // peepmatch ( incI_eReg movI ); 13982 // peepconstraint ( 0.dst == 1.dst ); 13983 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13984 // %} 13985 // 13986 // peephole %{ 13987 // peepmatch ( decI_eReg movI ); 13988 // peepconstraint ( 0.dst == 1.dst ); 13989 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13990 // %} 13991 // 13992 // peephole %{ 13993 // peepmatch ( addI_eReg_imm movI ); 13994 // peepconstraint ( 0.dst == 1.dst ); 13995 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13996 // %} 13997 // 13998 // peephole %{ 13999 // peepmatch ( addP_eReg_imm movP ); 14000 // peepconstraint ( 0.dst == 1.dst ); 14001 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 14002 // %} 14003 14004 // // Change load of spilled value to only a spill 14005 // instruct storeI(memory mem, rRegI src) %{ 14006 // match(Set mem (StoreI mem src)); 14007 // %} 14008 // 14009 // instruct loadI(rRegI dst, memory mem) %{ 14010 // match(Set dst (LoadI mem)); 14011 // %} 14012 // 14013 peephole %{ 14014 peepmatch ( loadI storeI ); 14015 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 14016 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 14017 %} 14018 14019 //----------SMARTSPILL RULES--------------------------------------------------- 14020 // These must follow all instruction definitions as they use the names 14021 // defined in the instructions definitions.