1 // 2 // Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 135 // 136 // Class for no registers (empty set). 137 reg_class no_reg(); 138 139 // Class for all registers 140 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 141 // Class for all registers (excluding EBP) 142 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 143 // Dynamic register class that selects at runtime between register classes 144 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 145 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 146 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 147 148 // Class for general registers 149 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 150 // Class for general registers (excluding EBP). 151 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 152 // Used also if the PreserveFramePointer flag is true. 153 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 154 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 155 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 156 157 // Class of "X" registers 158 reg_class int_x_reg(EBX, ECX, EDX, EAX); 159 160 // Class of registers that can appear in an address with no offset. 161 // EBP and ESP require an extra instruction byte for zero offset. 162 // Used in fast-unlock 163 reg_class p_reg(EDX, EDI, ESI, EBX); 164 165 // Class for general registers excluding ECX 166 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 167 // Class for general registers excluding ECX (and EBP) 168 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 169 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 170 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 171 172 // Class for general registers excluding EAX 173 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 174 175 // Class for general registers excluding EAX and EBX. 176 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 177 // Class for general registers excluding EAX and EBX (and EBP) 178 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 179 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 180 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 181 182 // Class of EAX (for multiply and divide operations) 183 reg_class eax_reg(EAX); 184 185 // Class of EBX (for atomic add) 186 reg_class ebx_reg(EBX); 187 188 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 189 reg_class ecx_reg(ECX); 190 191 // Class of EDX (for multiply and divide operations) 192 reg_class edx_reg(EDX); 193 194 // Class of EDI (for synchronization) 195 reg_class edi_reg(EDI); 196 197 // Class of ESI (for synchronization) 198 reg_class esi_reg(ESI); 199 200 // Singleton class for stack pointer 201 reg_class sp_reg(ESP); 202 203 // Singleton class for instruction pointer 204 // reg_class ip_reg(EIP); 205 206 // Class of integer register pairs 207 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 208 // Class of integer register pairs (excluding EBP and EDI); 209 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 210 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 211 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 212 213 // Class of integer register pairs that aligns with calling convention 214 reg_class eadx_reg( EAX,EDX ); 215 reg_class ebcx_reg( ECX,EBX ); 216 reg_class ebpd_reg( EBP,EDI ); 217 218 // Not AX or DX, used in divides 219 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 220 // Not AX or DX (and neither EBP), used in divides 221 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 222 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 223 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 224 225 // Floating point registers. Notice FPR0 is not a choice. 226 // FPR0 is not ever allocated; we use clever encodings to fake 227 // a 2-address instructions out of Intels FP stack. 228 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 229 230 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 231 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 232 FPR7L,FPR7H ); 233 234 reg_class fp_flt_reg0( FPR1L ); 235 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 236 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 237 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 238 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 239 240 %} 241 242 243 //----------SOURCE BLOCK------------------------------------------------------- 244 // This is a block of C++ code which provides values, functions, and 245 // definitions necessary in the rest of the architecture description 246 source_hpp %{ 247 // Must be visible to the DFA in dfa_x86_32.cpp 248 extern bool is_operand_hi32_zero(Node* n); 249 %} 250 251 source %{ 252 #define RELOC_IMM32 Assembler::imm_operand 253 #define RELOC_DISP32 Assembler::disp32_operand 254 255 #define __ _masm. 256 257 // How to find the high register of a Long pair, given the low register 258 #define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) 259 #define HIGH_FROM_LOW_ENC(x) ((x)+2) 260 261 // These masks are used to provide 128-bit aligned bitmasks to the XMM 262 // instructions, to allow sign-masking or sign-bit flipping. They allow 263 // fast versions of NegF/NegD and AbsF/AbsD. 264 265 void reg_mask_init() {} 266 267 // Note: 'double' and 'long long' have 32-bits alignment on x86. 268 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 269 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 270 // of 128-bits operands for SSE instructions. 271 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 272 // Store the value to a 128-bits operand. 273 operand[0] = lo; 274 operand[1] = hi; 275 return operand; 276 } 277 278 // Buffer for 128-bits masks used by SSE instructions. 279 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 280 281 // Static initialization during VM startup. 282 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 283 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 284 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 285 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 286 287 // Offset hacking within calls. 288 static int pre_call_resets_size() { 289 int size = 0; 290 Compile* C = Compile::current(); 291 if (C->in_24_bit_fp_mode()) { 292 size += 6; // fldcw 293 } 294 if (VM_Version::supports_vzeroupper()) { 295 size += 3; // vzeroupper 296 } 297 return size; 298 } 299 300 // !!!!! Special hack to get all type of calls to specify the byte offset 301 // from the start of the call to the point where the return address 302 // will point. 303 int MachCallStaticJavaNode::ret_addr_offset() { 304 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 305 } 306 307 int MachCallDynamicJavaNode::ret_addr_offset() { 308 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 309 } 310 311 static int sizeof_FFree_Float_Stack_All = -1; 312 313 int MachCallRuntimeNode::ret_addr_offset() { 314 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 315 return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); 316 } 317 318 // 319 // Compute padding required for nodes which need alignment 320 // 321 322 // The address of the call instruction needs to be 4-byte aligned to 323 // ensure that it does not span a cache line so that it can be patched. 324 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 325 current_offset += pre_call_resets_size(); // skip fldcw, if any 326 current_offset += 1; // skip call opcode byte 327 return align_up(current_offset, alignment_required()) - current_offset; 328 } 329 330 // The address of the call instruction needs to be 4-byte aligned to 331 // ensure that it does not span a cache line so that it can be patched. 332 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 333 current_offset += pre_call_resets_size(); // skip fldcw, if any 334 current_offset += 5; // skip MOV instruction 335 current_offset += 1; // skip call opcode byte 336 return align_up(current_offset, alignment_required()) - current_offset; 337 } 338 339 // EMIT_RM() 340 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 341 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 342 cbuf.insts()->emit_int8(c); 343 } 344 345 // EMIT_CC() 346 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 347 unsigned char c = (unsigned char)( f1 | f2 ); 348 cbuf.insts()->emit_int8(c); 349 } 350 351 // EMIT_OPCODE() 352 void emit_opcode(CodeBuffer &cbuf, int code) { 353 cbuf.insts()->emit_int8((unsigned char) code); 354 } 355 356 // EMIT_OPCODE() w/ relocation information 357 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 358 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 359 emit_opcode(cbuf, code); 360 } 361 362 // EMIT_D8() 363 void emit_d8(CodeBuffer &cbuf, int d8) { 364 cbuf.insts()->emit_int8((unsigned char) d8); 365 } 366 367 // EMIT_D16() 368 void emit_d16(CodeBuffer &cbuf, int d16) { 369 cbuf.insts()->emit_int16(d16); 370 } 371 372 // EMIT_D32() 373 void emit_d32(CodeBuffer &cbuf, int d32) { 374 cbuf.insts()->emit_int32(d32); 375 } 376 377 // emit 32 bit value and construct relocation entry from relocInfo::relocType 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 379 int format) { 380 cbuf.relocate(cbuf.insts_mark(), reloc, format); 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from RelocationHolder 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 386 int format) { 387 #ifdef ASSERT 388 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 389 assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); 390 } 391 #endif 392 cbuf.relocate(cbuf.insts_mark(), rspec, format); 393 cbuf.insts()->emit_int32(d32); 394 } 395 396 // Access stack slot for load or store 397 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 398 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 399 if( -128 <= disp && disp <= 127 ) { 400 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 401 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 402 emit_d8 (cbuf, disp); // Displacement // R/M byte 403 } else { 404 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 405 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 406 emit_d32(cbuf, disp); // Displacement // R/M byte 407 } 408 } 409 410 // rRegI ereg, memory mem) %{ // emit_reg_mem 411 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 412 // There is no index & no scale, use form without SIB byte 413 if ((index == 0x4) && 414 (scale == 0) && (base != ESP_enc)) { 415 // If no displacement, mode is 0x0; unless base is [EBP] 416 if ( (displace == 0) && (base != EBP_enc) ) { 417 emit_rm(cbuf, 0x0, reg_encoding, base); 418 } 419 else { // If 8-bit displacement, mode 0x1 420 if ((displace >= -128) && (displace <= 127) 421 && (disp_reloc == relocInfo::none) ) { 422 emit_rm(cbuf, 0x1, reg_encoding, base); 423 emit_d8(cbuf, displace); 424 } 425 else { // If 32-bit displacement 426 if (base == -1) { // Special flag for absolute address 427 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 428 // (manual lies; no SIB needed here) 429 if ( disp_reloc != relocInfo::none ) { 430 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 431 } else { 432 emit_d32 (cbuf, displace); 433 } 434 } 435 else { // Normal base + offset 436 emit_rm(cbuf, 0x2, reg_encoding, base); 437 if ( disp_reloc != relocInfo::none ) { 438 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 439 } else { 440 emit_d32 (cbuf, displace); 441 } 442 } 443 } 444 } 445 } 446 else { // Else, encode with the SIB byte 447 // If no displacement, mode is 0x0; unless base is [EBP] 448 if (displace == 0 && (base != EBP_enc)) { // If no displacement 449 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 450 emit_rm(cbuf, scale, index, base); 451 } 452 else { // If 8-bit displacement, mode 0x1 453 if ((displace >= -128) && (displace <= 127) 454 && (disp_reloc == relocInfo::none) ) { 455 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 456 emit_rm(cbuf, scale, index, base); 457 emit_d8(cbuf, displace); 458 } 459 else { // If 32-bit displacement 460 if (base == 0x04 ) { 461 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 462 emit_rm(cbuf, scale, index, 0x04); 463 } else { 464 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 465 emit_rm(cbuf, scale, index, base); 466 } 467 if ( disp_reloc != relocInfo::none ) { 468 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 469 } else { 470 emit_d32 (cbuf, displace); 471 } 472 } 473 } 474 } 475 } 476 477 478 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 479 if( dst_encoding == src_encoding ) { 480 // reg-reg copy, use an empty encoding 481 } else { 482 emit_opcode( cbuf, 0x8B ); 483 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 484 } 485 } 486 487 void emit_cmpfp_fixup(MacroAssembler& _masm) { 488 Label exit; 489 __ jccb(Assembler::noParity, exit); 490 __ pushf(); 491 // 492 // comiss/ucomiss instructions set ZF,PF,CF flags and 493 // zero OF,AF,SF for NaN values. 494 // Fixup flags by zeroing ZF,PF so that compare of NaN 495 // values returns 'less than' result (CF is set). 496 // Leave the rest of flags unchanged. 497 // 498 // 7 6 5 4 3 2 1 0 499 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 500 // 0 0 1 0 1 0 1 1 (0x2B) 501 // 502 __ andl(Address(rsp, 0), 0xffffff2b); 503 __ popf(); 504 __ bind(exit); 505 } 506 507 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 508 Label done; 509 __ movl(dst, -1); 510 __ jcc(Assembler::parity, done); 511 __ jcc(Assembler::below, done); 512 __ setb(Assembler::notEqual, dst); 513 __ movzbl(dst, dst); 514 __ bind(done); 515 } 516 517 518 //============================================================================= 519 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 520 521 int ConstantTable::calculate_table_base_offset() const { 522 return 0; // absolute addressing, no offset 523 } 524 525 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 526 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 527 ShouldNotReachHere(); 528 } 529 530 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 531 // Empty encoding 532 } 533 534 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 535 return 0; 536 } 537 538 #ifndef PRODUCT 539 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 540 st->print("# MachConstantBaseNode (empty encoding)"); 541 } 542 #endif 543 544 545 //============================================================================= 546 #ifndef PRODUCT 547 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 548 Compile* C = ra_->C; 549 550 int framesize = C->output()->frame_size_in_bytes(); 551 int bangsize = C->output()->bang_size_in_bytes(); 552 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 553 // Remove wordSize for return addr which is already pushed. 554 framesize -= wordSize; 555 556 if (C->output()->need_stack_bang(bangsize)) { 557 framesize -= wordSize; 558 st->print("# stack bang (%d bytes)", bangsize); 559 st->print("\n\t"); 560 st->print("PUSH EBP\t# Save EBP"); 561 if (PreserveFramePointer) { 562 st->print("\n\t"); 563 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 564 } 565 if (framesize) { 566 st->print("\n\t"); 567 st->print("SUB ESP, #%d\t# Create frame",framesize); 568 } 569 } else { 570 st->print("SUB ESP, #%d\t# Create frame",framesize); 571 st->print("\n\t"); 572 framesize -= wordSize; 573 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 574 if (PreserveFramePointer) { 575 st->print("\n\t"); 576 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 577 if (framesize > 0) { 578 st->print("\n\t"); 579 st->print("ADD EBP, #%d", framesize); 580 } 581 } 582 } 583 584 if (VerifyStackAtCalls) { 585 st->print("\n\t"); 586 framesize -= wordSize; 587 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 588 } 589 590 if( C->in_24_bit_fp_mode() ) { 591 st->print("\n\t"); 592 st->print("FLDCW \t# load 24 bit fpu control word"); 593 } 594 if (UseSSE >= 2 && VerifyFPU) { 595 st->print("\n\t"); 596 st->print("# verify FPU stack (must be clean on entry)"); 597 } 598 599 #ifdef ASSERT 600 if (VerifyStackAtCalls) { 601 st->print("\n\t"); 602 st->print("# stack alignment check"); 603 } 604 #endif 605 st->cr(); 606 } 607 #endif 608 609 610 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 611 Compile* C = ra_->C; 612 C2_MacroAssembler _masm(&cbuf); 613 614 int framesize = C->output()->frame_size_in_bytes(); 615 int bangsize = C->output()->bang_size_in_bytes(); 616 617 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL); 618 619 C->output()->set_frame_complete(cbuf.insts_size()); 620 621 if (C->has_mach_constant_base_node()) { 622 // NOTE: We set the table base offset here because users might be 623 // emitted before MachConstantBaseNode. 624 ConstantTable& constant_table = C->output()->constant_table(); 625 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 626 } 627 } 628 629 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 630 return MachNode::size(ra_); // too many variables; just compute it the hard way 631 } 632 633 int MachPrologNode::reloc() const { 634 return 0; // a large enough number 635 } 636 637 //============================================================================= 638 #ifndef PRODUCT 639 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 640 Compile *C = ra_->C; 641 int framesize = C->output()->frame_size_in_bytes(); 642 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 643 // Remove two words for return addr and rbp, 644 framesize -= 2*wordSize; 645 646 if (C->max_vector_size() > 16) { 647 st->print("VZEROUPPER"); 648 st->cr(); st->print("\t"); 649 } 650 if (C->in_24_bit_fp_mode()) { 651 st->print("FLDCW standard control word"); 652 st->cr(); st->print("\t"); 653 } 654 if (framesize) { 655 st->print("ADD ESP,%d\t# Destroy frame",framesize); 656 st->cr(); st->print("\t"); 657 } 658 st->print_cr("POPL EBP"); st->print("\t"); 659 if (do_polling() && C->is_method_compilation()) { 660 st->print("CMPL rsp, poll_offset[thread] \n\t" 661 "JA #safepoint_stub\t" 662 "# Safepoint: poll for GC"); 663 } 664 } 665 #endif 666 667 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 668 Compile *C = ra_->C; 669 MacroAssembler _masm(&cbuf); 670 671 if (C->max_vector_size() > 16) { 672 // Clear upper bits of YMM registers when current compiled code uses 673 // wide vectors to avoid AVX <-> SSE transition penalty during call. 674 _masm.vzeroupper(); 675 } 676 // If method set FPU control word, restore to standard control word 677 if (C->in_24_bit_fp_mode()) { 678 _masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 679 } 680 681 int framesize = C->output()->frame_size_in_bytes(); 682 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 683 // Remove two words for return addr and rbp, 684 framesize -= 2*wordSize; 685 686 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 687 688 if (framesize >= 128) { 689 emit_opcode(cbuf, 0x81); // add SP, #framesize 690 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 691 emit_d32(cbuf, framesize); 692 } else if (framesize) { 693 emit_opcode(cbuf, 0x83); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d8(cbuf, framesize); 696 } 697 698 emit_opcode(cbuf, 0x58 | EBP_enc); 699 700 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 701 __ reserved_stack_check(); 702 } 703 704 if (do_polling() && C->is_method_compilation()) { 705 Register thread = as_Register(EBX_enc); 706 MacroAssembler masm(&cbuf); 707 __ get_thread(thread); 708 Label dummy_label; 709 Label* code_stub = &dummy_label; 710 if (!C->output()->in_scratch_emit_size()) { 711 C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); 712 C->output()->add_stub(stub); 713 code_stub = &stub->entry(); 714 } 715 __ relocate(relocInfo::poll_return_type); 716 __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); 717 } 718 } 719 720 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 721 return MachNode::size(ra_); // too many variables; just compute it 722 // the hard way 723 } 724 725 int MachEpilogNode::reloc() const { 726 return 0; // a large enough number 727 } 728 729 const Pipeline * MachEpilogNode::pipeline() const { 730 return MachNode::pipeline_class(); 731 } 732 733 //============================================================================= 734 735 enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; 736 static enum RC rc_class( OptoReg::Name reg ) { 737 738 if( !OptoReg::is_valid(reg) ) return rc_bad; 739 if (OptoReg::is_stack(reg)) return rc_stack; 740 741 VMReg r = OptoReg::as_VMReg(reg); 742 if (r->is_Register()) return rc_int; 743 if (r->is_FloatRegister()) { 744 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 745 return rc_float; 746 } 747 if (r->is_KRegister()) return rc_kreg; 748 assert(r->is_XMMRegister(), "must be"); 749 return rc_xmm; 750 } 751 752 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 753 int opcode, const char *op_str, int size, outputStream* st ) { 754 if( cbuf ) { 755 emit_opcode (*cbuf, opcode ); 756 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 757 #ifndef PRODUCT 758 } else if( !do_size ) { 759 if( size != 0 ) st->print("\n\t"); 760 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 761 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 762 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 763 } else { // FLD, FST, PUSH, POP 764 st->print("%s [ESP + #%d]",op_str,offset); 765 } 766 #endif 767 } 768 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 769 return size+3+offset_size; 770 } 771 772 // Helper for XMM registers. Extra opcode bits, limited syntax. 773 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 774 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 775 int in_size_in_bits = Assembler::EVEX_32bit; 776 int evex_encoding = 0; 777 if (reg_lo+1 == reg_hi) { 778 in_size_in_bits = Assembler::EVEX_64bit; 779 evex_encoding = Assembler::VEX_W; 780 } 781 if (cbuf) { 782 MacroAssembler _masm(cbuf); 783 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 784 // it maps more cases to single byte displacement 785 _masm.set_managed(); 786 if (reg_lo+1 == reg_hi) { // double move? 787 if (is_load) { 788 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 789 } else { 790 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 791 } 792 } else { 793 if (is_load) { 794 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } 799 #ifndef PRODUCT 800 } else if (!do_size) { 801 if (size != 0) st->print("\n\t"); 802 if (reg_lo+1 == reg_hi) { // double move? 803 if (is_load) st->print("%s %s,[ESP + #%d]", 804 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 805 Matcher::regName[reg_lo], offset); 806 else st->print("MOVSD [ESP + #%d],%s", 807 offset, Matcher::regName[reg_lo]); 808 } else { 809 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 810 Matcher::regName[reg_lo], offset); 811 else st->print("MOVSS [ESP + #%d],%s", 812 offset, Matcher::regName[reg_lo]); 813 } 814 #endif 815 } 816 bool is_single_byte = false; 817 if ((UseAVX > 2) && (offset != 0)) { 818 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 819 } 820 int offset_size = 0; 821 if (UseAVX > 2 ) { 822 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 823 } else { 824 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 825 } 826 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 827 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 828 return size+5+offset_size; 829 } 830 831 832 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 833 int src_hi, int dst_hi, int size, outputStream* st ) { 834 if (cbuf) { 835 MacroAssembler _masm(cbuf); 836 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 837 _masm.set_managed(); 838 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 839 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 840 as_XMMRegister(Matcher::_regEncode[src_lo])); 841 } else { 842 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 843 as_XMMRegister(Matcher::_regEncode[src_lo])); 844 } 845 #ifndef PRODUCT 846 } else if (!do_size) { 847 if (size != 0) st->print("\n\t"); 848 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 849 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 850 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 851 } else { 852 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 853 } 854 } else { 855 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 856 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } else { 858 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } 860 } 861 #endif 862 } 863 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 864 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 865 int sz = (UseAVX > 2) ? 6 : 4; 866 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 867 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 868 return size + sz; 869 } 870 871 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 872 int src_hi, int dst_hi, int size, outputStream* st ) { 873 // 32-bit 874 if (cbuf) { 875 MacroAssembler _masm(cbuf); 876 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 877 _masm.set_managed(); 878 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 879 as_Register(Matcher::_regEncode[src_lo])); 880 #ifndef PRODUCT 881 } else if (!do_size) { 882 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 883 #endif 884 } 885 return (UseAVX> 2) ? 6 : 4; 886 } 887 888 889 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 890 int src_hi, int dst_hi, int size, outputStream* st ) { 891 // 32-bit 892 if (cbuf) { 893 MacroAssembler _masm(cbuf); 894 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 895 _masm.set_managed(); 896 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 897 as_XMMRegister(Matcher::_regEncode[src_lo])); 898 #ifndef PRODUCT 899 } else if (!do_size) { 900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 901 #endif 902 } 903 return (UseAVX> 2) ? 6 : 4; 904 } 905 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 907 if( cbuf ) { 908 emit_opcode(*cbuf, 0x8B ); 909 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 910 #ifndef PRODUCT 911 } else if( !do_size ) { 912 if( size != 0 ) st->print("\n\t"); 913 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 914 #endif 915 } 916 return size+2; 917 } 918 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 920 int offset, int size, outputStream* st ) { 921 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 922 if( cbuf ) { 923 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 924 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("FLD %s",Matcher::regName[src_lo]); 929 #endif 930 } 931 size += 2; 932 } 933 934 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 935 const char *op_str; 936 int op; 937 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 938 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 939 op = 0xDD; 940 } else { // 32-bit store 941 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 942 op = 0xD9; 943 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 944 } 945 946 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 947 } 948 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 950 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 951 int src_hi, int dst_hi, uint ireg, outputStream* st); 952 953 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 954 int stack_offset, int reg, uint ireg, outputStream* st); 955 956 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, 957 int dst_offset, uint ireg, outputStream* st) { 958 if (cbuf) { 959 MacroAssembler _masm(cbuf); 960 switch (ireg) { 961 case Op_VecS: 962 __ pushl(Address(rsp, src_offset)); 963 __ popl (Address(rsp, dst_offset)); 964 break; 965 case Op_VecD: 966 __ pushl(Address(rsp, src_offset)); 967 __ popl (Address(rsp, dst_offset)); 968 __ pushl(Address(rsp, src_offset+4)); 969 __ popl (Address(rsp, dst_offset+4)); 970 break; 971 case Op_VecX: 972 __ movdqu(Address(rsp, -16), xmm0); 973 __ movdqu(xmm0, Address(rsp, src_offset)); 974 __ movdqu(Address(rsp, dst_offset), xmm0); 975 __ movdqu(xmm0, Address(rsp, -16)); 976 break; 977 case Op_VecY: 978 __ vmovdqu(Address(rsp, -32), xmm0); 979 __ vmovdqu(xmm0, Address(rsp, src_offset)); 980 __ vmovdqu(Address(rsp, dst_offset), xmm0); 981 __ vmovdqu(xmm0, Address(rsp, -32)); 982 break; 983 case Op_VecZ: 984 __ evmovdquq(Address(rsp, -64), xmm0, 2); 985 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 986 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 987 __ evmovdquq(xmm0, Address(rsp, -64), 2); 988 break; 989 default: 990 ShouldNotReachHere(); 991 } 992 #ifndef PRODUCT 993 } else { 994 switch (ireg) { 995 case Op_VecS: 996 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 997 "popl [rsp + #%d]", 998 src_offset, dst_offset); 999 break; 1000 case Op_VecD: 1001 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1002 "popq [rsp + #%d]\n\t" 1003 "pushl [rsp + #%d]\n\t" 1004 "popq [rsp + #%d]", 1005 src_offset, dst_offset, src_offset+4, dst_offset+4); 1006 break; 1007 case Op_VecX: 1008 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1009 "movdqu xmm0, [rsp + #%d]\n\t" 1010 "movdqu [rsp + #%d], xmm0\n\t" 1011 "movdqu xmm0, [rsp - #16]", 1012 src_offset, dst_offset); 1013 break; 1014 case Op_VecY: 1015 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1016 "vmovdqu xmm0, [rsp + #%d]\n\t" 1017 "vmovdqu [rsp + #%d], xmm0\n\t" 1018 "vmovdqu xmm0, [rsp - #32]", 1019 src_offset, dst_offset); 1020 break; 1021 case Op_VecZ: 1022 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1023 "vmovdqu xmm0, [rsp + #%d]\n\t" 1024 "vmovdqu [rsp + #%d], xmm0\n\t" 1025 "vmovdqu xmm0, [rsp - #64]", 1026 src_offset, dst_offset); 1027 break; 1028 default: 1029 ShouldNotReachHere(); 1030 } 1031 #endif 1032 } 1033 } 1034 1035 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1036 // Get registers to move 1037 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1038 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1039 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1040 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1041 1042 enum RC src_second_rc = rc_class(src_second); 1043 enum RC src_first_rc = rc_class(src_first); 1044 enum RC dst_second_rc = rc_class(dst_second); 1045 enum RC dst_first_rc = rc_class(dst_first); 1046 1047 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1048 1049 // Generate spill code! 1050 int size = 0; 1051 1052 if( src_first == dst_first && src_second == dst_second ) 1053 return size; // Self copy, no move 1054 1055 if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) { 1056 uint ireg = ideal_reg(); 1057 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1058 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1059 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1060 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1061 // mem -> mem 1062 int src_offset = ra_->reg2offset(src_first); 1063 int dst_offset = ra_->reg2offset(dst_first); 1064 vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); 1065 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1066 vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st); 1067 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1068 int stack_offset = ra_->reg2offset(dst_first); 1069 vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st); 1070 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1071 int stack_offset = ra_->reg2offset(src_first); 1072 vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st); 1073 } else { 1074 ShouldNotReachHere(); 1075 } 1076 return 0; 1077 } 1078 1079 // -------------------------------------- 1080 // Check for mem-mem move. push/pop to move. 1081 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1082 if( src_second == dst_first ) { // overlapping stack copy ranges 1083 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1084 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1085 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1086 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1087 } 1088 // move low bits 1089 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1090 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1091 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1092 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1093 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1094 } 1095 return size; 1096 } 1097 1098 // -------------------------------------- 1099 // Check for integer reg-reg copy 1100 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1101 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1102 1103 // Check for integer store 1104 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1105 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1106 1107 // Check for integer load 1108 if( src_first_rc == rc_stack && dst_first_rc == rc_int ) 1109 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1110 1111 // Check for integer reg-xmm reg copy 1112 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1113 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1114 "no 64 bit integer-float reg moves" ); 1115 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1116 } 1117 // -------------------------------------- 1118 // Check for float reg-reg copy 1119 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1120 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1121 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1122 if( cbuf ) { 1123 1124 // Note the mucking with the register encode to compensate for the 0/1 1125 // indexing issue mentioned in a comment in the reg_def sections 1126 // for FPR registers many lines above here. 1127 1128 if( src_first != FPR1L_num ) { 1129 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1130 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1131 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1132 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1133 } else { 1134 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1135 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1136 } 1137 #ifndef PRODUCT 1138 } else if( !do_size ) { 1139 if( size != 0 ) st->print("\n\t"); 1140 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1141 else st->print( "FST %s", Matcher::regName[dst_first]); 1142 #endif 1143 } 1144 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1145 } 1146 1147 // Check for float store 1148 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1149 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1150 } 1151 1152 // Check for float load 1153 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1154 int offset = ra_->reg2offset(src_first); 1155 const char *op_str; 1156 int op; 1157 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1158 op_str = "FLD_D"; 1159 op = 0xDD; 1160 } else { // 32-bit load 1161 op_str = "FLD_S"; 1162 op = 0xD9; 1163 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1164 } 1165 if( cbuf ) { 1166 emit_opcode (*cbuf, op ); 1167 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1168 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1169 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1170 #ifndef PRODUCT 1171 } else if( !do_size ) { 1172 if( size != 0 ) st->print("\n\t"); 1173 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1174 #endif 1175 } 1176 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1177 return size + 3+offset_size+2; 1178 } 1179 1180 // Check for xmm reg-reg copy 1181 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1182 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1183 (src_first+1 == src_second && dst_first+1 == dst_second), 1184 "no non-adjacent float-moves" ); 1185 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1186 } 1187 1188 // Check for xmm reg-integer reg copy 1189 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1190 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1191 "no 64 bit float-integer reg moves" ); 1192 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1193 } 1194 1195 // Check for xmm store 1196 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1197 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); 1198 } 1199 1200 // Check for float xmm load 1201 if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1202 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1203 } 1204 1205 // Copy from float reg to xmm reg 1206 if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { 1207 // copy to the top of stack from floating point reg 1208 // and use LEA to preserve flags 1209 if( cbuf ) { 1210 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1211 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1212 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1213 emit_d8(*cbuf,0xF8); 1214 #ifndef PRODUCT 1215 } else if( !do_size ) { 1216 if( size != 0 ) st->print("\n\t"); 1217 st->print("LEA ESP,[ESP-8]"); 1218 #endif 1219 } 1220 size += 4; 1221 1222 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1223 1224 // Copy from the temp memory to the xmm reg. 1225 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1226 1227 if( cbuf ) { 1228 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1229 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1230 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1231 emit_d8(*cbuf,0x08); 1232 #ifndef PRODUCT 1233 } else if( !do_size ) { 1234 if( size != 0 ) st->print("\n\t"); 1235 st->print("LEA ESP,[ESP+8]"); 1236 #endif 1237 } 1238 size += 4; 1239 return size; 1240 } 1241 1242 // AVX-512 opmask specific spilling. 1243 if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { 1244 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1245 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1246 int offset = ra_->reg2offset(src_first); 1247 if (cbuf != nullptr) { 1248 MacroAssembler _masm(cbuf); 1249 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); 1250 #ifndef PRODUCT 1251 } else { 1252 st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); 1253 #endif 1254 } 1255 return 0; 1256 } 1257 1258 if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { 1259 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1260 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1261 int offset = ra_->reg2offset(dst_first); 1262 if (cbuf != nullptr) { 1263 MacroAssembler _masm(cbuf); 1264 __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); 1265 #ifndef PRODUCT 1266 } else { 1267 st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); 1268 #endif 1269 } 1270 return 0; 1271 } 1272 1273 if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { 1274 Unimplemented(); 1275 return 0; 1276 } 1277 1278 if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { 1279 Unimplemented(); 1280 return 0; 1281 } 1282 1283 if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { 1284 assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); 1285 assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); 1286 if (cbuf != nullptr) { 1287 MacroAssembler _masm(cbuf); 1288 __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); 1289 #ifndef PRODUCT 1290 } else { 1291 st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); 1292 #endif 1293 } 1294 return 0; 1295 } 1296 1297 assert( size > 0, "missed a case" ); 1298 1299 // -------------------------------------------------------------------- 1300 // Check for second bits still needing moving. 1301 if( src_second == dst_second ) 1302 return size; // Self copy; no move 1303 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1304 1305 // Check for second word int-int move 1306 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1307 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1308 1309 // Check for second word integer store 1310 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1311 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1312 1313 // Check for second word integer load 1314 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1315 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1316 1317 Unimplemented(); 1318 return 0; // Mute compiler 1319 } 1320 1321 #ifndef PRODUCT 1322 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1323 implementation( NULL, ra_, false, st ); 1324 } 1325 #endif 1326 1327 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1328 implementation( &cbuf, ra_, false, NULL ); 1329 } 1330 1331 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1332 return MachNode::size(ra_); 1333 } 1334 1335 1336 //============================================================================= 1337 #ifndef PRODUCT 1338 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1339 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1340 int reg = ra_->get_reg_first(this); 1341 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1342 } 1343 #endif 1344 1345 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1346 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1347 int reg = ra_->get_encode(this); 1348 if( offset >= 128 ) { 1349 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1350 emit_rm(cbuf, 0x2, reg, 0x04); 1351 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1352 emit_d32(cbuf, offset); 1353 } 1354 else { 1355 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1356 emit_rm(cbuf, 0x1, reg, 0x04); 1357 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1358 emit_d8(cbuf, offset); 1359 } 1360 } 1361 1362 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1363 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1364 if( offset >= 128 ) { 1365 return 7; 1366 } 1367 else { 1368 return 4; 1369 } 1370 } 1371 1372 //============================================================================= 1373 #ifndef PRODUCT 1374 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1375 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1376 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1377 st->print_cr("\tNOP"); 1378 st->print_cr("\tNOP"); 1379 if( !OptoBreakpoint ) 1380 st->print_cr("\tNOP"); 1381 } 1382 #endif 1383 1384 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1385 MacroAssembler masm(&cbuf); 1386 #ifdef ASSERT 1387 uint insts_size = cbuf.insts_size(); 1388 #endif 1389 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1390 masm.jump_cc(Assembler::notEqual, 1391 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1392 /* WARNING these NOPs are critical so that verified entry point is properly 1393 aligned for patching by NativeJump::patch_verified_entry() */ 1394 int nops_cnt = 2; 1395 if( !OptoBreakpoint ) // Leave space for int3 1396 nops_cnt += 1; 1397 masm.nop(nops_cnt); 1398 1399 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1400 } 1401 1402 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1403 return OptoBreakpoint ? 11 : 12; 1404 } 1405 1406 1407 //============================================================================= 1408 1409 // Vector calling convention not supported. 1410 bool Matcher::supports_vector_calling_convention() { 1411 return false; 1412 } 1413 1414 OptoRegPair Matcher::vector_return_value(uint ideal_reg) { 1415 Unimplemented(); 1416 return OptoRegPair(0, 0); 1417 } 1418 1419 // Is this branch offset short enough that a short branch can be used? 1420 // 1421 // NOTE: If the platform does not provide any short branch variants, then 1422 // this method should return false for offset 0. 1423 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1424 // The passed offset is relative to address of the branch. 1425 // On 86 a branch displacement is calculated relative to address 1426 // of a next instruction. 1427 offset -= br_size; 1428 1429 // the short version of jmpConUCF2 contains multiple branches, 1430 // making the reach slightly less 1431 if (rule == jmpConUCF2_rule) 1432 return (-126 <= offset && offset <= 125); 1433 return (-128 <= offset && offset <= 127); 1434 } 1435 1436 // Return whether or not this register is ever used as an argument. This 1437 // function is used on startup to build the trampoline stubs in generateOptoStub. 1438 // Registers not mentioned will be killed by the VM call in the trampoline, and 1439 // arguments in those registers not be available to the callee. 1440 bool Matcher::can_be_java_arg( int reg ) { 1441 if( reg == ECX_num || reg == EDX_num ) return true; 1442 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1443 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1444 return false; 1445 } 1446 1447 bool Matcher::is_spillable_arg( int reg ) { 1448 return can_be_java_arg(reg); 1449 } 1450 1451 uint Matcher::int_pressure_limit() 1452 { 1453 return (INTPRESSURE == -1) ? 6 : INTPRESSURE; 1454 } 1455 1456 uint Matcher::float_pressure_limit() 1457 { 1458 return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; 1459 } 1460 1461 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1462 // Use hardware integer DIV instruction when 1463 // it is faster than a code which use multiply. 1464 // Only when constant divisor fits into 32 bit 1465 // (min_jint is excluded to get only correct 1466 // positive 32 bit values from negative). 1467 return VM_Version::has_fast_idiv() && 1468 (divisor == (int)divisor && divisor != min_jint); 1469 } 1470 1471 // Register for DIVI projection of divmodI 1472 RegMask Matcher::divI_proj_mask() { 1473 return EAX_REG_mask(); 1474 } 1475 1476 // Register for MODI projection of divmodI 1477 RegMask Matcher::modI_proj_mask() { 1478 return EDX_REG_mask(); 1479 } 1480 1481 // Register for DIVL projection of divmodL 1482 RegMask Matcher::divL_proj_mask() { 1483 ShouldNotReachHere(); 1484 return RegMask(); 1485 } 1486 1487 // Register for MODL projection of divmodL 1488 RegMask Matcher::modL_proj_mask() { 1489 ShouldNotReachHere(); 1490 return RegMask(); 1491 } 1492 1493 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1494 return NO_REG_mask(); 1495 } 1496 1497 // Returns true if the high 32 bits of the value is known to be zero. 1498 bool is_operand_hi32_zero(Node* n) { 1499 int opc = n->Opcode(); 1500 if (opc == Op_AndL) { 1501 Node* o2 = n->in(2); 1502 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1503 return true; 1504 } 1505 } 1506 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1507 return true; 1508 } 1509 return false; 1510 } 1511 1512 %} 1513 1514 //----------ENCODING BLOCK----------------------------------------------------- 1515 // This block specifies the encoding classes used by the compiler to output 1516 // byte streams. Encoding classes generate functions which are called by 1517 // Machine Instruction Nodes in order to generate the bit encoding of the 1518 // instruction. Operands specify their base encoding interface with the 1519 // interface keyword. There are currently supported four interfaces, 1520 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1521 // operand to generate a function which returns its register number when 1522 // queried. CONST_INTER causes an operand to generate a function which 1523 // returns the value of the constant when queried. MEMORY_INTER causes an 1524 // operand to generate four functions which return the Base Register, the 1525 // Index Register, the Scale Value, and the Offset Value of the operand when 1526 // queried. COND_INTER causes an operand to generate six functions which 1527 // return the encoding code (ie - encoding bits for the instruction) 1528 // associated with each basic boolean condition for a conditional instruction. 1529 // Instructions specify two basic values for encoding. They use the 1530 // ins_encode keyword to specify their encoding class (which must be one of 1531 // the class names specified in the encoding block), and they use the 1532 // opcode keyword to specify, in order, their primary, secondary, and 1533 // tertiary opcode. Only the opcode sections which a particular instruction 1534 // needs for encoding need to be specified. 1535 encode %{ 1536 // Build emit functions for each basic byte or larger field in the intel 1537 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1538 // code in the enc_class source block. Emit functions will live in the 1539 // main source block for now. In future, we can generalize this by 1540 // adding a syntax that specifies the sizes of fields in an order, 1541 // so that the adlc can build the emit functions automagically 1542 1543 // Emit primary opcode 1544 enc_class OpcP %{ 1545 emit_opcode(cbuf, $primary); 1546 %} 1547 1548 // Emit secondary opcode 1549 enc_class OpcS %{ 1550 emit_opcode(cbuf, $secondary); 1551 %} 1552 1553 // Emit opcode directly 1554 enc_class Opcode(immI d8) %{ 1555 emit_opcode(cbuf, $d8$$constant); 1556 %} 1557 1558 enc_class SizePrefix %{ 1559 emit_opcode(cbuf,0x66); 1560 %} 1561 1562 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1563 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1564 %} 1565 1566 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1567 emit_opcode(cbuf,$opcode$$constant); 1568 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1569 %} 1570 1571 enc_class mov_r32_imm0( rRegI dst ) %{ 1572 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1573 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1574 %} 1575 1576 enc_class cdq_enc %{ 1577 // Full implementation of Java idiv and irem; checks for 1578 // special case as described in JVM spec., p.243 & p.271. 1579 // 1580 // normal case special case 1581 // 1582 // input : rax,: dividend min_int 1583 // reg: divisor -1 1584 // 1585 // output: rax,: quotient (= rax, idiv reg) min_int 1586 // rdx: remainder (= rax, irem reg) 0 1587 // 1588 // Code sequnce: 1589 // 1590 // 81 F8 00 00 00 80 cmp rax,80000000h 1591 // 0F 85 0B 00 00 00 jne normal_case 1592 // 33 D2 xor rdx,edx 1593 // 83 F9 FF cmp rcx,0FFh 1594 // 0F 84 03 00 00 00 je done 1595 // normal_case: 1596 // 99 cdq 1597 // F7 F9 idiv rax,ecx 1598 // done: 1599 // 1600 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1601 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1602 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1603 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1604 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1605 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1606 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1607 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1608 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1609 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1610 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1611 // normal_case: 1612 emit_opcode(cbuf,0x99); // cdq 1613 // idiv (note: must be emitted by the user of this rule) 1614 // normal: 1615 %} 1616 1617 // Dense encoding for older common ops 1618 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1619 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1620 %} 1621 1622 1623 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1624 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1625 // Check for 8-bit immediate, and set sign extend bit in opcode 1626 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1627 emit_opcode(cbuf, $primary | 0x02); 1628 } 1629 else { // If 32-bit immediate 1630 emit_opcode(cbuf, $primary); 1631 } 1632 %} 1633 1634 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1635 // Emit primary opcode and set sign-extend bit 1636 // Check for 8-bit immediate, and set sign extend bit in opcode 1637 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1638 emit_opcode(cbuf, $primary | 0x02); } 1639 else { // If 32-bit immediate 1640 emit_opcode(cbuf, $primary); 1641 } 1642 // Emit r/m byte with secondary opcode, after primary opcode. 1643 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1644 %} 1645 1646 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1647 // Check for 8-bit immediate, and set sign extend bit in opcode 1648 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1649 $$$emit8$imm$$constant; 1650 } 1651 else { // If 32-bit immediate 1652 // Output immediate 1653 $$$emit32$imm$$constant; 1654 } 1655 %} 1656 1657 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1658 // Emit primary opcode and set sign-extend bit 1659 // Check for 8-bit immediate, and set sign extend bit in opcode 1660 int con = (int)$imm$$constant; // Throw away top bits 1661 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1662 // Emit r/m byte with secondary opcode, after primary opcode. 1663 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1664 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1665 else emit_d32(cbuf,con); 1666 %} 1667 1668 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1669 // Emit primary opcode and set sign-extend bit 1670 // Check for 8-bit immediate, and set sign extend bit in opcode 1671 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1672 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1673 // Emit r/m byte with tertiary opcode, after primary opcode. 1674 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); 1675 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1676 else emit_d32(cbuf,con); 1677 %} 1678 1679 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1680 emit_cc(cbuf, $secondary, $dst$$reg ); 1681 %} 1682 1683 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1684 int destlo = $dst$$reg; 1685 int desthi = HIGH_FROM_LOW_ENC(destlo); 1686 // bswap lo 1687 emit_opcode(cbuf, 0x0F); 1688 emit_cc(cbuf, 0xC8, destlo); 1689 // bswap hi 1690 emit_opcode(cbuf, 0x0F); 1691 emit_cc(cbuf, 0xC8, desthi); 1692 // xchg lo and hi 1693 emit_opcode(cbuf, 0x87); 1694 emit_rm(cbuf, 0x3, destlo, desthi); 1695 %} 1696 1697 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1698 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1699 %} 1700 1701 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1702 $$$emit8$primary; 1703 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1704 %} 1705 1706 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1707 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1708 emit_d8(cbuf, op >> 8 ); 1709 emit_d8(cbuf, op & 255); 1710 %} 1711 1712 // emulate a CMOV with a conditional branch around a MOV 1713 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1714 // Invert sense of branch from sense of CMOV 1715 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1716 emit_d8( cbuf, $brOffs$$constant ); 1717 %} 1718 1719 enc_class enc_PartialSubtypeCheck( ) %{ 1720 Register Redi = as_Register(EDI_enc); // result register 1721 Register Reax = as_Register(EAX_enc); // super class 1722 Register Recx = as_Register(ECX_enc); // killed 1723 Register Resi = as_Register(ESI_enc); // sub class 1724 Label miss; 1725 1726 MacroAssembler _masm(&cbuf); 1727 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1728 NULL, &miss, 1729 /*set_cond_codes:*/ true); 1730 if ($primary) { 1731 __ xorptr(Redi, Redi); 1732 } 1733 __ bind(miss); 1734 %} 1735 1736 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1737 MacroAssembler masm(&cbuf); 1738 int start = masm.offset(); 1739 if (UseSSE >= 2) { 1740 if (VerifyFPU) { 1741 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1742 } 1743 } else { 1744 // External c_calling_convention expects the FPU stack to be 'clean'. 1745 // Compiled code leaves it dirty. Do cleanup now. 1746 masm.empty_FPU_stack(); 1747 } 1748 if (sizeof_FFree_Float_Stack_All == -1) { 1749 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1750 } else { 1751 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1752 } 1753 %} 1754 1755 enc_class Verify_FPU_For_Leaf %{ 1756 if( VerifyFPU ) { 1757 MacroAssembler masm(&cbuf); 1758 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1759 } 1760 %} 1761 1762 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1763 // This is the instruction starting address for relocation info. 1764 MacroAssembler _masm(&cbuf); 1765 cbuf.set_insts_mark(); 1766 $$$emit8$primary; 1767 // CALL directly to the runtime 1768 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1769 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1770 __ post_call_nop(); 1771 1772 if (UseSSE >= 2) { 1773 MacroAssembler _masm(&cbuf); 1774 BasicType rt = tf()->return_type(); 1775 1776 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1777 // A C runtime call where the return value is unused. In SSE2+ 1778 // mode the result needs to be removed from the FPU stack. It's 1779 // likely that this function call could be removed by the 1780 // optimizer if the C function is a pure function. 1781 __ ffree(0); 1782 } else if (rt == T_FLOAT) { 1783 __ lea(rsp, Address(rsp, -4)); 1784 __ fstp_s(Address(rsp, 0)); 1785 __ movflt(xmm0, Address(rsp, 0)); 1786 __ lea(rsp, Address(rsp, 4)); 1787 } else if (rt == T_DOUBLE) { 1788 __ lea(rsp, Address(rsp, -8)); 1789 __ fstp_d(Address(rsp, 0)); 1790 __ movdbl(xmm0, Address(rsp, 0)); 1791 __ lea(rsp, Address(rsp, 8)); 1792 } 1793 } 1794 %} 1795 1796 enc_class pre_call_resets %{ 1797 // If method sets FPU control word restore it here 1798 debug_only(int off0 = cbuf.insts_size()); 1799 if (ra_->C->in_24_bit_fp_mode()) { 1800 MacroAssembler _masm(&cbuf); 1801 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 1802 } 1803 // Clear upper bits of YMM registers when current compiled code uses 1804 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1805 MacroAssembler _masm(&cbuf); 1806 __ vzeroupper(); 1807 debug_only(int off1 = cbuf.insts_size()); 1808 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1809 %} 1810 1811 enc_class post_call_FPU %{ 1812 // If method sets FPU control word do it here also 1813 if (Compile::current()->in_24_bit_fp_mode()) { 1814 MacroAssembler masm(&cbuf); 1815 masm.fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 1816 } 1817 %} 1818 1819 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1820 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1821 // who we intended to call. 1822 MacroAssembler _masm(&cbuf); 1823 cbuf.set_insts_mark(); 1824 $$$emit8$primary; 1825 1826 if (!_method) { 1827 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1828 runtime_call_Relocation::spec(), 1829 RELOC_IMM32); 1830 __ post_call_nop(); 1831 } else { 1832 int method_index = resolved_method_index(cbuf); 1833 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1834 : static_call_Relocation::spec(method_index); 1835 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1836 rspec, RELOC_DISP32); 1837 __ post_call_nop(); 1838 address mark = cbuf.insts_mark(); 1839 if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { 1840 // Calls of the same statically bound method can share 1841 // a stub to the interpreter. 1842 cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off()); 1843 } else { 1844 // Emit stubs for static call. 1845 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark); 1846 if (stub == NULL) { 1847 ciEnv::current()->record_failure("CodeCache is full"); 1848 return; 1849 } 1850 } 1851 } 1852 %} 1853 1854 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1855 MacroAssembler _masm(&cbuf); 1856 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1857 __ post_call_nop(); 1858 %} 1859 1860 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1861 int disp = in_bytes(Method::from_compiled_offset()); 1862 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1863 1864 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1865 MacroAssembler _masm(&cbuf); 1866 cbuf.set_insts_mark(); 1867 $$$emit8$primary; 1868 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1869 emit_d8(cbuf, disp); // Displacement 1870 __ post_call_nop(); 1871 %} 1872 1873 // Following encoding is no longer used, but may be restored if calling 1874 // convention changes significantly. 1875 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1876 // 1877 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1878 // // int ic_reg = Matcher::inline_cache_reg(); 1879 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1880 // // int imo_reg = Matcher::interpreter_method_reg(); 1881 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1882 // 1883 // // // Interpreter expects method_ptr in EBX, currently a callee-saved register, 1884 // // // so we load it immediately before the call 1885 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_ptr 1886 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1887 // 1888 // // xor rbp,ebp 1889 // emit_opcode(cbuf, 0x33); 1890 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1891 // 1892 // // CALL to interpreter. 1893 // cbuf.set_insts_mark(); 1894 // $$$emit8$primary; 1895 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1896 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1897 // %} 1898 1899 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1900 $$$emit8$primary; 1901 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1902 $$$emit8$shift$$constant; 1903 %} 1904 1905 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1906 // Load immediate does not have a zero or sign extended version 1907 // for 8-bit immediates 1908 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1909 $$$emit32$src$$constant; 1910 %} 1911 1912 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1913 // Load immediate does not have a zero or sign extended version 1914 // for 8-bit immediates 1915 emit_opcode(cbuf, $primary + $dst$$reg); 1916 $$$emit32$src$$constant; 1917 %} 1918 1919 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1920 // Load immediate does not have a zero or sign extended version 1921 // for 8-bit immediates 1922 int dst_enc = $dst$$reg; 1923 int src_con = $src$$constant & 0x0FFFFFFFFL; 1924 if (src_con == 0) { 1925 // xor dst, dst 1926 emit_opcode(cbuf, 0x33); 1927 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1928 } else { 1929 emit_opcode(cbuf, $primary + dst_enc); 1930 emit_d32(cbuf, src_con); 1931 } 1932 %} 1933 1934 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1935 // Load immediate does not have a zero or sign extended version 1936 // for 8-bit immediates 1937 int dst_enc = $dst$$reg + 2; 1938 int src_con = ((julong)($src$$constant)) >> 32; 1939 if (src_con == 0) { 1940 // xor dst, dst 1941 emit_opcode(cbuf, 0x33); 1942 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1943 } else { 1944 emit_opcode(cbuf, $primary + dst_enc); 1945 emit_d32(cbuf, src_con); 1946 } 1947 %} 1948 1949 1950 // Encode a reg-reg copy. If it is useless, then empty encoding. 1951 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1952 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1953 %} 1954 1955 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1956 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1957 %} 1958 1959 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1960 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1961 %} 1962 1963 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1964 $$$emit8$primary; 1965 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1966 %} 1967 1968 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1969 $$$emit8$secondary; 1970 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1971 %} 1972 1973 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1974 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1975 %} 1976 1977 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 1978 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); 1979 %} 1980 1981 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 1982 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 1983 %} 1984 1985 enc_class Con32 (immI src) %{ // Con32(storeImmI) 1986 // Output immediate 1987 $$$emit32$src$$constant; 1988 %} 1989 1990 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 1991 // Output Float immediate bits 1992 jfloat jf = $src$$constant; 1993 int jf_as_bits = jint_cast( jf ); 1994 emit_d32(cbuf, jf_as_bits); 1995 %} 1996 1997 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 1998 // Output Float immediate bits 1999 jfloat jf = $src$$constant; 2000 int jf_as_bits = jint_cast( jf ); 2001 emit_d32(cbuf, jf_as_bits); 2002 %} 2003 2004 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2005 // Output immediate 2006 $$$emit16$src$$constant; 2007 %} 2008 2009 enc_class Con_d32(immI src) %{ 2010 emit_d32(cbuf,$src$$constant); 2011 %} 2012 2013 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2014 // Output immediate memory reference 2015 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2016 emit_d32(cbuf, 0x00); 2017 %} 2018 2019 enc_class lock_prefix( ) %{ 2020 emit_opcode(cbuf,0xF0); // [Lock] 2021 %} 2022 2023 // Cmp-xchg long value. 2024 // Note: we need to swap rbx, and rcx before and after the 2025 // cmpxchg8 instruction because the instruction uses 2026 // rcx as the high order word of the new value to store but 2027 // our register encoding uses rbx,. 2028 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2029 2030 // XCHG rbx,ecx 2031 emit_opcode(cbuf,0x87); 2032 emit_opcode(cbuf,0xD9); 2033 // [Lock] 2034 emit_opcode(cbuf,0xF0); 2035 // CMPXCHG8 [Eptr] 2036 emit_opcode(cbuf,0x0F); 2037 emit_opcode(cbuf,0xC7); 2038 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2039 // XCHG rbx,ecx 2040 emit_opcode(cbuf,0x87); 2041 emit_opcode(cbuf,0xD9); 2042 %} 2043 2044 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2045 // [Lock] 2046 emit_opcode(cbuf,0xF0); 2047 2048 // CMPXCHG [Eptr] 2049 emit_opcode(cbuf,0x0F); 2050 emit_opcode(cbuf,0xB1); 2051 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2052 %} 2053 2054 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2055 // [Lock] 2056 emit_opcode(cbuf,0xF0); 2057 2058 // CMPXCHGB [Eptr] 2059 emit_opcode(cbuf,0x0F); 2060 emit_opcode(cbuf,0xB0); 2061 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2062 %} 2063 2064 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2065 // [Lock] 2066 emit_opcode(cbuf,0xF0); 2067 2068 // 16-bit mode 2069 emit_opcode(cbuf, 0x66); 2070 2071 // CMPXCHGW [Eptr] 2072 emit_opcode(cbuf,0x0F); 2073 emit_opcode(cbuf,0xB1); 2074 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2075 %} 2076 2077 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2078 int res_encoding = $res$$reg; 2079 2080 // MOV res,0 2081 emit_opcode( cbuf, 0xB8 + res_encoding); 2082 emit_d32( cbuf, 0 ); 2083 // JNE,s fail 2084 emit_opcode(cbuf,0x75); 2085 emit_d8(cbuf, 5 ); 2086 // MOV res,1 2087 emit_opcode( cbuf, 0xB8 + res_encoding); 2088 emit_d32( cbuf, 1 ); 2089 // fail: 2090 %} 2091 2092 enc_class set_instruction_start( ) %{ 2093 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2094 %} 2095 2096 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2097 int reg_encoding = $ereg$$reg; 2098 int base = $mem$$base; 2099 int index = $mem$$index; 2100 int scale = $mem$$scale; 2101 int displace = $mem$$disp; 2102 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2103 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2104 %} 2105 2106 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2107 int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo 2108 int base = $mem$$base; 2109 int index = $mem$$index; 2110 int scale = $mem$$scale; 2111 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2112 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2113 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2114 %} 2115 2116 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2117 int r1, r2; 2118 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2119 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2120 emit_opcode(cbuf,0x0F); 2121 emit_opcode(cbuf,$tertiary); 2122 emit_rm(cbuf, 0x3, r1, r2); 2123 emit_d8(cbuf,$cnt$$constant); 2124 emit_d8(cbuf,$primary); 2125 emit_rm(cbuf, 0x3, $secondary, r1); 2126 emit_d8(cbuf,$cnt$$constant); 2127 %} 2128 2129 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2130 emit_opcode( cbuf, 0x8B ); // Move 2131 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2132 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2133 emit_d8(cbuf,$primary); 2134 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2135 emit_d8(cbuf,$cnt$$constant-32); 2136 } 2137 emit_d8(cbuf,$primary); 2138 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); 2139 emit_d8(cbuf,31); 2140 %} 2141 2142 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2143 int r1, r2; 2144 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } 2145 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } 2146 2147 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2148 emit_rm(cbuf, 0x3, r1, r2); 2149 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2150 emit_opcode(cbuf,$primary); 2151 emit_rm(cbuf, 0x3, $secondary, r1); 2152 emit_d8(cbuf,$cnt$$constant-32); 2153 } 2154 emit_opcode(cbuf,0x33); // XOR r2,r2 2155 emit_rm(cbuf, 0x3, r2, r2); 2156 %} 2157 2158 // Clone of RegMem but accepts an extra parameter to access each 2159 // half of a double in memory; it never needs relocation info. 2160 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2161 emit_opcode(cbuf,$opcode$$constant); 2162 int reg_encoding = $rm_reg$$reg; 2163 int base = $mem$$base; 2164 int index = $mem$$index; 2165 int scale = $mem$$scale; 2166 int displace = $mem$$disp + $disp_for_half$$constant; 2167 relocInfo::relocType disp_reloc = relocInfo::none; 2168 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2169 %} 2170 2171 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2172 // 2173 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2174 // and it never needs relocation information. 2175 // Frequently used to move data between FPU's Stack Top and memory. 2176 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2177 int rm_byte_opcode = $rm_opcode$$constant; 2178 int base = $mem$$base; 2179 int index = $mem$$index; 2180 int scale = $mem$$scale; 2181 int displace = $mem$$disp; 2182 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2183 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2184 %} 2185 2186 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2187 int rm_byte_opcode = $rm_opcode$$constant; 2188 int base = $mem$$base; 2189 int index = $mem$$index; 2190 int scale = $mem$$scale; 2191 int displace = $mem$$disp; 2192 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2193 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2194 %} 2195 2196 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2197 int reg_encoding = $dst$$reg; 2198 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2199 int index = 0x04; // 0x04 indicates no index 2200 int scale = 0x00; // 0x00 indicates no scale 2201 int displace = $src1$$constant; // 0x00 indicates no displacement 2202 relocInfo::relocType disp_reloc = relocInfo::none; 2203 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2204 %} 2205 2206 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2207 // Compare dst,src 2208 emit_opcode(cbuf,0x3B); 2209 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2210 // jmp dst < src around move 2211 emit_opcode(cbuf,0x7C); 2212 emit_d8(cbuf,2); 2213 // move dst,src 2214 emit_opcode(cbuf,0x8B); 2215 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2216 %} 2217 2218 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2219 // Compare dst,src 2220 emit_opcode(cbuf,0x3B); 2221 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2222 // jmp dst > src around move 2223 emit_opcode(cbuf,0x7F); 2224 emit_d8(cbuf,2); 2225 // move dst,src 2226 emit_opcode(cbuf,0x8B); 2227 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2228 %} 2229 2230 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2231 // If src is FPR1, we can just FST to store it. 2232 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2233 int reg_encoding = 0x2; // Just store 2234 int base = $mem$$base; 2235 int index = $mem$$index; 2236 int scale = $mem$$scale; 2237 int displace = $mem$$disp; 2238 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2239 if( $src$$reg != FPR1L_enc ) { 2240 reg_encoding = 0x3; // Store & pop 2241 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2242 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2243 } 2244 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2245 emit_opcode(cbuf,$primary); 2246 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2247 %} 2248 2249 enc_class neg_reg(rRegI dst) %{ 2250 // NEG $dst 2251 emit_opcode(cbuf,0xF7); 2252 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2253 %} 2254 2255 enc_class setLT_reg(eCXRegI dst) %{ 2256 // SETLT $dst 2257 emit_opcode(cbuf,0x0F); 2258 emit_opcode(cbuf,0x9C); 2259 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2260 %} 2261 2262 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2263 int tmpReg = $tmp$$reg; 2264 2265 // SUB $p,$q 2266 emit_opcode(cbuf,0x2B); 2267 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2268 // SBB $tmp,$tmp 2269 emit_opcode(cbuf,0x1B); 2270 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2271 // AND $tmp,$y 2272 emit_opcode(cbuf,0x23); 2273 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2274 // ADD $p,$tmp 2275 emit_opcode(cbuf,0x03); 2276 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2277 %} 2278 2279 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2280 // TEST shift,32 2281 emit_opcode(cbuf,0xF7); 2282 emit_rm(cbuf, 0x3, 0, ECX_enc); 2283 emit_d32(cbuf,0x20); 2284 // JEQ,s small 2285 emit_opcode(cbuf, 0x74); 2286 emit_d8(cbuf, 0x04); 2287 // MOV $dst.hi,$dst.lo 2288 emit_opcode( cbuf, 0x8B ); 2289 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2290 // CLR $dst.lo 2291 emit_opcode(cbuf, 0x33); 2292 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2293 // small: 2294 // SHLD $dst.hi,$dst.lo,$shift 2295 emit_opcode(cbuf,0x0F); 2296 emit_opcode(cbuf,0xA5); 2297 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); 2298 // SHL $dst.lo,$shift" 2299 emit_opcode(cbuf,0xD3); 2300 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2301 %} 2302 2303 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2304 // TEST shift,32 2305 emit_opcode(cbuf,0xF7); 2306 emit_rm(cbuf, 0x3, 0, ECX_enc); 2307 emit_d32(cbuf,0x20); 2308 // JEQ,s small 2309 emit_opcode(cbuf, 0x74); 2310 emit_d8(cbuf, 0x04); 2311 // MOV $dst.lo,$dst.hi 2312 emit_opcode( cbuf, 0x8B ); 2313 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2314 // CLR $dst.hi 2315 emit_opcode(cbuf, 0x33); 2316 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); 2317 // small: 2318 // SHRD $dst.lo,$dst.hi,$shift 2319 emit_opcode(cbuf,0x0F); 2320 emit_opcode(cbuf,0xAD); 2321 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2322 // SHR $dst.hi,$shift" 2323 emit_opcode(cbuf,0xD3); 2324 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); 2325 %} 2326 2327 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2328 // TEST shift,32 2329 emit_opcode(cbuf,0xF7); 2330 emit_rm(cbuf, 0x3, 0, ECX_enc); 2331 emit_d32(cbuf,0x20); 2332 // JEQ,s small 2333 emit_opcode(cbuf, 0x74); 2334 emit_d8(cbuf, 0x05); 2335 // MOV $dst.lo,$dst.hi 2336 emit_opcode( cbuf, 0x8B ); 2337 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2338 // SAR $dst.hi,31 2339 emit_opcode(cbuf, 0xC1); 2340 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2341 emit_d8(cbuf, 0x1F ); 2342 // small: 2343 // SHRD $dst.lo,$dst.hi,$shift 2344 emit_opcode(cbuf,0x0F); 2345 emit_opcode(cbuf,0xAD); 2346 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); 2347 // SAR $dst.hi,$shift" 2348 emit_opcode(cbuf,0xD3); 2349 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); 2350 %} 2351 2352 2353 // ----------------- Encodings for floating point unit ----------------- 2354 // May leave result in FPU-TOS or FPU reg depending on opcodes 2355 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2356 $$$emit8$primary; 2357 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2358 %} 2359 2360 // Pop argument in FPR0 with FSTP ST(0) 2361 enc_class PopFPU() %{ 2362 emit_opcode( cbuf, 0xDD ); 2363 emit_d8( cbuf, 0xD8 ); 2364 %} 2365 2366 // !!!!! equivalent to Pop_Reg_F 2367 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2368 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2369 emit_d8( cbuf, 0xD8+$dst$$reg ); 2370 %} 2371 2372 enc_class Push_Reg_DPR( regDPR dst ) %{ 2373 emit_opcode( cbuf, 0xD9 ); 2374 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2375 %} 2376 2377 enc_class strictfp_bias1( regDPR dst ) %{ 2378 emit_opcode( cbuf, 0xDB ); // FLD m80real 2379 emit_opcode( cbuf, 0x2D ); 2380 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); 2381 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2382 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2383 %} 2384 2385 enc_class strictfp_bias2( regDPR dst ) %{ 2386 emit_opcode( cbuf, 0xDB ); // FLD m80real 2387 emit_opcode( cbuf, 0x2D ); 2388 emit_d32( cbuf, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); 2389 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2390 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2391 %} 2392 2393 // Special case for moving an integer register to a stack slot. 2394 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2395 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2396 %} 2397 2398 // Special case for moving a register to a stack slot. 2399 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2400 // Opcode already emitted 2401 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2402 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2403 emit_d32(cbuf, $dst$$disp); // Displacement 2404 %} 2405 2406 // Push the integer in stackSlot 'src' onto FP-stack 2407 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2408 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2409 %} 2410 2411 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2412 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2413 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2414 %} 2415 2416 // Same as Pop_Mem_F except for opcode 2417 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2418 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2419 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2420 %} 2421 2422 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2423 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2424 emit_d8( cbuf, 0xD8+$dst$$reg ); 2425 %} 2426 2427 enc_class Push_Reg_FPR( regFPR dst ) %{ 2428 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2429 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2430 %} 2431 2432 // Push FPU's float to a stack-slot, and pop FPU-stack 2433 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2434 int pop = 0x02; 2435 if ($src$$reg != FPR1L_enc) { 2436 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2437 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2438 pop = 0x03; 2439 } 2440 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2441 %} 2442 2443 // Push FPU's double to a stack-slot, and pop FPU-stack 2444 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2445 int pop = 0x02; 2446 if ($src$$reg != FPR1L_enc) { 2447 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2448 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2449 pop = 0x03; 2450 } 2451 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2452 %} 2453 2454 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2455 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2456 int pop = 0xD0 - 1; // -1 since we skip FLD 2457 if ($src$$reg != FPR1L_enc) { 2458 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2459 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2460 pop = 0xD8; 2461 } 2462 emit_opcode( cbuf, 0xDD ); 2463 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2464 %} 2465 2466 2467 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2468 // load dst in FPR0 2469 emit_opcode( cbuf, 0xD9 ); 2470 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2471 if ($src$$reg != FPR1L_enc) { 2472 // fincstp 2473 emit_opcode (cbuf, 0xD9); 2474 emit_opcode (cbuf, 0xF7); 2475 // swap src with FPR1: 2476 // FXCH FPR1 with src 2477 emit_opcode(cbuf, 0xD9); 2478 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2479 // fdecstp 2480 emit_opcode (cbuf, 0xD9); 2481 emit_opcode (cbuf, 0xF6); 2482 } 2483 %} 2484 2485 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2486 MacroAssembler _masm(&cbuf); 2487 __ subptr(rsp, 8); 2488 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2489 __ fld_d(Address(rsp, 0)); 2490 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2491 __ fld_d(Address(rsp, 0)); 2492 %} 2493 2494 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2495 MacroAssembler _masm(&cbuf); 2496 __ subptr(rsp, 4); 2497 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2498 __ fld_s(Address(rsp, 0)); 2499 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2500 __ fld_s(Address(rsp, 0)); 2501 %} 2502 2503 enc_class Push_ResultD(regD dst) %{ 2504 MacroAssembler _masm(&cbuf); 2505 __ fstp_d(Address(rsp, 0)); 2506 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2507 __ addptr(rsp, 8); 2508 %} 2509 2510 enc_class Push_ResultF(regF dst, immI d8) %{ 2511 MacroAssembler _masm(&cbuf); 2512 __ fstp_s(Address(rsp, 0)); 2513 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2514 __ addptr(rsp, $d8$$constant); 2515 %} 2516 2517 enc_class Push_SrcD(regD src) %{ 2518 MacroAssembler _masm(&cbuf); 2519 __ subptr(rsp, 8); 2520 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2521 __ fld_d(Address(rsp, 0)); 2522 %} 2523 2524 enc_class push_stack_temp_qword() %{ 2525 MacroAssembler _masm(&cbuf); 2526 __ subptr(rsp, 8); 2527 %} 2528 2529 enc_class pop_stack_temp_qword() %{ 2530 MacroAssembler _masm(&cbuf); 2531 __ addptr(rsp, 8); 2532 %} 2533 2534 enc_class push_xmm_to_fpr1(regD src) %{ 2535 MacroAssembler _masm(&cbuf); 2536 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2537 __ fld_d(Address(rsp, 0)); 2538 %} 2539 2540 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2541 if ($src$$reg != FPR1L_enc) { 2542 // fincstp 2543 emit_opcode (cbuf, 0xD9); 2544 emit_opcode (cbuf, 0xF7); 2545 // FXCH FPR1 with src 2546 emit_opcode(cbuf, 0xD9); 2547 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2548 // fdecstp 2549 emit_opcode (cbuf, 0xD9); 2550 emit_opcode (cbuf, 0xF6); 2551 } 2552 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2553 // // FSTP FPR$dst$$reg 2554 // emit_opcode( cbuf, 0xDD ); 2555 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2556 %} 2557 2558 enc_class fnstsw_sahf_skip_parity() %{ 2559 // fnstsw ax 2560 emit_opcode( cbuf, 0xDF ); 2561 emit_opcode( cbuf, 0xE0 ); 2562 // sahf 2563 emit_opcode( cbuf, 0x9E ); 2564 // jnp ::skip 2565 emit_opcode( cbuf, 0x7B ); 2566 emit_opcode( cbuf, 0x05 ); 2567 %} 2568 2569 enc_class emitModDPR() %{ 2570 // fprem must be iterative 2571 // :: loop 2572 // fprem 2573 emit_opcode( cbuf, 0xD9 ); 2574 emit_opcode( cbuf, 0xF8 ); 2575 // wait 2576 emit_opcode( cbuf, 0x9b ); 2577 // fnstsw ax 2578 emit_opcode( cbuf, 0xDF ); 2579 emit_opcode( cbuf, 0xE0 ); 2580 // sahf 2581 emit_opcode( cbuf, 0x9E ); 2582 // jp ::loop 2583 emit_opcode( cbuf, 0x0F ); 2584 emit_opcode( cbuf, 0x8A ); 2585 emit_opcode( cbuf, 0xF4 ); 2586 emit_opcode( cbuf, 0xFF ); 2587 emit_opcode( cbuf, 0xFF ); 2588 emit_opcode( cbuf, 0xFF ); 2589 %} 2590 2591 enc_class fpu_flags() %{ 2592 // fnstsw_ax 2593 emit_opcode( cbuf, 0xDF); 2594 emit_opcode( cbuf, 0xE0); 2595 // test ax,0x0400 2596 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2597 emit_opcode( cbuf, 0xA9 ); 2598 emit_d16 ( cbuf, 0x0400 ); 2599 // // // This sequence works, but stalls for 12-16 cycles on PPro 2600 // // test rax,0x0400 2601 // emit_opcode( cbuf, 0xA9 ); 2602 // emit_d32 ( cbuf, 0x00000400 ); 2603 // 2604 // jz exit (no unordered comparison) 2605 emit_opcode( cbuf, 0x74 ); 2606 emit_d8 ( cbuf, 0x02 ); 2607 // mov ah,1 - treat as LT case (set carry flag) 2608 emit_opcode( cbuf, 0xB4 ); 2609 emit_d8 ( cbuf, 0x01 ); 2610 // sahf 2611 emit_opcode( cbuf, 0x9E); 2612 %} 2613 2614 enc_class cmpF_P6_fixup() %{ 2615 // Fixup the integer flags in case comparison involved a NaN 2616 // 2617 // JNP exit (no unordered comparison, P-flag is set by NaN) 2618 emit_opcode( cbuf, 0x7B ); 2619 emit_d8 ( cbuf, 0x03 ); 2620 // MOV AH,1 - treat as LT case (set carry flag) 2621 emit_opcode( cbuf, 0xB4 ); 2622 emit_d8 ( cbuf, 0x01 ); 2623 // SAHF 2624 emit_opcode( cbuf, 0x9E); 2625 // NOP // target for branch to avoid branch to branch 2626 emit_opcode( cbuf, 0x90); 2627 %} 2628 2629 // fnstsw_ax(); 2630 // sahf(); 2631 // movl(dst, nan_result); 2632 // jcc(Assembler::parity, exit); 2633 // movl(dst, less_result); 2634 // jcc(Assembler::below, exit); 2635 // movl(dst, equal_result); 2636 // jcc(Assembler::equal, exit); 2637 // movl(dst, greater_result); 2638 2639 // less_result = 1; 2640 // greater_result = -1; 2641 // equal_result = 0; 2642 // nan_result = -1; 2643 2644 enc_class CmpF_Result(rRegI dst) %{ 2645 // fnstsw_ax(); 2646 emit_opcode( cbuf, 0xDF); 2647 emit_opcode( cbuf, 0xE0); 2648 // sahf 2649 emit_opcode( cbuf, 0x9E); 2650 // movl(dst, nan_result); 2651 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2652 emit_d32( cbuf, -1 ); 2653 // jcc(Assembler::parity, exit); 2654 emit_opcode( cbuf, 0x7A ); 2655 emit_d8 ( cbuf, 0x13 ); 2656 // movl(dst, less_result); 2657 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2658 emit_d32( cbuf, -1 ); 2659 // jcc(Assembler::below, exit); 2660 emit_opcode( cbuf, 0x72 ); 2661 emit_d8 ( cbuf, 0x0C ); 2662 // movl(dst, equal_result); 2663 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2664 emit_d32( cbuf, 0 ); 2665 // jcc(Assembler::equal, exit); 2666 emit_opcode( cbuf, 0x74 ); 2667 emit_d8 ( cbuf, 0x05 ); 2668 // movl(dst, greater_result); 2669 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2670 emit_d32( cbuf, 1 ); 2671 %} 2672 2673 2674 // Compare the longs and set flags 2675 // BROKEN! Do Not use as-is 2676 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2677 // CMP $src1.hi,$src2.hi 2678 emit_opcode( cbuf, 0x3B ); 2679 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2680 // JNE,s done 2681 emit_opcode(cbuf,0x75); 2682 emit_d8(cbuf, 2 ); 2683 // CMP $src1.lo,$src2.lo 2684 emit_opcode( cbuf, 0x3B ); 2685 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2686 // done: 2687 %} 2688 2689 enc_class convert_int_long( regL dst, rRegI src ) %{ 2690 // mov $dst.lo,$src 2691 int dst_encoding = $dst$$reg; 2692 int src_encoding = $src$$reg; 2693 encode_Copy( cbuf, dst_encoding , src_encoding ); 2694 // mov $dst.hi,$src 2695 encode_Copy( cbuf, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); 2696 // sar $dst.hi,31 2697 emit_opcode( cbuf, 0xC1 ); 2698 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); 2699 emit_d8(cbuf, 0x1F ); 2700 %} 2701 2702 enc_class convert_long_double( eRegL src ) %{ 2703 // push $src.hi 2704 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2705 // push $src.lo 2706 emit_opcode(cbuf, 0x50+$src$$reg ); 2707 // fild 64-bits at [SP] 2708 emit_opcode(cbuf,0xdf); 2709 emit_d8(cbuf, 0x6C); 2710 emit_d8(cbuf, 0x24); 2711 emit_d8(cbuf, 0x00); 2712 // pop stack 2713 emit_opcode(cbuf, 0x83); // add SP, #8 2714 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2715 emit_d8(cbuf, 0x8); 2716 %} 2717 2718 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2719 // IMUL EDX:EAX,$src1 2720 emit_opcode( cbuf, 0xF7 ); 2721 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2722 // SAR EDX,$cnt-32 2723 int shift_count = ((int)$cnt$$constant) - 32; 2724 if (shift_count > 0) { 2725 emit_opcode(cbuf, 0xC1); 2726 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2727 emit_d8(cbuf, shift_count); 2728 } 2729 %} 2730 2731 // this version doesn't have add sp, 8 2732 enc_class convert_long_double2( eRegL src ) %{ 2733 // push $src.hi 2734 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); 2735 // push $src.lo 2736 emit_opcode(cbuf, 0x50+$src$$reg ); 2737 // fild 64-bits at [SP] 2738 emit_opcode(cbuf,0xdf); 2739 emit_d8(cbuf, 0x6C); 2740 emit_d8(cbuf, 0x24); 2741 emit_d8(cbuf, 0x00); 2742 %} 2743 2744 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2745 // Basic idea: long = (long)int * (long)int 2746 // IMUL EDX:EAX, src 2747 emit_opcode( cbuf, 0xF7 ); 2748 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2749 %} 2750 2751 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2752 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2753 // MUL EDX:EAX, src 2754 emit_opcode( cbuf, 0xF7 ); 2755 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2756 %} 2757 2758 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2759 // Basic idea: lo(result) = lo(x_lo * y_lo) 2760 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2761 // MOV $tmp,$src.lo 2762 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2763 // IMUL $tmp,EDX 2764 emit_opcode( cbuf, 0x0F ); 2765 emit_opcode( cbuf, 0xAF ); 2766 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2767 // MOV EDX,$src.hi 2768 encode_Copy( cbuf, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); 2769 // IMUL EDX,EAX 2770 emit_opcode( cbuf, 0x0F ); 2771 emit_opcode( cbuf, 0xAF ); 2772 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); 2773 // ADD $tmp,EDX 2774 emit_opcode( cbuf, 0x03 ); 2775 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2776 // MUL EDX:EAX,$src.lo 2777 emit_opcode( cbuf, 0xF7 ); 2778 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2779 // ADD EDX,ESI 2780 emit_opcode( cbuf, 0x03 ); 2781 emit_rm( cbuf, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); 2782 %} 2783 2784 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2785 // Basic idea: lo(result) = lo(src * y_lo) 2786 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2787 // IMUL $tmp,EDX,$src 2788 emit_opcode( cbuf, 0x6B ); 2789 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); 2790 emit_d8( cbuf, (int)$src$$constant ); 2791 // MOV EDX,$src 2792 emit_opcode(cbuf, 0xB8 + EDX_enc); 2793 emit_d32( cbuf, (int)$src$$constant ); 2794 // MUL EDX:EAX,EDX 2795 emit_opcode( cbuf, 0xF7 ); 2796 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2797 // ADD EDX,ESI 2798 emit_opcode( cbuf, 0x03 ); 2799 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2800 %} 2801 2802 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2803 // PUSH src1.hi 2804 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2805 // PUSH src1.lo 2806 emit_opcode(cbuf, 0x50+$src1$$reg ); 2807 // PUSH src2.hi 2808 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2809 // PUSH src2.lo 2810 emit_opcode(cbuf, 0x50+$src2$$reg ); 2811 // CALL directly to the runtime 2812 MacroAssembler _masm(&cbuf); 2813 cbuf.set_insts_mark(); 2814 emit_opcode(cbuf,0xE8); // Call into runtime 2815 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2816 __ post_call_nop(); 2817 // Restore stack 2818 emit_opcode(cbuf, 0x83); // add SP, #framesize 2819 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2820 emit_d8(cbuf, 4*4); 2821 %} 2822 2823 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2824 // PUSH src1.hi 2825 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); 2826 // PUSH src1.lo 2827 emit_opcode(cbuf, 0x50+$src1$$reg ); 2828 // PUSH src2.hi 2829 emit_opcode(cbuf, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); 2830 // PUSH src2.lo 2831 emit_opcode(cbuf, 0x50+$src2$$reg ); 2832 // CALL directly to the runtime 2833 MacroAssembler _masm(&cbuf); 2834 cbuf.set_insts_mark(); 2835 emit_opcode(cbuf,0xE8); // Call into runtime 2836 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2837 __ post_call_nop(); 2838 // Restore stack 2839 emit_opcode(cbuf, 0x83); // add SP, #framesize 2840 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2841 emit_d8(cbuf, 4*4); 2842 %} 2843 2844 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2845 // MOV $tmp,$src.lo 2846 emit_opcode(cbuf, 0x8B); 2847 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2848 // OR $tmp,$src.hi 2849 emit_opcode(cbuf, 0x0B); 2850 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); 2851 %} 2852 2853 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2854 // CMP $src1.lo,$src2.lo 2855 emit_opcode( cbuf, 0x3B ); 2856 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2857 // JNE,s skip 2858 emit_cc(cbuf, 0x70, 0x5); 2859 emit_d8(cbuf,2); 2860 // CMP $src1.hi,$src2.hi 2861 emit_opcode( cbuf, 0x3B ); 2862 emit_rm(cbuf, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); 2863 %} 2864 2865 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2866 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2867 emit_opcode( cbuf, 0x3B ); 2868 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2869 // MOV $tmp,$src1.hi 2870 emit_opcode( cbuf, 0x8B ); 2871 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); 2872 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2873 emit_opcode( cbuf, 0x1B ); 2874 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); 2875 %} 2876 2877 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2878 // XOR $tmp,$tmp 2879 emit_opcode(cbuf,0x33); // XOR 2880 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2881 // CMP $tmp,$src.lo 2882 emit_opcode( cbuf, 0x3B ); 2883 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2884 // SBB $tmp,$src.hi 2885 emit_opcode( cbuf, 0x1B ); 2886 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); 2887 %} 2888 2889 // Sniff, sniff... smells like Gnu Superoptimizer 2890 enc_class neg_long( eRegL dst ) %{ 2891 emit_opcode(cbuf,0xF7); // NEG hi 2892 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2893 emit_opcode(cbuf,0xF7); // NEG lo 2894 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2895 emit_opcode(cbuf,0x83); // SBB hi,0 2896 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); 2897 emit_d8 (cbuf,0 ); 2898 %} 2899 2900 enc_class enc_pop_rdx() %{ 2901 emit_opcode(cbuf,0x5A); 2902 %} 2903 2904 enc_class enc_rethrow() %{ 2905 MacroAssembler _masm(&cbuf); 2906 cbuf.set_insts_mark(); 2907 emit_opcode(cbuf, 0xE9); // jmp entry 2908 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2909 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2910 __ post_call_nop(); 2911 %} 2912 2913 2914 // Convert a double to an int. Java semantics require we do complex 2915 // manglelations in the corner cases. So we set the rounding mode to 2916 // 'zero', store the darned double down as an int, and reset the 2917 // rounding mode to 'nearest'. The hardware throws an exception which 2918 // patches up the correct value directly to the stack. 2919 enc_class DPR2I_encoding( regDPR src ) %{ 2920 // Flip to round-to-zero mode. We attempted to allow invalid-op 2921 // exceptions here, so that a NAN or other corner-case value will 2922 // thrown an exception (but normal values get converted at full speed). 2923 // However, I2C adapters and other float-stack manglers leave pending 2924 // invalid-op exceptions hanging. We would have to clear them before 2925 // enabling them and that is more expensive than just testing for the 2926 // invalid value Intel stores down in the corner cases. 2927 emit_opcode(cbuf,0xD9); // FLDCW trunc 2928 emit_opcode(cbuf,0x2D); 2929 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2930 // Allocate a word 2931 emit_opcode(cbuf,0x83); // SUB ESP,4 2932 emit_opcode(cbuf,0xEC); 2933 emit_d8(cbuf,0x04); 2934 // Encoding assumes a double has been pushed into FPR0. 2935 // Store down the double as an int, popping the FPU stack 2936 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2937 emit_opcode(cbuf,0x1C); 2938 emit_d8(cbuf,0x24); 2939 // Restore the rounding mode; mask the exception 2940 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2941 emit_opcode(cbuf,0x2D); 2942 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2943 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2944 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2945 2946 // Load the converted int; adjust CPU stack 2947 emit_opcode(cbuf,0x58); // POP EAX 2948 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2949 emit_d32 (cbuf,0x80000000); // 0x80000000 2950 emit_opcode(cbuf,0x75); // JNE around_slow_call 2951 emit_d8 (cbuf,0x07); // Size of slow_call 2952 // Push src onto stack slow-path 2953 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2954 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2955 // CALL directly to the runtime 2956 MacroAssembler _masm(&cbuf); 2957 cbuf.set_insts_mark(); 2958 emit_opcode(cbuf,0xE8); // Call into runtime 2959 emit_d32_reloc(cbuf, (StubRoutines::x86::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2960 __ post_call_nop(); 2961 // Carry on here... 2962 %} 2963 2964 enc_class DPR2L_encoding( regDPR src ) %{ 2965 emit_opcode(cbuf,0xD9); // FLDCW trunc 2966 emit_opcode(cbuf,0x2D); 2967 emit_d32(cbuf,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); 2968 // Allocate a word 2969 emit_opcode(cbuf,0x83); // SUB ESP,8 2970 emit_opcode(cbuf,0xEC); 2971 emit_d8(cbuf,0x08); 2972 // Encoding assumes a double has been pushed into FPR0. 2973 // Store down the double as a long, popping the FPU stack 2974 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2975 emit_opcode(cbuf,0x3C); 2976 emit_d8(cbuf,0x24); 2977 // Restore the rounding mode; mask the exception 2978 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2979 emit_opcode(cbuf,0x2D); 2980 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2981 ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() 2982 : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); 2983 2984 // Load the converted int; adjust CPU stack 2985 emit_opcode(cbuf,0x58); // POP EAX 2986 emit_opcode(cbuf,0x5A); // POP EDX 2987 emit_opcode(cbuf,0x81); // CMP EDX,imm 2988 emit_d8 (cbuf,0xFA); // rdx 2989 emit_d32 (cbuf,0x80000000); // 0x80000000 2990 emit_opcode(cbuf,0x75); // JNE around_slow_call 2991 emit_d8 (cbuf,0x07+4); // Size of slow_call 2992 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2993 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2994 emit_opcode(cbuf,0x75); // JNE around_slow_call 2995 emit_d8 (cbuf,0x07); // Size of slow_call 2996 // Push src onto stack slow-path 2997 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2998 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2999 // CALL directly to the runtime 3000 MacroAssembler _masm(&cbuf); 3001 cbuf.set_insts_mark(); 3002 emit_opcode(cbuf,0xE8); // Call into runtime 3003 emit_d32_reloc(cbuf, (StubRoutines::x86::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3004 __ post_call_nop(); 3005 // Carry on here... 3006 %} 3007 3008 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3009 // Operand was loaded from memory into fp ST (stack top) 3010 // FMUL ST,$src /* D8 C8+i */ 3011 emit_opcode(cbuf, 0xD8); 3012 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3013 %} 3014 3015 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3016 // FADDP ST,src2 /* D8 C0+i */ 3017 emit_opcode(cbuf, 0xD8); 3018 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3019 //could use FADDP src2,fpST /* DE C0+i */ 3020 %} 3021 3022 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3023 // FADDP src2,ST /* DE C0+i */ 3024 emit_opcode(cbuf, 0xDE); 3025 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3026 %} 3027 3028 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3029 // Operand has been loaded into fp ST (stack top) 3030 // FSUB ST,$src1 3031 emit_opcode(cbuf, 0xD8); 3032 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3033 3034 // FDIV 3035 emit_opcode(cbuf, 0xD8); 3036 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3037 %} 3038 3039 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3040 // Operand was loaded from memory into fp ST (stack top) 3041 // FADD ST,$src /* D8 C0+i */ 3042 emit_opcode(cbuf, 0xD8); 3043 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3044 3045 // FMUL ST,src2 /* D8 C*+i */ 3046 emit_opcode(cbuf, 0xD8); 3047 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3048 %} 3049 3050 3051 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3052 // Operand was loaded from memory into fp ST (stack top) 3053 // FADD ST,$src /* D8 C0+i */ 3054 emit_opcode(cbuf, 0xD8); 3055 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3056 3057 // FMULP src2,ST /* DE C8+i */ 3058 emit_opcode(cbuf, 0xDE); 3059 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3060 %} 3061 3062 // Atomically load the volatile long 3063 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3064 emit_opcode(cbuf,0xDF); 3065 int rm_byte_opcode = 0x05; 3066 int base = $mem$$base; 3067 int index = $mem$$index; 3068 int scale = $mem$$scale; 3069 int displace = $mem$$disp; 3070 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3071 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3072 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3073 %} 3074 3075 // Volatile Store Long. Must be atomic, so move it into 3076 // the FP TOS and then do a 64-bit FIST. Has to probe the 3077 // target address before the store (for null-ptr checks) 3078 // so the memory operand is used twice in the encoding. 3079 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3080 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3081 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3082 emit_opcode(cbuf,0xDF); 3083 int rm_byte_opcode = 0x07; 3084 int base = $mem$$base; 3085 int index = $mem$$index; 3086 int scale = $mem$$scale; 3087 int displace = $mem$$disp; 3088 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3089 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3090 %} 3091 3092 %} 3093 3094 3095 //----------FRAME-------------------------------------------------------------- 3096 // Definition of frame structure and management information. 3097 // 3098 // S T A C K L A Y O U T Allocators stack-slot number 3099 // | (to get allocators register number 3100 // G Owned by | | v add OptoReg::stack0()) 3101 // r CALLER | | 3102 // o | +--------+ pad to even-align allocators stack-slot 3103 // w V | pad0 | numbers; owned by CALLER 3104 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3105 // h ^ | in | 5 3106 // | | args | 4 Holes in incoming args owned by SELF 3107 // | | | | 3 3108 // | | +--------+ 3109 // V | | old out| Empty on Intel, window on Sparc 3110 // | old |preserve| Must be even aligned. 3111 // | SP-+--------+----> Matcher::_old_SP, even aligned 3112 // | | in | 3 area for Intel ret address 3113 // Owned by |preserve| Empty on Sparc. 3114 // SELF +--------+ 3115 // | | pad2 | 2 pad to align old SP 3116 // | +--------+ 1 3117 // | | locks | 0 3118 // | +--------+----> OptoReg::stack0(), even aligned 3119 // | | pad1 | 11 pad to align new SP 3120 // | +--------+ 3121 // | | | 10 3122 // | | spills | 9 spills 3123 // V | | 8 (pad0 slot for callee) 3124 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3125 // ^ | out | 7 3126 // | | args | 6 Holes in outgoing args owned by CALLEE 3127 // Owned by +--------+ 3128 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3129 // | new |preserve| Must be even-aligned. 3130 // | SP-+--------+----> Matcher::_new_SP, even aligned 3131 // | | | 3132 // 3133 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3134 // known from SELF's arguments and the Java calling convention. 3135 // Region 6-7 is determined per call site. 3136 // Note 2: If the calling convention leaves holes in the incoming argument 3137 // area, those holes are owned by SELF. Holes in the outgoing area 3138 // are owned by the CALLEE. Holes should not be necessary in the 3139 // incoming area, as the Java calling convention is completely under 3140 // the control of the AD file. Doubles can be sorted and packed to 3141 // avoid holes. Holes in the outgoing arguments may be necessary for 3142 // varargs C calling conventions. 3143 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3144 // even aligned with pad0 as needed. 3145 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3146 // region 6-11 is even aligned; it may be padded out more so that 3147 // the region from SP to FP meets the minimum stack alignment. 3148 3149 frame %{ 3150 // These three registers define part of the calling convention 3151 // between compiled code and the interpreter. 3152 inline_cache_reg(EAX); // Inline Cache Register 3153 3154 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3155 cisc_spilling_operand_name(indOffset32); 3156 3157 // Number of stack slots consumed by locking an object 3158 sync_stack_slots(1); 3159 3160 // Compiled code's Frame Pointer 3161 frame_pointer(ESP); 3162 // Interpreter stores its frame pointer in a register which is 3163 // stored to the stack by I2CAdaptors. 3164 // I2CAdaptors convert from interpreted java to compiled java. 3165 interpreter_frame_pointer(EBP); 3166 3167 // Stack alignment requirement 3168 // Alignment size in bytes (128-bit -> 16 bytes) 3169 stack_alignment(StackAlignmentInBytes); 3170 3171 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3172 // for calls to C. Supports the var-args backing area for register parms. 3173 varargs_C_out_slots_killed(0); 3174 3175 // The after-PROLOG location of the return address. Location of 3176 // return address specifies a type (REG or STACK) and a number 3177 // representing the register number (i.e. - use a register name) or 3178 // stack slot. 3179 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3180 // Otherwise, it is above the locks and verification slot and alignment word 3181 return_addr(STACK - 1 + 3182 align_up((Compile::current()->in_preserve_stack_slots() + 3183 Compile::current()->fixed_slots()), 3184 stack_alignment_in_slots())); 3185 3186 // Location of C & interpreter return values 3187 c_return_value %{ 3188 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3189 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3190 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3191 3192 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3193 // that C functions return float and double results in XMM0. 3194 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3195 return OptoRegPair(XMM0b_num,XMM0_num); 3196 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3197 return OptoRegPair(OptoReg::Bad,XMM0_num); 3198 3199 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3200 %} 3201 3202 // Location of return values 3203 return_value %{ 3204 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3205 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3206 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3207 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3208 return OptoRegPair(XMM0b_num,XMM0_num); 3209 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3210 return OptoRegPair(OptoReg::Bad,XMM0_num); 3211 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3212 %} 3213 3214 %} 3215 3216 //----------ATTRIBUTES--------------------------------------------------------- 3217 //----------Operand Attributes------------------------------------------------- 3218 op_attrib op_cost(0); // Required cost attribute 3219 3220 //----------Instruction Attributes--------------------------------------------- 3221 ins_attrib ins_cost(100); // Required cost attribute 3222 ins_attrib ins_size(8); // Required size attribute (in bits) 3223 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3224 // non-matching short branch variant of some 3225 // long branch? 3226 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3227 // specifies the alignment that some part of the instruction (not 3228 // necessarily the start) requires. If > 1, a compute_padding() 3229 // function must be provided for the instruction 3230 3231 //----------OPERANDS----------------------------------------------------------- 3232 // Operand definitions must precede instruction definitions for correct parsing 3233 // in the ADLC because operands constitute user defined types which are used in 3234 // instruction definitions. 3235 3236 //----------Simple Operands---------------------------------------------------- 3237 // Immediate Operands 3238 // Integer Immediate 3239 operand immI() %{ 3240 match(ConI); 3241 3242 op_cost(10); 3243 format %{ %} 3244 interface(CONST_INTER); 3245 %} 3246 3247 // Constant for test vs zero 3248 operand immI_0() %{ 3249 predicate(n->get_int() == 0); 3250 match(ConI); 3251 3252 op_cost(0); 3253 format %{ %} 3254 interface(CONST_INTER); 3255 %} 3256 3257 // Constant for increment 3258 operand immI_1() %{ 3259 predicate(n->get_int() == 1); 3260 match(ConI); 3261 3262 op_cost(0); 3263 format %{ %} 3264 interface(CONST_INTER); 3265 %} 3266 3267 // Constant for decrement 3268 operand immI_M1() %{ 3269 predicate(n->get_int() == -1); 3270 match(ConI); 3271 3272 op_cost(0); 3273 format %{ %} 3274 interface(CONST_INTER); 3275 %} 3276 3277 // Valid scale values for addressing modes 3278 operand immI2() %{ 3279 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3280 match(ConI); 3281 3282 format %{ %} 3283 interface(CONST_INTER); 3284 %} 3285 3286 operand immI8() %{ 3287 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3288 match(ConI); 3289 3290 op_cost(5); 3291 format %{ %} 3292 interface(CONST_INTER); 3293 %} 3294 3295 operand immU8() %{ 3296 predicate((0 <= n->get_int()) && (n->get_int() <= 255)); 3297 match(ConI); 3298 3299 op_cost(5); 3300 format %{ %} 3301 interface(CONST_INTER); 3302 %} 3303 3304 operand immI16() %{ 3305 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3306 match(ConI); 3307 3308 op_cost(10); 3309 format %{ %} 3310 interface(CONST_INTER); 3311 %} 3312 3313 // Int Immediate non-negative 3314 operand immU31() 3315 %{ 3316 predicate(n->get_int() >= 0); 3317 match(ConI); 3318 3319 op_cost(0); 3320 format %{ %} 3321 interface(CONST_INTER); 3322 %} 3323 3324 // Constant for long shifts 3325 operand immI_32() %{ 3326 predicate( n->get_int() == 32 ); 3327 match(ConI); 3328 3329 op_cost(0); 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 operand immI_1_31() %{ 3335 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3336 match(ConI); 3337 3338 op_cost(0); 3339 format %{ %} 3340 interface(CONST_INTER); 3341 %} 3342 3343 operand immI_32_63() %{ 3344 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3345 match(ConI); 3346 op_cost(0); 3347 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 operand immI_2() %{ 3353 predicate( n->get_int() == 2 ); 3354 match(ConI); 3355 3356 op_cost(0); 3357 format %{ %} 3358 interface(CONST_INTER); 3359 %} 3360 3361 operand immI_3() %{ 3362 predicate( n->get_int() == 3 ); 3363 match(ConI); 3364 3365 op_cost(0); 3366 format %{ %} 3367 interface(CONST_INTER); 3368 %} 3369 3370 operand immI_4() 3371 %{ 3372 predicate(n->get_int() == 4); 3373 match(ConI); 3374 3375 op_cost(0); 3376 format %{ %} 3377 interface(CONST_INTER); 3378 %} 3379 3380 operand immI_8() 3381 %{ 3382 predicate(n->get_int() == 8); 3383 match(ConI); 3384 3385 op_cost(0); 3386 format %{ %} 3387 interface(CONST_INTER); 3388 %} 3389 3390 // Pointer Immediate 3391 operand immP() %{ 3392 match(ConP); 3393 3394 op_cost(10); 3395 format %{ %} 3396 interface(CONST_INTER); 3397 %} 3398 3399 // NULL Pointer Immediate 3400 operand immP0() %{ 3401 predicate( n->get_ptr() == 0 ); 3402 match(ConP); 3403 op_cost(0); 3404 3405 format %{ %} 3406 interface(CONST_INTER); 3407 %} 3408 3409 // Long Immediate 3410 operand immL() %{ 3411 match(ConL); 3412 3413 op_cost(20); 3414 format %{ %} 3415 interface(CONST_INTER); 3416 %} 3417 3418 // Long Immediate zero 3419 operand immL0() %{ 3420 predicate( n->get_long() == 0L ); 3421 match(ConL); 3422 op_cost(0); 3423 3424 format %{ %} 3425 interface(CONST_INTER); 3426 %} 3427 3428 // Long Immediate zero 3429 operand immL_M1() %{ 3430 predicate( n->get_long() == -1L ); 3431 match(ConL); 3432 op_cost(0); 3433 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 // Long immediate from 0 to 127. 3439 // Used for a shorter form of long mul by 10. 3440 operand immL_127() %{ 3441 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3442 match(ConL); 3443 op_cost(0); 3444 3445 format %{ %} 3446 interface(CONST_INTER); 3447 %} 3448 3449 // Long Immediate: low 32-bit mask 3450 operand immL_32bits() %{ 3451 predicate(n->get_long() == 0xFFFFFFFFL); 3452 match(ConL); 3453 op_cost(0); 3454 3455 format %{ %} 3456 interface(CONST_INTER); 3457 %} 3458 3459 // Long Immediate: low 32-bit mask 3460 operand immL32() %{ 3461 predicate(n->get_long() == (int)(n->get_long())); 3462 match(ConL); 3463 op_cost(20); 3464 3465 format %{ %} 3466 interface(CONST_INTER); 3467 %} 3468 3469 //Double Immediate zero 3470 operand immDPR0() %{ 3471 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3472 // bug that generates code such that NaNs compare equal to 0.0 3473 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3474 match(ConD); 3475 3476 op_cost(5); 3477 format %{ %} 3478 interface(CONST_INTER); 3479 %} 3480 3481 // Double Immediate one 3482 operand immDPR1() %{ 3483 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3484 match(ConD); 3485 3486 op_cost(5); 3487 format %{ %} 3488 interface(CONST_INTER); 3489 %} 3490 3491 // Double Immediate 3492 operand immDPR() %{ 3493 predicate(UseSSE<=1); 3494 match(ConD); 3495 3496 op_cost(5); 3497 format %{ %} 3498 interface(CONST_INTER); 3499 %} 3500 3501 operand immD() %{ 3502 predicate(UseSSE>=2); 3503 match(ConD); 3504 3505 op_cost(5); 3506 format %{ %} 3507 interface(CONST_INTER); 3508 %} 3509 3510 // Double Immediate zero 3511 operand immD0() %{ 3512 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3513 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3514 // compare equal to -0.0. 3515 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3516 match(ConD); 3517 3518 format %{ %} 3519 interface(CONST_INTER); 3520 %} 3521 3522 // Float Immediate zero 3523 operand immFPR0() %{ 3524 predicate(UseSSE == 0 && n->getf() == 0.0F); 3525 match(ConF); 3526 3527 op_cost(5); 3528 format %{ %} 3529 interface(CONST_INTER); 3530 %} 3531 3532 // Float Immediate one 3533 operand immFPR1() %{ 3534 predicate(UseSSE == 0 && n->getf() == 1.0F); 3535 match(ConF); 3536 3537 op_cost(5); 3538 format %{ %} 3539 interface(CONST_INTER); 3540 %} 3541 3542 // Float Immediate 3543 operand immFPR() %{ 3544 predicate( UseSSE == 0 ); 3545 match(ConF); 3546 3547 op_cost(5); 3548 format %{ %} 3549 interface(CONST_INTER); 3550 %} 3551 3552 // Float Immediate 3553 operand immF() %{ 3554 predicate(UseSSE >= 1); 3555 match(ConF); 3556 3557 op_cost(5); 3558 format %{ %} 3559 interface(CONST_INTER); 3560 %} 3561 3562 // Float Immediate zero. Zero and not -0.0 3563 operand immF0() %{ 3564 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3565 match(ConF); 3566 3567 op_cost(5); 3568 format %{ %} 3569 interface(CONST_INTER); 3570 %} 3571 3572 // Immediates for special shifts (sign extend) 3573 3574 // Constants for increment 3575 operand immI_16() %{ 3576 predicate( n->get_int() == 16 ); 3577 match(ConI); 3578 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 operand immI_24() %{ 3584 predicate( n->get_int() == 24 ); 3585 match(ConI); 3586 3587 format %{ %} 3588 interface(CONST_INTER); 3589 %} 3590 3591 // Constant for byte-wide masking 3592 operand immI_255() %{ 3593 predicate( n->get_int() == 255 ); 3594 match(ConI); 3595 3596 format %{ %} 3597 interface(CONST_INTER); 3598 %} 3599 3600 // Constant for short-wide masking 3601 operand immI_65535() %{ 3602 predicate(n->get_int() == 65535); 3603 match(ConI); 3604 3605 format %{ %} 3606 interface(CONST_INTER); 3607 %} 3608 3609 operand kReg() 3610 %{ 3611 constraint(ALLOC_IN_RC(vectmask_reg)); 3612 match(RegVectMask); 3613 format %{%} 3614 interface(REG_INTER); 3615 %} 3616 3617 operand kReg_K1() 3618 %{ 3619 constraint(ALLOC_IN_RC(vectmask_reg_K1)); 3620 match(RegVectMask); 3621 format %{%} 3622 interface(REG_INTER); 3623 %} 3624 3625 operand kReg_K2() 3626 %{ 3627 constraint(ALLOC_IN_RC(vectmask_reg_K2)); 3628 match(RegVectMask); 3629 format %{%} 3630 interface(REG_INTER); 3631 %} 3632 3633 // Special Registers 3634 operand kReg_K3() 3635 %{ 3636 constraint(ALLOC_IN_RC(vectmask_reg_K3)); 3637 match(RegVectMask); 3638 format %{%} 3639 interface(REG_INTER); 3640 %} 3641 3642 operand kReg_K4() 3643 %{ 3644 constraint(ALLOC_IN_RC(vectmask_reg_K4)); 3645 match(RegVectMask); 3646 format %{%} 3647 interface(REG_INTER); 3648 %} 3649 3650 operand kReg_K5() 3651 %{ 3652 constraint(ALLOC_IN_RC(vectmask_reg_K5)); 3653 match(RegVectMask); 3654 format %{%} 3655 interface(REG_INTER); 3656 %} 3657 3658 operand kReg_K6() 3659 %{ 3660 constraint(ALLOC_IN_RC(vectmask_reg_K6)); 3661 match(RegVectMask); 3662 format %{%} 3663 interface(REG_INTER); 3664 %} 3665 3666 // Special Registers 3667 operand kReg_K7() 3668 %{ 3669 constraint(ALLOC_IN_RC(vectmask_reg_K7)); 3670 match(RegVectMask); 3671 format %{%} 3672 interface(REG_INTER); 3673 %} 3674 3675 // Register Operands 3676 // Integer Register 3677 operand rRegI() %{ 3678 constraint(ALLOC_IN_RC(int_reg)); 3679 match(RegI); 3680 match(xRegI); 3681 match(eAXRegI); 3682 match(eBXRegI); 3683 match(eCXRegI); 3684 match(eDXRegI); 3685 match(eDIRegI); 3686 match(eSIRegI); 3687 3688 format %{ %} 3689 interface(REG_INTER); 3690 %} 3691 3692 // Subset of Integer Register 3693 operand xRegI(rRegI reg) %{ 3694 constraint(ALLOC_IN_RC(int_x_reg)); 3695 match(reg); 3696 match(eAXRegI); 3697 match(eBXRegI); 3698 match(eCXRegI); 3699 match(eDXRegI); 3700 3701 format %{ %} 3702 interface(REG_INTER); 3703 %} 3704 3705 // Special Registers 3706 operand eAXRegI(xRegI reg) %{ 3707 constraint(ALLOC_IN_RC(eax_reg)); 3708 match(reg); 3709 match(rRegI); 3710 3711 format %{ "EAX" %} 3712 interface(REG_INTER); 3713 %} 3714 3715 // Special Registers 3716 operand eBXRegI(xRegI reg) %{ 3717 constraint(ALLOC_IN_RC(ebx_reg)); 3718 match(reg); 3719 match(rRegI); 3720 3721 format %{ "EBX" %} 3722 interface(REG_INTER); 3723 %} 3724 3725 operand eCXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(ecx_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "ECX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 operand eDXRegI(xRegI reg) %{ 3735 constraint(ALLOC_IN_RC(edx_reg)); 3736 match(reg); 3737 match(rRegI); 3738 3739 format %{ "EDX" %} 3740 interface(REG_INTER); 3741 %} 3742 3743 operand eDIRegI(xRegI reg) %{ 3744 constraint(ALLOC_IN_RC(edi_reg)); 3745 match(reg); 3746 match(rRegI); 3747 3748 format %{ "EDI" %} 3749 interface(REG_INTER); 3750 %} 3751 3752 operand naxRegI() %{ 3753 constraint(ALLOC_IN_RC(nax_reg)); 3754 match(RegI); 3755 match(eCXRegI); 3756 match(eDXRegI); 3757 match(eSIRegI); 3758 match(eDIRegI); 3759 3760 format %{ %} 3761 interface(REG_INTER); 3762 %} 3763 3764 operand nadxRegI() %{ 3765 constraint(ALLOC_IN_RC(nadx_reg)); 3766 match(RegI); 3767 match(eBXRegI); 3768 match(eCXRegI); 3769 match(eSIRegI); 3770 match(eDIRegI); 3771 3772 format %{ %} 3773 interface(REG_INTER); 3774 %} 3775 3776 operand ncxRegI() %{ 3777 constraint(ALLOC_IN_RC(ncx_reg)); 3778 match(RegI); 3779 match(eAXRegI); 3780 match(eDXRegI); 3781 match(eSIRegI); 3782 match(eDIRegI); 3783 3784 format %{ %} 3785 interface(REG_INTER); 3786 %} 3787 3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3789 // // 3790 operand eSIRegI(xRegI reg) %{ 3791 constraint(ALLOC_IN_RC(esi_reg)); 3792 match(reg); 3793 match(rRegI); 3794 3795 format %{ "ESI" %} 3796 interface(REG_INTER); 3797 %} 3798 3799 // Pointer Register 3800 operand anyRegP() %{ 3801 constraint(ALLOC_IN_RC(any_reg)); 3802 match(RegP); 3803 match(eAXRegP); 3804 match(eBXRegP); 3805 match(eCXRegP); 3806 match(eDIRegP); 3807 match(eRegP); 3808 3809 format %{ %} 3810 interface(REG_INTER); 3811 %} 3812 3813 operand eRegP() %{ 3814 constraint(ALLOC_IN_RC(int_reg)); 3815 match(RegP); 3816 match(eAXRegP); 3817 match(eBXRegP); 3818 match(eCXRegP); 3819 match(eDIRegP); 3820 3821 format %{ %} 3822 interface(REG_INTER); 3823 %} 3824 3825 operand rRegP() %{ 3826 constraint(ALLOC_IN_RC(int_reg)); 3827 match(RegP); 3828 match(eAXRegP); 3829 match(eBXRegP); 3830 match(eCXRegP); 3831 match(eDIRegP); 3832 3833 format %{ %} 3834 interface(REG_INTER); 3835 %} 3836 3837 // On windows95, EBP is not safe to use for implicit null tests. 3838 operand eRegP_no_EBP() %{ 3839 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3840 match(RegP); 3841 match(eAXRegP); 3842 match(eBXRegP); 3843 match(eCXRegP); 3844 match(eDIRegP); 3845 3846 op_cost(100); 3847 format %{ %} 3848 interface(REG_INTER); 3849 %} 3850 3851 operand naxRegP() %{ 3852 constraint(ALLOC_IN_RC(nax_reg)); 3853 match(RegP); 3854 match(eBXRegP); 3855 match(eDXRegP); 3856 match(eCXRegP); 3857 match(eSIRegP); 3858 match(eDIRegP); 3859 3860 format %{ %} 3861 interface(REG_INTER); 3862 %} 3863 3864 operand nabxRegP() %{ 3865 constraint(ALLOC_IN_RC(nabx_reg)); 3866 match(RegP); 3867 match(eCXRegP); 3868 match(eDXRegP); 3869 match(eSIRegP); 3870 match(eDIRegP); 3871 3872 format %{ %} 3873 interface(REG_INTER); 3874 %} 3875 3876 operand pRegP() %{ 3877 constraint(ALLOC_IN_RC(p_reg)); 3878 match(RegP); 3879 match(eBXRegP); 3880 match(eDXRegP); 3881 match(eSIRegP); 3882 match(eDIRegP); 3883 3884 format %{ %} 3885 interface(REG_INTER); 3886 %} 3887 3888 // Special Registers 3889 // Return a pointer value 3890 operand eAXRegP(eRegP reg) %{ 3891 constraint(ALLOC_IN_RC(eax_reg)); 3892 match(reg); 3893 format %{ "EAX" %} 3894 interface(REG_INTER); 3895 %} 3896 3897 // Used in AtomicAdd 3898 operand eBXRegP(eRegP reg) %{ 3899 constraint(ALLOC_IN_RC(ebx_reg)); 3900 match(reg); 3901 format %{ "EBX" %} 3902 interface(REG_INTER); 3903 %} 3904 3905 // Tail-call (interprocedural jump) to interpreter 3906 operand eCXRegP(eRegP reg) %{ 3907 constraint(ALLOC_IN_RC(ecx_reg)); 3908 match(reg); 3909 format %{ "ECX" %} 3910 interface(REG_INTER); 3911 %} 3912 3913 operand eDXRegP(eRegP reg) %{ 3914 constraint(ALLOC_IN_RC(edx_reg)); 3915 match(reg); 3916 format %{ "EDX" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eSIRegP(eRegP reg) %{ 3921 constraint(ALLOC_IN_RC(esi_reg)); 3922 match(reg); 3923 format %{ "ESI" %} 3924 interface(REG_INTER); 3925 %} 3926 3927 // Used in rep stosw 3928 operand eDIRegP(eRegP reg) %{ 3929 constraint(ALLOC_IN_RC(edi_reg)); 3930 match(reg); 3931 format %{ "EDI" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 operand eRegL() %{ 3936 constraint(ALLOC_IN_RC(long_reg)); 3937 match(RegL); 3938 match(eADXRegL); 3939 3940 format %{ %} 3941 interface(REG_INTER); 3942 %} 3943 3944 operand eADXRegL( eRegL reg ) %{ 3945 constraint(ALLOC_IN_RC(eadx_reg)); 3946 match(reg); 3947 3948 format %{ "EDX:EAX" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 operand eBCXRegL( eRegL reg ) %{ 3953 constraint(ALLOC_IN_RC(ebcx_reg)); 3954 match(reg); 3955 3956 format %{ "EBX:ECX" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 operand eBDPRegL( eRegL reg ) %{ 3961 constraint(ALLOC_IN_RC(ebpd_reg)); 3962 match(reg); 3963 3964 format %{ "EBP:EDI" %} 3965 interface(REG_INTER); 3966 %} 3967 // Special case for integer high multiply 3968 operand eADXRegL_low_only() %{ 3969 constraint(ALLOC_IN_RC(eadx_reg)); 3970 match(RegL); 3971 3972 format %{ "EAX" %} 3973 interface(REG_INTER); 3974 %} 3975 3976 // Flags register, used as output of compare instructions 3977 operand rFlagsReg() %{ 3978 constraint(ALLOC_IN_RC(int_flags)); 3979 match(RegFlags); 3980 3981 format %{ "EFLAGS" %} 3982 interface(REG_INTER); 3983 %} 3984 3985 // Flags register, used as output of compare instructions 3986 operand eFlagsReg() %{ 3987 constraint(ALLOC_IN_RC(int_flags)); 3988 match(RegFlags); 3989 3990 format %{ "EFLAGS" %} 3991 interface(REG_INTER); 3992 %} 3993 3994 // Flags register, used as output of FLOATING POINT compare instructions 3995 operand eFlagsRegU() %{ 3996 constraint(ALLOC_IN_RC(int_flags)); 3997 match(RegFlags); 3998 3999 format %{ "EFLAGS_U" %} 4000 interface(REG_INTER); 4001 %} 4002 4003 operand eFlagsRegUCF() %{ 4004 constraint(ALLOC_IN_RC(int_flags)); 4005 match(RegFlags); 4006 predicate(false); 4007 4008 format %{ "EFLAGS_U_CF" %} 4009 interface(REG_INTER); 4010 %} 4011 4012 // Condition Code Register used by long compare 4013 operand flagsReg_long_LTGE() %{ 4014 constraint(ALLOC_IN_RC(int_flags)); 4015 match(RegFlags); 4016 format %{ "FLAGS_LTGE" %} 4017 interface(REG_INTER); 4018 %} 4019 operand flagsReg_long_EQNE() %{ 4020 constraint(ALLOC_IN_RC(int_flags)); 4021 match(RegFlags); 4022 format %{ "FLAGS_EQNE" %} 4023 interface(REG_INTER); 4024 %} 4025 operand flagsReg_long_LEGT() %{ 4026 constraint(ALLOC_IN_RC(int_flags)); 4027 match(RegFlags); 4028 format %{ "FLAGS_LEGT" %} 4029 interface(REG_INTER); 4030 %} 4031 4032 // Condition Code Register used by unsigned long compare 4033 operand flagsReg_ulong_LTGE() %{ 4034 constraint(ALLOC_IN_RC(int_flags)); 4035 match(RegFlags); 4036 format %{ "FLAGS_U_LTGE" %} 4037 interface(REG_INTER); 4038 %} 4039 operand flagsReg_ulong_EQNE() %{ 4040 constraint(ALLOC_IN_RC(int_flags)); 4041 match(RegFlags); 4042 format %{ "FLAGS_U_EQNE" %} 4043 interface(REG_INTER); 4044 %} 4045 operand flagsReg_ulong_LEGT() %{ 4046 constraint(ALLOC_IN_RC(int_flags)); 4047 match(RegFlags); 4048 format %{ "FLAGS_U_LEGT" %} 4049 interface(REG_INTER); 4050 %} 4051 4052 // Float register operands 4053 operand regDPR() %{ 4054 predicate( UseSSE < 2 ); 4055 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4056 match(RegD); 4057 match(regDPR1); 4058 match(regDPR2); 4059 format %{ %} 4060 interface(REG_INTER); 4061 %} 4062 4063 operand regDPR1(regDPR reg) %{ 4064 predicate( UseSSE < 2 ); 4065 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4066 match(reg); 4067 format %{ "FPR1" %} 4068 interface(REG_INTER); 4069 %} 4070 4071 operand regDPR2(regDPR reg) %{ 4072 predicate( UseSSE < 2 ); 4073 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4074 match(reg); 4075 format %{ "FPR2" %} 4076 interface(REG_INTER); 4077 %} 4078 4079 operand regnotDPR1(regDPR reg) %{ 4080 predicate( UseSSE < 2 ); 4081 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4082 match(reg); 4083 format %{ %} 4084 interface(REG_INTER); 4085 %} 4086 4087 // Float register operands 4088 operand regFPR() %{ 4089 predicate( UseSSE < 2 ); 4090 constraint(ALLOC_IN_RC(fp_flt_reg)); 4091 match(RegF); 4092 match(regFPR1); 4093 format %{ %} 4094 interface(REG_INTER); 4095 %} 4096 4097 // Float register operands 4098 operand regFPR1(regFPR reg) %{ 4099 predicate( UseSSE < 2 ); 4100 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4101 match(reg); 4102 format %{ "FPR1" %} 4103 interface(REG_INTER); 4104 %} 4105 4106 // XMM Float register operands 4107 operand regF() %{ 4108 predicate( UseSSE>=1 ); 4109 constraint(ALLOC_IN_RC(float_reg_legacy)); 4110 match(RegF); 4111 format %{ %} 4112 interface(REG_INTER); 4113 %} 4114 4115 operand legRegF() %{ 4116 predicate( UseSSE>=1 ); 4117 constraint(ALLOC_IN_RC(float_reg_legacy)); 4118 match(RegF); 4119 format %{ %} 4120 interface(REG_INTER); 4121 %} 4122 4123 // Float register operands 4124 operand vlRegF() %{ 4125 constraint(ALLOC_IN_RC(float_reg_vl)); 4126 match(RegF); 4127 4128 format %{ %} 4129 interface(REG_INTER); 4130 %} 4131 4132 // XMM Double register operands 4133 operand regD() %{ 4134 predicate( UseSSE>=2 ); 4135 constraint(ALLOC_IN_RC(double_reg_legacy)); 4136 match(RegD); 4137 format %{ %} 4138 interface(REG_INTER); 4139 %} 4140 4141 // Double register operands 4142 operand legRegD() %{ 4143 predicate( UseSSE>=2 ); 4144 constraint(ALLOC_IN_RC(double_reg_legacy)); 4145 match(RegD); 4146 format %{ %} 4147 interface(REG_INTER); 4148 %} 4149 4150 operand vlRegD() %{ 4151 constraint(ALLOC_IN_RC(double_reg_vl)); 4152 match(RegD); 4153 4154 format %{ %} 4155 interface(REG_INTER); 4156 %} 4157 4158 //----------Memory Operands---------------------------------------------------- 4159 // Direct Memory Operand 4160 operand direct(immP addr) %{ 4161 match(addr); 4162 4163 format %{ "[$addr]" %} 4164 interface(MEMORY_INTER) %{ 4165 base(0xFFFFFFFF); 4166 index(0x4); 4167 scale(0x0); 4168 disp($addr); 4169 %} 4170 %} 4171 4172 // Indirect Memory Operand 4173 operand indirect(eRegP reg) %{ 4174 constraint(ALLOC_IN_RC(int_reg)); 4175 match(reg); 4176 4177 format %{ "[$reg]" %} 4178 interface(MEMORY_INTER) %{ 4179 base($reg); 4180 index(0x4); 4181 scale(0x0); 4182 disp(0x0); 4183 %} 4184 %} 4185 4186 // Indirect Memory Plus Short Offset Operand 4187 operand indOffset8(eRegP reg, immI8 off) %{ 4188 match(AddP reg off); 4189 4190 format %{ "[$reg + $off]" %} 4191 interface(MEMORY_INTER) %{ 4192 base($reg); 4193 index(0x4); 4194 scale(0x0); 4195 disp($off); 4196 %} 4197 %} 4198 4199 // Indirect Memory Plus Long Offset Operand 4200 operand indOffset32(eRegP reg, immI off) %{ 4201 match(AddP reg off); 4202 4203 format %{ "[$reg + $off]" %} 4204 interface(MEMORY_INTER) %{ 4205 base($reg); 4206 index(0x4); 4207 scale(0x0); 4208 disp($off); 4209 %} 4210 %} 4211 4212 // Indirect Memory Plus Long Offset Operand 4213 operand indOffset32X(rRegI reg, immP off) %{ 4214 match(AddP off reg); 4215 4216 format %{ "[$reg + $off]" %} 4217 interface(MEMORY_INTER) %{ 4218 base($reg); 4219 index(0x4); 4220 scale(0x0); 4221 disp($off); 4222 %} 4223 %} 4224 4225 // Indirect Memory Plus Index Register Plus Offset Operand 4226 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4227 match(AddP (AddP reg ireg) off); 4228 4229 op_cost(10); 4230 format %{"[$reg + $off + $ireg]" %} 4231 interface(MEMORY_INTER) %{ 4232 base($reg); 4233 index($ireg); 4234 scale(0x0); 4235 disp($off); 4236 %} 4237 %} 4238 4239 // Indirect Memory Plus Index Register Plus Offset Operand 4240 operand indIndex(eRegP reg, rRegI ireg) %{ 4241 match(AddP reg ireg); 4242 4243 op_cost(10); 4244 format %{"[$reg + $ireg]" %} 4245 interface(MEMORY_INTER) %{ 4246 base($reg); 4247 index($ireg); 4248 scale(0x0); 4249 disp(0x0); 4250 %} 4251 %} 4252 4253 // // ------------------------------------------------------------------------- 4254 // // 486 architecture doesn't support "scale * index + offset" with out a base 4255 // // ------------------------------------------------------------------------- 4256 // // Scaled Memory Operands 4257 // // Indirect Memory Times Scale Plus Offset Operand 4258 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4259 // match(AddP off (LShiftI ireg scale)); 4260 // 4261 // op_cost(10); 4262 // format %{"[$off + $ireg << $scale]" %} 4263 // interface(MEMORY_INTER) %{ 4264 // base(0x4); 4265 // index($ireg); 4266 // scale($scale); 4267 // disp($off); 4268 // %} 4269 // %} 4270 4271 // Indirect Memory Times Scale Plus Index Register 4272 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4273 match(AddP reg (LShiftI ireg scale)); 4274 4275 op_cost(10); 4276 format %{"[$reg + $ireg << $scale]" %} 4277 interface(MEMORY_INTER) %{ 4278 base($reg); 4279 index($ireg); 4280 scale($scale); 4281 disp(0x0); 4282 %} 4283 %} 4284 4285 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4286 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4287 match(AddP (AddP reg (LShiftI ireg scale)) off); 4288 4289 op_cost(10); 4290 format %{"[$reg + $off + $ireg << $scale]" %} 4291 interface(MEMORY_INTER) %{ 4292 base($reg); 4293 index($ireg); 4294 scale($scale); 4295 disp($off); 4296 %} 4297 %} 4298 4299 //----------Load Long Memory Operands------------------------------------------ 4300 // The load-long idiom will use it's address expression again after loading 4301 // the first word of the long. If the load-long destination overlaps with 4302 // registers used in the addressing expression, the 2nd half will be loaded 4303 // from a clobbered address. Fix this by requiring that load-long use 4304 // address registers that do not overlap with the load-long target. 4305 4306 // load-long support 4307 operand load_long_RegP() %{ 4308 constraint(ALLOC_IN_RC(esi_reg)); 4309 match(RegP); 4310 match(eSIRegP); 4311 op_cost(100); 4312 format %{ %} 4313 interface(REG_INTER); 4314 %} 4315 4316 // Indirect Memory Operand Long 4317 operand load_long_indirect(load_long_RegP reg) %{ 4318 constraint(ALLOC_IN_RC(esi_reg)); 4319 match(reg); 4320 4321 format %{ "[$reg]" %} 4322 interface(MEMORY_INTER) %{ 4323 base($reg); 4324 index(0x4); 4325 scale(0x0); 4326 disp(0x0); 4327 %} 4328 %} 4329 4330 // Indirect Memory Plus Long Offset Operand 4331 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4332 match(AddP reg off); 4333 4334 format %{ "[$reg + $off]" %} 4335 interface(MEMORY_INTER) %{ 4336 base($reg); 4337 index(0x4); 4338 scale(0x0); 4339 disp($off); 4340 %} 4341 %} 4342 4343 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4344 4345 4346 //----------Special Memory Operands-------------------------------------------- 4347 // Stack Slot Operand - This operand is used for loading and storing temporary 4348 // values on the stack where a match requires a value to 4349 // flow through memory. 4350 operand stackSlotP(sRegP reg) %{ 4351 constraint(ALLOC_IN_RC(stack_slots)); 4352 // No match rule because this operand is only generated in matching 4353 format %{ "[$reg]" %} 4354 interface(MEMORY_INTER) %{ 4355 base(0x4); // ESP 4356 index(0x4); // No Index 4357 scale(0x0); // No Scale 4358 disp($reg); // Stack Offset 4359 %} 4360 %} 4361 4362 operand stackSlotI(sRegI reg) %{ 4363 constraint(ALLOC_IN_RC(stack_slots)); 4364 // No match rule because this operand is only generated in matching 4365 format %{ "[$reg]" %} 4366 interface(MEMORY_INTER) %{ 4367 base(0x4); // ESP 4368 index(0x4); // No Index 4369 scale(0x0); // No Scale 4370 disp($reg); // Stack Offset 4371 %} 4372 %} 4373 4374 operand stackSlotF(sRegF reg) %{ 4375 constraint(ALLOC_IN_RC(stack_slots)); 4376 // No match rule because this operand is only generated in matching 4377 format %{ "[$reg]" %} 4378 interface(MEMORY_INTER) %{ 4379 base(0x4); // ESP 4380 index(0x4); // No Index 4381 scale(0x0); // No Scale 4382 disp($reg); // Stack Offset 4383 %} 4384 %} 4385 4386 operand stackSlotD(sRegD reg) %{ 4387 constraint(ALLOC_IN_RC(stack_slots)); 4388 // No match rule because this operand is only generated in matching 4389 format %{ "[$reg]" %} 4390 interface(MEMORY_INTER) %{ 4391 base(0x4); // ESP 4392 index(0x4); // No Index 4393 scale(0x0); // No Scale 4394 disp($reg); // Stack Offset 4395 %} 4396 %} 4397 4398 operand stackSlotL(sRegL reg) %{ 4399 constraint(ALLOC_IN_RC(stack_slots)); 4400 // No match rule because this operand is only generated in matching 4401 format %{ "[$reg]" %} 4402 interface(MEMORY_INTER) %{ 4403 base(0x4); // ESP 4404 index(0x4); // No Index 4405 scale(0x0); // No Scale 4406 disp($reg); // Stack Offset 4407 %} 4408 %} 4409 4410 //----------Conditional Branch Operands---------------------------------------- 4411 // Comparison Op - This is the operation of the comparison, and is limited to 4412 // the following set of codes: 4413 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4414 // 4415 // Other attributes of the comparison, such as unsignedness, are specified 4416 // by the comparison instruction that sets a condition code flags register. 4417 // That result is represented by a flags operand whose subtype is appropriate 4418 // to the unsignedness (etc.) of the comparison. 4419 // 4420 // Later, the instruction which matches both the Comparison Op (a Bool) and 4421 // the flags (produced by the Cmp) specifies the coding of the comparison op 4422 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4423 4424 // Comparison Code 4425 operand cmpOp() %{ 4426 match(Bool); 4427 4428 format %{ "" %} 4429 interface(COND_INTER) %{ 4430 equal(0x4, "e"); 4431 not_equal(0x5, "ne"); 4432 less(0xC, "l"); 4433 greater_equal(0xD, "ge"); 4434 less_equal(0xE, "le"); 4435 greater(0xF, "g"); 4436 overflow(0x0, "o"); 4437 no_overflow(0x1, "no"); 4438 %} 4439 %} 4440 4441 // Comparison Code, unsigned compare. Used by FP also, with 4442 // C2 (unordered) turned into GT or LT already. The other bits 4443 // C0 and C3 are turned into Carry & Zero flags. 4444 operand cmpOpU() %{ 4445 match(Bool); 4446 4447 format %{ "" %} 4448 interface(COND_INTER) %{ 4449 equal(0x4, "e"); 4450 not_equal(0x5, "ne"); 4451 less(0x2, "b"); 4452 greater_equal(0x3, "nb"); 4453 less_equal(0x6, "be"); 4454 greater(0x7, "nbe"); 4455 overflow(0x0, "o"); 4456 no_overflow(0x1, "no"); 4457 %} 4458 %} 4459 4460 // Floating comparisons that don't require any fixup for the unordered case 4461 operand cmpOpUCF() %{ 4462 match(Bool); 4463 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4464 n->as_Bool()->_test._test == BoolTest::ge || 4465 n->as_Bool()->_test._test == BoolTest::le || 4466 n->as_Bool()->_test._test == BoolTest::gt); 4467 format %{ "" %} 4468 interface(COND_INTER) %{ 4469 equal(0x4, "e"); 4470 not_equal(0x5, "ne"); 4471 less(0x2, "b"); 4472 greater_equal(0x3, "nb"); 4473 less_equal(0x6, "be"); 4474 greater(0x7, "nbe"); 4475 overflow(0x0, "o"); 4476 no_overflow(0x1, "no"); 4477 %} 4478 %} 4479 4480 4481 // Floating comparisons that can be fixed up with extra conditional jumps 4482 operand cmpOpUCF2() %{ 4483 match(Bool); 4484 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4485 n->as_Bool()->_test._test == BoolTest::eq); 4486 format %{ "" %} 4487 interface(COND_INTER) %{ 4488 equal(0x4, "e"); 4489 not_equal(0x5, "ne"); 4490 less(0x2, "b"); 4491 greater_equal(0x3, "nb"); 4492 less_equal(0x6, "be"); 4493 greater(0x7, "nbe"); 4494 overflow(0x0, "o"); 4495 no_overflow(0x1, "no"); 4496 %} 4497 %} 4498 4499 // Comparison Code for FP conditional move 4500 operand cmpOp_fcmov() %{ 4501 match(Bool); 4502 4503 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4504 n->as_Bool()->_test._test != BoolTest::no_overflow); 4505 format %{ "" %} 4506 interface(COND_INTER) %{ 4507 equal (0x0C8); 4508 not_equal (0x1C8); 4509 less (0x0C0); 4510 greater_equal(0x1C0); 4511 less_equal (0x0D0); 4512 greater (0x1D0); 4513 overflow(0x0, "o"); // not really supported by the instruction 4514 no_overflow(0x1, "no"); // not really supported by the instruction 4515 %} 4516 %} 4517 4518 // Comparison Code used in long compares 4519 operand cmpOp_commute() %{ 4520 match(Bool); 4521 4522 format %{ "" %} 4523 interface(COND_INTER) %{ 4524 equal(0x4, "e"); 4525 not_equal(0x5, "ne"); 4526 less(0xF, "g"); 4527 greater_equal(0xE, "le"); 4528 less_equal(0xD, "ge"); 4529 greater(0xC, "l"); 4530 overflow(0x0, "o"); 4531 no_overflow(0x1, "no"); 4532 %} 4533 %} 4534 4535 // Comparison Code used in unsigned long compares 4536 operand cmpOpU_commute() %{ 4537 match(Bool); 4538 4539 format %{ "" %} 4540 interface(COND_INTER) %{ 4541 equal(0x4, "e"); 4542 not_equal(0x5, "ne"); 4543 less(0x7, "nbe"); 4544 greater_equal(0x6, "be"); 4545 less_equal(0x3, "nb"); 4546 greater(0x2, "b"); 4547 overflow(0x0, "o"); 4548 no_overflow(0x1, "no"); 4549 %} 4550 %} 4551 4552 //----------OPERAND CLASSES---------------------------------------------------- 4553 // Operand Classes are groups of operands that are used as to simplify 4554 // instruction definitions by not requiring the AD writer to specify separate 4555 // instructions for every form of operand when the instruction accepts 4556 // multiple operand types with the same basic encoding and format. The classic 4557 // case of this is memory operands. 4558 4559 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4560 indIndex, indIndexScale, indIndexScaleOffset); 4561 4562 // Long memory operations are encoded in 2 instructions and a +4 offset. 4563 // This means some kind of offset is always required and you cannot use 4564 // an oop as the offset (done when working on static globals). 4565 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4566 indIndex, indIndexScale, indIndexScaleOffset); 4567 4568 4569 //----------PIPELINE----------------------------------------------------------- 4570 // Rules which define the behavior of the target architectures pipeline. 4571 pipeline %{ 4572 4573 //----------ATTRIBUTES--------------------------------------------------------- 4574 attributes %{ 4575 variable_size_instructions; // Fixed size instructions 4576 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4577 instruction_unit_size = 1; // An instruction is 1 bytes long 4578 instruction_fetch_unit_size = 16; // The processor fetches one line 4579 instruction_fetch_units = 1; // of 16 bytes 4580 4581 // List of nop instructions 4582 nops( MachNop ); 4583 %} 4584 4585 //----------RESOURCES---------------------------------------------------------- 4586 // Resources are the functional units available to the machine 4587 4588 // Generic P2/P3 pipeline 4589 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4590 // 3 instructions decoded per cycle. 4591 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4592 // 2 ALU op, only ALU0 handles mul/div instructions. 4593 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4594 MS0, MS1, MEM = MS0 | MS1, 4595 BR, FPU, 4596 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4597 4598 //----------PIPELINE DESCRIPTION----------------------------------------------- 4599 // Pipeline Description specifies the stages in the machine's pipeline 4600 4601 // Generic P2/P3 pipeline 4602 pipe_desc(S0, S1, S2, S3, S4, S5); 4603 4604 //----------PIPELINE CLASSES--------------------------------------------------- 4605 // Pipeline Classes describe the stages in which input and output are 4606 // referenced by the hardware pipeline. 4607 4608 // Naming convention: ialu or fpu 4609 // Then: _reg 4610 // Then: _reg if there is a 2nd register 4611 // Then: _long if it's a pair of instructions implementing a long 4612 // Then: _fat if it requires the big decoder 4613 // Or: _mem if it requires the big decoder and a memory unit. 4614 4615 // Integer ALU reg operation 4616 pipe_class ialu_reg(rRegI dst) %{ 4617 single_instruction; 4618 dst : S4(write); 4619 dst : S3(read); 4620 DECODE : S0; // any decoder 4621 ALU : S3; // any alu 4622 %} 4623 4624 // Long ALU reg operation 4625 pipe_class ialu_reg_long(eRegL dst) %{ 4626 instruction_count(2); 4627 dst : S4(write); 4628 dst : S3(read); 4629 DECODE : S0(2); // any 2 decoders 4630 ALU : S3(2); // both alus 4631 %} 4632 4633 // Integer ALU reg operation using big decoder 4634 pipe_class ialu_reg_fat(rRegI dst) %{ 4635 single_instruction; 4636 dst : S4(write); 4637 dst : S3(read); 4638 D0 : S0; // big decoder only 4639 ALU : S3; // any alu 4640 %} 4641 4642 // Long ALU reg operation using big decoder 4643 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4644 instruction_count(2); 4645 dst : S4(write); 4646 dst : S3(read); 4647 D0 : S0(2); // big decoder only; twice 4648 ALU : S3(2); // any 2 alus 4649 %} 4650 4651 // Integer ALU reg-reg operation 4652 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4653 single_instruction; 4654 dst : S4(write); 4655 src : S3(read); 4656 DECODE : S0; // any decoder 4657 ALU : S3; // any alu 4658 %} 4659 4660 // Long ALU reg-reg operation 4661 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4662 instruction_count(2); 4663 dst : S4(write); 4664 src : S3(read); 4665 DECODE : S0(2); // any 2 decoders 4666 ALU : S3(2); // both alus 4667 %} 4668 4669 // Integer ALU reg-reg operation 4670 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4671 single_instruction; 4672 dst : S4(write); 4673 src : S3(read); 4674 D0 : S0; // big decoder only 4675 ALU : S3; // any alu 4676 %} 4677 4678 // Long ALU reg-reg operation 4679 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4680 instruction_count(2); 4681 dst : S4(write); 4682 src : S3(read); 4683 D0 : S0(2); // big decoder only; twice 4684 ALU : S3(2); // both alus 4685 %} 4686 4687 // Integer ALU reg-mem operation 4688 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4689 single_instruction; 4690 dst : S5(write); 4691 mem : S3(read); 4692 D0 : S0; // big decoder only 4693 ALU : S4; // any alu 4694 MEM : S3; // any mem 4695 %} 4696 4697 // Long ALU reg-mem operation 4698 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4699 instruction_count(2); 4700 dst : S5(write); 4701 mem : S3(read); 4702 D0 : S0(2); // big decoder only; twice 4703 ALU : S4(2); // any 2 alus 4704 MEM : S3(2); // both mems 4705 %} 4706 4707 // Integer mem operation (prefetch) 4708 pipe_class ialu_mem(memory mem) 4709 %{ 4710 single_instruction; 4711 mem : S3(read); 4712 D0 : S0; // big decoder only 4713 MEM : S3; // any mem 4714 %} 4715 4716 // Integer Store to Memory 4717 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4718 single_instruction; 4719 mem : S3(read); 4720 src : S5(read); 4721 D0 : S0; // big decoder only 4722 ALU : S4; // any alu 4723 MEM : S3; 4724 %} 4725 4726 // Long Store to Memory 4727 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4728 instruction_count(2); 4729 mem : S3(read); 4730 src : S5(read); 4731 D0 : S0(2); // big decoder only; twice 4732 ALU : S4(2); // any 2 alus 4733 MEM : S3(2); // Both mems 4734 %} 4735 4736 // Integer Store to Memory 4737 pipe_class ialu_mem_imm(memory mem) %{ 4738 single_instruction; 4739 mem : S3(read); 4740 D0 : S0; // big decoder only 4741 ALU : S4; // any alu 4742 MEM : S3; 4743 %} 4744 4745 // Integer ALU0 reg-reg operation 4746 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4747 single_instruction; 4748 dst : S4(write); 4749 src : S3(read); 4750 D0 : S0; // Big decoder only 4751 ALU0 : S3; // only alu0 4752 %} 4753 4754 // Integer ALU0 reg-mem operation 4755 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4756 single_instruction; 4757 dst : S5(write); 4758 mem : S3(read); 4759 D0 : S0; // big decoder only 4760 ALU0 : S4; // ALU0 only 4761 MEM : S3; // any mem 4762 %} 4763 4764 // Integer ALU reg-reg operation 4765 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4766 single_instruction; 4767 cr : S4(write); 4768 src1 : S3(read); 4769 src2 : S3(read); 4770 DECODE : S0; // any decoder 4771 ALU : S3; // any alu 4772 %} 4773 4774 // Integer ALU reg-imm operation 4775 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4776 single_instruction; 4777 cr : S4(write); 4778 src1 : S3(read); 4779 DECODE : S0; // any decoder 4780 ALU : S3; // any alu 4781 %} 4782 4783 // Integer ALU reg-mem operation 4784 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4785 single_instruction; 4786 cr : S4(write); 4787 src1 : S3(read); 4788 src2 : S3(read); 4789 D0 : S0; // big decoder only 4790 ALU : S4; // any alu 4791 MEM : S3; 4792 %} 4793 4794 // Conditional move reg-reg 4795 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4796 instruction_count(4); 4797 y : S4(read); 4798 q : S3(read); 4799 p : S3(read); 4800 DECODE : S0(4); // any decoder 4801 %} 4802 4803 // Conditional move reg-reg 4804 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4805 single_instruction; 4806 dst : S4(write); 4807 src : S3(read); 4808 cr : S3(read); 4809 DECODE : S0; // any decoder 4810 %} 4811 4812 // Conditional move reg-mem 4813 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4814 single_instruction; 4815 dst : S4(write); 4816 src : S3(read); 4817 cr : S3(read); 4818 DECODE : S0; // any decoder 4819 MEM : S3; 4820 %} 4821 4822 // Conditional move reg-reg long 4823 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4824 single_instruction; 4825 dst : S4(write); 4826 src : S3(read); 4827 cr : S3(read); 4828 DECODE : S0(2); // any 2 decoders 4829 %} 4830 4831 // Conditional move double reg-reg 4832 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4833 single_instruction; 4834 dst : S4(write); 4835 src : S3(read); 4836 cr : S3(read); 4837 DECODE : S0; // any decoder 4838 %} 4839 4840 // Float reg-reg operation 4841 pipe_class fpu_reg(regDPR dst) %{ 4842 instruction_count(2); 4843 dst : S3(read); 4844 DECODE : S0(2); // any 2 decoders 4845 FPU : S3; 4846 %} 4847 4848 // Float reg-reg operation 4849 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4850 instruction_count(2); 4851 dst : S4(write); 4852 src : S3(read); 4853 DECODE : S0(2); // any 2 decoders 4854 FPU : S3; 4855 %} 4856 4857 // Float reg-reg operation 4858 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4859 instruction_count(3); 4860 dst : S4(write); 4861 src1 : S3(read); 4862 src2 : S3(read); 4863 DECODE : S0(3); // any 3 decoders 4864 FPU : S3(2); 4865 %} 4866 4867 // Float reg-reg operation 4868 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4869 instruction_count(4); 4870 dst : S4(write); 4871 src1 : S3(read); 4872 src2 : S3(read); 4873 src3 : S3(read); 4874 DECODE : S0(4); // any 3 decoders 4875 FPU : S3(2); 4876 %} 4877 4878 // Float reg-reg operation 4879 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4880 instruction_count(4); 4881 dst : S4(write); 4882 src1 : S3(read); 4883 src2 : S3(read); 4884 src3 : S3(read); 4885 DECODE : S1(3); // any 3 decoders 4886 D0 : S0; // Big decoder only 4887 FPU : S3(2); 4888 MEM : S3; 4889 %} 4890 4891 // Float reg-mem operation 4892 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4893 instruction_count(2); 4894 dst : S5(write); 4895 mem : S3(read); 4896 D0 : S0; // big decoder only 4897 DECODE : S1; // any decoder for FPU POP 4898 FPU : S4; 4899 MEM : S3; // any mem 4900 %} 4901 4902 // Float reg-mem operation 4903 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4904 instruction_count(3); 4905 dst : S5(write); 4906 src1 : S3(read); 4907 mem : S3(read); 4908 D0 : S0; // big decoder only 4909 DECODE : S1(2); // any decoder for FPU POP 4910 FPU : S4; 4911 MEM : S3; // any mem 4912 %} 4913 4914 // Float mem-reg operation 4915 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4916 instruction_count(2); 4917 src : S5(read); 4918 mem : S3(read); 4919 DECODE : S0; // any decoder for FPU PUSH 4920 D0 : S1; // big decoder only 4921 FPU : S4; 4922 MEM : S3; // any mem 4923 %} 4924 4925 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4926 instruction_count(3); 4927 src1 : S3(read); 4928 src2 : S3(read); 4929 mem : S3(read); 4930 DECODE : S0(2); // any decoder for FPU PUSH 4931 D0 : S1; // big decoder only 4932 FPU : S4; 4933 MEM : S3; // any mem 4934 %} 4935 4936 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4937 instruction_count(3); 4938 src1 : S3(read); 4939 src2 : S3(read); 4940 mem : S4(read); 4941 DECODE : S0; // any decoder for FPU PUSH 4942 D0 : S0(2); // big decoder only 4943 FPU : S4; 4944 MEM : S3(2); // any mem 4945 %} 4946 4947 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4948 instruction_count(2); 4949 src1 : S3(read); 4950 dst : S4(read); 4951 D0 : S0(2); // big decoder only 4952 MEM : S3(2); // any mem 4953 %} 4954 4955 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4956 instruction_count(3); 4957 src1 : S3(read); 4958 src2 : S3(read); 4959 dst : S4(read); 4960 D0 : S0(3); // big decoder only 4961 FPU : S4; 4962 MEM : S3(3); // any mem 4963 %} 4964 4965 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4966 instruction_count(3); 4967 src1 : S4(read); 4968 mem : S4(read); 4969 DECODE : S0; // any decoder for FPU PUSH 4970 D0 : S0(2); // big decoder only 4971 FPU : S4; 4972 MEM : S3(2); // any mem 4973 %} 4974 4975 // Float load constant 4976 pipe_class fpu_reg_con(regDPR dst) %{ 4977 instruction_count(2); 4978 dst : S5(write); 4979 D0 : S0; // big decoder only for the load 4980 DECODE : S1; // any decoder for FPU POP 4981 FPU : S4; 4982 MEM : S3; // any mem 4983 %} 4984 4985 // Float load constant 4986 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4987 instruction_count(3); 4988 dst : S5(write); 4989 src : S3(read); 4990 D0 : S0; // big decoder only for the load 4991 DECODE : S1(2); // any decoder for FPU POP 4992 FPU : S4; 4993 MEM : S3; // any mem 4994 %} 4995 4996 // UnConditional branch 4997 pipe_class pipe_jmp( label labl ) %{ 4998 single_instruction; 4999 BR : S3; 5000 %} 5001 5002 // Conditional branch 5003 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5004 single_instruction; 5005 cr : S1(read); 5006 BR : S3; 5007 %} 5008 5009 // Allocation idiom 5010 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5011 instruction_count(1); force_serialization; 5012 fixed_latency(6); 5013 heap_ptr : S3(read); 5014 DECODE : S0(3); 5015 D0 : S2; 5016 MEM : S3; 5017 ALU : S3(2); 5018 dst : S5(write); 5019 BR : S5; 5020 %} 5021 5022 // Generic big/slow expanded idiom 5023 pipe_class pipe_slow( ) %{ 5024 instruction_count(10); multiple_bundles; force_serialization; 5025 fixed_latency(100); 5026 D0 : S0(2); 5027 MEM : S3(2); 5028 %} 5029 5030 // The real do-nothing guy 5031 pipe_class empty( ) %{ 5032 instruction_count(0); 5033 %} 5034 5035 // Define the class for the Nop node 5036 define %{ 5037 MachNop = empty; 5038 %} 5039 5040 %} 5041 5042 //----------INSTRUCTIONS------------------------------------------------------- 5043 // 5044 // match -- States which machine-independent subtree may be replaced 5045 // by this instruction. 5046 // ins_cost -- The estimated cost of this instruction is used by instruction 5047 // selection to identify a minimum cost tree of machine 5048 // instructions that matches a tree of machine-independent 5049 // instructions. 5050 // format -- A string providing the disassembly for this instruction. 5051 // The value of an instruction's operand may be inserted 5052 // by referring to it with a '$' prefix. 5053 // opcode -- Three instruction opcodes may be provided. These are referred 5054 // to within an encode class as $primary, $secondary, and $tertiary 5055 // respectively. The primary opcode is commonly used to 5056 // indicate the type of machine instruction, while secondary 5057 // and tertiary are often used for prefix options or addressing 5058 // modes. 5059 // ins_encode -- A list of encode classes with parameters. The encode class 5060 // name must have been defined in an 'enc_class' specification 5061 // in the encode section of the architecture description. 5062 5063 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 5064 // Load Float 5065 instruct MoveF2LEG(legRegF dst, regF src) %{ 5066 match(Set dst src); 5067 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5068 ins_encode %{ 5069 ShouldNotReachHere(); 5070 %} 5071 ins_pipe( fpu_reg_reg ); 5072 %} 5073 5074 // Load Float 5075 instruct MoveLEG2F(regF dst, legRegF src) %{ 5076 match(Set dst src); 5077 format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} 5078 ins_encode %{ 5079 ShouldNotReachHere(); 5080 %} 5081 ins_pipe( fpu_reg_reg ); 5082 %} 5083 5084 // Load Float 5085 instruct MoveF2VL(vlRegF dst, regF src) %{ 5086 match(Set dst src); 5087 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5088 ins_encode %{ 5089 ShouldNotReachHere(); 5090 %} 5091 ins_pipe( fpu_reg_reg ); 5092 %} 5093 5094 // Load Float 5095 instruct MoveVL2F(regF dst, vlRegF src) %{ 5096 match(Set dst src); 5097 format %{ "movss $dst,$src\t! load float (4 bytes)" %} 5098 ins_encode %{ 5099 ShouldNotReachHere(); 5100 %} 5101 ins_pipe( fpu_reg_reg ); 5102 %} 5103 5104 5105 5106 // Load Double 5107 instruct MoveD2LEG(legRegD dst, regD src) %{ 5108 match(Set dst src); 5109 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5110 ins_encode %{ 5111 ShouldNotReachHere(); 5112 %} 5113 ins_pipe( fpu_reg_reg ); 5114 %} 5115 5116 // Load Double 5117 instruct MoveLEG2D(regD dst, legRegD src) %{ 5118 match(Set dst src); 5119 format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} 5120 ins_encode %{ 5121 ShouldNotReachHere(); 5122 %} 5123 ins_pipe( fpu_reg_reg ); 5124 %} 5125 5126 // Load Double 5127 instruct MoveD2VL(vlRegD dst, regD src) %{ 5128 match(Set dst src); 5129 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5130 ins_encode %{ 5131 ShouldNotReachHere(); 5132 %} 5133 ins_pipe( fpu_reg_reg ); 5134 %} 5135 5136 // Load Double 5137 instruct MoveVL2D(regD dst, vlRegD src) %{ 5138 match(Set dst src); 5139 format %{ "movsd $dst,$src\t! load double (8 bytes)" %} 5140 ins_encode %{ 5141 ShouldNotReachHere(); 5142 %} 5143 ins_pipe( fpu_reg_reg ); 5144 %} 5145 5146 //----------BSWAP-Instruction-------------------------------------------------- 5147 instruct bytes_reverse_int(rRegI dst) %{ 5148 match(Set dst (ReverseBytesI dst)); 5149 5150 format %{ "BSWAP $dst" %} 5151 opcode(0x0F, 0xC8); 5152 ins_encode( OpcP, OpcSReg(dst) ); 5153 ins_pipe( ialu_reg ); 5154 %} 5155 5156 instruct bytes_reverse_long(eRegL dst) %{ 5157 match(Set dst (ReverseBytesL dst)); 5158 5159 format %{ "BSWAP $dst.lo\n\t" 5160 "BSWAP $dst.hi\n\t" 5161 "XCHG $dst.lo $dst.hi" %} 5162 5163 ins_cost(125); 5164 ins_encode( bswap_long_bytes(dst) ); 5165 ins_pipe( ialu_reg_reg); 5166 %} 5167 5168 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5169 match(Set dst (ReverseBytesUS dst)); 5170 effect(KILL cr); 5171 5172 format %{ "BSWAP $dst\n\t" 5173 "SHR $dst,16\n\t" %} 5174 ins_encode %{ 5175 __ bswapl($dst$$Register); 5176 __ shrl($dst$$Register, 16); 5177 %} 5178 ins_pipe( ialu_reg ); 5179 %} 5180 5181 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5182 match(Set dst (ReverseBytesS dst)); 5183 effect(KILL cr); 5184 5185 format %{ "BSWAP $dst\n\t" 5186 "SAR $dst,16\n\t" %} 5187 ins_encode %{ 5188 __ bswapl($dst$$Register); 5189 __ sarl($dst$$Register, 16); 5190 %} 5191 ins_pipe( ialu_reg ); 5192 %} 5193 5194 5195 //---------- Zeros Count Instructions ------------------------------------------ 5196 5197 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5198 predicate(UseCountLeadingZerosInstruction); 5199 match(Set dst (CountLeadingZerosI src)); 5200 effect(KILL cr); 5201 5202 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5203 ins_encode %{ 5204 __ lzcntl($dst$$Register, $src$$Register); 5205 %} 5206 ins_pipe(ialu_reg); 5207 %} 5208 5209 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5210 predicate(!UseCountLeadingZerosInstruction); 5211 match(Set dst (CountLeadingZerosI src)); 5212 effect(KILL cr); 5213 5214 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5215 "JNZ skip\n\t" 5216 "MOV $dst, -1\n" 5217 "skip:\n\t" 5218 "NEG $dst\n\t" 5219 "ADD $dst, 31" %} 5220 ins_encode %{ 5221 Register Rdst = $dst$$Register; 5222 Register Rsrc = $src$$Register; 5223 Label skip; 5224 __ bsrl(Rdst, Rsrc); 5225 __ jccb(Assembler::notZero, skip); 5226 __ movl(Rdst, -1); 5227 __ bind(skip); 5228 __ negl(Rdst); 5229 __ addl(Rdst, BitsPerInt - 1); 5230 %} 5231 ins_pipe(ialu_reg); 5232 %} 5233 5234 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5235 predicate(UseCountLeadingZerosInstruction); 5236 match(Set dst (CountLeadingZerosL src)); 5237 effect(TEMP dst, KILL cr); 5238 5239 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5240 "JNC done\n\t" 5241 "LZCNT $dst, $src.lo\n\t" 5242 "ADD $dst, 32\n" 5243 "done:" %} 5244 ins_encode %{ 5245 Register Rdst = $dst$$Register; 5246 Register Rsrc = $src$$Register; 5247 Label done; 5248 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5249 __ jccb(Assembler::carryClear, done); 5250 __ lzcntl(Rdst, Rsrc); 5251 __ addl(Rdst, BitsPerInt); 5252 __ bind(done); 5253 %} 5254 ins_pipe(ialu_reg); 5255 %} 5256 5257 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5258 predicate(!UseCountLeadingZerosInstruction); 5259 match(Set dst (CountLeadingZerosL src)); 5260 effect(TEMP dst, KILL cr); 5261 5262 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5263 "JZ msw_is_zero\n\t" 5264 "ADD $dst, 32\n\t" 5265 "JMP not_zero\n" 5266 "msw_is_zero:\n\t" 5267 "BSR $dst, $src.lo\n\t" 5268 "JNZ not_zero\n\t" 5269 "MOV $dst, -1\n" 5270 "not_zero:\n\t" 5271 "NEG $dst\n\t" 5272 "ADD $dst, 63\n" %} 5273 ins_encode %{ 5274 Register Rdst = $dst$$Register; 5275 Register Rsrc = $src$$Register; 5276 Label msw_is_zero; 5277 Label not_zero; 5278 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5279 __ jccb(Assembler::zero, msw_is_zero); 5280 __ addl(Rdst, BitsPerInt); 5281 __ jmpb(not_zero); 5282 __ bind(msw_is_zero); 5283 __ bsrl(Rdst, Rsrc); 5284 __ jccb(Assembler::notZero, not_zero); 5285 __ movl(Rdst, -1); 5286 __ bind(not_zero); 5287 __ negl(Rdst); 5288 __ addl(Rdst, BitsPerLong - 1); 5289 %} 5290 ins_pipe(ialu_reg); 5291 %} 5292 5293 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5294 predicate(UseCountTrailingZerosInstruction); 5295 match(Set dst (CountTrailingZerosI src)); 5296 effect(KILL cr); 5297 5298 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5299 ins_encode %{ 5300 __ tzcntl($dst$$Register, $src$$Register); 5301 %} 5302 ins_pipe(ialu_reg); 5303 %} 5304 5305 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5306 predicate(!UseCountTrailingZerosInstruction); 5307 match(Set dst (CountTrailingZerosI src)); 5308 effect(KILL cr); 5309 5310 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5311 "JNZ done\n\t" 5312 "MOV $dst, 32\n" 5313 "done:" %} 5314 ins_encode %{ 5315 Register Rdst = $dst$$Register; 5316 Label done; 5317 __ bsfl(Rdst, $src$$Register); 5318 __ jccb(Assembler::notZero, done); 5319 __ movl(Rdst, BitsPerInt); 5320 __ bind(done); 5321 %} 5322 ins_pipe(ialu_reg); 5323 %} 5324 5325 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5326 predicate(UseCountTrailingZerosInstruction); 5327 match(Set dst (CountTrailingZerosL src)); 5328 effect(TEMP dst, KILL cr); 5329 5330 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5331 "JNC done\n\t" 5332 "TZCNT $dst, $src.hi\n\t" 5333 "ADD $dst, 32\n" 5334 "done:" %} 5335 ins_encode %{ 5336 Register Rdst = $dst$$Register; 5337 Register Rsrc = $src$$Register; 5338 Label done; 5339 __ tzcntl(Rdst, Rsrc); 5340 __ jccb(Assembler::carryClear, done); 5341 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5342 __ addl(Rdst, BitsPerInt); 5343 __ bind(done); 5344 %} 5345 ins_pipe(ialu_reg); 5346 %} 5347 5348 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5349 predicate(!UseCountTrailingZerosInstruction); 5350 match(Set dst (CountTrailingZerosL src)); 5351 effect(TEMP dst, KILL cr); 5352 5353 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5354 "JNZ done\n\t" 5355 "BSF $dst, $src.hi\n\t" 5356 "JNZ msw_not_zero\n\t" 5357 "MOV $dst, 32\n" 5358 "msw_not_zero:\n\t" 5359 "ADD $dst, 32\n" 5360 "done:" %} 5361 ins_encode %{ 5362 Register Rdst = $dst$$Register; 5363 Register Rsrc = $src$$Register; 5364 Label msw_not_zero; 5365 Label done; 5366 __ bsfl(Rdst, Rsrc); 5367 __ jccb(Assembler::notZero, done); 5368 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5369 __ jccb(Assembler::notZero, msw_not_zero); 5370 __ movl(Rdst, BitsPerInt); 5371 __ bind(msw_not_zero); 5372 __ addl(Rdst, BitsPerInt); 5373 __ bind(done); 5374 %} 5375 ins_pipe(ialu_reg); 5376 %} 5377 5378 5379 //---------- Population Count Instructions ------------------------------------- 5380 5381 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5382 predicate(UsePopCountInstruction); 5383 match(Set dst (PopCountI src)); 5384 effect(KILL cr); 5385 5386 format %{ "POPCNT $dst, $src" %} 5387 ins_encode %{ 5388 __ popcntl($dst$$Register, $src$$Register); 5389 %} 5390 ins_pipe(ialu_reg); 5391 %} 5392 5393 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5394 predicate(UsePopCountInstruction); 5395 match(Set dst (PopCountI (LoadI mem))); 5396 effect(KILL cr); 5397 5398 format %{ "POPCNT $dst, $mem" %} 5399 ins_encode %{ 5400 __ popcntl($dst$$Register, $mem$$Address); 5401 %} 5402 ins_pipe(ialu_reg); 5403 %} 5404 5405 // Note: Long.bitCount(long) returns an int. 5406 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5407 predicate(UsePopCountInstruction); 5408 match(Set dst (PopCountL src)); 5409 effect(KILL cr, TEMP tmp, TEMP dst); 5410 5411 format %{ "POPCNT $dst, $src.lo\n\t" 5412 "POPCNT $tmp, $src.hi\n\t" 5413 "ADD $dst, $tmp" %} 5414 ins_encode %{ 5415 __ popcntl($dst$$Register, $src$$Register); 5416 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5417 __ addl($dst$$Register, $tmp$$Register); 5418 %} 5419 ins_pipe(ialu_reg); 5420 %} 5421 5422 // Note: Long.bitCount(long) returns an int. 5423 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5424 predicate(UsePopCountInstruction); 5425 match(Set dst (PopCountL (LoadL mem))); 5426 effect(KILL cr, TEMP tmp, TEMP dst); 5427 5428 format %{ "POPCNT $dst, $mem\n\t" 5429 "POPCNT $tmp, $mem+4\n\t" 5430 "ADD $dst, $tmp" %} 5431 ins_encode %{ 5432 //__ popcntl($dst$$Register, $mem$$Address$$first); 5433 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5434 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5435 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5436 __ addl($dst$$Register, $tmp$$Register); 5437 %} 5438 ins_pipe(ialu_reg); 5439 %} 5440 5441 5442 //----------Load/Store/Move Instructions--------------------------------------- 5443 //----------Load Instructions-------------------------------------------------- 5444 // Load Byte (8bit signed) 5445 instruct loadB(xRegI dst, memory mem) %{ 5446 match(Set dst (LoadB mem)); 5447 5448 ins_cost(125); 5449 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5450 5451 ins_encode %{ 5452 __ movsbl($dst$$Register, $mem$$Address); 5453 %} 5454 5455 ins_pipe(ialu_reg_mem); 5456 %} 5457 5458 // Load Byte (8bit signed) into Long Register 5459 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5460 match(Set dst (ConvI2L (LoadB mem))); 5461 effect(KILL cr); 5462 5463 ins_cost(375); 5464 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5465 "MOV $dst.hi,$dst.lo\n\t" 5466 "SAR $dst.hi,7" %} 5467 5468 ins_encode %{ 5469 __ movsbl($dst$$Register, $mem$$Address); 5470 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5471 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5472 %} 5473 5474 ins_pipe(ialu_reg_mem); 5475 %} 5476 5477 // Load Unsigned Byte (8bit UNsigned) 5478 instruct loadUB(xRegI dst, memory mem) %{ 5479 match(Set dst (LoadUB mem)); 5480 5481 ins_cost(125); 5482 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5483 5484 ins_encode %{ 5485 __ movzbl($dst$$Register, $mem$$Address); 5486 %} 5487 5488 ins_pipe(ialu_reg_mem); 5489 %} 5490 5491 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5492 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5493 match(Set dst (ConvI2L (LoadUB mem))); 5494 effect(KILL cr); 5495 5496 ins_cost(250); 5497 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5498 "XOR $dst.hi,$dst.hi" %} 5499 5500 ins_encode %{ 5501 Register Rdst = $dst$$Register; 5502 __ movzbl(Rdst, $mem$$Address); 5503 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5504 %} 5505 5506 ins_pipe(ialu_reg_mem); 5507 %} 5508 5509 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5510 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5511 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5512 effect(KILL cr); 5513 5514 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5515 "XOR $dst.hi,$dst.hi\n\t" 5516 "AND $dst.lo,right_n_bits($mask, 8)" %} 5517 ins_encode %{ 5518 Register Rdst = $dst$$Register; 5519 __ movzbl(Rdst, $mem$$Address); 5520 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5521 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5522 %} 5523 ins_pipe(ialu_reg_mem); 5524 %} 5525 5526 // Load Short (16bit signed) 5527 instruct loadS(rRegI dst, memory mem) %{ 5528 match(Set dst (LoadS mem)); 5529 5530 ins_cost(125); 5531 format %{ "MOVSX $dst,$mem\t# short" %} 5532 5533 ins_encode %{ 5534 __ movswl($dst$$Register, $mem$$Address); 5535 %} 5536 5537 ins_pipe(ialu_reg_mem); 5538 %} 5539 5540 // Load Short (16 bit signed) to Byte (8 bit signed) 5541 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5542 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5543 5544 ins_cost(125); 5545 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5546 ins_encode %{ 5547 __ movsbl($dst$$Register, $mem$$Address); 5548 %} 5549 ins_pipe(ialu_reg_mem); 5550 %} 5551 5552 // Load Short (16bit signed) into Long Register 5553 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5554 match(Set dst (ConvI2L (LoadS mem))); 5555 effect(KILL cr); 5556 5557 ins_cost(375); 5558 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5559 "MOV $dst.hi,$dst.lo\n\t" 5560 "SAR $dst.hi,15" %} 5561 5562 ins_encode %{ 5563 __ movswl($dst$$Register, $mem$$Address); 5564 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5565 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5566 %} 5567 5568 ins_pipe(ialu_reg_mem); 5569 %} 5570 5571 // Load Unsigned Short/Char (16bit unsigned) 5572 instruct loadUS(rRegI dst, memory mem) %{ 5573 match(Set dst (LoadUS mem)); 5574 5575 ins_cost(125); 5576 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5577 5578 ins_encode %{ 5579 __ movzwl($dst$$Register, $mem$$Address); 5580 %} 5581 5582 ins_pipe(ialu_reg_mem); 5583 %} 5584 5585 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5586 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5587 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5588 5589 ins_cost(125); 5590 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5591 ins_encode %{ 5592 __ movsbl($dst$$Register, $mem$$Address); 5593 %} 5594 ins_pipe(ialu_reg_mem); 5595 %} 5596 5597 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5598 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5599 match(Set dst (ConvI2L (LoadUS mem))); 5600 effect(KILL cr); 5601 5602 ins_cost(250); 5603 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5604 "XOR $dst.hi,$dst.hi" %} 5605 5606 ins_encode %{ 5607 __ movzwl($dst$$Register, $mem$$Address); 5608 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5609 %} 5610 5611 ins_pipe(ialu_reg_mem); 5612 %} 5613 5614 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5615 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5616 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5617 effect(KILL cr); 5618 5619 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5620 "XOR $dst.hi,$dst.hi" %} 5621 ins_encode %{ 5622 Register Rdst = $dst$$Register; 5623 __ movzbl(Rdst, $mem$$Address); 5624 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5625 %} 5626 ins_pipe(ialu_reg_mem); 5627 %} 5628 5629 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5630 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5631 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5632 effect(KILL cr); 5633 5634 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5635 "XOR $dst.hi,$dst.hi\n\t" 5636 "AND $dst.lo,right_n_bits($mask, 16)" %} 5637 ins_encode %{ 5638 Register Rdst = $dst$$Register; 5639 __ movzwl(Rdst, $mem$$Address); 5640 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5641 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5642 %} 5643 ins_pipe(ialu_reg_mem); 5644 %} 5645 5646 // Load Integer 5647 instruct loadI(rRegI dst, memory mem) %{ 5648 match(Set dst (LoadI mem)); 5649 5650 ins_cost(125); 5651 format %{ "MOV $dst,$mem\t# int" %} 5652 5653 ins_encode %{ 5654 __ movl($dst$$Register, $mem$$Address); 5655 %} 5656 5657 ins_pipe(ialu_reg_mem); 5658 %} 5659 5660 // Load Integer (32 bit signed) to Byte (8 bit signed) 5661 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5662 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5663 5664 ins_cost(125); 5665 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5666 ins_encode %{ 5667 __ movsbl($dst$$Register, $mem$$Address); 5668 %} 5669 ins_pipe(ialu_reg_mem); 5670 %} 5671 5672 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5673 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5674 match(Set dst (AndI (LoadI mem) mask)); 5675 5676 ins_cost(125); 5677 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5678 ins_encode %{ 5679 __ movzbl($dst$$Register, $mem$$Address); 5680 %} 5681 ins_pipe(ialu_reg_mem); 5682 %} 5683 5684 // Load Integer (32 bit signed) to Short (16 bit signed) 5685 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5686 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5687 5688 ins_cost(125); 5689 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5690 ins_encode %{ 5691 __ movswl($dst$$Register, $mem$$Address); 5692 %} 5693 ins_pipe(ialu_reg_mem); 5694 %} 5695 5696 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5697 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5698 match(Set dst (AndI (LoadI mem) mask)); 5699 5700 ins_cost(125); 5701 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5702 ins_encode %{ 5703 __ movzwl($dst$$Register, $mem$$Address); 5704 %} 5705 ins_pipe(ialu_reg_mem); 5706 %} 5707 5708 // Load Integer into Long Register 5709 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5710 match(Set dst (ConvI2L (LoadI mem))); 5711 effect(KILL cr); 5712 5713 ins_cost(375); 5714 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5715 "MOV $dst.hi,$dst.lo\n\t" 5716 "SAR $dst.hi,31" %} 5717 5718 ins_encode %{ 5719 __ movl($dst$$Register, $mem$$Address); 5720 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5721 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5722 %} 5723 5724 ins_pipe(ialu_reg_mem); 5725 %} 5726 5727 // Load Integer with mask 0xFF into Long Register 5728 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5729 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5730 effect(KILL cr); 5731 5732 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5733 "XOR $dst.hi,$dst.hi" %} 5734 ins_encode %{ 5735 Register Rdst = $dst$$Register; 5736 __ movzbl(Rdst, $mem$$Address); 5737 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5738 %} 5739 ins_pipe(ialu_reg_mem); 5740 %} 5741 5742 // Load Integer with mask 0xFFFF into Long Register 5743 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5744 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5745 effect(KILL cr); 5746 5747 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5748 "XOR $dst.hi,$dst.hi" %} 5749 ins_encode %{ 5750 Register Rdst = $dst$$Register; 5751 __ movzwl(Rdst, $mem$$Address); 5752 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5753 %} 5754 ins_pipe(ialu_reg_mem); 5755 %} 5756 5757 // Load Integer with 31-bit mask into Long Register 5758 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5759 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5760 effect(KILL cr); 5761 5762 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5763 "XOR $dst.hi,$dst.hi\n\t" 5764 "AND $dst.lo,$mask" %} 5765 ins_encode %{ 5766 Register Rdst = $dst$$Register; 5767 __ movl(Rdst, $mem$$Address); 5768 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5769 __ andl(Rdst, $mask$$constant); 5770 %} 5771 ins_pipe(ialu_reg_mem); 5772 %} 5773 5774 // Load Unsigned Integer into Long Register 5775 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5776 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5777 effect(KILL cr); 5778 5779 ins_cost(250); 5780 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5781 "XOR $dst.hi,$dst.hi" %} 5782 5783 ins_encode %{ 5784 __ movl($dst$$Register, $mem$$Address); 5785 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5786 %} 5787 5788 ins_pipe(ialu_reg_mem); 5789 %} 5790 5791 // Load Long. Cannot clobber address while loading, so restrict address 5792 // register to ESI 5793 instruct loadL(eRegL dst, load_long_memory mem) %{ 5794 predicate(!((LoadLNode*)n)->require_atomic_access()); 5795 match(Set dst (LoadL mem)); 5796 5797 ins_cost(250); 5798 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5799 "MOV $dst.hi,$mem+4" %} 5800 5801 ins_encode %{ 5802 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5803 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5804 __ movl($dst$$Register, Amemlo); 5805 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5806 %} 5807 5808 ins_pipe(ialu_reg_long_mem); 5809 %} 5810 5811 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5812 // then store it down to the stack and reload on the int 5813 // side. 5814 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5815 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5816 match(Set dst (LoadL mem)); 5817 5818 ins_cost(200); 5819 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5820 "FISTp $dst" %} 5821 ins_encode(enc_loadL_volatile(mem,dst)); 5822 ins_pipe( fpu_reg_mem ); 5823 %} 5824 5825 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5826 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5827 match(Set dst (LoadL mem)); 5828 effect(TEMP tmp); 5829 ins_cost(180); 5830 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5831 "MOVSD $dst,$tmp" %} 5832 ins_encode %{ 5833 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5834 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5840 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5841 match(Set dst (LoadL mem)); 5842 effect(TEMP tmp); 5843 ins_cost(160); 5844 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5845 "MOVD $dst.lo,$tmp\n\t" 5846 "PSRLQ $tmp,32\n\t" 5847 "MOVD $dst.hi,$tmp" %} 5848 ins_encode %{ 5849 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5850 __ movdl($dst$$Register, $tmp$$XMMRegister); 5851 __ psrlq($tmp$$XMMRegister, 32); 5852 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5853 %} 5854 ins_pipe( pipe_slow ); 5855 %} 5856 5857 // Load Range 5858 instruct loadRange(rRegI dst, memory mem) %{ 5859 match(Set dst (LoadRange mem)); 5860 5861 ins_cost(125); 5862 format %{ "MOV $dst,$mem" %} 5863 opcode(0x8B); 5864 ins_encode( OpcP, RegMem(dst,mem)); 5865 ins_pipe( ialu_reg_mem ); 5866 %} 5867 5868 5869 // Load Pointer 5870 instruct loadP(eRegP dst, memory mem) %{ 5871 match(Set dst (LoadP mem)); 5872 5873 ins_cost(125); 5874 format %{ "MOV $dst,$mem" %} 5875 opcode(0x8B); 5876 ins_encode( OpcP, RegMem(dst,mem)); 5877 ins_pipe( ialu_reg_mem ); 5878 %} 5879 5880 // Load Klass Pointer 5881 instruct loadKlass(eRegP dst, memory mem) %{ 5882 match(Set dst (LoadKlass mem)); 5883 5884 ins_cost(125); 5885 format %{ "MOV $dst,$mem" %} 5886 opcode(0x8B); 5887 ins_encode( OpcP, RegMem(dst,mem)); 5888 ins_pipe( ialu_reg_mem ); 5889 %} 5890 5891 // Load Double 5892 instruct loadDPR(regDPR dst, memory mem) %{ 5893 predicate(UseSSE<=1); 5894 match(Set dst (LoadD mem)); 5895 5896 ins_cost(150); 5897 format %{ "FLD_D ST,$mem\n\t" 5898 "FSTP $dst" %} 5899 opcode(0xDD); /* DD /0 */ 5900 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5901 Pop_Reg_DPR(dst) ); 5902 ins_pipe( fpu_reg_mem ); 5903 %} 5904 5905 // Load Double to XMM 5906 instruct loadD(regD dst, memory mem) %{ 5907 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5908 match(Set dst (LoadD mem)); 5909 ins_cost(145); 5910 format %{ "MOVSD $dst,$mem" %} 5911 ins_encode %{ 5912 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5913 %} 5914 ins_pipe( pipe_slow ); 5915 %} 5916 5917 instruct loadD_partial(regD dst, memory mem) %{ 5918 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5919 match(Set dst (LoadD mem)); 5920 ins_cost(145); 5921 format %{ "MOVLPD $dst,$mem" %} 5922 ins_encode %{ 5923 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5924 %} 5925 ins_pipe( pipe_slow ); 5926 %} 5927 5928 // Load to XMM register (single-precision floating point) 5929 // MOVSS instruction 5930 instruct loadF(regF dst, memory mem) %{ 5931 predicate(UseSSE>=1); 5932 match(Set dst (LoadF mem)); 5933 ins_cost(145); 5934 format %{ "MOVSS $dst,$mem" %} 5935 ins_encode %{ 5936 __ movflt ($dst$$XMMRegister, $mem$$Address); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 // Load Float 5942 instruct loadFPR(regFPR dst, memory mem) %{ 5943 predicate(UseSSE==0); 5944 match(Set dst (LoadF mem)); 5945 5946 ins_cost(150); 5947 format %{ "FLD_S ST,$mem\n\t" 5948 "FSTP $dst" %} 5949 opcode(0xD9); /* D9 /0 */ 5950 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5951 Pop_Reg_FPR(dst) ); 5952 ins_pipe( fpu_reg_mem ); 5953 %} 5954 5955 // Load Effective Address 5956 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5957 match(Set dst mem); 5958 5959 ins_cost(110); 5960 format %{ "LEA $dst,$mem" %} 5961 opcode(0x8D); 5962 ins_encode( OpcP, RegMem(dst,mem)); 5963 ins_pipe( ialu_reg_reg_fat ); 5964 %} 5965 5966 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5967 match(Set dst mem); 5968 5969 ins_cost(110); 5970 format %{ "LEA $dst,$mem" %} 5971 opcode(0x8D); 5972 ins_encode( OpcP, RegMem(dst,mem)); 5973 ins_pipe( ialu_reg_reg_fat ); 5974 %} 5975 5976 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5977 match(Set dst mem); 5978 5979 ins_cost(110); 5980 format %{ "LEA $dst,$mem" %} 5981 opcode(0x8D); 5982 ins_encode( OpcP, RegMem(dst,mem)); 5983 ins_pipe( ialu_reg_reg_fat ); 5984 %} 5985 5986 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5987 match(Set dst mem); 5988 5989 ins_cost(110); 5990 format %{ "LEA $dst,$mem" %} 5991 opcode(0x8D); 5992 ins_encode( OpcP, RegMem(dst,mem)); 5993 ins_pipe( ialu_reg_reg_fat ); 5994 %} 5995 5996 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5997 match(Set dst mem); 5998 5999 ins_cost(110); 6000 format %{ "LEA $dst,$mem" %} 6001 opcode(0x8D); 6002 ins_encode( OpcP, RegMem(dst,mem)); 6003 ins_pipe( ialu_reg_reg_fat ); 6004 %} 6005 6006 // Load Constant 6007 instruct loadConI(rRegI dst, immI src) %{ 6008 match(Set dst src); 6009 6010 format %{ "MOV $dst,$src" %} 6011 ins_encode( LdImmI(dst, src) ); 6012 ins_pipe( ialu_reg_fat ); 6013 %} 6014 6015 // Load Constant zero 6016 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ 6017 match(Set dst src); 6018 effect(KILL cr); 6019 6020 ins_cost(50); 6021 format %{ "XOR $dst,$dst" %} 6022 opcode(0x33); /* + rd */ 6023 ins_encode( OpcP, RegReg( dst, dst ) ); 6024 ins_pipe( ialu_reg ); 6025 %} 6026 6027 instruct loadConP(eRegP dst, immP src) %{ 6028 match(Set dst src); 6029 6030 format %{ "MOV $dst,$src" %} 6031 opcode(0xB8); /* + rd */ 6032 ins_encode( LdImmP(dst, src) ); 6033 ins_pipe( ialu_reg_fat ); 6034 %} 6035 6036 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6037 match(Set dst src); 6038 effect(KILL cr); 6039 ins_cost(200); 6040 format %{ "MOV $dst.lo,$src.lo\n\t" 6041 "MOV $dst.hi,$src.hi" %} 6042 opcode(0xB8); 6043 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6044 ins_pipe( ialu_reg_long_fat ); 6045 %} 6046 6047 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6048 match(Set dst src); 6049 effect(KILL cr); 6050 ins_cost(150); 6051 format %{ "XOR $dst.lo,$dst.lo\n\t" 6052 "XOR $dst.hi,$dst.hi" %} 6053 opcode(0x33,0x33); 6054 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6055 ins_pipe( ialu_reg_long ); 6056 %} 6057 6058 // The instruction usage is guarded by predicate in operand immFPR(). 6059 instruct loadConFPR(regFPR dst, immFPR con) %{ 6060 match(Set dst con); 6061 ins_cost(125); 6062 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6063 "FSTP $dst" %} 6064 ins_encode %{ 6065 __ fld_s($constantaddress($con)); 6066 __ fstp_d($dst$$reg); 6067 %} 6068 ins_pipe(fpu_reg_con); 6069 %} 6070 6071 // The instruction usage is guarded by predicate in operand immFPR0(). 6072 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6073 match(Set dst con); 6074 ins_cost(125); 6075 format %{ "FLDZ ST\n\t" 6076 "FSTP $dst" %} 6077 ins_encode %{ 6078 __ fldz(); 6079 __ fstp_d($dst$$reg); 6080 %} 6081 ins_pipe(fpu_reg_con); 6082 %} 6083 6084 // The instruction usage is guarded by predicate in operand immFPR1(). 6085 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6086 match(Set dst con); 6087 ins_cost(125); 6088 format %{ "FLD1 ST\n\t" 6089 "FSTP $dst" %} 6090 ins_encode %{ 6091 __ fld1(); 6092 __ fstp_d($dst$$reg); 6093 %} 6094 ins_pipe(fpu_reg_con); 6095 %} 6096 6097 // The instruction usage is guarded by predicate in operand immF(). 6098 instruct loadConF(regF dst, immF con) %{ 6099 match(Set dst con); 6100 ins_cost(125); 6101 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6102 ins_encode %{ 6103 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6104 %} 6105 ins_pipe(pipe_slow); 6106 %} 6107 6108 // The instruction usage is guarded by predicate in operand immF0(). 6109 instruct loadConF0(regF dst, immF0 src) %{ 6110 match(Set dst src); 6111 ins_cost(100); 6112 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6113 ins_encode %{ 6114 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6115 %} 6116 ins_pipe(pipe_slow); 6117 %} 6118 6119 // The instruction usage is guarded by predicate in operand immDPR(). 6120 instruct loadConDPR(regDPR dst, immDPR con) %{ 6121 match(Set dst con); 6122 ins_cost(125); 6123 6124 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6125 "FSTP $dst" %} 6126 ins_encode %{ 6127 __ fld_d($constantaddress($con)); 6128 __ fstp_d($dst$$reg); 6129 %} 6130 ins_pipe(fpu_reg_con); 6131 %} 6132 6133 // The instruction usage is guarded by predicate in operand immDPR0(). 6134 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6135 match(Set dst con); 6136 ins_cost(125); 6137 6138 format %{ "FLDZ ST\n\t" 6139 "FSTP $dst" %} 6140 ins_encode %{ 6141 __ fldz(); 6142 __ fstp_d($dst$$reg); 6143 %} 6144 ins_pipe(fpu_reg_con); 6145 %} 6146 6147 // The instruction usage is guarded by predicate in operand immDPR1(). 6148 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6149 match(Set dst con); 6150 ins_cost(125); 6151 6152 format %{ "FLD1 ST\n\t" 6153 "FSTP $dst" %} 6154 ins_encode %{ 6155 __ fld1(); 6156 __ fstp_d($dst$$reg); 6157 %} 6158 ins_pipe(fpu_reg_con); 6159 %} 6160 6161 // The instruction usage is guarded by predicate in operand immD(). 6162 instruct loadConD(regD dst, immD con) %{ 6163 match(Set dst con); 6164 ins_cost(125); 6165 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6166 ins_encode %{ 6167 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6168 %} 6169 ins_pipe(pipe_slow); 6170 %} 6171 6172 // The instruction usage is guarded by predicate in operand immD0(). 6173 instruct loadConD0(regD dst, immD0 src) %{ 6174 match(Set dst src); 6175 ins_cost(100); 6176 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6177 ins_encode %{ 6178 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6179 %} 6180 ins_pipe( pipe_slow ); 6181 %} 6182 6183 // Load Stack Slot 6184 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6185 match(Set dst src); 6186 ins_cost(125); 6187 6188 format %{ "MOV $dst,$src" %} 6189 opcode(0x8B); 6190 ins_encode( OpcP, RegMem(dst,src)); 6191 ins_pipe( ialu_reg_mem ); 6192 %} 6193 6194 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6195 match(Set dst src); 6196 6197 ins_cost(200); 6198 format %{ "MOV $dst,$src.lo\n\t" 6199 "MOV $dst+4,$src.hi" %} 6200 opcode(0x8B, 0x8B); 6201 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6202 ins_pipe( ialu_mem_long_reg ); 6203 %} 6204 6205 // Load Stack Slot 6206 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6207 match(Set dst src); 6208 ins_cost(125); 6209 6210 format %{ "MOV $dst,$src" %} 6211 opcode(0x8B); 6212 ins_encode( OpcP, RegMem(dst,src)); 6213 ins_pipe( ialu_reg_mem ); 6214 %} 6215 6216 // Load Stack Slot 6217 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6218 match(Set dst src); 6219 ins_cost(125); 6220 6221 format %{ "FLD_S $src\n\t" 6222 "FSTP $dst" %} 6223 opcode(0xD9); /* D9 /0, FLD m32real */ 6224 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6225 Pop_Reg_FPR(dst) ); 6226 ins_pipe( fpu_reg_mem ); 6227 %} 6228 6229 // Load Stack Slot 6230 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6231 match(Set dst src); 6232 ins_cost(125); 6233 6234 format %{ "FLD_D $src\n\t" 6235 "FSTP $dst" %} 6236 opcode(0xDD); /* DD /0, FLD m64real */ 6237 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6238 Pop_Reg_DPR(dst) ); 6239 ins_pipe( fpu_reg_mem ); 6240 %} 6241 6242 // Prefetch instructions for allocation. 6243 // Must be safe to execute with invalid address (cannot fault). 6244 6245 instruct prefetchAlloc0( memory mem ) %{ 6246 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6247 match(PrefetchAllocation mem); 6248 ins_cost(0); 6249 size(0); 6250 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6251 ins_encode(); 6252 ins_pipe(empty); 6253 %} 6254 6255 instruct prefetchAlloc( memory mem ) %{ 6256 predicate(AllocatePrefetchInstr==3); 6257 match( PrefetchAllocation mem ); 6258 ins_cost(100); 6259 6260 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6261 ins_encode %{ 6262 __ prefetchw($mem$$Address); 6263 %} 6264 ins_pipe(ialu_mem); 6265 %} 6266 6267 instruct prefetchAllocNTA( memory mem ) %{ 6268 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6269 match(PrefetchAllocation mem); 6270 ins_cost(100); 6271 6272 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6273 ins_encode %{ 6274 __ prefetchnta($mem$$Address); 6275 %} 6276 ins_pipe(ialu_mem); 6277 %} 6278 6279 instruct prefetchAllocT0( memory mem ) %{ 6280 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6281 match(PrefetchAllocation mem); 6282 ins_cost(100); 6283 6284 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6285 ins_encode %{ 6286 __ prefetcht0($mem$$Address); 6287 %} 6288 ins_pipe(ialu_mem); 6289 %} 6290 6291 instruct prefetchAllocT2( memory mem ) %{ 6292 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6293 match(PrefetchAllocation mem); 6294 ins_cost(100); 6295 6296 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6297 ins_encode %{ 6298 __ prefetcht2($mem$$Address); 6299 %} 6300 ins_pipe(ialu_mem); 6301 %} 6302 6303 //----------Store Instructions------------------------------------------------- 6304 6305 // Store Byte 6306 instruct storeB(memory mem, xRegI src) %{ 6307 match(Set mem (StoreB mem src)); 6308 6309 ins_cost(125); 6310 format %{ "MOV8 $mem,$src" %} 6311 opcode(0x88); 6312 ins_encode( OpcP, RegMem( src, mem ) ); 6313 ins_pipe( ialu_mem_reg ); 6314 %} 6315 6316 // Store Char/Short 6317 instruct storeC(memory mem, rRegI src) %{ 6318 match(Set mem (StoreC mem src)); 6319 6320 ins_cost(125); 6321 format %{ "MOV16 $mem,$src" %} 6322 opcode(0x89, 0x66); 6323 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6324 ins_pipe( ialu_mem_reg ); 6325 %} 6326 6327 // Store Integer 6328 instruct storeI(memory mem, rRegI src) %{ 6329 match(Set mem (StoreI mem src)); 6330 6331 ins_cost(125); 6332 format %{ "MOV $mem,$src" %} 6333 opcode(0x89); 6334 ins_encode( OpcP, RegMem( src, mem ) ); 6335 ins_pipe( ialu_mem_reg ); 6336 %} 6337 6338 // Store Long 6339 instruct storeL(long_memory mem, eRegL src) %{ 6340 predicate(!((StoreLNode*)n)->require_atomic_access()); 6341 match(Set mem (StoreL mem src)); 6342 6343 ins_cost(200); 6344 format %{ "MOV $mem,$src.lo\n\t" 6345 "MOV $mem+4,$src.hi" %} 6346 opcode(0x89, 0x89); 6347 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6348 ins_pipe( ialu_mem_long_reg ); 6349 %} 6350 6351 // Store Long to Integer 6352 instruct storeL2I(memory mem, eRegL src) %{ 6353 match(Set mem (StoreI mem (ConvL2I src))); 6354 6355 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6356 ins_encode %{ 6357 __ movl($mem$$Address, $src$$Register); 6358 %} 6359 ins_pipe(ialu_mem_reg); 6360 %} 6361 6362 // Volatile Store Long. Must be atomic, so move it into 6363 // the FP TOS and then do a 64-bit FIST. Has to probe the 6364 // target address before the store (for null-ptr checks) 6365 // so the memory operand is used twice in the encoding. 6366 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6367 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6368 match(Set mem (StoreL mem src)); 6369 effect( KILL cr ); 6370 ins_cost(400); 6371 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6372 "FILD $src\n\t" 6373 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6374 opcode(0x3B); 6375 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6376 ins_pipe( fpu_reg_mem ); 6377 %} 6378 6379 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6380 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6381 match(Set mem (StoreL mem src)); 6382 effect( TEMP tmp, KILL cr ); 6383 ins_cost(380); 6384 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6385 "MOVSD $tmp,$src\n\t" 6386 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6387 ins_encode %{ 6388 __ cmpl(rax, $mem$$Address); 6389 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6390 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6396 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6397 match(Set mem (StoreL mem src)); 6398 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6399 ins_cost(360); 6400 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6401 "MOVD $tmp,$src.lo\n\t" 6402 "MOVD $tmp2,$src.hi\n\t" 6403 "PUNPCKLDQ $tmp,$tmp2\n\t" 6404 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6405 ins_encode %{ 6406 __ cmpl(rax, $mem$$Address); 6407 __ movdl($tmp$$XMMRegister, $src$$Register); 6408 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6409 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6410 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 // Store Pointer; for storing unknown oops and raw pointers 6416 instruct storeP(memory mem, anyRegP src) %{ 6417 match(Set mem (StoreP mem src)); 6418 6419 ins_cost(125); 6420 format %{ "MOV $mem,$src" %} 6421 opcode(0x89); 6422 ins_encode( OpcP, RegMem( src, mem ) ); 6423 ins_pipe( ialu_mem_reg ); 6424 %} 6425 6426 // Store Integer Immediate 6427 instruct storeImmI(memory mem, immI src) %{ 6428 match(Set mem (StoreI mem src)); 6429 6430 ins_cost(150); 6431 format %{ "MOV $mem,$src" %} 6432 opcode(0xC7); /* C7 /0 */ 6433 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6434 ins_pipe( ialu_mem_imm ); 6435 %} 6436 6437 // Store Short/Char Immediate 6438 instruct storeImmI16(memory mem, immI16 src) %{ 6439 predicate(UseStoreImmI16); 6440 match(Set mem (StoreC mem src)); 6441 6442 ins_cost(150); 6443 format %{ "MOV16 $mem,$src" %} 6444 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6445 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6446 ins_pipe( ialu_mem_imm ); 6447 %} 6448 6449 // Store Pointer Immediate; null pointers or constant oops that do not 6450 // need card-mark barriers. 6451 instruct storeImmP(memory mem, immP src) %{ 6452 match(Set mem (StoreP mem src)); 6453 6454 ins_cost(150); 6455 format %{ "MOV $mem,$src" %} 6456 opcode(0xC7); /* C7 /0 */ 6457 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6458 ins_pipe( ialu_mem_imm ); 6459 %} 6460 6461 // Store Byte Immediate 6462 instruct storeImmB(memory mem, immI8 src) %{ 6463 match(Set mem (StoreB mem src)); 6464 6465 ins_cost(150); 6466 format %{ "MOV8 $mem,$src" %} 6467 opcode(0xC6); /* C6 /0 */ 6468 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6469 ins_pipe( ialu_mem_imm ); 6470 %} 6471 6472 // Store CMS card-mark Immediate 6473 instruct storeImmCM(memory mem, immI8 src) %{ 6474 match(Set mem (StoreCM mem src)); 6475 6476 ins_cost(150); 6477 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6478 opcode(0xC6); /* C6 /0 */ 6479 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6480 ins_pipe( ialu_mem_imm ); 6481 %} 6482 6483 // Store Double 6484 instruct storeDPR( memory mem, regDPR1 src) %{ 6485 predicate(UseSSE<=1); 6486 match(Set mem (StoreD mem src)); 6487 6488 ins_cost(100); 6489 format %{ "FST_D $mem,$src" %} 6490 opcode(0xDD); /* DD /2 */ 6491 ins_encode( enc_FPR_store(mem,src) ); 6492 ins_pipe( fpu_mem_reg ); 6493 %} 6494 6495 // Store double does rounding on x86 6496 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6497 predicate(UseSSE<=1); 6498 match(Set mem (StoreD mem (RoundDouble src))); 6499 6500 ins_cost(100); 6501 format %{ "FST_D $mem,$src\t# round" %} 6502 opcode(0xDD); /* DD /2 */ 6503 ins_encode( enc_FPR_store(mem,src) ); 6504 ins_pipe( fpu_mem_reg ); 6505 %} 6506 6507 // Store XMM register to memory (double-precision floating points) 6508 // MOVSD instruction 6509 instruct storeD(memory mem, regD src) %{ 6510 predicate(UseSSE>=2); 6511 match(Set mem (StoreD mem src)); 6512 ins_cost(95); 6513 format %{ "MOVSD $mem,$src" %} 6514 ins_encode %{ 6515 __ movdbl($mem$$Address, $src$$XMMRegister); 6516 %} 6517 ins_pipe( pipe_slow ); 6518 %} 6519 6520 // Store XMM register to memory (single-precision floating point) 6521 // MOVSS instruction 6522 instruct storeF(memory mem, regF src) %{ 6523 predicate(UseSSE>=1); 6524 match(Set mem (StoreF mem src)); 6525 ins_cost(95); 6526 format %{ "MOVSS $mem,$src" %} 6527 ins_encode %{ 6528 __ movflt($mem$$Address, $src$$XMMRegister); 6529 %} 6530 ins_pipe( pipe_slow ); 6531 %} 6532 6533 6534 // Store Float 6535 instruct storeFPR( memory mem, regFPR1 src) %{ 6536 predicate(UseSSE==0); 6537 match(Set mem (StoreF mem src)); 6538 6539 ins_cost(100); 6540 format %{ "FST_S $mem,$src" %} 6541 opcode(0xD9); /* D9 /2 */ 6542 ins_encode( enc_FPR_store(mem,src) ); 6543 ins_pipe( fpu_mem_reg ); 6544 %} 6545 6546 // Store Float does rounding on x86 6547 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6548 predicate(UseSSE==0); 6549 match(Set mem (StoreF mem (RoundFloat src))); 6550 6551 ins_cost(100); 6552 format %{ "FST_S $mem,$src\t# round" %} 6553 opcode(0xD9); /* D9 /2 */ 6554 ins_encode( enc_FPR_store(mem,src) ); 6555 ins_pipe( fpu_mem_reg ); 6556 %} 6557 6558 // Store Float does rounding on x86 6559 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6560 predicate(UseSSE<=1); 6561 match(Set mem (StoreF mem (ConvD2F src))); 6562 6563 ins_cost(100); 6564 format %{ "FST_S $mem,$src\t# D-round" %} 6565 opcode(0xD9); /* D9 /2 */ 6566 ins_encode( enc_FPR_store(mem,src) ); 6567 ins_pipe( fpu_mem_reg ); 6568 %} 6569 6570 // Store immediate Float value (it is faster than store from FPU register) 6571 // The instruction usage is guarded by predicate in operand immFPR(). 6572 instruct storeFPR_imm( memory mem, immFPR src) %{ 6573 match(Set mem (StoreF mem src)); 6574 6575 ins_cost(50); 6576 format %{ "MOV $mem,$src\t# store float" %} 6577 opcode(0xC7); /* C7 /0 */ 6578 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6579 ins_pipe( ialu_mem_imm ); 6580 %} 6581 6582 // Store immediate Float value (it is faster than store from XMM register) 6583 // The instruction usage is guarded by predicate in operand immF(). 6584 instruct storeF_imm( memory mem, immF src) %{ 6585 match(Set mem (StoreF mem src)); 6586 6587 ins_cost(50); 6588 format %{ "MOV $mem,$src\t# store float" %} 6589 opcode(0xC7); /* C7 /0 */ 6590 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6591 ins_pipe( ialu_mem_imm ); 6592 %} 6593 6594 // Store Integer to stack slot 6595 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6596 match(Set dst src); 6597 6598 ins_cost(100); 6599 format %{ "MOV $dst,$src" %} 6600 opcode(0x89); 6601 ins_encode( OpcPRegSS( dst, src ) ); 6602 ins_pipe( ialu_mem_reg ); 6603 %} 6604 6605 // Store Integer to stack slot 6606 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6607 match(Set dst src); 6608 6609 ins_cost(100); 6610 format %{ "MOV $dst,$src" %} 6611 opcode(0x89); 6612 ins_encode( OpcPRegSS( dst, src ) ); 6613 ins_pipe( ialu_mem_reg ); 6614 %} 6615 6616 // Store Long to stack slot 6617 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6618 match(Set dst src); 6619 6620 ins_cost(200); 6621 format %{ "MOV $dst,$src.lo\n\t" 6622 "MOV $dst+4,$src.hi" %} 6623 opcode(0x89, 0x89); 6624 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6625 ins_pipe( ialu_mem_long_reg ); 6626 %} 6627 6628 //----------MemBar Instructions----------------------------------------------- 6629 // Memory barrier flavors 6630 6631 instruct membar_acquire() %{ 6632 match(MemBarAcquire); 6633 match(LoadFence); 6634 ins_cost(400); 6635 6636 size(0); 6637 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6638 ins_encode(); 6639 ins_pipe(empty); 6640 %} 6641 6642 instruct membar_acquire_lock() %{ 6643 match(MemBarAcquireLock); 6644 ins_cost(0); 6645 6646 size(0); 6647 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6648 ins_encode( ); 6649 ins_pipe(empty); 6650 %} 6651 6652 instruct membar_release() %{ 6653 match(MemBarRelease); 6654 match(StoreFence); 6655 ins_cost(400); 6656 6657 size(0); 6658 format %{ "MEMBAR-release ! (empty encoding)" %} 6659 ins_encode( ); 6660 ins_pipe(empty); 6661 %} 6662 6663 instruct membar_release_lock() %{ 6664 match(MemBarReleaseLock); 6665 ins_cost(0); 6666 6667 size(0); 6668 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6669 ins_encode( ); 6670 ins_pipe(empty); 6671 %} 6672 6673 instruct membar_volatile(eFlagsReg cr) %{ 6674 match(MemBarVolatile); 6675 effect(KILL cr); 6676 ins_cost(400); 6677 6678 format %{ 6679 $$template 6680 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6681 %} 6682 ins_encode %{ 6683 __ membar(Assembler::StoreLoad); 6684 %} 6685 ins_pipe(pipe_slow); 6686 %} 6687 6688 instruct unnecessary_membar_volatile() %{ 6689 match(MemBarVolatile); 6690 predicate(Matcher::post_store_load_barrier(n)); 6691 ins_cost(0); 6692 6693 size(0); 6694 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6695 ins_encode( ); 6696 ins_pipe(empty); 6697 %} 6698 6699 instruct membar_storestore() %{ 6700 match(MemBarStoreStore); 6701 match(StoreStoreFence); 6702 ins_cost(0); 6703 6704 size(0); 6705 format %{ "MEMBAR-storestore (empty encoding)" %} 6706 ins_encode( ); 6707 ins_pipe(empty); 6708 %} 6709 6710 //----------Move Instructions-------------------------------------------------- 6711 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6712 match(Set dst (CastX2P src)); 6713 format %{ "# X2P $dst, $src" %} 6714 ins_encode( /*empty encoding*/ ); 6715 ins_cost(0); 6716 ins_pipe(empty); 6717 %} 6718 6719 instruct castP2X(rRegI dst, eRegP src ) %{ 6720 match(Set dst (CastP2X src)); 6721 ins_cost(50); 6722 format %{ "MOV $dst, $src\t# CastP2X" %} 6723 ins_encode( enc_Copy( dst, src) ); 6724 ins_pipe( ialu_reg_reg ); 6725 %} 6726 6727 //----------Conditional Move--------------------------------------------------- 6728 // Conditional move 6729 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6730 predicate(!VM_Version::supports_cmov() ); 6731 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6732 ins_cost(200); 6733 format %{ "J$cop,us skip\t# signed cmove\n\t" 6734 "MOV $dst,$src\n" 6735 "skip:" %} 6736 ins_encode %{ 6737 Label Lskip; 6738 // Invert sense of branch from sense of CMOV 6739 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6740 __ movl($dst$$Register, $src$$Register); 6741 __ bind(Lskip); 6742 %} 6743 ins_pipe( pipe_cmov_reg ); 6744 %} 6745 6746 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6747 predicate(!VM_Version::supports_cmov() ); 6748 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6749 ins_cost(200); 6750 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6751 "MOV $dst,$src\n" 6752 "skip:" %} 6753 ins_encode %{ 6754 Label Lskip; 6755 // Invert sense of branch from sense of CMOV 6756 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6757 __ movl($dst$$Register, $src$$Register); 6758 __ bind(Lskip); 6759 %} 6760 ins_pipe( pipe_cmov_reg ); 6761 %} 6762 6763 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6764 predicate(VM_Version::supports_cmov() ); 6765 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6766 ins_cost(200); 6767 format %{ "CMOV$cop $dst,$src" %} 6768 opcode(0x0F,0x40); 6769 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6770 ins_pipe( pipe_cmov_reg ); 6771 %} 6772 6773 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6774 predicate(VM_Version::supports_cmov() ); 6775 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6776 ins_cost(200); 6777 format %{ "CMOV$cop $dst,$src" %} 6778 opcode(0x0F,0x40); 6779 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6780 ins_pipe( pipe_cmov_reg ); 6781 %} 6782 6783 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6784 predicate(VM_Version::supports_cmov() ); 6785 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6786 ins_cost(200); 6787 expand %{ 6788 cmovI_regU(cop, cr, dst, src); 6789 %} 6790 %} 6791 6792 // Conditional move 6793 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6794 predicate(VM_Version::supports_cmov() ); 6795 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6796 ins_cost(250); 6797 format %{ "CMOV$cop $dst,$src" %} 6798 opcode(0x0F,0x40); 6799 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6800 ins_pipe( pipe_cmov_mem ); 6801 %} 6802 6803 // Conditional move 6804 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6805 predicate(VM_Version::supports_cmov() ); 6806 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6807 ins_cost(250); 6808 format %{ "CMOV$cop $dst,$src" %} 6809 opcode(0x0F,0x40); 6810 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6811 ins_pipe( pipe_cmov_mem ); 6812 %} 6813 6814 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6815 predicate(VM_Version::supports_cmov() ); 6816 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6817 ins_cost(250); 6818 expand %{ 6819 cmovI_memU(cop, cr, dst, src); 6820 %} 6821 %} 6822 6823 // Conditional move 6824 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6825 predicate(VM_Version::supports_cmov() ); 6826 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6827 ins_cost(200); 6828 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6829 opcode(0x0F,0x40); 6830 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6831 ins_pipe( pipe_cmov_reg ); 6832 %} 6833 6834 // Conditional move (non-P6 version) 6835 // Note: a CMoveP is generated for stubs and native wrappers 6836 // regardless of whether we are on a P6, so we 6837 // emulate a cmov here 6838 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6839 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6840 ins_cost(300); 6841 format %{ "Jn$cop skip\n\t" 6842 "MOV $dst,$src\t# pointer\n" 6843 "skip:" %} 6844 opcode(0x8b); 6845 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6846 ins_pipe( pipe_cmov_reg ); 6847 %} 6848 6849 // Conditional move 6850 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6851 predicate(VM_Version::supports_cmov() ); 6852 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6853 ins_cost(200); 6854 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6855 opcode(0x0F,0x40); 6856 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6857 ins_pipe( pipe_cmov_reg ); 6858 %} 6859 6860 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6861 predicate(VM_Version::supports_cmov() ); 6862 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6863 ins_cost(200); 6864 expand %{ 6865 cmovP_regU(cop, cr, dst, src); 6866 %} 6867 %} 6868 6869 // DISABLED: Requires the ADLC to emit a bottom_type call that 6870 // correctly meets the two pointer arguments; one is an incoming 6871 // register but the other is a memory operand. ALSO appears to 6872 // be buggy with implicit null checks. 6873 // 6874 //// Conditional move 6875 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6876 // predicate(VM_Version::supports_cmov() ); 6877 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6878 // ins_cost(250); 6879 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6880 // opcode(0x0F,0x40); 6881 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6882 // ins_pipe( pipe_cmov_mem ); 6883 //%} 6884 // 6885 //// Conditional move 6886 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6887 // predicate(VM_Version::supports_cmov() ); 6888 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6889 // ins_cost(250); 6890 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6891 // opcode(0x0F,0x40); 6892 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6893 // ins_pipe( pipe_cmov_mem ); 6894 //%} 6895 6896 // Conditional move 6897 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6898 predicate(UseSSE<=1); 6899 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6900 ins_cost(200); 6901 format %{ "FCMOV$cop $dst,$src\t# double" %} 6902 opcode(0xDA); 6903 ins_encode( enc_cmov_dpr(cop,src) ); 6904 ins_pipe( pipe_cmovDPR_reg ); 6905 %} 6906 6907 // Conditional move 6908 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6909 predicate(UseSSE==0); 6910 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6911 ins_cost(200); 6912 format %{ "FCMOV$cop $dst,$src\t# float" %} 6913 opcode(0xDA); 6914 ins_encode( enc_cmov_dpr(cop,src) ); 6915 ins_pipe( pipe_cmovDPR_reg ); 6916 %} 6917 6918 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6919 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6920 predicate(UseSSE<=1); 6921 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6922 ins_cost(200); 6923 format %{ "Jn$cop skip\n\t" 6924 "MOV $dst,$src\t# double\n" 6925 "skip:" %} 6926 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6927 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6928 ins_pipe( pipe_cmovDPR_reg ); 6929 %} 6930 6931 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6932 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6933 predicate(UseSSE==0); 6934 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6935 ins_cost(200); 6936 format %{ "Jn$cop skip\n\t" 6937 "MOV $dst,$src\t# float\n" 6938 "skip:" %} 6939 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6940 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6941 ins_pipe( pipe_cmovDPR_reg ); 6942 %} 6943 6944 // No CMOVE with SSE/SSE2 6945 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6946 predicate (UseSSE>=1); 6947 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6948 ins_cost(200); 6949 format %{ "Jn$cop skip\n\t" 6950 "MOVSS $dst,$src\t# float\n" 6951 "skip:" %} 6952 ins_encode %{ 6953 Label skip; 6954 // Invert sense of branch from sense of CMOV 6955 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6956 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6957 __ bind(skip); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 // No CMOVE with SSE/SSE2 6963 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6964 predicate (UseSSE>=2); 6965 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6966 ins_cost(200); 6967 format %{ "Jn$cop skip\n\t" 6968 "MOVSD $dst,$src\t# float\n" 6969 "skip:" %} 6970 ins_encode %{ 6971 Label skip; 6972 // Invert sense of branch from sense of CMOV 6973 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6974 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6975 __ bind(skip); 6976 %} 6977 ins_pipe( pipe_slow ); 6978 %} 6979 6980 // unsigned version 6981 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6982 predicate (UseSSE>=1); 6983 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6984 ins_cost(200); 6985 format %{ "Jn$cop skip\n\t" 6986 "MOVSS $dst,$src\t# float\n" 6987 "skip:" %} 6988 ins_encode %{ 6989 Label skip; 6990 // Invert sense of branch from sense of CMOV 6991 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6992 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6993 __ bind(skip); 6994 %} 6995 ins_pipe( pipe_slow ); 6996 %} 6997 6998 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6999 predicate (UseSSE>=1); 7000 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 7001 ins_cost(200); 7002 expand %{ 7003 fcmovF_regU(cop, cr, dst, src); 7004 %} 7005 %} 7006 7007 // unsigned version 7008 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 7009 predicate (UseSSE>=2); 7010 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7011 ins_cost(200); 7012 format %{ "Jn$cop skip\n\t" 7013 "MOVSD $dst,$src\t# float\n" 7014 "skip:" %} 7015 ins_encode %{ 7016 Label skip; 7017 // Invert sense of branch from sense of CMOV 7018 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 7019 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7020 __ bind(skip); 7021 %} 7022 ins_pipe( pipe_slow ); 7023 %} 7024 7025 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 7026 predicate (UseSSE>=2); 7027 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 7028 ins_cost(200); 7029 expand %{ 7030 fcmovD_regU(cop, cr, dst, src); 7031 %} 7032 %} 7033 7034 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7035 predicate(VM_Version::supports_cmov() ); 7036 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7037 ins_cost(200); 7038 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7039 "CMOV$cop $dst.hi,$src.hi" %} 7040 opcode(0x0F,0x40); 7041 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7042 ins_pipe( pipe_cmov_reg_long ); 7043 %} 7044 7045 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7046 predicate(VM_Version::supports_cmov() ); 7047 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7048 ins_cost(200); 7049 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7050 "CMOV$cop $dst.hi,$src.hi" %} 7051 opcode(0x0F,0x40); 7052 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7053 ins_pipe( pipe_cmov_reg_long ); 7054 %} 7055 7056 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7057 predicate(VM_Version::supports_cmov() ); 7058 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7059 ins_cost(200); 7060 expand %{ 7061 cmovL_regU(cop, cr, dst, src); 7062 %} 7063 %} 7064 7065 //----------Arithmetic Instructions-------------------------------------------- 7066 //----------Addition Instructions---------------------------------------------- 7067 7068 // Integer Addition Instructions 7069 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7070 match(Set dst (AddI dst src)); 7071 effect(KILL cr); 7072 7073 size(2); 7074 format %{ "ADD $dst,$src" %} 7075 opcode(0x03); 7076 ins_encode( OpcP, RegReg( dst, src) ); 7077 ins_pipe( ialu_reg_reg ); 7078 %} 7079 7080 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7081 match(Set dst (AddI dst src)); 7082 effect(KILL cr); 7083 7084 format %{ "ADD $dst,$src" %} 7085 opcode(0x81, 0x00); /* /0 id */ 7086 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7087 ins_pipe( ialu_reg ); 7088 %} 7089 7090 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 7091 predicate(UseIncDec); 7092 match(Set dst (AddI dst src)); 7093 effect(KILL cr); 7094 7095 size(1); 7096 format %{ "INC $dst" %} 7097 opcode(0x40); /* */ 7098 ins_encode( Opc_plus( primary, dst ) ); 7099 ins_pipe( ialu_reg ); 7100 %} 7101 7102 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7103 match(Set dst (AddI src0 src1)); 7104 ins_cost(110); 7105 7106 format %{ "LEA $dst,[$src0 + $src1]" %} 7107 opcode(0x8D); /* 0x8D /r */ 7108 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7109 ins_pipe( ialu_reg_reg ); 7110 %} 7111 7112 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7113 match(Set dst (AddP src0 src1)); 7114 ins_cost(110); 7115 7116 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7117 opcode(0x8D); /* 0x8D /r */ 7118 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7119 ins_pipe( ialu_reg_reg ); 7120 %} 7121 7122 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7123 predicate(UseIncDec); 7124 match(Set dst (AddI dst src)); 7125 effect(KILL cr); 7126 7127 size(1); 7128 format %{ "DEC $dst" %} 7129 opcode(0x48); /* */ 7130 ins_encode( Opc_plus( primary, dst ) ); 7131 ins_pipe( ialu_reg ); 7132 %} 7133 7134 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7135 match(Set dst (AddP dst src)); 7136 effect(KILL cr); 7137 7138 size(2); 7139 format %{ "ADD $dst,$src" %} 7140 opcode(0x03); 7141 ins_encode( OpcP, RegReg( dst, src) ); 7142 ins_pipe( ialu_reg_reg ); 7143 %} 7144 7145 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7146 match(Set dst (AddP dst src)); 7147 effect(KILL cr); 7148 7149 format %{ "ADD $dst,$src" %} 7150 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7151 // ins_encode( RegImm( dst, src) ); 7152 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7153 ins_pipe( ialu_reg ); 7154 %} 7155 7156 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7157 match(Set dst (AddI dst (LoadI src))); 7158 effect(KILL cr); 7159 7160 ins_cost(150); 7161 format %{ "ADD $dst,$src" %} 7162 opcode(0x03); 7163 ins_encode( OpcP, RegMem( dst, src) ); 7164 ins_pipe( ialu_reg_mem ); 7165 %} 7166 7167 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7168 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7169 effect(KILL cr); 7170 7171 ins_cost(150); 7172 format %{ "ADD $dst,$src" %} 7173 opcode(0x01); /* Opcode 01 /r */ 7174 ins_encode( OpcP, RegMem( src, dst ) ); 7175 ins_pipe( ialu_mem_reg ); 7176 %} 7177 7178 // Add Memory with Immediate 7179 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7180 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7181 effect(KILL cr); 7182 7183 ins_cost(125); 7184 format %{ "ADD $dst,$src" %} 7185 opcode(0x81); /* Opcode 81 /0 id */ 7186 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7187 ins_pipe( ialu_mem_imm ); 7188 %} 7189 7190 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ 7191 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7192 effect(KILL cr); 7193 7194 ins_cost(125); 7195 format %{ "INC $dst" %} 7196 opcode(0xFF); /* Opcode FF /0 */ 7197 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7198 ins_pipe( ialu_mem_imm ); 7199 %} 7200 7201 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7202 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7203 effect(KILL cr); 7204 7205 ins_cost(125); 7206 format %{ "DEC $dst" %} 7207 opcode(0xFF); /* Opcode FF /1 */ 7208 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7209 ins_pipe( ialu_mem_imm ); 7210 %} 7211 7212 7213 instruct checkCastPP( eRegP dst ) %{ 7214 match(Set dst (CheckCastPP dst)); 7215 7216 size(0); 7217 format %{ "#checkcastPP of $dst" %} 7218 ins_encode( /*empty encoding*/ ); 7219 ins_pipe( empty ); 7220 %} 7221 7222 instruct castPP( eRegP dst ) %{ 7223 match(Set dst (CastPP dst)); 7224 format %{ "#castPP of $dst" %} 7225 ins_encode( /*empty encoding*/ ); 7226 ins_pipe( empty ); 7227 %} 7228 7229 instruct castII( rRegI dst ) %{ 7230 match(Set dst (CastII dst)); 7231 format %{ "#castII of $dst" %} 7232 ins_encode( /*empty encoding*/ ); 7233 ins_cost(0); 7234 ins_pipe( empty ); 7235 %} 7236 7237 instruct castLL( eRegL dst ) %{ 7238 match(Set dst (CastLL dst)); 7239 format %{ "#castLL of $dst" %} 7240 ins_encode( /*empty encoding*/ ); 7241 ins_cost(0); 7242 ins_pipe( empty ); 7243 %} 7244 7245 instruct castFF( regF dst ) %{ 7246 predicate(UseSSE >= 1); 7247 match(Set dst (CastFF dst)); 7248 format %{ "#castFF of $dst" %} 7249 ins_encode( /*empty encoding*/ ); 7250 ins_cost(0); 7251 ins_pipe( empty ); 7252 %} 7253 7254 instruct castDD( regD dst ) %{ 7255 predicate(UseSSE >= 2); 7256 match(Set dst (CastDD dst)); 7257 format %{ "#castDD of $dst" %} 7258 ins_encode( /*empty encoding*/ ); 7259 ins_cost(0); 7260 ins_pipe( empty ); 7261 %} 7262 7263 instruct castFF_PR( regFPR dst ) %{ 7264 predicate(UseSSE < 1); 7265 match(Set dst (CastFF dst)); 7266 format %{ "#castFF of $dst" %} 7267 ins_encode( /*empty encoding*/ ); 7268 ins_cost(0); 7269 ins_pipe( empty ); 7270 %} 7271 7272 instruct castDD_PR( regDPR dst ) %{ 7273 predicate(UseSSE < 2); 7274 match(Set dst (CastDD dst)); 7275 format %{ "#castDD of $dst" %} 7276 ins_encode( /*empty encoding*/ ); 7277 ins_cost(0); 7278 ins_pipe( empty ); 7279 %} 7280 7281 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7282 7283 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7284 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7285 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7286 effect(KILL cr, KILL oldval); 7287 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7288 "MOV $res,0\n\t" 7289 "JNE,s fail\n\t" 7290 "MOV $res,1\n" 7291 "fail:" %} 7292 ins_encode( enc_cmpxchg8(mem_ptr), 7293 enc_flags_ne_to_boolean(res) ); 7294 ins_pipe( pipe_cmpxchg ); 7295 %} 7296 7297 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7298 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7299 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7300 effect(KILL cr, KILL oldval); 7301 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7302 "MOV $res,0\n\t" 7303 "JNE,s fail\n\t" 7304 "MOV $res,1\n" 7305 "fail:" %} 7306 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7307 ins_pipe( pipe_cmpxchg ); 7308 %} 7309 7310 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7311 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7312 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7313 effect(KILL cr, KILL oldval); 7314 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7315 "MOV $res,0\n\t" 7316 "JNE,s fail\n\t" 7317 "MOV $res,1\n" 7318 "fail:" %} 7319 ins_encode( enc_cmpxchgb(mem_ptr), 7320 enc_flags_ne_to_boolean(res) ); 7321 ins_pipe( pipe_cmpxchg ); 7322 %} 7323 7324 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7325 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7326 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7327 effect(KILL cr, KILL oldval); 7328 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7329 "MOV $res,0\n\t" 7330 "JNE,s fail\n\t" 7331 "MOV $res,1\n" 7332 "fail:" %} 7333 ins_encode( enc_cmpxchgw(mem_ptr), 7334 enc_flags_ne_to_boolean(res) ); 7335 ins_pipe( pipe_cmpxchg ); 7336 %} 7337 7338 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7339 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7340 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7341 effect(KILL cr, KILL oldval); 7342 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7343 "MOV $res,0\n\t" 7344 "JNE,s fail\n\t" 7345 "MOV $res,1\n" 7346 "fail:" %} 7347 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7348 ins_pipe( pipe_cmpxchg ); 7349 %} 7350 7351 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7352 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7353 effect(KILL cr); 7354 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7355 ins_encode( enc_cmpxchg8(mem_ptr) ); 7356 ins_pipe( pipe_cmpxchg ); 7357 %} 7358 7359 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7360 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7361 effect(KILL cr); 7362 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7363 ins_encode( enc_cmpxchg(mem_ptr) ); 7364 ins_pipe( pipe_cmpxchg ); 7365 %} 7366 7367 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7368 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7369 effect(KILL cr); 7370 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7371 ins_encode( enc_cmpxchgb(mem_ptr) ); 7372 ins_pipe( pipe_cmpxchg ); 7373 %} 7374 7375 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7376 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7377 effect(KILL cr); 7378 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7379 ins_encode( enc_cmpxchgw(mem_ptr) ); 7380 ins_pipe( pipe_cmpxchg ); 7381 %} 7382 7383 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7384 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7385 effect(KILL cr); 7386 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7387 ins_encode( enc_cmpxchg(mem_ptr) ); 7388 ins_pipe( pipe_cmpxchg ); 7389 %} 7390 7391 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7392 predicate(n->as_LoadStore()->result_not_used()); 7393 match(Set dummy (GetAndAddB mem add)); 7394 effect(KILL cr); 7395 format %{ "ADDB [$mem],$add" %} 7396 ins_encode %{ 7397 __ lock(); 7398 __ addb($mem$$Address, $add$$constant); 7399 %} 7400 ins_pipe( pipe_cmpxchg ); 7401 %} 7402 7403 // Important to match to xRegI: only 8-bit regs. 7404 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7405 match(Set newval (GetAndAddB mem newval)); 7406 effect(KILL cr); 7407 format %{ "XADDB [$mem],$newval" %} 7408 ins_encode %{ 7409 __ lock(); 7410 __ xaddb($mem$$Address, $newval$$Register); 7411 %} 7412 ins_pipe( pipe_cmpxchg ); 7413 %} 7414 7415 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7416 predicate(n->as_LoadStore()->result_not_used()); 7417 match(Set dummy (GetAndAddS mem add)); 7418 effect(KILL cr); 7419 format %{ "ADDS [$mem],$add" %} 7420 ins_encode %{ 7421 __ lock(); 7422 __ addw($mem$$Address, $add$$constant); 7423 %} 7424 ins_pipe( pipe_cmpxchg ); 7425 %} 7426 7427 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7428 match(Set newval (GetAndAddS mem newval)); 7429 effect(KILL cr); 7430 format %{ "XADDS [$mem],$newval" %} 7431 ins_encode %{ 7432 __ lock(); 7433 __ xaddw($mem$$Address, $newval$$Register); 7434 %} 7435 ins_pipe( pipe_cmpxchg ); 7436 %} 7437 7438 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7439 predicate(n->as_LoadStore()->result_not_used()); 7440 match(Set dummy (GetAndAddI mem add)); 7441 effect(KILL cr); 7442 format %{ "ADDL [$mem],$add" %} 7443 ins_encode %{ 7444 __ lock(); 7445 __ addl($mem$$Address, $add$$constant); 7446 %} 7447 ins_pipe( pipe_cmpxchg ); 7448 %} 7449 7450 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7451 match(Set newval (GetAndAddI mem newval)); 7452 effect(KILL cr); 7453 format %{ "XADDL [$mem],$newval" %} 7454 ins_encode %{ 7455 __ lock(); 7456 __ xaddl($mem$$Address, $newval$$Register); 7457 %} 7458 ins_pipe( pipe_cmpxchg ); 7459 %} 7460 7461 // Important to match to xRegI: only 8-bit regs. 7462 instruct xchgB( memory mem, xRegI newval) %{ 7463 match(Set newval (GetAndSetB mem newval)); 7464 format %{ "XCHGB $newval,[$mem]" %} 7465 ins_encode %{ 7466 __ xchgb($newval$$Register, $mem$$Address); 7467 %} 7468 ins_pipe( pipe_cmpxchg ); 7469 %} 7470 7471 instruct xchgS( memory mem, rRegI newval) %{ 7472 match(Set newval (GetAndSetS mem newval)); 7473 format %{ "XCHGW $newval,[$mem]" %} 7474 ins_encode %{ 7475 __ xchgw($newval$$Register, $mem$$Address); 7476 %} 7477 ins_pipe( pipe_cmpxchg ); 7478 %} 7479 7480 instruct xchgI( memory mem, rRegI newval) %{ 7481 match(Set newval (GetAndSetI mem newval)); 7482 format %{ "XCHGL $newval,[$mem]" %} 7483 ins_encode %{ 7484 __ xchgl($newval$$Register, $mem$$Address); 7485 %} 7486 ins_pipe( pipe_cmpxchg ); 7487 %} 7488 7489 instruct xchgP( memory mem, pRegP newval) %{ 7490 match(Set newval (GetAndSetP mem newval)); 7491 format %{ "XCHGL $newval,[$mem]" %} 7492 ins_encode %{ 7493 __ xchgl($newval$$Register, $mem$$Address); 7494 %} 7495 ins_pipe( pipe_cmpxchg ); 7496 %} 7497 7498 //----------Subtraction Instructions------------------------------------------- 7499 7500 // Integer Subtraction Instructions 7501 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7502 match(Set dst (SubI dst src)); 7503 effect(KILL cr); 7504 7505 size(2); 7506 format %{ "SUB $dst,$src" %} 7507 opcode(0x2B); 7508 ins_encode( OpcP, RegReg( dst, src) ); 7509 ins_pipe( ialu_reg_reg ); 7510 %} 7511 7512 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7513 match(Set dst (SubI dst src)); 7514 effect(KILL cr); 7515 7516 format %{ "SUB $dst,$src" %} 7517 opcode(0x81,0x05); /* Opcode 81 /5 */ 7518 // ins_encode( RegImm( dst, src) ); 7519 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7520 ins_pipe( ialu_reg ); 7521 %} 7522 7523 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7524 match(Set dst (SubI dst (LoadI src))); 7525 effect(KILL cr); 7526 7527 ins_cost(150); 7528 format %{ "SUB $dst,$src" %} 7529 opcode(0x2B); 7530 ins_encode( OpcP, RegMem( dst, src) ); 7531 ins_pipe( ialu_reg_mem ); 7532 %} 7533 7534 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7535 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7536 effect(KILL cr); 7537 7538 ins_cost(150); 7539 format %{ "SUB $dst,$src" %} 7540 opcode(0x29); /* Opcode 29 /r */ 7541 ins_encode( OpcP, RegMem( src, dst ) ); 7542 ins_pipe( ialu_mem_reg ); 7543 %} 7544 7545 // Subtract from a pointer 7546 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ 7547 match(Set dst (AddP dst (SubI zero src))); 7548 effect(KILL cr); 7549 7550 size(2); 7551 format %{ "SUB $dst,$src" %} 7552 opcode(0x2B); 7553 ins_encode( OpcP, RegReg( dst, src) ); 7554 ins_pipe( ialu_reg_reg ); 7555 %} 7556 7557 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 7558 match(Set dst (SubI zero dst)); 7559 effect(KILL cr); 7560 7561 size(2); 7562 format %{ "NEG $dst" %} 7563 opcode(0xF7,0x03); // Opcode F7 /3 7564 ins_encode( OpcP, RegOpc( dst ) ); 7565 ins_pipe( ialu_reg ); 7566 %} 7567 7568 //----------Multiplication/Division Instructions------------------------------- 7569 // Integer Multiplication Instructions 7570 // Multiply Register 7571 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7572 match(Set dst (MulI dst src)); 7573 effect(KILL cr); 7574 7575 size(3); 7576 ins_cost(300); 7577 format %{ "IMUL $dst,$src" %} 7578 opcode(0xAF, 0x0F); 7579 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7580 ins_pipe( ialu_reg_reg_alu0 ); 7581 %} 7582 7583 // Multiply 32-bit Immediate 7584 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7585 match(Set dst (MulI src imm)); 7586 effect(KILL cr); 7587 7588 ins_cost(300); 7589 format %{ "IMUL $dst,$src,$imm" %} 7590 opcode(0x69); /* 69 /r id */ 7591 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7592 ins_pipe( ialu_reg_reg_alu0 ); 7593 %} 7594 7595 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7596 match(Set dst src); 7597 effect(KILL cr); 7598 7599 // Note that this is artificially increased to make it more expensive than loadConL 7600 ins_cost(250); 7601 format %{ "MOV EAX,$src\t// low word only" %} 7602 opcode(0xB8); 7603 ins_encode( LdImmL_Lo(dst, src) ); 7604 ins_pipe( ialu_reg_fat ); 7605 %} 7606 7607 // Multiply by 32-bit Immediate, taking the shifted high order results 7608 // (special case for shift by 32) 7609 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7610 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7611 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7612 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7613 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7614 effect(USE src1, KILL cr); 7615 7616 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7617 ins_cost(0*100 + 1*400 - 150); 7618 format %{ "IMUL EDX:EAX,$src1" %} 7619 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 // Multiply by 32-bit Immediate, taking the shifted high order results 7624 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7625 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7626 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7627 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7628 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7629 effect(USE src1, KILL cr); 7630 7631 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7632 ins_cost(1*100 + 1*400 - 150); 7633 format %{ "IMUL EDX:EAX,$src1\n\t" 7634 "SAR EDX,$cnt-32" %} 7635 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 // Multiply Memory 32-bit Immediate 7640 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7641 match(Set dst (MulI (LoadI src) imm)); 7642 effect(KILL cr); 7643 7644 ins_cost(300); 7645 format %{ "IMUL $dst,$src,$imm" %} 7646 opcode(0x69); /* 69 /r id */ 7647 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7648 ins_pipe( ialu_reg_mem_alu0 ); 7649 %} 7650 7651 // Multiply Memory 7652 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7653 match(Set dst (MulI dst (LoadI src))); 7654 effect(KILL cr); 7655 7656 ins_cost(350); 7657 format %{ "IMUL $dst,$src" %} 7658 opcode(0xAF, 0x0F); 7659 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7660 ins_pipe( ialu_reg_mem_alu0 ); 7661 %} 7662 7663 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) 7664 %{ 7665 match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); 7666 effect(KILL cr, KILL src2); 7667 7668 expand %{ mulI_eReg(dst, src1, cr); 7669 mulI_eReg(src2, src3, cr); 7670 addI_eReg(dst, src2, cr); %} 7671 %} 7672 7673 // Multiply Register Int to Long 7674 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7675 // Basic Idea: long = (long)int * (long)int 7676 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7677 effect(DEF dst, USE src, USE src1, KILL flags); 7678 7679 ins_cost(300); 7680 format %{ "IMUL $dst,$src1" %} 7681 7682 ins_encode( long_int_multiply( dst, src1 ) ); 7683 ins_pipe( ialu_reg_reg_alu0 ); 7684 %} 7685 7686 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7687 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7688 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7689 effect(KILL flags); 7690 7691 ins_cost(300); 7692 format %{ "MUL $dst,$src1" %} 7693 7694 ins_encode( long_uint_multiply(dst, src1) ); 7695 ins_pipe( ialu_reg_reg_alu0 ); 7696 %} 7697 7698 // Multiply Register Long 7699 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7700 match(Set dst (MulL dst src)); 7701 effect(KILL cr, TEMP tmp); 7702 ins_cost(4*100+3*400); 7703 // Basic idea: lo(result) = lo(x_lo * y_lo) 7704 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7705 format %{ "MOV $tmp,$src.lo\n\t" 7706 "IMUL $tmp,EDX\n\t" 7707 "MOV EDX,$src.hi\n\t" 7708 "IMUL EDX,EAX\n\t" 7709 "ADD $tmp,EDX\n\t" 7710 "MUL EDX:EAX,$src.lo\n\t" 7711 "ADD EDX,$tmp" %} 7712 ins_encode( long_multiply( dst, src, tmp ) ); 7713 ins_pipe( pipe_slow ); 7714 %} 7715 7716 // Multiply Register Long where the left operand's high 32 bits are zero 7717 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7718 predicate(is_operand_hi32_zero(n->in(1))); 7719 match(Set dst (MulL dst src)); 7720 effect(KILL cr, TEMP tmp); 7721 ins_cost(2*100+2*400); 7722 // Basic idea: lo(result) = lo(x_lo * y_lo) 7723 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7724 format %{ "MOV $tmp,$src.hi\n\t" 7725 "IMUL $tmp,EAX\n\t" 7726 "MUL EDX:EAX,$src.lo\n\t" 7727 "ADD EDX,$tmp" %} 7728 ins_encode %{ 7729 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7730 __ imull($tmp$$Register, rax); 7731 __ mull($src$$Register); 7732 __ addl(rdx, $tmp$$Register); 7733 %} 7734 ins_pipe( pipe_slow ); 7735 %} 7736 7737 // Multiply Register Long where the right operand's high 32 bits are zero 7738 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7739 predicate(is_operand_hi32_zero(n->in(2))); 7740 match(Set dst (MulL dst src)); 7741 effect(KILL cr, TEMP tmp); 7742 ins_cost(2*100+2*400); 7743 // Basic idea: lo(result) = lo(x_lo * y_lo) 7744 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7745 format %{ "MOV $tmp,$src.lo\n\t" 7746 "IMUL $tmp,EDX\n\t" 7747 "MUL EDX:EAX,$src.lo\n\t" 7748 "ADD EDX,$tmp" %} 7749 ins_encode %{ 7750 __ movl($tmp$$Register, $src$$Register); 7751 __ imull($tmp$$Register, rdx); 7752 __ mull($src$$Register); 7753 __ addl(rdx, $tmp$$Register); 7754 %} 7755 ins_pipe( pipe_slow ); 7756 %} 7757 7758 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7759 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7760 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7761 match(Set dst (MulL dst src)); 7762 effect(KILL cr); 7763 ins_cost(1*400); 7764 // Basic idea: lo(result) = lo(x_lo * y_lo) 7765 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7766 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7767 ins_encode %{ 7768 __ mull($src$$Register); 7769 %} 7770 ins_pipe( pipe_slow ); 7771 %} 7772 7773 // Multiply Register Long by small constant 7774 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7775 match(Set dst (MulL dst src)); 7776 effect(KILL cr, TEMP tmp); 7777 ins_cost(2*100+2*400); 7778 size(12); 7779 // Basic idea: lo(result) = lo(src * EAX) 7780 // hi(result) = hi(src * EAX) + lo(src * EDX) 7781 format %{ "IMUL $tmp,EDX,$src\n\t" 7782 "MOV EDX,$src\n\t" 7783 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7784 "ADD EDX,$tmp" %} 7785 ins_encode( long_multiply_con( dst, src, tmp ) ); 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 // Integer DIV with Register 7790 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7791 match(Set rax (DivI rax div)); 7792 effect(KILL rdx, KILL cr); 7793 size(26); 7794 ins_cost(30*100+10*100); 7795 format %{ "CMP EAX,0x80000000\n\t" 7796 "JNE,s normal\n\t" 7797 "XOR EDX,EDX\n\t" 7798 "CMP ECX,-1\n\t" 7799 "JE,s done\n" 7800 "normal: CDQ\n\t" 7801 "IDIV $div\n\t" 7802 "done:" %} 7803 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7804 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7805 ins_pipe( ialu_reg_reg_alu0 ); 7806 %} 7807 7808 // Divide Register Long 7809 instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7810 match(Set dst (DivL src1 src2)); 7811 effect(CALL); 7812 ins_cost(10000); 7813 format %{ "PUSH $src1.hi\n\t" 7814 "PUSH $src1.lo\n\t" 7815 "PUSH $src2.hi\n\t" 7816 "PUSH $src2.lo\n\t" 7817 "CALL SharedRuntime::ldiv\n\t" 7818 "ADD ESP,16" %} 7819 ins_encode( long_div(src1,src2) ); 7820 ins_pipe( pipe_slow ); 7821 %} 7822 7823 // Integer DIVMOD with Register, both quotient and mod results 7824 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7825 match(DivModI rax div); 7826 effect(KILL cr); 7827 size(26); 7828 ins_cost(30*100+10*100); 7829 format %{ "CMP EAX,0x80000000\n\t" 7830 "JNE,s normal\n\t" 7831 "XOR EDX,EDX\n\t" 7832 "CMP ECX,-1\n\t" 7833 "JE,s done\n" 7834 "normal: CDQ\n\t" 7835 "IDIV $div\n\t" 7836 "done:" %} 7837 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7838 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7839 ins_pipe( pipe_slow ); 7840 %} 7841 7842 // Integer MOD with Register 7843 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7844 match(Set rdx (ModI rax div)); 7845 effect(KILL rax, KILL cr); 7846 7847 size(26); 7848 ins_cost(300); 7849 format %{ "CDQ\n\t" 7850 "IDIV $div" %} 7851 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7852 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7853 ins_pipe( ialu_reg_reg_alu0 ); 7854 %} 7855 7856 // Remainder Register Long 7857 instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ 7858 match(Set dst (ModL src1 src2)); 7859 effect(CALL); 7860 ins_cost(10000); 7861 format %{ "PUSH $src1.hi\n\t" 7862 "PUSH $src1.lo\n\t" 7863 "PUSH $src2.hi\n\t" 7864 "PUSH $src2.lo\n\t" 7865 "CALL SharedRuntime::lrem\n\t" 7866 "ADD ESP,16" %} 7867 ins_encode( long_mod(src1,src2) ); 7868 ins_pipe( pipe_slow ); 7869 %} 7870 7871 // Divide Register Long (no special case since divisor != -1) 7872 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7873 match(Set dst (DivL dst imm)); 7874 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7875 ins_cost(1000); 7876 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7877 "XOR $tmp2,$tmp2\n\t" 7878 "CMP $tmp,EDX\n\t" 7879 "JA,s fast\n\t" 7880 "MOV $tmp2,EAX\n\t" 7881 "MOV EAX,EDX\n\t" 7882 "MOV EDX,0\n\t" 7883 "JLE,s pos\n\t" 7884 "LNEG EAX : $tmp2\n\t" 7885 "DIV $tmp # unsigned division\n\t" 7886 "XCHG EAX,$tmp2\n\t" 7887 "DIV $tmp\n\t" 7888 "LNEG $tmp2 : EAX\n\t" 7889 "JMP,s done\n" 7890 "pos:\n\t" 7891 "DIV $tmp\n\t" 7892 "XCHG EAX,$tmp2\n" 7893 "fast:\n\t" 7894 "DIV $tmp\n" 7895 "done:\n\t" 7896 "MOV EDX,$tmp2\n\t" 7897 "NEG EDX:EAX # if $imm < 0" %} 7898 ins_encode %{ 7899 int con = (int)$imm$$constant; 7900 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7901 int pcon = (con > 0) ? con : -con; 7902 Label Lfast, Lpos, Ldone; 7903 7904 __ movl($tmp$$Register, pcon); 7905 __ xorl($tmp2$$Register,$tmp2$$Register); 7906 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7907 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7908 7909 __ movl($tmp2$$Register, $dst$$Register); // save 7910 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7911 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7912 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7913 7914 // Negative dividend. 7915 // convert value to positive to use unsigned division 7916 __ lneg($dst$$Register, $tmp2$$Register); 7917 __ divl($tmp$$Register); 7918 __ xchgl($dst$$Register, $tmp2$$Register); 7919 __ divl($tmp$$Register); 7920 // revert result back to negative 7921 __ lneg($tmp2$$Register, $dst$$Register); 7922 __ jmpb(Ldone); 7923 7924 __ bind(Lpos); 7925 __ divl($tmp$$Register); // Use unsigned division 7926 __ xchgl($dst$$Register, $tmp2$$Register); 7927 // Fallthrow for final divide, tmp2 has 32 bit hi result 7928 7929 __ bind(Lfast); 7930 // fast path: src is positive 7931 __ divl($tmp$$Register); // Use unsigned division 7932 7933 __ bind(Ldone); 7934 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7935 if (con < 0) { 7936 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7937 } 7938 %} 7939 ins_pipe( pipe_slow ); 7940 %} 7941 7942 // Remainder Register Long (remainder fit into 32 bits) 7943 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7944 match(Set dst (ModL dst imm)); 7945 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7946 ins_cost(1000); 7947 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7948 "CMP $tmp,EDX\n\t" 7949 "JA,s fast\n\t" 7950 "MOV $tmp2,EAX\n\t" 7951 "MOV EAX,EDX\n\t" 7952 "MOV EDX,0\n\t" 7953 "JLE,s pos\n\t" 7954 "LNEG EAX : $tmp2\n\t" 7955 "DIV $tmp # unsigned division\n\t" 7956 "MOV EAX,$tmp2\n\t" 7957 "DIV $tmp\n\t" 7958 "NEG EDX\n\t" 7959 "JMP,s done\n" 7960 "pos:\n\t" 7961 "DIV $tmp\n\t" 7962 "MOV EAX,$tmp2\n" 7963 "fast:\n\t" 7964 "DIV $tmp\n" 7965 "done:\n\t" 7966 "MOV EAX,EDX\n\t" 7967 "SAR EDX,31\n\t" %} 7968 ins_encode %{ 7969 int con = (int)$imm$$constant; 7970 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7971 int pcon = (con > 0) ? con : -con; 7972 Label Lfast, Lpos, Ldone; 7973 7974 __ movl($tmp$$Register, pcon); 7975 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7976 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7977 7978 __ movl($tmp2$$Register, $dst$$Register); // save 7979 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7980 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7981 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7982 7983 // Negative dividend. 7984 // convert value to positive to use unsigned division 7985 __ lneg($dst$$Register, $tmp2$$Register); 7986 __ divl($tmp$$Register); 7987 __ movl($dst$$Register, $tmp2$$Register); 7988 __ divl($tmp$$Register); 7989 // revert remainder back to negative 7990 __ negl(HIGH_FROM_LOW($dst$$Register)); 7991 __ jmpb(Ldone); 7992 7993 __ bind(Lpos); 7994 __ divl($tmp$$Register); 7995 __ movl($dst$$Register, $tmp2$$Register); 7996 7997 __ bind(Lfast); 7998 // fast path: src is positive 7999 __ divl($tmp$$Register); 8000 8001 __ bind(Ldone); 8002 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 8003 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 8004 8005 %} 8006 ins_pipe( pipe_slow ); 8007 %} 8008 8009 // Integer Shift Instructions 8010 // Shift Left by one 8011 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8012 match(Set dst (LShiftI dst shift)); 8013 effect(KILL cr); 8014 8015 size(2); 8016 format %{ "SHL $dst,$shift" %} 8017 opcode(0xD1, 0x4); /* D1 /4 */ 8018 ins_encode( OpcP, RegOpc( dst ) ); 8019 ins_pipe( ialu_reg ); 8020 %} 8021 8022 // Shift Left by 8-bit immediate 8023 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8024 match(Set dst (LShiftI dst shift)); 8025 effect(KILL cr); 8026 8027 size(3); 8028 format %{ "SHL $dst,$shift" %} 8029 opcode(0xC1, 0x4); /* C1 /4 ib */ 8030 ins_encode( RegOpcImm( dst, shift) ); 8031 ins_pipe( ialu_reg ); 8032 %} 8033 8034 // Shift Left by variable 8035 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8036 match(Set dst (LShiftI dst shift)); 8037 effect(KILL cr); 8038 8039 size(2); 8040 format %{ "SHL $dst,$shift" %} 8041 opcode(0xD3, 0x4); /* D3 /4 */ 8042 ins_encode( OpcP, RegOpc( dst ) ); 8043 ins_pipe( ialu_reg_reg ); 8044 %} 8045 8046 // Arithmetic shift right by one 8047 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8048 match(Set dst (RShiftI dst shift)); 8049 effect(KILL cr); 8050 8051 size(2); 8052 format %{ "SAR $dst,$shift" %} 8053 opcode(0xD1, 0x7); /* D1 /7 */ 8054 ins_encode( OpcP, RegOpc( dst ) ); 8055 ins_pipe( ialu_reg ); 8056 %} 8057 8058 // Arithmetic shift right by one 8059 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ 8060 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8061 effect(KILL cr); 8062 format %{ "SAR $dst,$shift" %} 8063 opcode(0xD1, 0x7); /* D1 /7 */ 8064 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8065 ins_pipe( ialu_mem_imm ); 8066 %} 8067 8068 // Arithmetic Shift Right by 8-bit immediate 8069 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8070 match(Set dst (RShiftI dst shift)); 8071 effect(KILL cr); 8072 8073 size(3); 8074 format %{ "SAR $dst,$shift" %} 8075 opcode(0xC1, 0x7); /* C1 /7 ib */ 8076 ins_encode( RegOpcImm( dst, shift ) ); 8077 ins_pipe( ialu_mem_imm ); 8078 %} 8079 8080 // Arithmetic Shift Right by 8-bit immediate 8081 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8082 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8083 effect(KILL cr); 8084 8085 format %{ "SAR $dst,$shift" %} 8086 opcode(0xC1, 0x7); /* C1 /7 ib */ 8087 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8088 ins_pipe( ialu_mem_imm ); 8089 %} 8090 8091 // Arithmetic Shift Right by variable 8092 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8093 match(Set dst (RShiftI dst shift)); 8094 effect(KILL cr); 8095 8096 size(2); 8097 format %{ "SAR $dst,$shift" %} 8098 opcode(0xD3, 0x7); /* D3 /7 */ 8099 ins_encode( OpcP, RegOpc( dst ) ); 8100 ins_pipe( ialu_reg_reg ); 8101 %} 8102 8103 // Logical shift right by one 8104 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8105 match(Set dst (URShiftI dst shift)); 8106 effect(KILL cr); 8107 8108 size(2); 8109 format %{ "SHR $dst,$shift" %} 8110 opcode(0xD1, 0x5); /* D1 /5 */ 8111 ins_encode( OpcP, RegOpc( dst ) ); 8112 ins_pipe( ialu_reg ); 8113 %} 8114 8115 // Logical Shift Right by 8-bit immediate 8116 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8117 match(Set dst (URShiftI dst shift)); 8118 effect(KILL cr); 8119 8120 size(3); 8121 format %{ "SHR $dst,$shift" %} 8122 opcode(0xC1, 0x5); /* C1 /5 ib */ 8123 ins_encode( RegOpcImm( dst, shift) ); 8124 ins_pipe( ialu_reg ); 8125 %} 8126 8127 8128 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8129 // This idiom is used by the compiler for the i2b bytecode. 8130 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8131 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8132 8133 size(3); 8134 format %{ "MOVSX $dst,$src :8" %} 8135 ins_encode %{ 8136 __ movsbl($dst$$Register, $src$$Register); 8137 %} 8138 ins_pipe(ialu_reg_reg); 8139 %} 8140 8141 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8142 // This idiom is used by the compiler the i2s bytecode. 8143 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8144 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8145 8146 size(3); 8147 format %{ "MOVSX $dst,$src :16" %} 8148 ins_encode %{ 8149 __ movswl($dst$$Register, $src$$Register); 8150 %} 8151 ins_pipe(ialu_reg_reg); 8152 %} 8153 8154 8155 // Logical Shift Right by variable 8156 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8157 match(Set dst (URShiftI dst shift)); 8158 effect(KILL cr); 8159 8160 size(2); 8161 format %{ "SHR $dst,$shift" %} 8162 opcode(0xD3, 0x5); /* D3 /5 */ 8163 ins_encode( OpcP, RegOpc( dst ) ); 8164 ins_pipe( ialu_reg_reg ); 8165 %} 8166 8167 8168 //----------Logical Instructions----------------------------------------------- 8169 //----------Integer Logical Instructions--------------------------------------- 8170 // And Instructions 8171 // And Register with Register 8172 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8173 match(Set dst (AndI dst src)); 8174 effect(KILL cr); 8175 8176 size(2); 8177 format %{ "AND $dst,$src" %} 8178 opcode(0x23); 8179 ins_encode( OpcP, RegReg( dst, src) ); 8180 ins_pipe( ialu_reg_reg ); 8181 %} 8182 8183 // And Register with Immediate 8184 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8185 match(Set dst (AndI dst src)); 8186 effect(KILL cr); 8187 8188 format %{ "AND $dst,$src" %} 8189 opcode(0x81,0x04); /* Opcode 81 /4 */ 8190 // ins_encode( RegImm( dst, src) ); 8191 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8192 ins_pipe( ialu_reg ); 8193 %} 8194 8195 // And Register with Memory 8196 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8197 match(Set dst (AndI dst (LoadI src))); 8198 effect(KILL cr); 8199 8200 ins_cost(150); 8201 format %{ "AND $dst,$src" %} 8202 opcode(0x23); 8203 ins_encode( OpcP, RegMem( dst, src) ); 8204 ins_pipe( ialu_reg_mem ); 8205 %} 8206 8207 // And Memory with Register 8208 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8209 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8210 effect(KILL cr); 8211 8212 ins_cost(150); 8213 format %{ "AND $dst,$src" %} 8214 opcode(0x21); /* Opcode 21 /r */ 8215 ins_encode( OpcP, RegMem( src, dst ) ); 8216 ins_pipe( ialu_mem_reg ); 8217 %} 8218 8219 // And Memory with Immediate 8220 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8221 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8222 effect(KILL cr); 8223 8224 ins_cost(125); 8225 format %{ "AND $dst,$src" %} 8226 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8227 // ins_encode( MemImm( dst, src) ); 8228 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8229 ins_pipe( ialu_mem_imm ); 8230 %} 8231 8232 // BMI1 instructions 8233 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8234 match(Set dst (AndI (XorI src1 minus_1) src2)); 8235 predicate(UseBMI1Instructions); 8236 effect(KILL cr); 8237 8238 format %{ "ANDNL $dst, $src1, $src2" %} 8239 8240 ins_encode %{ 8241 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8242 %} 8243 ins_pipe(ialu_reg); 8244 %} 8245 8246 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8247 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8248 predicate(UseBMI1Instructions); 8249 effect(KILL cr); 8250 8251 ins_cost(125); 8252 format %{ "ANDNL $dst, $src1, $src2" %} 8253 8254 ins_encode %{ 8255 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8256 %} 8257 ins_pipe(ialu_reg_mem); 8258 %} 8259 8260 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ 8261 match(Set dst (AndI (SubI imm_zero src) src)); 8262 predicate(UseBMI1Instructions); 8263 effect(KILL cr); 8264 8265 format %{ "BLSIL $dst, $src" %} 8266 8267 ins_encode %{ 8268 __ blsil($dst$$Register, $src$$Register); 8269 %} 8270 ins_pipe(ialu_reg); 8271 %} 8272 8273 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ 8274 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8275 predicate(UseBMI1Instructions); 8276 effect(KILL cr); 8277 8278 ins_cost(125); 8279 format %{ "BLSIL $dst, $src" %} 8280 8281 ins_encode %{ 8282 __ blsil($dst$$Register, $src$$Address); 8283 %} 8284 ins_pipe(ialu_reg_mem); 8285 %} 8286 8287 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8288 %{ 8289 match(Set dst (XorI (AddI src minus_1) src)); 8290 predicate(UseBMI1Instructions); 8291 effect(KILL cr); 8292 8293 format %{ "BLSMSKL $dst, $src" %} 8294 8295 ins_encode %{ 8296 __ blsmskl($dst$$Register, $src$$Register); 8297 %} 8298 8299 ins_pipe(ialu_reg); 8300 %} 8301 8302 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8303 %{ 8304 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8305 predicate(UseBMI1Instructions); 8306 effect(KILL cr); 8307 8308 ins_cost(125); 8309 format %{ "BLSMSKL $dst, $src" %} 8310 8311 ins_encode %{ 8312 __ blsmskl($dst$$Register, $src$$Address); 8313 %} 8314 8315 ins_pipe(ialu_reg_mem); 8316 %} 8317 8318 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8319 %{ 8320 match(Set dst (AndI (AddI src minus_1) src) ); 8321 predicate(UseBMI1Instructions); 8322 effect(KILL cr); 8323 8324 format %{ "BLSRL $dst, $src" %} 8325 8326 ins_encode %{ 8327 __ blsrl($dst$$Register, $src$$Register); 8328 %} 8329 8330 ins_pipe(ialu_reg); 8331 %} 8332 8333 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8334 %{ 8335 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8336 predicate(UseBMI1Instructions); 8337 effect(KILL cr); 8338 8339 ins_cost(125); 8340 format %{ "BLSRL $dst, $src" %} 8341 8342 ins_encode %{ 8343 __ blsrl($dst$$Register, $src$$Address); 8344 %} 8345 8346 ins_pipe(ialu_reg_mem); 8347 %} 8348 8349 // Or Instructions 8350 // Or Register with Register 8351 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8352 match(Set dst (OrI dst src)); 8353 effect(KILL cr); 8354 8355 size(2); 8356 format %{ "OR $dst,$src" %} 8357 opcode(0x0B); 8358 ins_encode( OpcP, RegReg( dst, src) ); 8359 ins_pipe( ialu_reg_reg ); 8360 %} 8361 8362 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8363 match(Set dst (OrI dst (CastP2X src))); 8364 effect(KILL cr); 8365 8366 size(2); 8367 format %{ "OR $dst,$src" %} 8368 opcode(0x0B); 8369 ins_encode( OpcP, RegReg( dst, src) ); 8370 ins_pipe( ialu_reg_reg ); 8371 %} 8372 8373 8374 // Or Register with Immediate 8375 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8376 match(Set dst (OrI dst src)); 8377 effect(KILL cr); 8378 8379 format %{ "OR $dst,$src" %} 8380 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8381 // ins_encode( RegImm( dst, src) ); 8382 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8383 ins_pipe( ialu_reg ); 8384 %} 8385 8386 // Or Register with Memory 8387 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8388 match(Set dst (OrI dst (LoadI src))); 8389 effect(KILL cr); 8390 8391 ins_cost(150); 8392 format %{ "OR $dst,$src" %} 8393 opcode(0x0B); 8394 ins_encode( OpcP, RegMem( dst, src) ); 8395 ins_pipe( ialu_reg_mem ); 8396 %} 8397 8398 // Or Memory with Register 8399 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8400 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8401 effect(KILL cr); 8402 8403 ins_cost(150); 8404 format %{ "OR $dst,$src" %} 8405 opcode(0x09); /* Opcode 09 /r */ 8406 ins_encode( OpcP, RegMem( src, dst ) ); 8407 ins_pipe( ialu_mem_reg ); 8408 %} 8409 8410 // Or Memory with Immediate 8411 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8412 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8413 effect(KILL cr); 8414 8415 ins_cost(125); 8416 format %{ "OR $dst,$src" %} 8417 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8418 // ins_encode( MemImm( dst, src) ); 8419 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8420 ins_pipe( ialu_mem_imm ); 8421 %} 8422 8423 // ROL/ROR 8424 // ROL expand 8425 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8426 effect(USE_DEF dst, USE shift, KILL cr); 8427 8428 format %{ "ROL $dst, $shift" %} 8429 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8430 ins_encode( OpcP, RegOpc( dst )); 8431 ins_pipe( ialu_reg ); 8432 %} 8433 8434 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8435 effect(USE_DEF dst, USE shift, KILL cr); 8436 8437 format %{ "ROL $dst, $shift" %} 8438 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8439 ins_encode( RegOpcImm(dst, shift) ); 8440 ins_pipe(ialu_reg); 8441 %} 8442 8443 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8444 effect(USE_DEF dst, USE shift, KILL cr); 8445 8446 format %{ "ROL $dst, $shift" %} 8447 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8448 ins_encode(OpcP, RegOpc(dst)); 8449 ins_pipe( ialu_reg_reg ); 8450 %} 8451 // end of ROL expand 8452 8453 // ROL 32bit by one once 8454 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8455 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8456 8457 expand %{ 8458 rolI_eReg_imm1(dst, lshift, cr); 8459 %} 8460 %} 8461 8462 // ROL 32bit var by imm8 once 8463 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8464 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8465 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8466 8467 expand %{ 8468 rolI_eReg_imm8(dst, lshift, cr); 8469 %} 8470 %} 8471 8472 // ROL 32bit var by var once 8473 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8474 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8475 8476 expand %{ 8477 rolI_eReg_CL(dst, shift, cr); 8478 %} 8479 %} 8480 8481 // ROL 32bit var by var once 8482 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8483 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8484 8485 expand %{ 8486 rolI_eReg_CL(dst, shift, cr); 8487 %} 8488 %} 8489 8490 // ROR expand 8491 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ 8492 effect(USE_DEF dst, USE shift, KILL cr); 8493 8494 format %{ "ROR $dst, $shift" %} 8495 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8496 ins_encode( OpcP, RegOpc( dst ) ); 8497 ins_pipe( ialu_reg ); 8498 %} 8499 8500 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8501 effect (USE_DEF dst, USE shift, KILL cr); 8502 8503 format %{ "ROR $dst, $shift" %} 8504 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8505 ins_encode( RegOpcImm(dst, shift) ); 8506 ins_pipe( ialu_reg ); 8507 %} 8508 8509 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8510 effect(USE_DEF dst, USE shift, KILL cr); 8511 8512 format %{ "ROR $dst, $shift" %} 8513 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8514 ins_encode(OpcP, RegOpc(dst)); 8515 ins_pipe( ialu_reg_reg ); 8516 %} 8517 // end of ROR expand 8518 8519 // ROR right once 8520 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8521 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8522 8523 expand %{ 8524 rorI_eReg_imm1(dst, rshift, cr); 8525 %} 8526 %} 8527 8528 // ROR 32bit by immI8 once 8529 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8530 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8531 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8532 8533 expand %{ 8534 rorI_eReg_imm8(dst, rshift, cr); 8535 %} 8536 %} 8537 8538 // ROR 32bit var by var once 8539 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ 8540 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8541 8542 expand %{ 8543 rorI_eReg_CL(dst, shift, cr); 8544 %} 8545 %} 8546 8547 // ROR 32bit var by var once 8548 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8549 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8550 8551 expand %{ 8552 rorI_eReg_CL(dst, shift, cr); 8553 %} 8554 %} 8555 8556 // Xor Instructions 8557 // Xor Register with Register 8558 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8559 match(Set dst (XorI dst src)); 8560 effect(KILL cr); 8561 8562 size(2); 8563 format %{ "XOR $dst,$src" %} 8564 opcode(0x33); 8565 ins_encode( OpcP, RegReg( dst, src) ); 8566 ins_pipe( ialu_reg_reg ); 8567 %} 8568 8569 // Xor Register with Immediate -1 8570 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8571 match(Set dst (XorI dst imm)); 8572 8573 size(2); 8574 format %{ "NOT $dst" %} 8575 ins_encode %{ 8576 __ notl($dst$$Register); 8577 %} 8578 ins_pipe( ialu_reg ); 8579 %} 8580 8581 // Xor Register with Immediate 8582 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8583 match(Set dst (XorI dst src)); 8584 effect(KILL cr); 8585 8586 format %{ "XOR $dst,$src" %} 8587 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8588 // ins_encode( RegImm( dst, src) ); 8589 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8590 ins_pipe( ialu_reg ); 8591 %} 8592 8593 // Xor Register with Memory 8594 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8595 match(Set dst (XorI dst (LoadI src))); 8596 effect(KILL cr); 8597 8598 ins_cost(150); 8599 format %{ "XOR $dst,$src" %} 8600 opcode(0x33); 8601 ins_encode( OpcP, RegMem(dst, src) ); 8602 ins_pipe( ialu_reg_mem ); 8603 %} 8604 8605 // Xor Memory with Register 8606 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8607 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8608 effect(KILL cr); 8609 8610 ins_cost(150); 8611 format %{ "XOR $dst,$src" %} 8612 opcode(0x31); /* Opcode 31 /r */ 8613 ins_encode( OpcP, RegMem( src, dst ) ); 8614 ins_pipe( ialu_mem_reg ); 8615 %} 8616 8617 // Xor Memory with Immediate 8618 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8619 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8620 effect(KILL cr); 8621 8622 ins_cost(125); 8623 format %{ "XOR $dst,$src" %} 8624 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8625 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8626 ins_pipe( ialu_mem_imm ); 8627 %} 8628 8629 //----------Convert Int to Boolean--------------------------------------------- 8630 8631 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8632 effect( DEF dst, USE src ); 8633 format %{ "MOV $dst,$src" %} 8634 ins_encode( enc_Copy( dst, src) ); 8635 ins_pipe( ialu_reg_reg ); 8636 %} 8637 8638 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8639 effect( USE_DEF dst, USE src, KILL cr ); 8640 8641 size(4); 8642 format %{ "NEG $dst\n\t" 8643 "ADC $dst,$src" %} 8644 ins_encode( neg_reg(dst), 8645 OpcRegReg(0x13,dst,src) ); 8646 ins_pipe( ialu_reg_reg_long ); 8647 %} 8648 8649 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8650 match(Set dst (Conv2B src)); 8651 8652 expand %{ 8653 movI_nocopy(dst,src); 8654 ci2b(dst,src,cr); 8655 %} 8656 %} 8657 8658 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8659 effect( DEF dst, USE src ); 8660 format %{ "MOV $dst,$src" %} 8661 ins_encode( enc_Copy( dst, src) ); 8662 ins_pipe( ialu_reg_reg ); 8663 %} 8664 8665 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8666 effect( USE_DEF dst, USE src, KILL cr ); 8667 format %{ "NEG $dst\n\t" 8668 "ADC $dst,$src" %} 8669 ins_encode( neg_reg(dst), 8670 OpcRegReg(0x13,dst,src) ); 8671 ins_pipe( ialu_reg_reg_long ); 8672 %} 8673 8674 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8675 match(Set dst (Conv2B src)); 8676 8677 expand %{ 8678 movP_nocopy(dst,src); 8679 cp2b(dst,src,cr); 8680 %} 8681 %} 8682 8683 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8684 match(Set dst (CmpLTMask p q)); 8685 effect(KILL cr); 8686 ins_cost(400); 8687 8688 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8689 format %{ "XOR $dst,$dst\n\t" 8690 "CMP $p,$q\n\t" 8691 "SETlt $dst\n\t" 8692 "NEG $dst" %} 8693 ins_encode %{ 8694 Register Rp = $p$$Register; 8695 Register Rq = $q$$Register; 8696 Register Rd = $dst$$Register; 8697 Label done; 8698 __ xorl(Rd, Rd); 8699 __ cmpl(Rp, Rq); 8700 __ setb(Assembler::less, Rd); 8701 __ negl(Rd); 8702 %} 8703 8704 ins_pipe(pipe_slow); 8705 %} 8706 8707 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ 8708 match(Set dst (CmpLTMask dst zero)); 8709 effect(DEF dst, KILL cr); 8710 ins_cost(100); 8711 8712 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8713 ins_encode %{ 8714 __ sarl($dst$$Register, 31); 8715 %} 8716 ins_pipe(ialu_reg); 8717 %} 8718 8719 /* better to save a register than avoid a branch */ 8720 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8721 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8722 effect(KILL cr); 8723 ins_cost(400); 8724 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8725 "JGE done\n\t" 8726 "ADD $p,$y\n" 8727 "done: " %} 8728 ins_encode %{ 8729 Register Rp = $p$$Register; 8730 Register Rq = $q$$Register; 8731 Register Ry = $y$$Register; 8732 Label done; 8733 __ subl(Rp, Rq); 8734 __ jccb(Assembler::greaterEqual, done); 8735 __ addl(Rp, Ry); 8736 __ bind(done); 8737 %} 8738 8739 ins_pipe(pipe_cmplt); 8740 %} 8741 8742 /* better to save a register than avoid a branch */ 8743 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8744 match(Set y (AndI (CmpLTMask p q) y)); 8745 effect(KILL cr); 8746 8747 ins_cost(300); 8748 8749 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8750 "JLT done\n\t" 8751 "XORL $y, $y\n" 8752 "done: " %} 8753 ins_encode %{ 8754 Register Rp = $p$$Register; 8755 Register Rq = $q$$Register; 8756 Register Ry = $y$$Register; 8757 Label done; 8758 __ cmpl(Rp, Rq); 8759 __ jccb(Assembler::less, done); 8760 __ xorl(Ry, Ry); 8761 __ bind(done); 8762 %} 8763 8764 ins_pipe(pipe_cmplt); 8765 %} 8766 8767 /* If I enable this, I encourage spilling in the inner loop of compress. 8768 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8769 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8770 */ 8771 //----------Overflow Math Instructions----------------------------------------- 8772 8773 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8774 %{ 8775 match(Set cr (OverflowAddI op1 op2)); 8776 effect(DEF cr, USE_KILL op1, USE op2); 8777 8778 format %{ "ADD $op1, $op2\t# overflow check int" %} 8779 8780 ins_encode %{ 8781 __ addl($op1$$Register, $op2$$Register); 8782 %} 8783 ins_pipe(ialu_reg_reg); 8784 %} 8785 8786 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8787 %{ 8788 match(Set cr (OverflowAddI op1 op2)); 8789 effect(DEF cr, USE_KILL op1, USE op2); 8790 8791 format %{ "ADD $op1, $op2\t# overflow check int" %} 8792 8793 ins_encode %{ 8794 __ addl($op1$$Register, $op2$$constant); 8795 %} 8796 ins_pipe(ialu_reg_reg); 8797 %} 8798 8799 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8800 %{ 8801 match(Set cr (OverflowSubI op1 op2)); 8802 8803 format %{ "CMP $op1, $op2\t# overflow check int" %} 8804 ins_encode %{ 8805 __ cmpl($op1$$Register, $op2$$Register); 8806 %} 8807 ins_pipe(ialu_reg_reg); 8808 %} 8809 8810 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8811 %{ 8812 match(Set cr (OverflowSubI op1 op2)); 8813 8814 format %{ "CMP $op1, $op2\t# overflow check int" %} 8815 ins_encode %{ 8816 __ cmpl($op1$$Register, $op2$$constant); 8817 %} 8818 ins_pipe(ialu_reg_reg); 8819 %} 8820 8821 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) 8822 %{ 8823 match(Set cr (OverflowSubI zero op2)); 8824 effect(DEF cr, USE_KILL op2); 8825 8826 format %{ "NEG $op2\t# overflow check int" %} 8827 ins_encode %{ 8828 __ negl($op2$$Register); 8829 %} 8830 ins_pipe(ialu_reg_reg); 8831 %} 8832 8833 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8834 %{ 8835 match(Set cr (OverflowMulI op1 op2)); 8836 effect(DEF cr, USE_KILL op1, USE op2); 8837 8838 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8839 ins_encode %{ 8840 __ imull($op1$$Register, $op2$$Register); 8841 %} 8842 ins_pipe(ialu_reg_reg_alu0); 8843 %} 8844 8845 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8846 %{ 8847 match(Set cr (OverflowMulI op1 op2)); 8848 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8849 8850 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8851 ins_encode %{ 8852 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8853 %} 8854 ins_pipe(ialu_reg_reg_alu0); 8855 %} 8856 8857 // Integer Absolute Instructions 8858 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) 8859 %{ 8860 match(Set dst (AbsI src)); 8861 effect(TEMP dst, TEMP tmp, KILL cr); 8862 format %{ "movl $tmp, $src\n\t" 8863 "sarl $tmp, 31\n\t" 8864 "movl $dst, $src\n\t" 8865 "xorl $dst, $tmp\n\t" 8866 "subl $dst, $tmp\n" 8867 %} 8868 ins_encode %{ 8869 __ movl($tmp$$Register, $src$$Register); 8870 __ sarl($tmp$$Register, 31); 8871 __ movl($dst$$Register, $src$$Register); 8872 __ xorl($dst$$Register, $tmp$$Register); 8873 __ subl($dst$$Register, $tmp$$Register); 8874 %} 8875 8876 ins_pipe(ialu_reg_reg); 8877 %} 8878 8879 //----------Long Instructions------------------------------------------------ 8880 // Add Long Register with Register 8881 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8882 match(Set dst (AddL dst src)); 8883 effect(KILL cr); 8884 ins_cost(200); 8885 format %{ "ADD $dst.lo,$src.lo\n\t" 8886 "ADC $dst.hi,$src.hi" %} 8887 opcode(0x03, 0x13); 8888 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8889 ins_pipe( ialu_reg_reg_long ); 8890 %} 8891 8892 // Add Long Register with Immediate 8893 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8894 match(Set dst (AddL dst src)); 8895 effect(KILL cr); 8896 format %{ "ADD $dst.lo,$src.lo\n\t" 8897 "ADC $dst.hi,$src.hi" %} 8898 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8899 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8900 ins_pipe( ialu_reg_long ); 8901 %} 8902 8903 // Add Long Register with Memory 8904 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8905 match(Set dst (AddL dst (LoadL mem))); 8906 effect(KILL cr); 8907 ins_cost(125); 8908 format %{ "ADD $dst.lo,$mem\n\t" 8909 "ADC $dst.hi,$mem+4" %} 8910 opcode(0x03, 0x13); 8911 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8912 ins_pipe( ialu_reg_long_mem ); 8913 %} 8914 8915 // Subtract Long Register with Register. 8916 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8917 match(Set dst (SubL dst src)); 8918 effect(KILL cr); 8919 ins_cost(200); 8920 format %{ "SUB $dst.lo,$src.lo\n\t" 8921 "SBB $dst.hi,$src.hi" %} 8922 opcode(0x2B, 0x1B); 8923 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8924 ins_pipe( ialu_reg_reg_long ); 8925 %} 8926 8927 // Subtract Long Register with Immediate 8928 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8929 match(Set dst (SubL dst src)); 8930 effect(KILL cr); 8931 format %{ "SUB $dst.lo,$src.lo\n\t" 8932 "SBB $dst.hi,$src.hi" %} 8933 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8934 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8935 ins_pipe( ialu_reg_long ); 8936 %} 8937 8938 // Subtract Long Register with Memory 8939 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8940 match(Set dst (SubL dst (LoadL mem))); 8941 effect(KILL cr); 8942 ins_cost(125); 8943 format %{ "SUB $dst.lo,$mem\n\t" 8944 "SBB $dst.hi,$mem+4" %} 8945 opcode(0x2B, 0x1B); 8946 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8947 ins_pipe( ialu_reg_long_mem ); 8948 %} 8949 8950 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8951 match(Set dst (SubL zero dst)); 8952 effect(KILL cr); 8953 ins_cost(300); 8954 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8955 ins_encode( neg_long(dst) ); 8956 ins_pipe( ialu_reg_reg_long ); 8957 %} 8958 8959 // And Long Register with Register 8960 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8961 match(Set dst (AndL dst src)); 8962 effect(KILL cr); 8963 format %{ "AND $dst.lo,$src.lo\n\t" 8964 "AND $dst.hi,$src.hi" %} 8965 opcode(0x23,0x23); 8966 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8967 ins_pipe( ialu_reg_reg_long ); 8968 %} 8969 8970 // And Long Register with Immediate 8971 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8972 match(Set dst (AndL dst src)); 8973 effect(KILL cr); 8974 format %{ "AND $dst.lo,$src.lo\n\t" 8975 "AND $dst.hi,$src.hi" %} 8976 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8977 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8978 ins_pipe( ialu_reg_long ); 8979 %} 8980 8981 // And Long Register with Memory 8982 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8983 match(Set dst (AndL dst (LoadL mem))); 8984 effect(KILL cr); 8985 ins_cost(125); 8986 format %{ "AND $dst.lo,$mem\n\t" 8987 "AND $dst.hi,$mem+4" %} 8988 opcode(0x23, 0x23); 8989 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8990 ins_pipe( ialu_reg_long_mem ); 8991 %} 8992 8993 // BMI1 instructions 8994 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8995 match(Set dst (AndL (XorL src1 minus_1) src2)); 8996 predicate(UseBMI1Instructions); 8997 effect(KILL cr, TEMP dst); 8998 8999 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 9000 "ANDNL $dst.hi, $src1.hi, $src2.hi" 9001 %} 9002 9003 ins_encode %{ 9004 Register Rdst = $dst$$Register; 9005 Register Rsrc1 = $src1$$Register; 9006 Register Rsrc2 = $src2$$Register; 9007 __ andnl(Rdst, Rsrc1, Rsrc2); 9008 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 9009 %} 9010 ins_pipe(ialu_reg_reg_long); 9011 %} 9012 9013 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 9014 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 9015 predicate(UseBMI1Instructions); 9016 effect(KILL cr, TEMP dst); 9017 9018 ins_cost(125); 9019 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 9020 "ANDNL $dst.hi, $src1.hi, $src2+4" 9021 %} 9022 9023 ins_encode %{ 9024 Register Rdst = $dst$$Register; 9025 Register Rsrc1 = $src1$$Register; 9026 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 9027 9028 __ andnl(Rdst, Rsrc1, $src2$$Address); 9029 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 9030 %} 9031 ins_pipe(ialu_reg_mem); 9032 %} 9033 9034 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 9035 match(Set dst (AndL (SubL imm_zero src) src)); 9036 predicate(UseBMI1Instructions); 9037 effect(KILL cr, TEMP dst); 9038 9039 format %{ "MOVL $dst.hi, 0\n\t" 9040 "BLSIL $dst.lo, $src.lo\n\t" 9041 "JNZ done\n\t" 9042 "BLSIL $dst.hi, $src.hi\n" 9043 "done:" 9044 %} 9045 9046 ins_encode %{ 9047 Label done; 9048 Register Rdst = $dst$$Register; 9049 Register Rsrc = $src$$Register; 9050 __ movl(HIGH_FROM_LOW(Rdst), 0); 9051 __ blsil(Rdst, Rsrc); 9052 __ jccb(Assembler::notZero, done); 9053 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9054 __ bind(done); 9055 %} 9056 ins_pipe(ialu_reg); 9057 %} 9058 9059 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9060 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9061 predicate(UseBMI1Instructions); 9062 effect(KILL cr, TEMP dst); 9063 9064 ins_cost(125); 9065 format %{ "MOVL $dst.hi, 0\n\t" 9066 "BLSIL $dst.lo, $src\n\t" 9067 "JNZ done\n\t" 9068 "BLSIL $dst.hi, $src+4\n" 9069 "done:" 9070 %} 9071 9072 ins_encode %{ 9073 Label done; 9074 Register Rdst = $dst$$Register; 9075 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9076 9077 __ movl(HIGH_FROM_LOW(Rdst), 0); 9078 __ blsil(Rdst, $src$$Address); 9079 __ jccb(Assembler::notZero, done); 9080 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9081 __ bind(done); 9082 %} 9083 ins_pipe(ialu_reg_mem); 9084 %} 9085 9086 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9087 %{ 9088 match(Set dst (XorL (AddL src minus_1) src)); 9089 predicate(UseBMI1Instructions); 9090 effect(KILL cr, TEMP dst); 9091 9092 format %{ "MOVL $dst.hi, 0\n\t" 9093 "BLSMSKL $dst.lo, $src.lo\n\t" 9094 "JNC done\n\t" 9095 "BLSMSKL $dst.hi, $src.hi\n" 9096 "done:" 9097 %} 9098 9099 ins_encode %{ 9100 Label done; 9101 Register Rdst = $dst$$Register; 9102 Register Rsrc = $src$$Register; 9103 __ movl(HIGH_FROM_LOW(Rdst), 0); 9104 __ blsmskl(Rdst, Rsrc); 9105 __ jccb(Assembler::carryClear, done); 9106 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9107 __ bind(done); 9108 %} 9109 9110 ins_pipe(ialu_reg); 9111 %} 9112 9113 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9114 %{ 9115 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9116 predicate(UseBMI1Instructions); 9117 effect(KILL cr, TEMP dst); 9118 9119 ins_cost(125); 9120 format %{ "MOVL $dst.hi, 0\n\t" 9121 "BLSMSKL $dst.lo, $src\n\t" 9122 "JNC done\n\t" 9123 "BLSMSKL $dst.hi, $src+4\n" 9124 "done:" 9125 %} 9126 9127 ins_encode %{ 9128 Label done; 9129 Register Rdst = $dst$$Register; 9130 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9131 9132 __ movl(HIGH_FROM_LOW(Rdst), 0); 9133 __ blsmskl(Rdst, $src$$Address); 9134 __ jccb(Assembler::carryClear, done); 9135 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9136 __ bind(done); 9137 %} 9138 9139 ins_pipe(ialu_reg_mem); 9140 %} 9141 9142 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9143 %{ 9144 match(Set dst (AndL (AddL src minus_1) src) ); 9145 predicate(UseBMI1Instructions); 9146 effect(KILL cr, TEMP dst); 9147 9148 format %{ "MOVL $dst.hi, $src.hi\n\t" 9149 "BLSRL $dst.lo, $src.lo\n\t" 9150 "JNC done\n\t" 9151 "BLSRL $dst.hi, $src.hi\n" 9152 "done:" 9153 %} 9154 9155 ins_encode %{ 9156 Label done; 9157 Register Rdst = $dst$$Register; 9158 Register Rsrc = $src$$Register; 9159 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9160 __ blsrl(Rdst, Rsrc); 9161 __ jccb(Assembler::carryClear, done); 9162 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9163 __ bind(done); 9164 %} 9165 9166 ins_pipe(ialu_reg); 9167 %} 9168 9169 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9170 %{ 9171 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9172 predicate(UseBMI1Instructions); 9173 effect(KILL cr, TEMP dst); 9174 9175 ins_cost(125); 9176 format %{ "MOVL $dst.hi, $src+4\n\t" 9177 "BLSRL $dst.lo, $src\n\t" 9178 "JNC done\n\t" 9179 "BLSRL $dst.hi, $src+4\n" 9180 "done:" 9181 %} 9182 9183 ins_encode %{ 9184 Label done; 9185 Register Rdst = $dst$$Register; 9186 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9187 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9188 __ blsrl(Rdst, $src$$Address); 9189 __ jccb(Assembler::carryClear, done); 9190 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9191 __ bind(done); 9192 %} 9193 9194 ins_pipe(ialu_reg_mem); 9195 %} 9196 9197 // Or Long Register with Register 9198 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9199 match(Set dst (OrL dst src)); 9200 effect(KILL cr); 9201 format %{ "OR $dst.lo,$src.lo\n\t" 9202 "OR $dst.hi,$src.hi" %} 9203 opcode(0x0B,0x0B); 9204 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9205 ins_pipe( ialu_reg_reg_long ); 9206 %} 9207 9208 // Or Long Register with Immediate 9209 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9210 match(Set dst (OrL dst src)); 9211 effect(KILL cr); 9212 format %{ "OR $dst.lo,$src.lo\n\t" 9213 "OR $dst.hi,$src.hi" %} 9214 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9215 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9216 ins_pipe( ialu_reg_long ); 9217 %} 9218 9219 // Or Long Register with Memory 9220 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9221 match(Set dst (OrL dst (LoadL mem))); 9222 effect(KILL cr); 9223 ins_cost(125); 9224 format %{ "OR $dst.lo,$mem\n\t" 9225 "OR $dst.hi,$mem+4" %} 9226 opcode(0x0B,0x0B); 9227 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9228 ins_pipe( ialu_reg_long_mem ); 9229 %} 9230 9231 // Xor Long Register with Register 9232 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9233 match(Set dst (XorL dst src)); 9234 effect(KILL cr); 9235 format %{ "XOR $dst.lo,$src.lo\n\t" 9236 "XOR $dst.hi,$src.hi" %} 9237 opcode(0x33,0x33); 9238 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9239 ins_pipe( ialu_reg_reg_long ); 9240 %} 9241 9242 // Xor Long Register with Immediate -1 9243 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9244 match(Set dst (XorL dst imm)); 9245 format %{ "NOT $dst.lo\n\t" 9246 "NOT $dst.hi" %} 9247 ins_encode %{ 9248 __ notl($dst$$Register); 9249 __ notl(HIGH_FROM_LOW($dst$$Register)); 9250 %} 9251 ins_pipe( ialu_reg_long ); 9252 %} 9253 9254 // Xor Long Register with Immediate 9255 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9256 match(Set dst (XorL dst src)); 9257 effect(KILL cr); 9258 format %{ "XOR $dst.lo,$src.lo\n\t" 9259 "XOR $dst.hi,$src.hi" %} 9260 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9261 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9262 ins_pipe( ialu_reg_long ); 9263 %} 9264 9265 // Xor Long Register with Memory 9266 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9267 match(Set dst (XorL dst (LoadL mem))); 9268 effect(KILL cr); 9269 ins_cost(125); 9270 format %{ "XOR $dst.lo,$mem\n\t" 9271 "XOR $dst.hi,$mem+4" %} 9272 opcode(0x33,0x33); 9273 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9274 ins_pipe( ialu_reg_long_mem ); 9275 %} 9276 9277 // Shift Left Long by 1 9278 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9279 predicate(UseNewLongLShift); 9280 match(Set dst (LShiftL dst cnt)); 9281 effect(KILL cr); 9282 ins_cost(100); 9283 format %{ "ADD $dst.lo,$dst.lo\n\t" 9284 "ADC $dst.hi,$dst.hi" %} 9285 ins_encode %{ 9286 __ addl($dst$$Register,$dst$$Register); 9287 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9288 %} 9289 ins_pipe( ialu_reg_long ); 9290 %} 9291 9292 // Shift Left Long by 2 9293 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9294 predicate(UseNewLongLShift); 9295 match(Set dst (LShiftL dst cnt)); 9296 effect(KILL cr); 9297 ins_cost(100); 9298 format %{ "ADD $dst.lo,$dst.lo\n\t" 9299 "ADC $dst.hi,$dst.hi\n\t" 9300 "ADD $dst.lo,$dst.lo\n\t" 9301 "ADC $dst.hi,$dst.hi" %} 9302 ins_encode %{ 9303 __ addl($dst$$Register,$dst$$Register); 9304 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9305 __ addl($dst$$Register,$dst$$Register); 9306 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9307 %} 9308 ins_pipe( ialu_reg_long ); 9309 %} 9310 9311 // Shift Left Long by 3 9312 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9313 predicate(UseNewLongLShift); 9314 match(Set dst (LShiftL dst cnt)); 9315 effect(KILL cr); 9316 ins_cost(100); 9317 format %{ "ADD $dst.lo,$dst.lo\n\t" 9318 "ADC $dst.hi,$dst.hi\n\t" 9319 "ADD $dst.lo,$dst.lo\n\t" 9320 "ADC $dst.hi,$dst.hi\n\t" 9321 "ADD $dst.lo,$dst.lo\n\t" 9322 "ADC $dst.hi,$dst.hi" %} 9323 ins_encode %{ 9324 __ addl($dst$$Register,$dst$$Register); 9325 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9326 __ addl($dst$$Register,$dst$$Register); 9327 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9328 __ addl($dst$$Register,$dst$$Register); 9329 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9330 %} 9331 ins_pipe( ialu_reg_long ); 9332 %} 9333 9334 // Shift Left Long by 1-31 9335 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9336 match(Set dst (LShiftL dst cnt)); 9337 effect(KILL cr); 9338 ins_cost(200); 9339 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9340 "SHL $dst.lo,$cnt" %} 9341 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9342 ins_encode( move_long_small_shift(dst,cnt) ); 9343 ins_pipe( ialu_reg_long ); 9344 %} 9345 9346 // Shift Left Long by 32-63 9347 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9348 match(Set dst (LShiftL dst cnt)); 9349 effect(KILL cr); 9350 ins_cost(300); 9351 format %{ "MOV $dst.hi,$dst.lo\n" 9352 "\tSHL $dst.hi,$cnt-32\n" 9353 "\tXOR $dst.lo,$dst.lo" %} 9354 opcode(0xC1, 0x4); /* C1 /4 ib */ 9355 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9356 ins_pipe( ialu_reg_long ); 9357 %} 9358 9359 // Shift Left Long by variable 9360 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9361 match(Set dst (LShiftL dst shift)); 9362 effect(KILL cr); 9363 ins_cost(500+200); 9364 size(17); 9365 format %{ "TEST $shift,32\n\t" 9366 "JEQ,s small\n\t" 9367 "MOV $dst.hi,$dst.lo\n\t" 9368 "XOR $dst.lo,$dst.lo\n" 9369 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9370 "SHL $dst.lo,$shift" %} 9371 ins_encode( shift_left_long( dst, shift ) ); 9372 ins_pipe( pipe_slow ); 9373 %} 9374 9375 // Shift Right Long by 1-31 9376 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9377 match(Set dst (URShiftL dst cnt)); 9378 effect(KILL cr); 9379 ins_cost(200); 9380 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9381 "SHR $dst.hi,$cnt" %} 9382 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9383 ins_encode( move_long_small_shift(dst,cnt) ); 9384 ins_pipe( ialu_reg_long ); 9385 %} 9386 9387 // Shift Right Long by 32-63 9388 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9389 match(Set dst (URShiftL dst cnt)); 9390 effect(KILL cr); 9391 ins_cost(300); 9392 format %{ "MOV $dst.lo,$dst.hi\n" 9393 "\tSHR $dst.lo,$cnt-32\n" 9394 "\tXOR $dst.hi,$dst.hi" %} 9395 opcode(0xC1, 0x5); /* C1 /5 ib */ 9396 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9397 ins_pipe( ialu_reg_long ); 9398 %} 9399 9400 // Shift Right Long by variable 9401 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9402 match(Set dst (URShiftL dst shift)); 9403 effect(KILL cr); 9404 ins_cost(600); 9405 size(17); 9406 format %{ "TEST $shift,32\n\t" 9407 "JEQ,s small\n\t" 9408 "MOV $dst.lo,$dst.hi\n\t" 9409 "XOR $dst.hi,$dst.hi\n" 9410 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9411 "SHR $dst.hi,$shift" %} 9412 ins_encode( shift_right_long( dst, shift ) ); 9413 ins_pipe( pipe_slow ); 9414 %} 9415 9416 // Shift Right Long by 1-31 9417 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9418 match(Set dst (RShiftL dst cnt)); 9419 effect(KILL cr); 9420 ins_cost(200); 9421 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9422 "SAR $dst.hi,$cnt" %} 9423 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9424 ins_encode( move_long_small_shift(dst,cnt) ); 9425 ins_pipe( ialu_reg_long ); 9426 %} 9427 9428 // Shift Right Long by 32-63 9429 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9430 match(Set dst (RShiftL dst cnt)); 9431 effect(KILL cr); 9432 ins_cost(300); 9433 format %{ "MOV $dst.lo,$dst.hi\n" 9434 "\tSAR $dst.lo,$cnt-32\n" 9435 "\tSAR $dst.hi,31" %} 9436 opcode(0xC1, 0x7); /* C1 /7 ib */ 9437 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9438 ins_pipe( ialu_reg_long ); 9439 %} 9440 9441 // Shift Right arithmetic Long by variable 9442 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9443 match(Set dst (RShiftL dst shift)); 9444 effect(KILL cr); 9445 ins_cost(600); 9446 size(18); 9447 format %{ "TEST $shift,32\n\t" 9448 "JEQ,s small\n\t" 9449 "MOV $dst.lo,$dst.hi\n\t" 9450 "SAR $dst.hi,31\n" 9451 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9452 "SAR $dst.hi,$shift" %} 9453 ins_encode( shift_right_arith_long( dst, shift ) ); 9454 ins_pipe( pipe_slow ); 9455 %} 9456 9457 9458 //----------Double Instructions------------------------------------------------ 9459 // Double Math 9460 9461 // Compare & branch 9462 9463 // P6 version of float compare, sets condition codes in EFLAGS 9464 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9465 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9466 match(Set cr (CmpD src1 src2)); 9467 effect(KILL rax); 9468 ins_cost(150); 9469 format %{ "FLD $src1\n\t" 9470 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9471 "JNP exit\n\t" 9472 "MOV ah,1 // saw a NaN, set CF\n\t" 9473 "SAHF\n" 9474 "exit:\tNOP // avoid branch to branch" %} 9475 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9476 ins_encode( Push_Reg_DPR(src1), 9477 OpcP, RegOpc(src2), 9478 cmpF_P6_fixup ); 9479 ins_pipe( pipe_slow ); 9480 %} 9481 9482 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9483 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9484 match(Set cr (CmpD src1 src2)); 9485 ins_cost(150); 9486 format %{ "FLD $src1\n\t" 9487 "FUCOMIP ST,$src2 // P6 instruction" %} 9488 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9489 ins_encode( Push_Reg_DPR(src1), 9490 OpcP, RegOpc(src2)); 9491 ins_pipe( pipe_slow ); 9492 %} 9493 9494 // Compare & branch 9495 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9496 predicate(UseSSE<=1); 9497 match(Set cr (CmpD src1 src2)); 9498 effect(KILL rax); 9499 ins_cost(200); 9500 format %{ "FLD $src1\n\t" 9501 "FCOMp $src2\n\t" 9502 "FNSTSW AX\n\t" 9503 "TEST AX,0x400\n\t" 9504 "JZ,s flags\n\t" 9505 "MOV AH,1\t# unordered treat as LT\n" 9506 "flags:\tSAHF" %} 9507 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9508 ins_encode( Push_Reg_DPR(src1), 9509 OpcP, RegOpc(src2), 9510 fpu_flags); 9511 ins_pipe( pipe_slow ); 9512 %} 9513 9514 // Compare vs zero into -1,0,1 9515 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9516 predicate(UseSSE<=1); 9517 match(Set dst (CmpD3 src1 zero)); 9518 effect(KILL cr, KILL rax); 9519 ins_cost(280); 9520 format %{ "FTSTD $dst,$src1" %} 9521 opcode(0xE4, 0xD9); 9522 ins_encode( Push_Reg_DPR(src1), 9523 OpcS, OpcP, PopFPU, 9524 CmpF_Result(dst)); 9525 ins_pipe( pipe_slow ); 9526 %} 9527 9528 // Compare into -1,0,1 9529 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9530 predicate(UseSSE<=1); 9531 match(Set dst (CmpD3 src1 src2)); 9532 effect(KILL cr, KILL rax); 9533 ins_cost(300); 9534 format %{ "FCMPD $dst,$src1,$src2" %} 9535 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9536 ins_encode( Push_Reg_DPR(src1), 9537 OpcP, RegOpc(src2), 9538 CmpF_Result(dst)); 9539 ins_pipe( pipe_slow ); 9540 %} 9541 9542 // float compare and set condition codes in EFLAGS by XMM regs 9543 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9544 predicate(UseSSE>=2); 9545 match(Set cr (CmpD src1 src2)); 9546 ins_cost(145); 9547 format %{ "UCOMISD $src1,$src2\n\t" 9548 "JNP,s exit\n\t" 9549 "PUSHF\t# saw NaN, set CF\n\t" 9550 "AND [rsp], #0xffffff2b\n\t" 9551 "POPF\n" 9552 "exit:" %} 9553 ins_encode %{ 9554 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9555 emit_cmpfp_fixup(_masm); 9556 %} 9557 ins_pipe( pipe_slow ); 9558 %} 9559 9560 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9561 predicate(UseSSE>=2); 9562 match(Set cr (CmpD src1 src2)); 9563 ins_cost(100); 9564 format %{ "UCOMISD $src1,$src2" %} 9565 ins_encode %{ 9566 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9567 %} 9568 ins_pipe( pipe_slow ); 9569 %} 9570 9571 // float compare and set condition codes in EFLAGS by XMM regs 9572 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9573 predicate(UseSSE>=2); 9574 match(Set cr (CmpD src1 (LoadD src2))); 9575 ins_cost(145); 9576 format %{ "UCOMISD $src1,$src2\n\t" 9577 "JNP,s exit\n\t" 9578 "PUSHF\t# saw NaN, set CF\n\t" 9579 "AND [rsp], #0xffffff2b\n\t" 9580 "POPF\n" 9581 "exit:" %} 9582 ins_encode %{ 9583 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9584 emit_cmpfp_fixup(_masm); 9585 %} 9586 ins_pipe( pipe_slow ); 9587 %} 9588 9589 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9590 predicate(UseSSE>=2); 9591 match(Set cr (CmpD src1 (LoadD src2))); 9592 ins_cost(100); 9593 format %{ "UCOMISD $src1,$src2" %} 9594 ins_encode %{ 9595 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9596 %} 9597 ins_pipe( pipe_slow ); 9598 %} 9599 9600 // Compare into -1,0,1 in XMM 9601 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9602 predicate(UseSSE>=2); 9603 match(Set dst (CmpD3 src1 src2)); 9604 effect(KILL cr); 9605 ins_cost(255); 9606 format %{ "UCOMISD $src1, $src2\n\t" 9607 "MOV $dst, #-1\n\t" 9608 "JP,s done\n\t" 9609 "JB,s done\n\t" 9610 "SETNE $dst\n\t" 9611 "MOVZB $dst, $dst\n" 9612 "done:" %} 9613 ins_encode %{ 9614 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9615 emit_cmpfp3(_masm, $dst$$Register); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 // Compare into -1,0,1 in XMM and memory 9621 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9622 predicate(UseSSE>=2); 9623 match(Set dst (CmpD3 src1 (LoadD src2))); 9624 effect(KILL cr); 9625 ins_cost(275); 9626 format %{ "UCOMISD $src1, $src2\n\t" 9627 "MOV $dst, #-1\n\t" 9628 "JP,s done\n\t" 9629 "JB,s done\n\t" 9630 "SETNE $dst\n\t" 9631 "MOVZB $dst, $dst\n" 9632 "done:" %} 9633 ins_encode %{ 9634 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9635 emit_cmpfp3(_masm, $dst$$Register); 9636 %} 9637 ins_pipe( pipe_slow ); 9638 %} 9639 9640 9641 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9642 predicate (UseSSE <=1); 9643 match(Set dst (SubD dst src)); 9644 9645 format %{ "FLD $src\n\t" 9646 "DSUBp $dst,ST" %} 9647 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9648 ins_cost(150); 9649 ins_encode( Push_Reg_DPR(src), 9650 OpcP, RegOpc(dst) ); 9651 ins_pipe( fpu_reg_reg ); 9652 %} 9653 9654 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9655 predicate (UseSSE <=1); 9656 match(Set dst (RoundDouble (SubD src1 src2))); 9657 ins_cost(250); 9658 9659 format %{ "FLD $src2\n\t" 9660 "DSUB ST,$src1\n\t" 9661 "FSTP_D $dst\t# D-round" %} 9662 opcode(0xD8, 0x5); 9663 ins_encode( Push_Reg_DPR(src2), 9664 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9665 ins_pipe( fpu_mem_reg_reg ); 9666 %} 9667 9668 9669 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9670 predicate (UseSSE <=1); 9671 match(Set dst (SubD dst (LoadD src))); 9672 ins_cost(150); 9673 9674 format %{ "FLD $src\n\t" 9675 "DSUBp $dst,ST" %} 9676 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9677 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9678 OpcP, RegOpc(dst) ); 9679 ins_pipe( fpu_reg_mem ); 9680 %} 9681 9682 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9683 predicate (UseSSE<=1); 9684 match(Set dst (AbsD src)); 9685 ins_cost(100); 9686 format %{ "FABS" %} 9687 opcode(0xE1, 0xD9); 9688 ins_encode( OpcS, OpcP ); 9689 ins_pipe( fpu_reg_reg ); 9690 %} 9691 9692 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9693 predicate(UseSSE<=1); 9694 match(Set dst (NegD src)); 9695 ins_cost(100); 9696 format %{ "FCHS" %} 9697 opcode(0xE0, 0xD9); 9698 ins_encode( OpcS, OpcP ); 9699 ins_pipe( fpu_reg_reg ); 9700 %} 9701 9702 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9703 predicate(UseSSE<=1); 9704 match(Set dst (AddD dst src)); 9705 format %{ "FLD $src\n\t" 9706 "DADD $dst,ST" %} 9707 size(4); 9708 ins_cost(150); 9709 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9710 ins_encode( Push_Reg_DPR(src), 9711 OpcP, RegOpc(dst) ); 9712 ins_pipe( fpu_reg_reg ); 9713 %} 9714 9715 9716 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9717 predicate(UseSSE<=1); 9718 match(Set dst (RoundDouble (AddD src1 src2))); 9719 ins_cost(250); 9720 9721 format %{ "FLD $src2\n\t" 9722 "DADD ST,$src1\n\t" 9723 "FSTP_D $dst\t# D-round" %} 9724 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9725 ins_encode( Push_Reg_DPR(src2), 9726 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9727 ins_pipe( fpu_mem_reg_reg ); 9728 %} 9729 9730 9731 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9732 predicate(UseSSE<=1); 9733 match(Set dst (AddD dst (LoadD src))); 9734 ins_cost(150); 9735 9736 format %{ "FLD $src\n\t" 9737 "DADDp $dst,ST" %} 9738 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9739 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9740 OpcP, RegOpc(dst) ); 9741 ins_pipe( fpu_reg_mem ); 9742 %} 9743 9744 // add-to-memory 9745 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9746 predicate(UseSSE<=1); 9747 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9748 ins_cost(150); 9749 9750 format %{ "FLD_D $dst\n\t" 9751 "DADD ST,$src\n\t" 9752 "FST_D $dst" %} 9753 opcode(0xDD, 0x0); 9754 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9755 Opcode(0xD8), RegOpc(src), 9756 set_instruction_start, 9757 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9758 ins_pipe( fpu_reg_mem ); 9759 %} 9760 9761 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9762 predicate(UseSSE<=1); 9763 match(Set dst (AddD dst con)); 9764 ins_cost(125); 9765 format %{ "FLD1\n\t" 9766 "DADDp $dst,ST" %} 9767 ins_encode %{ 9768 __ fld1(); 9769 __ faddp($dst$$reg); 9770 %} 9771 ins_pipe(fpu_reg); 9772 %} 9773 9774 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9775 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9776 match(Set dst (AddD dst con)); 9777 ins_cost(200); 9778 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9779 "DADDp $dst,ST" %} 9780 ins_encode %{ 9781 __ fld_d($constantaddress($con)); 9782 __ faddp($dst$$reg); 9783 %} 9784 ins_pipe(fpu_reg_mem); 9785 %} 9786 9787 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9788 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9789 match(Set dst (RoundDouble (AddD src con))); 9790 ins_cost(200); 9791 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9792 "DADD ST,$src\n\t" 9793 "FSTP_D $dst\t# D-round" %} 9794 ins_encode %{ 9795 __ fld_d($constantaddress($con)); 9796 __ fadd($src$$reg); 9797 __ fstp_d(Address(rsp, $dst$$disp)); 9798 %} 9799 ins_pipe(fpu_mem_reg_con); 9800 %} 9801 9802 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9803 predicate(UseSSE<=1); 9804 match(Set dst (MulD dst src)); 9805 format %{ "FLD $src\n\t" 9806 "DMULp $dst,ST" %} 9807 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9808 ins_cost(150); 9809 ins_encode( Push_Reg_DPR(src), 9810 OpcP, RegOpc(dst) ); 9811 ins_pipe( fpu_reg_reg ); 9812 %} 9813 9814 // Strict FP instruction biases argument before multiply then 9815 // biases result to avoid double rounding of subnormals. 9816 // 9817 // scale arg1 by multiplying arg1 by 2^(-15360) 9818 // load arg2 9819 // multiply scaled arg1 by arg2 9820 // rescale product by 2^(15360) 9821 // 9822 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9823 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9824 match(Set dst (MulD dst src)); 9825 ins_cost(1); // Select this instruction for all FP double multiplies 9826 9827 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9828 "DMULp $dst,ST\n\t" 9829 "FLD $src\n\t" 9830 "DMULp $dst,ST\n\t" 9831 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9832 "DMULp $dst,ST\n\t" %} 9833 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9834 ins_encode( strictfp_bias1(dst), 9835 Push_Reg_DPR(src), 9836 OpcP, RegOpc(dst), 9837 strictfp_bias2(dst) ); 9838 ins_pipe( fpu_reg_reg ); 9839 %} 9840 9841 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9842 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9843 match(Set dst (MulD dst con)); 9844 ins_cost(200); 9845 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9846 "DMULp $dst,ST" %} 9847 ins_encode %{ 9848 __ fld_d($constantaddress($con)); 9849 __ fmulp($dst$$reg); 9850 %} 9851 ins_pipe(fpu_reg_mem); 9852 %} 9853 9854 9855 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9856 predicate( UseSSE<=1 ); 9857 match(Set dst (MulD dst (LoadD src))); 9858 ins_cost(200); 9859 format %{ "FLD_D $src\n\t" 9860 "DMULp $dst,ST" %} 9861 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9862 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9863 OpcP, RegOpc(dst) ); 9864 ins_pipe( fpu_reg_mem ); 9865 %} 9866 9867 // 9868 // Cisc-alternate to reg-reg multiply 9869 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9870 predicate( UseSSE<=1 ); 9871 match(Set dst (MulD src (LoadD mem))); 9872 ins_cost(250); 9873 format %{ "FLD_D $mem\n\t" 9874 "DMUL ST,$src\n\t" 9875 "FSTP_D $dst" %} 9876 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9877 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9878 OpcReg_FPR(src), 9879 Pop_Reg_DPR(dst) ); 9880 ins_pipe( fpu_reg_reg_mem ); 9881 %} 9882 9883 9884 // MACRO3 -- addDPR a mulDPR 9885 // This instruction is a '2-address' instruction in that the result goes 9886 // back to src2. This eliminates a move from the macro; possibly the 9887 // register allocator will have to add it back (and maybe not). 9888 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9889 predicate( UseSSE<=1 ); 9890 match(Set src2 (AddD (MulD src0 src1) src2)); 9891 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9892 "DMUL ST,$src1\n\t" 9893 "DADDp $src2,ST" %} 9894 ins_cost(250); 9895 opcode(0xDD); /* LoadD DD /0 */ 9896 ins_encode( Push_Reg_FPR(src0), 9897 FMul_ST_reg(src1), 9898 FAddP_reg_ST(src2) ); 9899 ins_pipe( fpu_reg_reg_reg ); 9900 %} 9901 9902 9903 // MACRO3 -- subDPR a mulDPR 9904 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9905 predicate( UseSSE<=1 ); 9906 match(Set src2 (SubD (MulD src0 src1) src2)); 9907 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9908 "DMUL ST,$src1\n\t" 9909 "DSUBRp $src2,ST" %} 9910 ins_cost(250); 9911 ins_encode( Push_Reg_FPR(src0), 9912 FMul_ST_reg(src1), 9913 Opcode(0xDE), Opc_plus(0xE0,src2)); 9914 ins_pipe( fpu_reg_reg_reg ); 9915 %} 9916 9917 9918 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9919 predicate( UseSSE<=1 ); 9920 match(Set dst (DivD dst src)); 9921 9922 format %{ "FLD $src\n\t" 9923 "FDIVp $dst,ST" %} 9924 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9925 ins_cost(150); 9926 ins_encode( Push_Reg_DPR(src), 9927 OpcP, RegOpc(dst) ); 9928 ins_pipe( fpu_reg_reg ); 9929 %} 9930 9931 // Strict FP instruction biases argument before division then 9932 // biases result, to avoid double rounding of subnormals. 9933 // 9934 // scale dividend by multiplying dividend by 2^(-15360) 9935 // load divisor 9936 // divide scaled dividend by divisor 9937 // rescale quotient by 2^(15360) 9938 // 9939 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9940 predicate (UseSSE<=1); 9941 match(Set dst (DivD dst src)); 9942 predicate( UseSSE<=1 && Compile::current()->has_method() ); 9943 ins_cost(01); 9944 9945 format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" 9946 "DMULp $dst,ST\n\t" 9947 "FLD $src\n\t" 9948 "FDIVp $dst,ST\n\t" 9949 "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" 9950 "DMULp $dst,ST\n\t" %} 9951 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9952 ins_encode( strictfp_bias1(dst), 9953 Push_Reg_DPR(src), 9954 OpcP, RegOpc(dst), 9955 strictfp_bias2(dst) ); 9956 ins_pipe( fpu_reg_reg ); 9957 %} 9958 9959 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9960 predicate(UseSSE<=1); 9961 match(Set dst (ModD dst src)); 9962 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9963 9964 format %{ "DMOD $dst,$src" %} 9965 ins_cost(250); 9966 ins_encode(Push_Reg_Mod_DPR(dst, src), 9967 emitModDPR(), 9968 Push_Result_Mod_DPR(src), 9969 Pop_Reg_DPR(dst)); 9970 ins_pipe( pipe_slow ); 9971 %} 9972 9973 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9974 predicate(UseSSE>=2); 9975 match(Set dst (ModD src0 src1)); 9976 effect(KILL rax, KILL cr); 9977 9978 format %{ "SUB ESP,8\t # DMOD\n" 9979 "\tMOVSD [ESP+0],$src1\n" 9980 "\tFLD_D [ESP+0]\n" 9981 "\tMOVSD [ESP+0],$src0\n" 9982 "\tFLD_D [ESP+0]\n" 9983 "loop:\tFPREM\n" 9984 "\tFWAIT\n" 9985 "\tFNSTSW AX\n" 9986 "\tSAHF\n" 9987 "\tJP loop\n" 9988 "\tFSTP_D [ESP+0]\n" 9989 "\tMOVSD $dst,[ESP+0]\n" 9990 "\tADD ESP,8\n" 9991 "\tFSTP ST0\t # Restore FPU Stack" 9992 %} 9993 ins_cost(250); 9994 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9995 ins_pipe( pipe_slow ); 9996 %} 9997 9998 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9999 predicate (UseSSE<=1); 10000 match(Set dst(AtanD dst src)); 10001 format %{ "DATA $dst,$src" %} 10002 opcode(0xD9, 0xF3); 10003 ins_encode( Push_Reg_DPR(src), 10004 OpcP, OpcS, RegOpc(dst) ); 10005 ins_pipe( pipe_slow ); 10006 %} 10007 10008 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 10009 predicate (UseSSE>=2); 10010 match(Set dst(AtanD dst src)); 10011 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 10012 format %{ "DATA $dst,$src" %} 10013 opcode(0xD9, 0xF3); 10014 ins_encode( Push_SrcD(src), 10015 OpcP, OpcS, Push_ResultD(dst) ); 10016 ins_pipe( pipe_slow ); 10017 %} 10018 10019 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 10020 predicate (UseSSE<=1); 10021 match(Set dst (SqrtD src)); 10022 format %{ "DSQRT $dst,$src" %} 10023 opcode(0xFA, 0xD9); 10024 ins_encode( Push_Reg_DPR(src), 10025 OpcS, OpcP, Pop_Reg_DPR(dst) ); 10026 ins_pipe( pipe_slow ); 10027 %} 10028 10029 //-------------Float Instructions------------------------------- 10030 // Float Math 10031 10032 // Code for float compare: 10033 // fcompp(); 10034 // fwait(); fnstsw_ax(); 10035 // sahf(); 10036 // movl(dst, unordered_result); 10037 // jcc(Assembler::parity, exit); 10038 // movl(dst, less_result); 10039 // jcc(Assembler::below, exit); 10040 // movl(dst, equal_result); 10041 // jcc(Assembler::equal, exit); 10042 // movl(dst, greater_result); 10043 // exit: 10044 10045 // P6 version of float compare, sets condition codes in EFLAGS 10046 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10047 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10048 match(Set cr (CmpF src1 src2)); 10049 effect(KILL rax); 10050 ins_cost(150); 10051 format %{ "FLD $src1\n\t" 10052 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10053 "JNP exit\n\t" 10054 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10055 "SAHF\n" 10056 "exit:\tNOP // avoid branch to branch" %} 10057 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10058 ins_encode( Push_Reg_DPR(src1), 10059 OpcP, RegOpc(src2), 10060 cmpF_P6_fixup ); 10061 ins_pipe( pipe_slow ); 10062 %} 10063 10064 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10065 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10066 match(Set cr (CmpF src1 src2)); 10067 ins_cost(100); 10068 format %{ "FLD $src1\n\t" 10069 "FUCOMIP ST,$src2 // P6 instruction" %} 10070 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10071 ins_encode( Push_Reg_DPR(src1), 10072 OpcP, RegOpc(src2)); 10073 ins_pipe( pipe_slow ); 10074 %} 10075 10076 10077 // Compare & branch 10078 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10079 predicate(UseSSE == 0); 10080 match(Set cr (CmpF src1 src2)); 10081 effect(KILL rax); 10082 ins_cost(200); 10083 format %{ "FLD $src1\n\t" 10084 "FCOMp $src2\n\t" 10085 "FNSTSW AX\n\t" 10086 "TEST AX,0x400\n\t" 10087 "JZ,s flags\n\t" 10088 "MOV AH,1\t# unordered treat as LT\n" 10089 "flags:\tSAHF" %} 10090 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10091 ins_encode( Push_Reg_DPR(src1), 10092 OpcP, RegOpc(src2), 10093 fpu_flags); 10094 ins_pipe( pipe_slow ); 10095 %} 10096 10097 // Compare vs zero into -1,0,1 10098 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10099 predicate(UseSSE == 0); 10100 match(Set dst (CmpF3 src1 zero)); 10101 effect(KILL cr, KILL rax); 10102 ins_cost(280); 10103 format %{ "FTSTF $dst,$src1" %} 10104 opcode(0xE4, 0xD9); 10105 ins_encode( Push_Reg_DPR(src1), 10106 OpcS, OpcP, PopFPU, 10107 CmpF_Result(dst)); 10108 ins_pipe( pipe_slow ); 10109 %} 10110 10111 // Compare into -1,0,1 10112 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10113 predicate(UseSSE == 0); 10114 match(Set dst (CmpF3 src1 src2)); 10115 effect(KILL cr, KILL rax); 10116 ins_cost(300); 10117 format %{ "FCMPF $dst,$src1,$src2" %} 10118 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10119 ins_encode( Push_Reg_DPR(src1), 10120 OpcP, RegOpc(src2), 10121 CmpF_Result(dst)); 10122 ins_pipe( pipe_slow ); 10123 %} 10124 10125 // float compare and set condition codes in EFLAGS by XMM regs 10126 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10127 predicate(UseSSE>=1); 10128 match(Set cr (CmpF src1 src2)); 10129 ins_cost(145); 10130 format %{ "UCOMISS $src1,$src2\n\t" 10131 "JNP,s exit\n\t" 10132 "PUSHF\t# saw NaN, set CF\n\t" 10133 "AND [rsp], #0xffffff2b\n\t" 10134 "POPF\n" 10135 "exit:" %} 10136 ins_encode %{ 10137 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10138 emit_cmpfp_fixup(_masm); 10139 %} 10140 ins_pipe( pipe_slow ); 10141 %} 10142 10143 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10144 predicate(UseSSE>=1); 10145 match(Set cr (CmpF src1 src2)); 10146 ins_cost(100); 10147 format %{ "UCOMISS $src1,$src2" %} 10148 ins_encode %{ 10149 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10150 %} 10151 ins_pipe( pipe_slow ); 10152 %} 10153 10154 // float compare and set condition codes in EFLAGS by XMM regs 10155 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10156 predicate(UseSSE>=1); 10157 match(Set cr (CmpF src1 (LoadF src2))); 10158 ins_cost(165); 10159 format %{ "UCOMISS $src1,$src2\n\t" 10160 "JNP,s exit\n\t" 10161 "PUSHF\t# saw NaN, set CF\n\t" 10162 "AND [rsp], #0xffffff2b\n\t" 10163 "POPF\n" 10164 "exit:" %} 10165 ins_encode %{ 10166 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10167 emit_cmpfp_fixup(_masm); 10168 %} 10169 ins_pipe( pipe_slow ); 10170 %} 10171 10172 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10173 predicate(UseSSE>=1); 10174 match(Set cr (CmpF src1 (LoadF src2))); 10175 ins_cost(100); 10176 format %{ "UCOMISS $src1,$src2" %} 10177 ins_encode %{ 10178 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10179 %} 10180 ins_pipe( pipe_slow ); 10181 %} 10182 10183 // Compare into -1,0,1 in XMM 10184 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10185 predicate(UseSSE>=1); 10186 match(Set dst (CmpF3 src1 src2)); 10187 effect(KILL cr); 10188 ins_cost(255); 10189 format %{ "UCOMISS $src1, $src2\n\t" 10190 "MOV $dst, #-1\n\t" 10191 "JP,s done\n\t" 10192 "JB,s done\n\t" 10193 "SETNE $dst\n\t" 10194 "MOVZB $dst, $dst\n" 10195 "done:" %} 10196 ins_encode %{ 10197 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10198 emit_cmpfp3(_masm, $dst$$Register); 10199 %} 10200 ins_pipe( pipe_slow ); 10201 %} 10202 10203 // Compare into -1,0,1 in XMM and memory 10204 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10205 predicate(UseSSE>=1); 10206 match(Set dst (CmpF3 src1 (LoadF src2))); 10207 effect(KILL cr); 10208 ins_cost(275); 10209 format %{ "UCOMISS $src1, $src2\n\t" 10210 "MOV $dst, #-1\n\t" 10211 "JP,s done\n\t" 10212 "JB,s done\n\t" 10213 "SETNE $dst\n\t" 10214 "MOVZB $dst, $dst\n" 10215 "done:" %} 10216 ins_encode %{ 10217 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10218 emit_cmpfp3(_masm, $dst$$Register); 10219 %} 10220 ins_pipe( pipe_slow ); 10221 %} 10222 10223 // Spill to obtain 24-bit precision 10224 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10225 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10226 match(Set dst (SubF src1 src2)); 10227 10228 format %{ "FSUB $dst,$src1 - $src2" %} 10229 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10230 ins_encode( Push_Reg_FPR(src1), 10231 OpcReg_FPR(src2), 10232 Pop_Mem_FPR(dst) ); 10233 ins_pipe( fpu_mem_reg_reg ); 10234 %} 10235 // 10236 // This instruction does not round to 24-bits 10237 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10238 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10239 match(Set dst (SubF dst src)); 10240 10241 format %{ "FSUB $dst,$src" %} 10242 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10243 ins_encode( Push_Reg_FPR(src), 10244 OpcP, RegOpc(dst) ); 10245 ins_pipe( fpu_reg_reg ); 10246 %} 10247 10248 // Spill to obtain 24-bit precision 10249 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10250 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10251 match(Set dst (AddF src1 src2)); 10252 10253 format %{ "FADD $dst,$src1,$src2" %} 10254 opcode(0xD8, 0x0); /* D8 C0+i */ 10255 ins_encode( Push_Reg_FPR(src2), 10256 OpcReg_FPR(src1), 10257 Pop_Mem_FPR(dst) ); 10258 ins_pipe( fpu_mem_reg_reg ); 10259 %} 10260 // 10261 // This instruction does not round to 24-bits 10262 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10263 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10264 match(Set dst (AddF dst src)); 10265 10266 format %{ "FLD $src\n\t" 10267 "FADDp $dst,ST" %} 10268 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10269 ins_encode( Push_Reg_FPR(src), 10270 OpcP, RegOpc(dst) ); 10271 ins_pipe( fpu_reg_reg ); 10272 %} 10273 10274 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10275 predicate(UseSSE==0); 10276 match(Set dst (AbsF src)); 10277 ins_cost(100); 10278 format %{ "FABS" %} 10279 opcode(0xE1, 0xD9); 10280 ins_encode( OpcS, OpcP ); 10281 ins_pipe( fpu_reg_reg ); 10282 %} 10283 10284 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10285 predicate(UseSSE==0); 10286 match(Set dst (NegF src)); 10287 ins_cost(100); 10288 format %{ "FCHS" %} 10289 opcode(0xE0, 0xD9); 10290 ins_encode( OpcS, OpcP ); 10291 ins_pipe( fpu_reg_reg ); 10292 %} 10293 10294 // Cisc-alternate to addFPR_reg 10295 // Spill to obtain 24-bit precision 10296 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10297 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10298 match(Set dst (AddF src1 (LoadF src2))); 10299 10300 format %{ "FLD $src2\n\t" 10301 "FADD ST,$src1\n\t" 10302 "FSTP_S $dst" %} 10303 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10304 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10305 OpcReg_FPR(src1), 10306 Pop_Mem_FPR(dst) ); 10307 ins_pipe( fpu_mem_reg_mem ); 10308 %} 10309 // 10310 // Cisc-alternate to addFPR_reg 10311 // This instruction does not round to 24-bits 10312 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10313 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10314 match(Set dst (AddF dst (LoadF src))); 10315 10316 format %{ "FADD $dst,$src" %} 10317 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10318 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10319 OpcP, RegOpc(dst) ); 10320 ins_pipe( fpu_reg_mem ); 10321 %} 10322 10323 // // Following two instructions for _222_mpegaudio 10324 // Spill to obtain 24-bit precision 10325 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10326 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10327 match(Set dst (AddF src1 src2)); 10328 10329 format %{ "FADD $dst,$src1,$src2" %} 10330 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10331 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10332 OpcReg_FPR(src2), 10333 Pop_Mem_FPR(dst) ); 10334 ins_pipe( fpu_mem_reg_mem ); 10335 %} 10336 10337 // Cisc-spill variant 10338 // Spill to obtain 24-bit precision 10339 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10340 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10341 match(Set dst (AddF src1 (LoadF src2))); 10342 10343 format %{ "FADD $dst,$src1,$src2 cisc" %} 10344 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10345 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10346 set_instruction_start, 10347 OpcP, RMopc_Mem(secondary,src1), 10348 Pop_Mem_FPR(dst) ); 10349 ins_pipe( fpu_mem_mem_mem ); 10350 %} 10351 10352 // Spill to obtain 24-bit precision 10353 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10354 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10355 match(Set dst (AddF src1 src2)); 10356 10357 format %{ "FADD $dst,$src1,$src2" %} 10358 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10359 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10360 set_instruction_start, 10361 OpcP, RMopc_Mem(secondary,src1), 10362 Pop_Mem_FPR(dst) ); 10363 ins_pipe( fpu_mem_mem_mem ); 10364 %} 10365 10366 10367 // Spill to obtain 24-bit precision 10368 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10369 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10370 match(Set dst (AddF src con)); 10371 format %{ "FLD $src\n\t" 10372 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10373 "FSTP_S $dst" %} 10374 ins_encode %{ 10375 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10376 __ fadd_s($constantaddress($con)); 10377 __ fstp_s(Address(rsp, $dst$$disp)); 10378 %} 10379 ins_pipe(fpu_mem_reg_con); 10380 %} 10381 // 10382 // This instruction does not round to 24-bits 10383 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10384 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10385 match(Set dst (AddF src con)); 10386 format %{ "FLD $src\n\t" 10387 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10388 "FSTP $dst" %} 10389 ins_encode %{ 10390 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10391 __ fadd_s($constantaddress($con)); 10392 __ fstp_d($dst$$reg); 10393 %} 10394 ins_pipe(fpu_reg_reg_con); 10395 %} 10396 10397 // Spill to obtain 24-bit precision 10398 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10399 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10400 match(Set dst (MulF src1 src2)); 10401 10402 format %{ "FLD $src1\n\t" 10403 "FMUL $src2\n\t" 10404 "FSTP_S $dst" %} 10405 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10406 ins_encode( Push_Reg_FPR(src1), 10407 OpcReg_FPR(src2), 10408 Pop_Mem_FPR(dst) ); 10409 ins_pipe( fpu_mem_reg_reg ); 10410 %} 10411 // 10412 // This instruction does not round to 24-bits 10413 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10414 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10415 match(Set dst (MulF src1 src2)); 10416 10417 format %{ "FLD $src1\n\t" 10418 "FMUL $src2\n\t" 10419 "FSTP_S $dst" %} 10420 opcode(0xD8, 0x1); /* D8 C8+i */ 10421 ins_encode( Push_Reg_FPR(src2), 10422 OpcReg_FPR(src1), 10423 Pop_Reg_FPR(dst) ); 10424 ins_pipe( fpu_reg_reg_reg ); 10425 %} 10426 10427 10428 // Spill to obtain 24-bit precision 10429 // Cisc-alternate to reg-reg multiply 10430 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10431 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10432 match(Set dst (MulF src1 (LoadF src2))); 10433 10434 format %{ "FLD_S $src2\n\t" 10435 "FMUL $src1\n\t" 10436 "FSTP_S $dst" %} 10437 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10438 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10439 OpcReg_FPR(src1), 10440 Pop_Mem_FPR(dst) ); 10441 ins_pipe( fpu_mem_reg_mem ); 10442 %} 10443 // 10444 // This instruction does not round to 24-bits 10445 // Cisc-alternate to reg-reg multiply 10446 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10447 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10448 match(Set dst (MulF src1 (LoadF src2))); 10449 10450 format %{ "FMUL $dst,$src1,$src2" %} 10451 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10452 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10453 OpcReg_FPR(src1), 10454 Pop_Reg_FPR(dst) ); 10455 ins_pipe( fpu_reg_reg_mem ); 10456 %} 10457 10458 // Spill to obtain 24-bit precision 10459 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10460 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10461 match(Set dst (MulF src1 src2)); 10462 10463 format %{ "FMUL $dst,$src1,$src2" %} 10464 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10465 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10466 set_instruction_start, 10467 OpcP, RMopc_Mem(secondary,src1), 10468 Pop_Mem_FPR(dst) ); 10469 ins_pipe( fpu_mem_mem_mem ); 10470 %} 10471 10472 // Spill to obtain 24-bit precision 10473 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10474 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10475 match(Set dst (MulF src con)); 10476 10477 format %{ "FLD $src\n\t" 10478 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10479 "FSTP_S $dst" %} 10480 ins_encode %{ 10481 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10482 __ fmul_s($constantaddress($con)); 10483 __ fstp_s(Address(rsp, $dst$$disp)); 10484 %} 10485 ins_pipe(fpu_mem_reg_con); 10486 %} 10487 // 10488 // This instruction does not round to 24-bits 10489 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10490 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10491 match(Set dst (MulF src con)); 10492 10493 format %{ "FLD $src\n\t" 10494 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10495 "FSTP $dst" %} 10496 ins_encode %{ 10497 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10498 __ fmul_s($constantaddress($con)); 10499 __ fstp_d($dst$$reg); 10500 %} 10501 ins_pipe(fpu_reg_reg_con); 10502 %} 10503 10504 10505 // 10506 // MACRO1 -- subsume unshared load into mulFPR 10507 // This instruction does not round to 24-bits 10508 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10509 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10510 match(Set dst (MulF (LoadF mem1) src)); 10511 10512 format %{ "FLD $mem1 ===MACRO1===\n\t" 10513 "FMUL ST,$src\n\t" 10514 "FSTP $dst" %} 10515 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10516 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10517 OpcReg_FPR(src), 10518 Pop_Reg_FPR(dst) ); 10519 ins_pipe( fpu_reg_reg_mem ); 10520 %} 10521 // 10522 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10523 // This instruction does not round to 24-bits 10524 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10525 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10526 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10527 ins_cost(95); 10528 10529 format %{ "FLD $mem1 ===MACRO2===\n\t" 10530 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10531 "FADD ST,$src2\n\t" 10532 "FSTP $dst" %} 10533 opcode(0xD9); /* LoadF D9 /0 */ 10534 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10535 FMul_ST_reg(src1), 10536 FAdd_ST_reg(src2), 10537 Pop_Reg_FPR(dst) ); 10538 ins_pipe( fpu_reg_mem_reg_reg ); 10539 %} 10540 10541 // MACRO3 -- addFPR a mulFPR 10542 // This instruction does not round to 24-bits. It is a '2-address' 10543 // instruction in that the result goes back to src2. This eliminates 10544 // a move from the macro; possibly the register allocator will have 10545 // to add it back (and maybe not). 10546 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10547 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10548 match(Set src2 (AddF (MulF src0 src1) src2)); 10549 10550 format %{ "FLD $src0 ===MACRO3===\n\t" 10551 "FMUL ST,$src1\n\t" 10552 "FADDP $src2,ST" %} 10553 opcode(0xD9); /* LoadF D9 /0 */ 10554 ins_encode( Push_Reg_FPR(src0), 10555 FMul_ST_reg(src1), 10556 FAddP_reg_ST(src2) ); 10557 ins_pipe( fpu_reg_reg_reg ); 10558 %} 10559 10560 // MACRO4 -- divFPR subFPR 10561 // This instruction does not round to 24-bits 10562 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10563 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10564 match(Set dst (DivF (SubF src2 src1) src3)); 10565 10566 format %{ "FLD $src2 ===MACRO4===\n\t" 10567 "FSUB ST,$src1\n\t" 10568 "FDIV ST,$src3\n\t" 10569 "FSTP $dst" %} 10570 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10571 ins_encode( Push_Reg_FPR(src2), 10572 subFPR_divFPR_encode(src1,src3), 10573 Pop_Reg_FPR(dst) ); 10574 ins_pipe( fpu_reg_reg_reg_reg ); 10575 %} 10576 10577 // Spill to obtain 24-bit precision 10578 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10579 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10580 match(Set dst (DivF src1 src2)); 10581 10582 format %{ "FDIV $dst,$src1,$src2" %} 10583 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10584 ins_encode( Push_Reg_FPR(src1), 10585 OpcReg_FPR(src2), 10586 Pop_Mem_FPR(dst) ); 10587 ins_pipe( fpu_mem_reg_reg ); 10588 %} 10589 // 10590 // This instruction does not round to 24-bits 10591 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10592 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10593 match(Set dst (DivF dst src)); 10594 10595 format %{ "FDIV $dst,$src" %} 10596 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10597 ins_encode( Push_Reg_FPR(src), 10598 OpcP, RegOpc(dst) ); 10599 ins_pipe( fpu_reg_reg ); 10600 %} 10601 10602 10603 // Spill to obtain 24-bit precision 10604 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10605 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10606 match(Set dst (ModF src1 src2)); 10607 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10608 10609 format %{ "FMOD $dst,$src1,$src2" %} 10610 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10611 emitModDPR(), 10612 Push_Result_Mod_DPR(src2), 10613 Pop_Mem_FPR(dst)); 10614 ins_pipe( pipe_slow ); 10615 %} 10616 // 10617 // This instruction does not round to 24-bits 10618 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10619 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10620 match(Set dst (ModF dst src)); 10621 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10622 10623 format %{ "FMOD $dst,$src" %} 10624 ins_encode(Push_Reg_Mod_DPR(dst, src), 10625 emitModDPR(), 10626 Push_Result_Mod_DPR(src), 10627 Pop_Reg_FPR(dst)); 10628 ins_pipe( pipe_slow ); 10629 %} 10630 10631 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10632 predicate(UseSSE>=1); 10633 match(Set dst (ModF src0 src1)); 10634 effect(KILL rax, KILL cr); 10635 format %{ "SUB ESP,4\t # FMOD\n" 10636 "\tMOVSS [ESP+0],$src1\n" 10637 "\tFLD_S [ESP+0]\n" 10638 "\tMOVSS [ESP+0],$src0\n" 10639 "\tFLD_S [ESP+0]\n" 10640 "loop:\tFPREM\n" 10641 "\tFWAIT\n" 10642 "\tFNSTSW AX\n" 10643 "\tSAHF\n" 10644 "\tJP loop\n" 10645 "\tFSTP_S [ESP+0]\n" 10646 "\tMOVSS $dst,[ESP+0]\n" 10647 "\tADD ESP,4\n" 10648 "\tFSTP ST0\t # Restore FPU Stack" 10649 %} 10650 ins_cost(250); 10651 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10652 ins_pipe( pipe_slow ); 10653 %} 10654 10655 10656 //----------Arithmetic Conversion Instructions--------------------------------- 10657 // The conversions operations are all Alpha sorted. Please keep it that way! 10658 10659 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10660 predicate(UseSSE==0); 10661 match(Set dst (RoundFloat src)); 10662 ins_cost(125); 10663 format %{ "FST_S $dst,$src\t# F-round" %} 10664 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10665 ins_pipe( fpu_mem_reg ); 10666 %} 10667 10668 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10669 predicate(UseSSE<=1); 10670 match(Set dst (RoundDouble src)); 10671 ins_cost(125); 10672 format %{ "FST_D $dst,$src\t# D-round" %} 10673 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10674 ins_pipe( fpu_mem_reg ); 10675 %} 10676 10677 // Force rounding to 24-bit precision and 6-bit exponent 10678 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10679 predicate(UseSSE==0); 10680 match(Set dst (ConvD2F src)); 10681 format %{ "FST_S $dst,$src\t# F-round" %} 10682 expand %{ 10683 roundFloat_mem_reg(dst,src); 10684 %} 10685 %} 10686 10687 // Force rounding to 24-bit precision and 6-bit exponent 10688 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10689 predicate(UseSSE==1); 10690 match(Set dst (ConvD2F src)); 10691 effect( KILL cr ); 10692 format %{ "SUB ESP,4\n\t" 10693 "FST_S [ESP],$src\t# F-round\n\t" 10694 "MOVSS $dst,[ESP]\n\t" 10695 "ADD ESP,4" %} 10696 ins_encode %{ 10697 __ subptr(rsp, 4); 10698 if ($src$$reg != FPR1L_enc) { 10699 __ fld_s($src$$reg-1); 10700 __ fstp_s(Address(rsp, 0)); 10701 } else { 10702 __ fst_s(Address(rsp, 0)); 10703 } 10704 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10705 __ addptr(rsp, 4); 10706 %} 10707 ins_pipe( pipe_slow ); 10708 %} 10709 10710 // Force rounding double precision to single precision 10711 instruct convD2F_reg(regF dst, regD src) %{ 10712 predicate(UseSSE>=2); 10713 match(Set dst (ConvD2F src)); 10714 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10715 ins_encode %{ 10716 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10717 %} 10718 ins_pipe( pipe_slow ); 10719 %} 10720 10721 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10722 predicate(UseSSE==0); 10723 match(Set dst (ConvF2D src)); 10724 format %{ "FST_S $dst,$src\t# D-round" %} 10725 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10726 ins_pipe( fpu_reg_reg ); 10727 %} 10728 10729 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10730 predicate(UseSSE==1); 10731 match(Set dst (ConvF2D src)); 10732 format %{ "FST_D $dst,$src\t# D-round" %} 10733 expand %{ 10734 roundDouble_mem_reg(dst,src); 10735 %} 10736 %} 10737 10738 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10739 predicate(UseSSE==1); 10740 match(Set dst (ConvF2D src)); 10741 effect( KILL cr ); 10742 format %{ "SUB ESP,4\n\t" 10743 "MOVSS [ESP] $src\n\t" 10744 "FLD_S [ESP]\n\t" 10745 "ADD ESP,4\n\t" 10746 "FSTP $dst\t# D-round" %} 10747 ins_encode %{ 10748 __ subptr(rsp, 4); 10749 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10750 __ fld_s(Address(rsp, 0)); 10751 __ addptr(rsp, 4); 10752 __ fstp_d($dst$$reg); 10753 %} 10754 ins_pipe( pipe_slow ); 10755 %} 10756 10757 instruct convF2D_reg(regD dst, regF src) %{ 10758 predicate(UseSSE>=2); 10759 match(Set dst (ConvF2D src)); 10760 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10761 ins_encode %{ 10762 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10763 %} 10764 ins_pipe( pipe_slow ); 10765 %} 10766 10767 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10768 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10769 predicate(UseSSE<=1); 10770 match(Set dst (ConvD2I src)); 10771 effect( KILL tmp, KILL cr ); 10772 format %{ "FLD $src\t# Convert double to int \n\t" 10773 "FLDCW trunc mode\n\t" 10774 "SUB ESP,4\n\t" 10775 "FISTp [ESP + #0]\n\t" 10776 "FLDCW std/24-bit mode\n\t" 10777 "POP EAX\n\t" 10778 "CMP EAX,0x80000000\n\t" 10779 "JNE,s fast\n\t" 10780 "FLD_D $src\n\t" 10781 "CALL d2i_wrapper\n" 10782 "fast:" %} 10783 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10784 ins_pipe( pipe_slow ); 10785 %} 10786 10787 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10788 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10789 predicate(UseSSE>=2); 10790 match(Set dst (ConvD2I src)); 10791 effect( KILL tmp, KILL cr ); 10792 format %{ "CVTTSD2SI $dst, $src\n\t" 10793 "CMP $dst,0x80000000\n\t" 10794 "JNE,s fast\n\t" 10795 "SUB ESP, 8\n\t" 10796 "MOVSD [ESP], $src\n\t" 10797 "FLD_D [ESP]\n\t" 10798 "ADD ESP, 8\n\t" 10799 "CALL d2i_wrapper\n" 10800 "fast:" %} 10801 ins_encode %{ 10802 Label fast; 10803 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10804 __ cmpl($dst$$Register, 0x80000000); 10805 __ jccb(Assembler::notEqual, fast); 10806 __ subptr(rsp, 8); 10807 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10808 __ fld_d(Address(rsp, 0)); 10809 __ addptr(rsp, 8); 10810 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10811 __ post_call_nop(); 10812 __ bind(fast); 10813 %} 10814 ins_pipe( pipe_slow ); 10815 %} 10816 10817 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10818 predicate(UseSSE<=1); 10819 match(Set dst (ConvD2L src)); 10820 effect( KILL cr ); 10821 format %{ "FLD $src\t# Convert double to long\n\t" 10822 "FLDCW trunc mode\n\t" 10823 "SUB ESP,8\n\t" 10824 "FISTp [ESP + #0]\n\t" 10825 "FLDCW std/24-bit mode\n\t" 10826 "POP EAX\n\t" 10827 "POP EDX\n\t" 10828 "CMP EDX,0x80000000\n\t" 10829 "JNE,s fast\n\t" 10830 "TEST EAX,EAX\n\t" 10831 "JNE,s fast\n\t" 10832 "FLD $src\n\t" 10833 "CALL d2l_wrapper\n" 10834 "fast:" %} 10835 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10836 ins_pipe( pipe_slow ); 10837 %} 10838 10839 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10840 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10841 predicate (UseSSE>=2); 10842 match(Set dst (ConvD2L src)); 10843 effect( KILL cr ); 10844 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10845 "MOVSD [ESP],$src\n\t" 10846 "FLD_D [ESP]\n\t" 10847 "FLDCW trunc mode\n\t" 10848 "FISTp [ESP + #0]\n\t" 10849 "FLDCW std/24-bit mode\n\t" 10850 "POP EAX\n\t" 10851 "POP EDX\n\t" 10852 "CMP EDX,0x80000000\n\t" 10853 "JNE,s fast\n\t" 10854 "TEST EAX,EAX\n\t" 10855 "JNE,s fast\n\t" 10856 "SUB ESP,8\n\t" 10857 "MOVSD [ESP],$src\n\t" 10858 "FLD_D [ESP]\n\t" 10859 "ADD ESP,8\n\t" 10860 "CALL d2l_wrapper\n" 10861 "fast:" %} 10862 ins_encode %{ 10863 Label fast; 10864 __ subptr(rsp, 8); 10865 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10866 __ fld_d(Address(rsp, 0)); 10867 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 10868 __ fistp_d(Address(rsp, 0)); 10869 // Restore the rounding mode, mask the exception 10870 if (Compile::current()->in_24_bit_fp_mode()) { 10871 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 10872 } else { 10873 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 10874 } 10875 // Load the converted long, adjust CPU stack 10876 __ pop(rax); 10877 __ pop(rdx); 10878 __ cmpl(rdx, 0x80000000); 10879 __ jccb(Assembler::notEqual, fast); 10880 __ testl(rax, rax); 10881 __ jccb(Assembler::notEqual, fast); 10882 __ subptr(rsp, 8); 10883 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10884 __ fld_d(Address(rsp, 0)); 10885 __ addptr(rsp, 8); 10886 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 10887 __ post_call_nop(); 10888 __ bind(fast); 10889 %} 10890 ins_pipe( pipe_slow ); 10891 %} 10892 10893 // Convert a double to an int. Java semantics require we do complex 10894 // manglations in the corner cases. So we set the rounding mode to 10895 // 'zero', store the darned double down as an int, and reset the 10896 // rounding mode to 'nearest'. The hardware stores a flag value down 10897 // if we would overflow or converted a NAN; we check for this and 10898 // and go the slow path if needed. 10899 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10900 predicate(UseSSE==0); 10901 match(Set dst (ConvF2I src)); 10902 effect( KILL tmp, KILL cr ); 10903 format %{ "FLD $src\t# Convert float to int \n\t" 10904 "FLDCW trunc mode\n\t" 10905 "SUB ESP,4\n\t" 10906 "FISTp [ESP + #0]\n\t" 10907 "FLDCW std/24-bit mode\n\t" 10908 "POP EAX\n\t" 10909 "CMP EAX,0x80000000\n\t" 10910 "JNE,s fast\n\t" 10911 "FLD $src\n\t" 10912 "CALL d2i_wrapper\n" 10913 "fast:" %} 10914 // DPR2I_encoding works for FPR2I 10915 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10916 ins_pipe( pipe_slow ); 10917 %} 10918 10919 // Convert a float in xmm to an int reg. 10920 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10921 predicate(UseSSE>=1); 10922 match(Set dst (ConvF2I src)); 10923 effect( KILL tmp, KILL cr ); 10924 format %{ "CVTTSS2SI $dst, $src\n\t" 10925 "CMP $dst,0x80000000\n\t" 10926 "JNE,s fast\n\t" 10927 "SUB ESP, 4\n\t" 10928 "MOVSS [ESP], $src\n\t" 10929 "FLD [ESP]\n\t" 10930 "ADD ESP, 4\n\t" 10931 "CALL d2i_wrapper\n" 10932 "fast:" %} 10933 ins_encode %{ 10934 Label fast; 10935 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10936 __ cmpl($dst$$Register, 0x80000000); 10937 __ jccb(Assembler::notEqual, fast); 10938 __ subptr(rsp, 4); 10939 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10940 __ fld_s(Address(rsp, 0)); 10941 __ addptr(rsp, 4); 10942 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); 10943 __ post_call_nop(); 10944 __ bind(fast); 10945 %} 10946 ins_pipe( pipe_slow ); 10947 %} 10948 10949 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10950 predicate(UseSSE==0); 10951 match(Set dst (ConvF2L src)); 10952 effect( KILL cr ); 10953 format %{ "FLD $src\t# Convert float to long\n\t" 10954 "FLDCW trunc mode\n\t" 10955 "SUB ESP,8\n\t" 10956 "FISTp [ESP + #0]\n\t" 10957 "FLDCW std/24-bit mode\n\t" 10958 "POP EAX\n\t" 10959 "POP EDX\n\t" 10960 "CMP EDX,0x80000000\n\t" 10961 "JNE,s fast\n\t" 10962 "TEST EAX,EAX\n\t" 10963 "JNE,s fast\n\t" 10964 "FLD $src\n\t" 10965 "CALL d2l_wrapper\n" 10966 "fast:" %} 10967 // DPR2L_encoding works for FPR2L 10968 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10969 ins_pipe( pipe_slow ); 10970 %} 10971 10972 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10973 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10974 predicate (UseSSE>=1); 10975 match(Set dst (ConvF2L src)); 10976 effect( KILL cr ); 10977 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10978 "MOVSS [ESP],$src\n\t" 10979 "FLD_S [ESP]\n\t" 10980 "FLDCW trunc mode\n\t" 10981 "FISTp [ESP + #0]\n\t" 10982 "FLDCW std/24-bit mode\n\t" 10983 "POP EAX\n\t" 10984 "POP EDX\n\t" 10985 "CMP EDX,0x80000000\n\t" 10986 "JNE,s fast\n\t" 10987 "TEST EAX,EAX\n\t" 10988 "JNE,s fast\n\t" 10989 "SUB ESP,4\t# Convert float to long\n\t" 10990 "MOVSS [ESP],$src\n\t" 10991 "FLD_S [ESP]\n\t" 10992 "ADD ESP,4\n\t" 10993 "CALL d2l_wrapper\n" 10994 "fast:" %} 10995 ins_encode %{ 10996 Label fast; 10997 __ subptr(rsp, 8); 10998 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10999 __ fld_s(Address(rsp, 0)); 11000 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); 11001 __ fistp_d(Address(rsp, 0)); 11002 // Restore the rounding mode, mask the exception 11003 if (Compile::current()->in_24_bit_fp_mode()) { 11004 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); 11005 } else { 11006 __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); 11007 } 11008 // Load the converted long, adjust CPU stack 11009 __ pop(rax); 11010 __ pop(rdx); 11011 __ cmpl(rdx, 0x80000000); 11012 __ jccb(Assembler::notEqual, fast); 11013 __ testl(rax, rax); 11014 __ jccb(Assembler::notEqual, fast); 11015 __ subptr(rsp, 4); 11016 __ movflt(Address(rsp, 0), $src$$XMMRegister); 11017 __ fld_s(Address(rsp, 0)); 11018 __ addptr(rsp, 4); 11019 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); 11020 __ post_call_nop(); 11021 __ bind(fast); 11022 %} 11023 ins_pipe( pipe_slow ); 11024 %} 11025 11026 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 11027 predicate( UseSSE<=1 ); 11028 match(Set dst (ConvI2D src)); 11029 format %{ "FILD $src\n\t" 11030 "FSTP $dst" %} 11031 opcode(0xDB, 0x0); /* DB /0 */ 11032 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 11033 ins_pipe( fpu_reg_mem ); 11034 %} 11035 11036 instruct convI2D_reg(regD dst, rRegI src) %{ 11037 predicate( UseSSE>=2 && !UseXmmI2D ); 11038 match(Set dst (ConvI2D src)); 11039 format %{ "CVTSI2SD $dst,$src" %} 11040 ins_encode %{ 11041 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11042 %} 11043 ins_pipe( pipe_slow ); 11044 %} 11045 11046 instruct convI2D_mem(regD dst, memory mem) %{ 11047 predicate( UseSSE>=2 ); 11048 match(Set dst (ConvI2D (LoadI mem))); 11049 format %{ "CVTSI2SD $dst,$mem" %} 11050 ins_encode %{ 11051 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11052 %} 11053 ins_pipe( pipe_slow ); 11054 %} 11055 11056 instruct convXI2D_reg(regD dst, rRegI src) 11057 %{ 11058 predicate( UseSSE>=2 && UseXmmI2D ); 11059 match(Set dst (ConvI2D src)); 11060 11061 format %{ "MOVD $dst,$src\n\t" 11062 "CVTDQ2PD $dst,$dst\t# i2d" %} 11063 ins_encode %{ 11064 __ movdl($dst$$XMMRegister, $src$$Register); 11065 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11066 %} 11067 ins_pipe(pipe_slow); // XXX 11068 %} 11069 11070 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11071 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11072 match(Set dst (ConvI2D (LoadI mem))); 11073 format %{ "FILD $mem\n\t" 11074 "FSTP $dst" %} 11075 opcode(0xDB); /* DB /0 */ 11076 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11077 Pop_Reg_DPR(dst)); 11078 ins_pipe( fpu_reg_mem ); 11079 %} 11080 11081 // Convert a byte to a float; no rounding step needed. 11082 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11083 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11084 match(Set dst (ConvI2F src)); 11085 format %{ "FILD $src\n\t" 11086 "FSTP $dst" %} 11087 11088 opcode(0xDB, 0x0); /* DB /0 */ 11089 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11090 ins_pipe( fpu_reg_mem ); 11091 %} 11092 11093 // In 24-bit mode, force exponent rounding by storing back out 11094 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11095 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11096 match(Set dst (ConvI2F src)); 11097 ins_cost(200); 11098 format %{ "FILD $src\n\t" 11099 "FSTP_S $dst" %} 11100 opcode(0xDB, 0x0); /* DB /0 */ 11101 ins_encode( Push_Mem_I(src), 11102 Pop_Mem_FPR(dst)); 11103 ins_pipe( fpu_mem_mem ); 11104 %} 11105 11106 // In 24-bit mode, force exponent rounding by storing back out 11107 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11108 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11109 match(Set dst (ConvI2F (LoadI mem))); 11110 ins_cost(200); 11111 format %{ "FILD $mem\n\t" 11112 "FSTP_S $dst" %} 11113 opcode(0xDB); /* DB /0 */ 11114 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11115 Pop_Mem_FPR(dst)); 11116 ins_pipe( fpu_mem_mem ); 11117 %} 11118 11119 // This instruction does not round to 24-bits 11120 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11121 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11122 match(Set dst (ConvI2F src)); 11123 format %{ "FILD $src\n\t" 11124 "FSTP $dst" %} 11125 opcode(0xDB, 0x0); /* DB /0 */ 11126 ins_encode( Push_Mem_I(src), 11127 Pop_Reg_FPR(dst)); 11128 ins_pipe( fpu_reg_mem ); 11129 %} 11130 11131 // This instruction does not round to 24-bits 11132 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11133 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11134 match(Set dst (ConvI2F (LoadI mem))); 11135 format %{ "FILD $mem\n\t" 11136 "FSTP $dst" %} 11137 opcode(0xDB); /* DB /0 */ 11138 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11139 Pop_Reg_FPR(dst)); 11140 ins_pipe( fpu_reg_mem ); 11141 %} 11142 11143 // Convert an int to a float in xmm; no rounding step needed. 11144 instruct convI2F_reg(regF dst, rRegI src) %{ 11145 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11146 match(Set dst (ConvI2F src)); 11147 format %{ "CVTSI2SS $dst, $src" %} 11148 ins_encode %{ 11149 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11150 %} 11151 ins_pipe( pipe_slow ); 11152 %} 11153 11154 instruct convXI2F_reg(regF dst, rRegI src) 11155 %{ 11156 predicate( UseSSE>=2 && UseXmmI2F ); 11157 match(Set dst (ConvI2F src)); 11158 11159 format %{ "MOVD $dst,$src\n\t" 11160 "CVTDQ2PS $dst,$dst\t# i2f" %} 11161 ins_encode %{ 11162 __ movdl($dst$$XMMRegister, $src$$Register); 11163 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11164 %} 11165 ins_pipe(pipe_slow); // XXX 11166 %} 11167 11168 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11169 match(Set dst (ConvI2L src)); 11170 effect(KILL cr); 11171 ins_cost(375); 11172 format %{ "MOV $dst.lo,$src\n\t" 11173 "MOV $dst.hi,$src\n\t" 11174 "SAR $dst.hi,31" %} 11175 ins_encode(convert_int_long(dst,src)); 11176 ins_pipe( ialu_reg_reg_long ); 11177 %} 11178 11179 // Zero-extend convert int to long 11180 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11181 match(Set dst (AndL (ConvI2L src) mask) ); 11182 effect( KILL flags ); 11183 ins_cost(250); 11184 format %{ "MOV $dst.lo,$src\n\t" 11185 "XOR $dst.hi,$dst.hi" %} 11186 opcode(0x33); // XOR 11187 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11188 ins_pipe( ialu_reg_reg_long ); 11189 %} 11190 11191 // Zero-extend long 11192 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11193 match(Set dst (AndL src mask) ); 11194 effect( KILL flags ); 11195 ins_cost(250); 11196 format %{ "MOV $dst.lo,$src.lo\n\t" 11197 "XOR $dst.hi,$dst.hi\n\t" %} 11198 opcode(0x33); // XOR 11199 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11200 ins_pipe( ialu_reg_reg_long ); 11201 %} 11202 11203 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11204 predicate (UseSSE<=1); 11205 match(Set dst (ConvL2D src)); 11206 effect( KILL cr ); 11207 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11208 "PUSH $src.lo\n\t" 11209 "FILD ST,[ESP + #0]\n\t" 11210 "ADD ESP,8\n\t" 11211 "FSTP_D $dst\t# D-round" %} 11212 opcode(0xDF, 0x5); /* DF /5 */ 11213 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11214 ins_pipe( pipe_slow ); 11215 %} 11216 11217 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11218 predicate (UseSSE>=2); 11219 match(Set dst (ConvL2D src)); 11220 effect( KILL cr ); 11221 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11222 "PUSH $src.lo\n\t" 11223 "FILD_D [ESP]\n\t" 11224 "FSTP_D [ESP]\n\t" 11225 "MOVSD $dst,[ESP]\n\t" 11226 "ADD ESP,8" %} 11227 opcode(0xDF, 0x5); /* DF /5 */ 11228 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11229 ins_pipe( pipe_slow ); 11230 %} 11231 11232 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11233 predicate (UseSSE>=1); 11234 match(Set dst (ConvL2F src)); 11235 effect( KILL cr ); 11236 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11237 "PUSH $src.lo\n\t" 11238 "FILD_D [ESP]\n\t" 11239 "FSTP_S [ESP]\n\t" 11240 "MOVSS $dst,[ESP]\n\t" 11241 "ADD ESP,8" %} 11242 opcode(0xDF, 0x5); /* DF /5 */ 11243 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11244 ins_pipe( pipe_slow ); 11245 %} 11246 11247 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11248 match(Set dst (ConvL2F src)); 11249 effect( KILL cr ); 11250 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11251 "PUSH $src.lo\n\t" 11252 "FILD ST,[ESP + #0]\n\t" 11253 "ADD ESP,8\n\t" 11254 "FSTP_S $dst\t# F-round" %} 11255 opcode(0xDF, 0x5); /* DF /5 */ 11256 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11257 ins_pipe( pipe_slow ); 11258 %} 11259 11260 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11261 match(Set dst (ConvL2I src)); 11262 effect( DEF dst, USE src ); 11263 format %{ "MOV $dst,$src.lo" %} 11264 ins_encode(enc_CopyL_Lo(dst,src)); 11265 ins_pipe( ialu_reg_reg ); 11266 %} 11267 11268 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11269 match(Set dst (MoveF2I src)); 11270 effect( DEF dst, USE src ); 11271 ins_cost(100); 11272 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11273 ins_encode %{ 11274 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11275 %} 11276 ins_pipe( ialu_reg_mem ); 11277 %} 11278 11279 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11280 predicate(UseSSE==0); 11281 match(Set dst (MoveF2I src)); 11282 effect( DEF dst, USE src ); 11283 11284 ins_cost(125); 11285 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11286 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11287 ins_pipe( fpu_mem_reg ); 11288 %} 11289 11290 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11291 predicate(UseSSE>=1); 11292 match(Set dst (MoveF2I src)); 11293 effect( DEF dst, USE src ); 11294 11295 ins_cost(95); 11296 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11297 ins_encode %{ 11298 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11299 %} 11300 ins_pipe( pipe_slow ); 11301 %} 11302 11303 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11304 predicate(UseSSE>=2); 11305 match(Set dst (MoveF2I src)); 11306 effect( DEF dst, USE src ); 11307 ins_cost(85); 11308 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11309 ins_encode %{ 11310 __ movdl($dst$$Register, $src$$XMMRegister); 11311 %} 11312 ins_pipe( pipe_slow ); 11313 %} 11314 11315 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11316 match(Set dst (MoveI2F src)); 11317 effect( DEF dst, USE src ); 11318 11319 ins_cost(100); 11320 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11321 ins_encode %{ 11322 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11323 %} 11324 ins_pipe( ialu_mem_reg ); 11325 %} 11326 11327 11328 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11329 predicate(UseSSE==0); 11330 match(Set dst (MoveI2F src)); 11331 effect(DEF dst, USE src); 11332 11333 ins_cost(125); 11334 format %{ "FLD_S $src\n\t" 11335 "FSTP $dst\t# MoveI2F_stack_reg" %} 11336 opcode(0xD9); /* D9 /0, FLD m32real */ 11337 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11338 Pop_Reg_FPR(dst) ); 11339 ins_pipe( fpu_reg_mem ); 11340 %} 11341 11342 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11343 predicate(UseSSE>=1); 11344 match(Set dst (MoveI2F src)); 11345 effect( DEF dst, USE src ); 11346 11347 ins_cost(95); 11348 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11349 ins_encode %{ 11350 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11351 %} 11352 ins_pipe( pipe_slow ); 11353 %} 11354 11355 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11356 predicate(UseSSE>=2); 11357 match(Set dst (MoveI2F src)); 11358 effect( DEF dst, USE src ); 11359 11360 ins_cost(85); 11361 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11362 ins_encode %{ 11363 __ movdl($dst$$XMMRegister, $src$$Register); 11364 %} 11365 ins_pipe( pipe_slow ); 11366 %} 11367 11368 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11369 match(Set dst (MoveD2L src)); 11370 effect(DEF dst, USE src); 11371 11372 ins_cost(250); 11373 format %{ "MOV $dst.lo,$src\n\t" 11374 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11375 opcode(0x8B, 0x8B); 11376 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11377 ins_pipe( ialu_mem_long_reg ); 11378 %} 11379 11380 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11381 predicate(UseSSE<=1); 11382 match(Set dst (MoveD2L src)); 11383 effect(DEF dst, USE src); 11384 11385 ins_cost(125); 11386 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11387 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11388 ins_pipe( fpu_mem_reg ); 11389 %} 11390 11391 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11392 predicate(UseSSE>=2); 11393 match(Set dst (MoveD2L src)); 11394 effect(DEF dst, USE src); 11395 ins_cost(95); 11396 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11397 ins_encode %{ 11398 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11399 %} 11400 ins_pipe( pipe_slow ); 11401 %} 11402 11403 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11404 predicate(UseSSE>=2); 11405 match(Set dst (MoveD2L src)); 11406 effect(DEF dst, USE src, TEMP tmp); 11407 ins_cost(85); 11408 format %{ "MOVD $dst.lo,$src\n\t" 11409 "PSHUFLW $tmp,$src,0x4E\n\t" 11410 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11411 ins_encode %{ 11412 __ movdl($dst$$Register, $src$$XMMRegister); 11413 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11414 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11415 %} 11416 ins_pipe( pipe_slow ); 11417 %} 11418 11419 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11420 match(Set dst (MoveL2D src)); 11421 effect(DEF dst, USE src); 11422 11423 ins_cost(200); 11424 format %{ "MOV $dst,$src.lo\n\t" 11425 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11426 opcode(0x89, 0x89); 11427 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11428 ins_pipe( ialu_mem_long_reg ); 11429 %} 11430 11431 11432 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11433 predicate(UseSSE<=1); 11434 match(Set dst (MoveL2D src)); 11435 effect(DEF dst, USE src); 11436 ins_cost(125); 11437 11438 format %{ "FLD_D $src\n\t" 11439 "FSTP $dst\t# MoveL2D_stack_reg" %} 11440 opcode(0xDD); /* DD /0, FLD m64real */ 11441 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11442 Pop_Reg_DPR(dst) ); 11443 ins_pipe( fpu_reg_mem ); 11444 %} 11445 11446 11447 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11448 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11449 match(Set dst (MoveL2D src)); 11450 effect(DEF dst, USE src); 11451 11452 ins_cost(95); 11453 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11454 ins_encode %{ 11455 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11456 %} 11457 ins_pipe( pipe_slow ); 11458 %} 11459 11460 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11461 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11462 match(Set dst (MoveL2D src)); 11463 effect(DEF dst, USE src); 11464 11465 ins_cost(95); 11466 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11467 ins_encode %{ 11468 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11469 %} 11470 ins_pipe( pipe_slow ); 11471 %} 11472 11473 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11474 predicate(UseSSE>=2); 11475 match(Set dst (MoveL2D src)); 11476 effect(TEMP dst, USE src, TEMP tmp); 11477 ins_cost(85); 11478 format %{ "MOVD $dst,$src.lo\n\t" 11479 "MOVD $tmp,$src.hi\n\t" 11480 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11481 ins_encode %{ 11482 __ movdl($dst$$XMMRegister, $src$$Register); 11483 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11484 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11485 %} 11486 ins_pipe( pipe_slow ); 11487 %} 11488 11489 //----------------------------- CompressBits/ExpandBits ------------------------ 11490 11491 instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11492 predicate(n->bottom_type()->isa_long()); 11493 match(Set dst (CompressBits src mask)); 11494 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11495 format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11496 ins_encode %{ 11497 Label exit, partail_result; 11498 // Parallely extract both upper and lower 32 bits of source into destination register pair. 11499 // Merge the results of upper and lower destination registers such that upper destination 11500 // results are contiguously laid out after the lower destination result. 11501 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 11502 __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11503 __ popcntl($rtmp$$Register, $mask$$Register); 11504 // Skip merging if bit count of lower mask register is equal to 32 (register size). 11505 __ cmpl($rtmp$$Register, 32); 11506 __ jccb(Assembler::equal, exit); 11507 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11508 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11509 // Shift left the contents of upper destination register by true bit count of lower mask register 11510 // and merge with lower destination register. 11511 __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11512 __ orl($dst$$Register, $rtmp$$Register); 11513 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11514 // Zero out upper destination register if true bit count of lower 32 bit mask is zero 11515 // since contents of upper destination have already been copied to lower destination 11516 // register. 11517 __ cmpl($rtmp$$Register, 0); 11518 __ jccb(Assembler::greater, partail_result); 11519 __ movl(HIGH_FROM_LOW($dst$$Register), 0); 11520 __ jmp(exit); 11521 __ bind(partail_result); 11522 // Perform right shift over upper destination register to move out bits already copied 11523 // to lower destination register. 11524 __ subl($rtmp$$Register, 32); 11525 __ negl($rtmp$$Register); 11526 __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11527 __ bind(exit); 11528 %} 11529 ins_pipe( pipe_slow ); 11530 %} 11531 11532 instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ 11533 predicate(n->bottom_type()->isa_long()); 11534 match(Set dst (ExpandBits src mask)); 11535 effect(TEMP rtmp, TEMP xtmp, KILL cr); 11536 format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} 11537 ins_encode %{ 11538 // Extraction operation sequentially reads the bits from source register starting from LSB 11539 // and lays them out into destination register at bit locations corresponding to true bits 11540 // in mask register. Thus number of source bits read are equal to combined true bit count 11541 // of mask register pair. 11542 Label exit, mask_clipping; 11543 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 11544 __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); 11545 __ popcntl($rtmp$$Register, $mask$$Register); 11546 // If true bit count of lower mask register is 32 then none of bit of lower source register 11547 // will feed to upper destination register. 11548 __ cmpl($rtmp$$Register, 32); 11549 __ jccb(Assembler::equal, exit); 11550 // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. 11551 __ movdl($xtmp$$XMMRegister, $rtmp$$Register); 11552 // Shift right the contents of lower source register to remove already consumed bits. 11553 __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); 11554 // Extract the bits from lower source register starting from LSB under the influence 11555 // of upper mask register. 11556 __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); 11557 __ movdl($rtmp$$Register, $xtmp$$XMMRegister); 11558 __ subl($rtmp$$Register, 32); 11559 __ negl($rtmp$$Register); 11560 __ movdl($xtmp$$XMMRegister, $mask$$Register); 11561 __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); 11562 // Clear the set bits in upper mask register which have been used to extract the contents 11563 // from lower source register. 11564 __ bind(mask_clipping); 11565 __ blsrl($mask$$Register, $mask$$Register); 11566 __ decrementl($rtmp$$Register, 1); 11567 __ jccb(Assembler::greater, mask_clipping); 11568 // Starting from LSB extract the bits from upper source register under the influence of 11569 // remaining set bits in upper mask register. 11570 __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); 11571 // Merge the partial results extracted from lower and upper source register bits. 11572 __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); 11573 __ movdl($mask$$Register, $xtmp$$XMMRegister); 11574 __ bind(exit); 11575 %} 11576 ins_pipe( pipe_slow ); 11577 %} 11578 11579 // ======================================================================= 11580 // fast clearing of an array 11581 // Small ClearArray non-AVX512. 11582 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11583 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); 11584 match(Set dummy (ClearArray cnt base)); 11585 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11586 11587 format %{ $$template 11588 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11589 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11590 $$emit$$"JG LARGE\n\t" 11591 $$emit$$"SHL ECX, 1\n\t" 11592 $$emit$$"DEC ECX\n\t" 11593 $$emit$$"JS DONE\t# Zero length\n\t" 11594 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11595 $$emit$$"DEC ECX\n\t" 11596 $$emit$$"JGE LOOP\n\t" 11597 $$emit$$"JMP DONE\n\t" 11598 $$emit$$"# LARGE:\n\t" 11599 if (UseFastStosb) { 11600 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11601 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11602 } else if (UseXMMForObjInit) { 11603 $$emit$$"MOV RDI,RAX\n\t" 11604 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11605 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11606 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11607 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11608 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11609 $$emit$$"ADD 0x40,RAX\n\t" 11610 $$emit$$"# L_zero_64_bytes:\n\t" 11611 $$emit$$"SUB 0x8,RCX\n\t" 11612 $$emit$$"JGE L_loop\n\t" 11613 $$emit$$"ADD 0x4,RCX\n\t" 11614 $$emit$$"JL L_tail\n\t" 11615 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11616 $$emit$$"ADD 0x20,RAX\n\t" 11617 $$emit$$"SUB 0x4,RCX\n\t" 11618 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11619 $$emit$$"ADD 0x4,RCX\n\t" 11620 $$emit$$"JLE L_end\n\t" 11621 $$emit$$"DEC RCX\n\t" 11622 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11623 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11624 $$emit$$"ADD 0x8,RAX\n\t" 11625 $$emit$$"DEC RCX\n\t" 11626 $$emit$$"JGE L_sloop\n\t" 11627 $$emit$$"# L_end:\n\t" 11628 } else { 11629 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11630 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11631 } 11632 $$emit$$"# DONE" 11633 %} 11634 ins_encode %{ 11635 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11636 $tmp$$XMMRegister, false, knoreg); 11637 %} 11638 ins_pipe( pipe_slow ); 11639 %} 11640 11641 // Small ClearArray AVX512 non-constant length. 11642 instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11643 predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); 11644 match(Set dummy (ClearArray cnt base)); 11645 ins_cost(125); 11646 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11647 11648 format %{ $$template 11649 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11650 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11651 $$emit$$"JG LARGE\n\t" 11652 $$emit$$"SHL ECX, 1\n\t" 11653 $$emit$$"DEC ECX\n\t" 11654 $$emit$$"JS DONE\t# Zero length\n\t" 11655 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11656 $$emit$$"DEC ECX\n\t" 11657 $$emit$$"JGE LOOP\n\t" 11658 $$emit$$"JMP DONE\n\t" 11659 $$emit$$"# LARGE:\n\t" 11660 if (UseFastStosb) { 11661 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11662 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11663 } else if (UseXMMForObjInit) { 11664 $$emit$$"MOV RDI,RAX\n\t" 11665 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11666 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11667 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11668 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11669 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11670 $$emit$$"ADD 0x40,RAX\n\t" 11671 $$emit$$"# L_zero_64_bytes:\n\t" 11672 $$emit$$"SUB 0x8,RCX\n\t" 11673 $$emit$$"JGE L_loop\n\t" 11674 $$emit$$"ADD 0x4,RCX\n\t" 11675 $$emit$$"JL L_tail\n\t" 11676 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11677 $$emit$$"ADD 0x20,RAX\n\t" 11678 $$emit$$"SUB 0x4,RCX\n\t" 11679 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11680 $$emit$$"ADD 0x4,RCX\n\t" 11681 $$emit$$"JLE L_end\n\t" 11682 $$emit$$"DEC RCX\n\t" 11683 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11684 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11685 $$emit$$"ADD 0x8,RAX\n\t" 11686 $$emit$$"DEC RCX\n\t" 11687 $$emit$$"JGE L_sloop\n\t" 11688 $$emit$$"# L_end:\n\t" 11689 } else { 11690 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11691 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11692 } 11693 $$emit$$"# DONE" 11694 %} 11695 ins_encode %{ 11696 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11697 $tmp$$XMMRegister, false, $ktmp$$KRegister); 11698 %} 11699 ins_pipe( pipe_slow ); 11700 %} 11701 11702 // Large ClearArray non-AVX512. 11703 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11704 predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); 11705 match(Set dummy (ClearArray cnt base)); 11706 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); 11707 format %{ $$template 11708 if (UseFastStosb) { 11709 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11710 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11711 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11712 } else if (UseXMMForObjInit) { 11713 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11714 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11715 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11716 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11717 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11718 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11719 $$emit$$"ADD 0x40,RAX\n\t" 11720 $$emit$$"# L_zero_64_bytes:\n\t" 11721 $$emit$$"SUB 0x8,RCX\n\t" 11722 $$emit$$"JGE L_loop\n\t" 11723 $$emit$$"ADD 0x4,RCX\n\t" 11724 $$emit$$"JL L_tail\n\t" 11725 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11726 $$emit$$"ADD 0x20,RAX\n\t" 11727 $$emit$$"SUB 0x4,RCX\n\t" 11728 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11729 $$emit$$"ADD 0x4,RCX\n\t" 11730 $$emit$$"JLE L_end\n\t" 11731 $$emit$$"DEC RCX\n\t" 11732 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11733 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11734 $$emit$$"ADD 0x8,RAX\n\t" 11735 $$emit$$"DEC RCX\n\t" 11736 $$emit$$"JGE L_sloop\n\t" 11737 $$emit$$"# L_end:\n\t" 11738 } else { 11739 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11740 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11741 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11742 } 11743 $$emit$$"# DONE" 11744 %} 11745 ins_encode %{ 11746 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11747 $tmp$$XMMRegister, true, knoreg); 11748 %} 11749 ins_pipe( pipe_slow ); 11750 %} 11751 11752 // Large ClearArray AVX512. 11753 instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11754 predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); 11755 match(Set dummy (ClearArray cnt base)); 11756 effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); 11757 format %{ $$template 11758 if (UseFastStosb) { 11759 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11760 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11761 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11762 } else if (UseXMMForObjInit) { 11763 $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" 11764 $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" 11765 $$emit$$"JMPQ L_zero_64_bytes\n\t" 11766 $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" 11767 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11768 $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" 11769 $$emit$$"ADD 0x40,RAX\n\t" 11770 $$emit$$"# L_zero_64_bytes:\n\t" 11771 $$emit$$"SUB 0x8,RCX\n\t" 11772 $$emit$$"JGE L_loop\n\t" 11773 $$emit$$"ADD 0x4,RCX\n\t" 11774 $$emit$$"JL L_tail\n\t" 11775 $$emit$$"VMOVDQU YMM0,(RAX)\n\t" 11776 $$emit$$"ADD 0x20,RAX\n\t" 11777 $$emit$$"SUB 0x4,RCX\n\t" 11778 $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" 11779 $$emit$$"ADD 0x4,RCX\n\t" 11780 $$emit$$"JLE L_end\n\t" 11781 $$emit$$"DEC RCX\n\t" 11782 $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" 11783 $$emit$$"VMOVQ XMM0,(RAX)\n\t" 11784 $$emit$$"ADD 0x8,RAX\n\t" 11785 $$emit$$"DEC RCX\n\t" 11786 $$emit$$"JGE L_sloop\n\t" 11787 $$emit$$"# L_end:\n\t" 11788 } else { 11789 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11790 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11791 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11792 } 11793 $$emit$$"# DONE" 11794 %} 11795 ins_encode %{ 11796 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, 11797 $tmp$$XMMRegister, true, $ktmp$$KRegister); 11798 %} 11799 ins_pipe( pipe_slow ); 11800 %} 11801 11802 // Small ClearArray AVX512 constant length. 11803 instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) 11804 %{ 11805 predicate(!((ClearArrayNode*)n)->is_large() && 11806 ((UseAVX > 2) && VM_Version::supports_avx512vlbw())); 11807 match(Set dummy (ClearArray cnt base)); 11808 ins_cost(100); 11809 effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); 11810 format %{ "clear_mem_imm $base , $cnt \n\t" %} 11811 ins_encode %{ 11812 __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); 11813 %} 11814 ins_pipe(pipe_slow); 11815 %} 11816 11817 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11818 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11819 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11820 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11821 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11822 11823 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11824 ins_encode %{ 11825 __ string_compare($str1$$Register, $str2$$Register, 11826 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11827 $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); 11828 %} 11829 ins_pipe( pipe_slow ); 11830 %} 11831 11832 instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11833 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11834 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11835 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11836 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11837 11838 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11839 ins_encode %{ 11840 __ string_compare($str1$$Register, $str2$$Register, 11841 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11842 $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); 11843 %} 11844 ins_pipe( pipe_slow ); 11845 %} 11846 11847 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11848 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11849 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11850 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11851 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11852 11853 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11854 ins_encode %{ 11855 __ string_compare($str1$$Register, $str2$$Register, 11856 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11857 $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); 11858 %} 11859 ins_pipe( pipe_slow ); 11860 %} 11861 11862 instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11863 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11864 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11865 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11866 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11867 11868 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11869 ins_encode %{ 11870 __ string_compare($str1$$Register, $str2$$Register, 11871 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11872 $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); 11873 %} 11874 ins_pipe( pipe_slow ); 11875 %} 11876 11877 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11878 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11879 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11880 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11881 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11882 11883 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11884 ins_encode %{ 11885 __ string_compare($str1$$Register, $str2$$Register, 11886 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11887 $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); 11888 %} 11889 ins_pipe( pipe_slow ); 11890 %} 11891 11892 instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11893 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11894 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11895 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11896 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11897 11898 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11899 ins_encode %{ 11900 __ string_compare($str1$$Register, $str2$$Register, 11901 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11902 $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); 11903 %} 11904 ins_pipe( pipe_slow ); 11905 %} 11906 11907 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11908 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11909 predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11910 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11911 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11912 11913 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11914 ins_encode %{ 11915 __ string_compare($str2$$Register, $str1$$Register, 11916 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11917 $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); 11918 %} 11919 ins_pipe( pipe_slow ); 11920 %} 11921 11922 instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11923 eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ 11924 predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11925 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11926 effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11927 11928 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11929 ins_encode %{ 11930 __ string_compare($str2$$Register, $str1$$Register, 11931 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11932 $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); 11933 %} 11934 ins_pipe( pipe_slow ); 11935 %} 11936 11937 // fast string equals 11938 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11939 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11940 predicate(!VM_Version::supports_avx512vlbw()); 11941 match(Set result (StrEquals (Binary str1 str2) cnt)); 11942 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11943 11944 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11945 ins_encode %{ 11946 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11947 $cnt$$Register, $result$$Register, $tmp3$$Register, 11948 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 11949 %} 11950 11951 ins_pipe( pipe_slow ); 11952 %} 11953 11954 instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11955 regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ 11956 predicate(VM_Version::supports_avx512vlbw()); 11957 match(Set result (StrEquals (Binary str1 str2) cnt)); 11958 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11959 11960 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11961 ins_encode %{ 11962 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11963 $cnt$$Register, $result$$Register, $tmp3$$Register, 11964 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 11965 %} 11966 11967 ins_pipe( pipe_slow ); 11968 %} 11969 11970 11971 // fast search of substring with known size. 11972 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11973 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11974 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11975 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11976 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11977 11978 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 11979 ins_encode %{ 11980 int icnt2 = (int)$int_cnt2$$constant; 11981 if (icnt2 >= 16) { 11982 // IndexOf for constant substrings with size >= 16 elements 11983 // which don't need to be loaded through stack. 11984 __ string_indexofC8($str1$$Register, $str2$$Register, 11985 $cnt1$$Register, $cnt2$$Register, 11986 icnt2, $result$$Register, 11987 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11988 } else { 11989 // Small strings are loaded through stack if they cross page boundary. 11990 __ string_indexof($str1$$Register, $str2$$Register, 11991 $cnt1$$Register, $cnt2$$Register, 11992 icnt2, $result$$Register, 11993 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11994 } 11995 %} 11996 ins_pipe( pipe_slow ); 11997 %} 11998 11999 // fast search of substring with known size. 12000 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12001 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12002 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12003 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12004 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12005 12006 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12007 ins_encode %{ 12008 int icnt2 = (int)$int_cnt2$$constant; 12009 if (icnt2 >= 8) { 12010 // IndexOf for constant substrings with size >= 8 elements 12011 // which don't need to be loaded through stack. 12012 __ string_indexofC8($str1$$Register, $str2$$Register, 12013 $cnt1$$Register, $cnt2$$Register, 12014 icnt2, $result$$Register, 12015 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12016 } else { 12017 // Small strings are loaded through stack if they cross page boundary. 12018 __ string_indexof($str1$$Register, $str2$$Register, 12019 $cnt1$$Register, $cnt2$$Register, 12020 icnt2, $result$$Register, 12021 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12022 } 12023 %} 12024 ins_pipe( pipe_slow ); 12025 %} 12026 12027 // fast search of substring with known size. 12028 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 12029 eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 12030 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12031 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 12032 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 12033 12034 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} 12035 ins_encode %{ 12036 int icnt2 = (int)$int_cnt2$$constant; 12037 if (icnt2 >= 8) { 12038 // IndexOf for constant substrings with size >= 8 elements 12039 // which don't need to be loaded through stack. 12040 __ string_indexofC8($str1$$Register, $str2$$Register, 12041 $cnt1$$Register, $cnt2$$Register, 12042 icnt2, $result$$Register, 12043 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12044 } else { 12045 // Small strings are loaded through stack if they cross page boundary. 12046 __ string_indexof($str1$$Register, $str2$$Register, 12047 $cnt1$$Register, $cnt2$$Register, 12048 icnt2, $result$$Register, 12049 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12050 } 12051 %} 12052 ins_pipe( pipe_slow ); 12053 %} 12054 12055 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12056 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12057 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 12058 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12059 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12060 12061 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12062 ins_encode %{ 12063 __ string_indexof($str1$$Register, $str2$$Register, 12064 $cnt1$$Register, $cnt2$$Register, 12065 (-1), $result$$Register, 12066 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 12067 %} 12068 ins_pipe( pipe_slow ); 12069 %} 12070 12071 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12072 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12073 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 12074 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12075 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12076 12077 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12078 ins_encode %{ 12079 __ string_indexof($str1$$Register, $str2$$Register, 12080 $cnt1$$Register, $cnt2$$Register, 12081 (-1), $result$$Register, 12082 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 12083 %} 12084 ins_pipe( pipe_slow ); 12085 %} 12086 12087 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 12088 eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ 12089 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 12090 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 12091 effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 12092 12093 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 12094 ins_encode %{ 12095 __ string_indexof($str1$$Register, $str2$$Register, 12096 $cnt1$$Register, $cnt2$$Register, 12097 (-1), $result$$Register, 12098 $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 12099 %} 12100 ins_pipe( pipe_slow ); 12101 %} 12102 12103 instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12104 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12105 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); 12106 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12107 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12108 format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12109 ins_encode %{ 12110 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12111 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12112 %} 12113 ins_pipe( pipe_slow ); 12114 %} 12115 12116 instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 12117 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 12118 predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); 12119 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 12120 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 12121 format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 12122 ins_encode %{ 12123 __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 12124 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 12125 %} 12126 ins_pipe( pipe_slow ); 12127 %} 12128 12129 12130 // fast array equals 12131 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12132 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12133 %{ 12134 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12135 match(Set result (AryEq ary1 ary2)); 12136 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12137 //ins_cost(300); 12138 12139 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12140 ins_encode %{ 12141 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12142 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12143 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); 12144 %} 12145 ins_pipe( pipe_slow ); 12146 %} 12147 12148 instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12149 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12150 %{ 12151 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 12152 match(Set result (AryEq ary1 ary2)); 12153 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12154 //ins_cost(300); 12155 12156 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12157 ins_encode %{ 12158 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12159 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12160 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); 12161 %} 12162 ins_pipe( pipe_slow ); 12163 %} 12164 12165 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12166 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12167 %{ 12168 predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12169 match(Set result (AryEq ary1 ary2)); 12170 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12171 //ins_cost(300); 12172 12173 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12174 ins_encode %{ 12175 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12176 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12177 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); 12178 %} 12179 ins_pipe( pipe_slow ); 12180 %} 12181 12182 instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 12183 regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 12184 %{ 12185 predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 12186 match(Set result (AryEq ary1 ary2)); 12187 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 12188 //ins_cost(300); 12189 12190 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 12191 ins_encode %{ 12192 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 12193 $tmp3$$Register, $result$$Register, $tmp4$$Register, 12194 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); 12195 %} 12196 ins_pipe( pipe_slow ); 12197 %} 12198 12199 instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, 12200 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 12201 %{ 12202 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12203 match(Set result (CountPositives ary1 len)); 12204 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12205 12206 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12207 ins_encode %{ 12208 __ count_positives($ary1$$Register, $len$$Register, 12209 $result$$Register, $tmp3$$Register, 12210 $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); 12211 %} 12212 ins_pipe( pipe_slow ); 12213 %} 12214 12215 instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, 12216 regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) 12217 %{ 12218 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12219 match(Set result (CountPositives ary1 len)); 12220 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 12221 12222 format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 12223 ins_encode %{ 12224 __ count_positives($ary1$$Register, $len$$Register, 12225 $result$$Register, $tmp3$$Register, 12226 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 12227 %} 12228 ins_pipe( pipe_slow ); 12229 %} 12230 12231 12232 // fast char[] to byte[] compression 12233 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12234 regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12235 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12236 match(Set result (StrCompressedCopy src (Binary dst len))); 12237 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12238 12239 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12240 ins_encode %{ 12241 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12242 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12243 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12244 knoreg, knoreg); 12245 %} 12246 ins_pipe( pipe_slow ); 12247 %} 12248 12249 instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, 12250 regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12251 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12252 match(Set result (StrCompressedCopy src (Binary dst len))); 12253 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12254 12255 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 12256 ins_encode %{ 12257 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 12258 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12259 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, 12260 $ktmp1$$KRegister, $ktmp2$$KRegister); 12261 %} 12262 ins_pipe( pipe_slow ); 12263 %} 12264 12265 // fast byte[] to char[] inflation 12266 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12267 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 12268 predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); 12269 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12270 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12271 12272 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12273 ins_encode %{ 12274 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12275 $tmp1$$XMMRegister, $tmp2$$Register, knoreg); 12276 %} 12277 ins_pipe( pipe_slow ); 12278 %} 12279 12280 instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 12281 regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ 12282 predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); 12283 match(Set dummy (StrInflatedCopy src (Binary dst len))); 12284 effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 12285 12286 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 12287 ins_encode %{ 12288 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 12289 $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); 12290 %} 12291 ins_pipe( pipe_slow ); 12292 %} 12293 12294 // encode char[] to byte[] in ISO_8859_1 12295 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12296 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12297 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12298 predicate(!((EncodeISOArrayNode*)n)->is_ascii()); 12299 match(Set result (EncodeISOArray src (Binary dst len))); 12300 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12301 12302 format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12303 ins_encode %{ 12304 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12305 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12306 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); 12307 %} 12308 ins_pipe( pipe_slow ); 12309 %} 12310 12311 // encode char[] to byte[] in ASCII 12312 instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, 12313 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 12314 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 12315 predicate(((EncodeISOArrayNode*)n)->is_ascii()); 12316 match(Set result (EncodeISOArray src (Binary dst len))); 12317 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 12318 12319 format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 12320 ins_encode %{ 12321 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 12322 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 12323 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); 12324 %} 12325 ins_pipe( pipe_slow ); 12326 %} 12327 12328 //----------Control Flow Instructions------------------------------------------ 12329 // Signed compare Instructions 12330 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 12331 match(Set cr (CmpI op1 op2)); 12332 effect( DEF cr, USE op1, USE op2 ); 12333 format %{ "CMP $op1,$op2" %} 12334 opcode(0x3B); /* Opcode 3B /r */ 12335 ins_encode( OpcP, RegReg( op1, op2) ); 12336 ins_pipe( ialu_cr_reg_reg ); 12337 %} 12338 12339 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 12340 match(Set cr (CmpI op1 op2)); 12341 effect( DEF cr, USE op1 ); 12342 format %{ "CMP $op1,$op2" %} 12343 opcode(0x81,0x07); /* Opcode 81 /7 */ 12344 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 12345 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12346 ins_pipe( ialu_cr_reg_imm ); 12347 %} 12348 12349 // Cisc-spilled version of cmpI_eReg 12350 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 12351 match(Set cr (CmpI op1 (LoadI op2))); 12352 12353 format %{ "CMP $op1,$op2" %} 12354 ins_cost(500); 12355 opcode(0x3B); /* Opcode 3B /r */ 12356 ins_encode( OpcP, RegMem( op1, op2) ); 12357 ins_pipe( ialu_cr_reg_mem ); 12358 %} 12359 12360 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ 12361 match(Set cr (CmpI src zero)); 12362 effect( DEF cr, USE src ); 12363 12364 format %{ "TEST $src,$src" %} 12365 opcode(0x85); 12366 ins_encode( OpcP, RegReg( src, src ) ); 12367 ins_pipe( ialu_cr_reg_imm ); 12368 %} 12369 12370 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ 12371 match(Set cr (CmpI (AndI src con) zero)); 12372 12373 format %{ "TEST $src,$con" %} 12374 opcode(0xF7,0x00); 12375 ins_encode( OpcP, RegOpc(src), Con32(con) ); 12376 ins_pipe( ialu_cr_reg_imm ); 12377 %} 12378 12379 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ 12380 match(Set cr (CmpI (AndI src mem) zero)); 12381 12382 format %{ "TEST $src,$mem" %} 12383 opcode(0x85); 12384 ins_encode( OpcP, RegMem( src, mem ) ); 12385 ins_pipe( ialu_cr_reg_mem ); 12386 %} 12387 12388 // Unsigned compare Instructions; really, same as signed except they 12389 // produce an eFlagsRegU instead of eFlagsReg. 12390 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 12391 match(Set cr (CmpU op1 op2)); 12392 12393 format %{ "CMPu $op1,$op2" %} 12394 opcode(0x3B); /* Opcode 3B /r */ 12395 ins_encode( OpcP, RegReg( op1, op2) ); 12396 ins_pipe( ialu_cr_reg_reg ); 12397 %} 12398 12399 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 12400 match(Set cr (CmpU op1 op2)); 12401 12402 format %{ "CMPu $op1,$op2" %} 12403 opcode(0x81,0x07); /* Opcode 81 /7 */ 12404 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12405 ins_pipe( ialu_cr_reg_imm ); 12406 %} 12407 12408 // // Cisc-spilled version of cmpU_eReg 12409 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 12410 match(Set cr (CmpU op1 (LoadI op2))); 12411 12412 format %{ "CMPu $op1,$op2" %} 12413 ins_cost(500); 12414 opcode(0x3B); /* Opcode 3B /r */ 12415 ins_encode( OpcP, RegMem( op1, op2) ); 12416 ins_pipe( ialu_cr_reg_mem ); 12417 %} 12418 12419 // // Cisc-spilled version of cmpU_eReg 12420 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 12421 // match(Set cr (CmpU (LoadI op1) op2)); 12422 // 12423 // format %{ "CMPu $op1,$op2" %} 12424 // ins_cost(500); 12425 // opcode(0x39); /* Opcode 39 /r */ 12426 // ins_encode( OpcP, RegMem( op1, op2) ); 12427 //%} 12428 12429 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ 12430 match(Set cr (CmpU src zero)); 12431 12432 format %{ "TESTu $src,$src" %} 12433 opcode(0x85); 12434 ins_encode( OpcP, RegReg( src, src ) ); 12435 ins_pipe( ialu_cr_reg_imm ); 12436 %} 12437 12438 // Unsigned pointer compare Instructions 12439 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 12440 match(Set cr (CmpP op1 op2)); 12441 12442 format %{ "CMPu $op1,$op2" %} 12443 opcode(0x3B); /* Opcode 3B /r */ 12444 ins_encode( OpcP, RegReg( op1, op2) ); 12445 ins_pipe( ialu_cr_reg_reg ); 12446 %} 12447 12448 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 12449 match(Set cr (CmpP op1 op2)); 12450 12451 format %{ "CMPu $op1,$op2" %} 12452 opcode(0x81,0x07); /* Opcode 81 /7 */ 12453 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 12454 ins_pipe( ialu_cr_reg_imm ); 12455 %} 12456 12457 // // Cisc-spilled version of cmpP_eReg 12458 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 12459 match(Set cr (CmpP op1 (LoadP op2))); 12460 12461 format %{ "CMPu $op1,$op2" %} 12462 ins_cost(500); 12463 opcode(0x3B); /* Opcode 3B /r */ 12464 ins_encode( OpcP, RegMem( op1, op2) ); 12465 ins_pipe( ialu_cr_reg_mem ); 12466 %} 12467 12468 // // Cisc-spilled version of cmpP_eReg 12469 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 12470 // match(Set cr (CmpP (LoadP op1) op2)); 12471 // 12472 // format %{ "CMPu $op1,$op2" %} 12473 // ins_cost(500); 12474 // opcode(0x39); /* Opcode 39 /r */ 12475 // ins_encode( OpcP, RegMem( op1, op2) ); 12476 //%} 12477 12478 // Compare raw pointer (used in out-of-heap check). 12479 // Only works because non-oop pointers must be raw pointers 12480 // and raw pointers have no anti-dependencies. 12481 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 12482 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 12483 match(Set cr (CmpP op1 (LoadP op2))); 12484 12485 format %{ "CMPu $op1,$op2" %} 12486 opcode(0x3B); /* Opcode 3B /r */ 12487 ins_encode( OpcP, RegMem( op1, op2) ); 12488 ins_pipe( ialu_cr_reg_mem ); 12489 %} 12490 12491 // 12492 // This will generate a signed flags result. This should be ok 12493 // since any compare to a zero should be eq/neq. 12494 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 12495 match(Set cr (CmpP src zero)); 12496 12497 format %{ "TEST $src,$src" %} 12498 opcode(0x85); 12499 ins_encode( OpcP, RegReg( src, src ) ); 12500 ins_pipe( ialu_cr_reg_imm ); 12501 %} 12502 12503 // Cisc-spilled version of testP_reg 12504 // This will generate a signed flags result. This should be ok 12505 // since any compare to a zero should be eq/neq. 12506 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ 12507 match(Set cr (CmpP (LoadP op) zero)); 12508 12509 format %{ "TEST $op,0xFFFFFFFF" %} 12510 ins_cost(500); 12511 opcode(0xF7); /* Opcode F7 /0 */ 12512 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12513 ins_pipe( ialu_cr_reg_imm ); 12514 %} 12515 12516 // Yanked all unsigned pointer compare operations. 12517 // Pointer compares are done with CmpP which is already unsigned. 12518 12519 //----------Max and Min-------------------------------------------------------- 12520 // Min Instructions 12521 //// 12522 // *** Min and Max using the conditional move are slower than the 12523 // *** branch version on a Pentium III. 12524 // // Conditional move for min 12525 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12526 // effect( USE_DEF op2, USE op1, USE cr ); 12527 // format %{ "CMOVlt $op2,$op1\t! min" %} 12528 // opcode(0x4C,0x0F); 12529 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12530 // ins_pipe( pipe_cmov_reg ); 12531 //%} 12532 // 12533 //// Min Register with Register (P6 version) 12534 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12535 // predicate(VM_Version::supports_cmov() ); 12536 // match(Set op2 (MinI op1 op2)); 12537 // ins_cost(200); 12538 // expand %{ 12539 // eFlagsReg cr; 12540 // compI_eReg(cr,op1,op2); 12541 // cmovI_reg_lt(op2,op1,cr); 12542 // %} 12543 //%} 12544 12545 // Min Register with Register (generic version) 12546 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12547 match(Set dst (MinI dst src)); 12548 effect(KILL flags); 12549 ins_cost(300); 12550 12551 format %{ "MIN $dst,$src" %} 12552 opcode(0xCC); 12553 ins_encode( min_enc(dst,src) ); 12554 ins_pipe( pipe_slow ); 12555 %} 12556 12557 // Max Register with Register 12558 // *** Min and Max using the conditional move are slower than the 12559 // *** branch version on a Pentium III. 12560 // // Conditional move for max 12561 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12562 // effect( USE_DEF op2, USE op1, USE cr ); 12563 // format %{ "CMOVgt $op2,$op1\t! max" %} 12564 // opcode(0x4F,0x0F); 12565 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12566 // ins_pipe( pipe_cmov_reg ); 12567 //%} 12568 // 12569 // // Max Register with Register (P6 version) 12570 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12571 // predicate(VM_Version::supports_cmov() ); 12572 // match(Set op2 (MaxI op1 op2)); 12573 // ins_cost(200); 12574 // expand %{ 12575 // eFlagsReg cr; 12576 // compI_eReg(cr,op1,op2); 12577 // cmovI_reg_gt(op2,op1,cr); 12578 // %} 12579 //%} 12580 12581 // Max Register with Register (generic version) 12582 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12583 match(Set dst (MaxI dst src)); 12584 effect(KILL flags); 12585 ins_cost(300); 12586 12587 format %{ "MAX $dst,$src" %} 12588 opcode(0xCC); 12589 ins_encode( max_enc(dst,src) ); 12590 ins_pipe( pipe_slow ); 12591 %} 12592 12593 // ============================================================================ 12594 // Counted Loop limit node which represents exact final iterator value. 12595 // Note: the resulting value should fit into integer range since 12596 // counted loops have limit check on overflow. 12597 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12598 match(Set limit (LoopLimit (Binary init limit) stride)); 12599 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12600 ins_cost(300); 12601 12602 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12603 ins_encode %{ 12604 int strd = (int)$stride$$constant; 12605 assert(strd != 1 && strd != -1, "sanity"); 12606 int m1 = (strd > 0) ? 1 : -1; 12607 // Convert limit to long (EAX:EDX) 12608 __ cdql(); 12609 // Convert init to long (init:tmp) 12610 __ movl($tmp$$Register, $init$$Register); 12611 __ sarl($tmp$$Register, 31); 12612 // $limit - $init 12613 __ subl($limit$$Register, $init$$Register); 12614 __ sbbl($limit_hi$$Register, $tmp$$Register); 12615 // + ($stride - 1) 12616 if (strd > 0) { 12617 __ addl($limit$$Register, (strd - 1)); 12618 __ adcl($limit_hi$$Register, 0); 12619 __ movl($tmp$$Register, strd); 12620 } else { 12621 __ addl($limit$$Register, (strd + 1)); 12622 __ adcl($limit_hi$$Register, -1); 12623 __ lneg($limit_hi$$Register, $limit$$Register); 12624 __ movl($tmp$$Register, -strd); 12625 } 12626 // signed division: (EAX:EDX) / pos_stride 12627 __ idivl($tmp$$Register); 12628 if (strd < 0) { 12629 // restore sign 12630 __ negl($tmp$$Register); 12631 } 12632 // (EAX) * stride 12633 __ mull($tmp$$Register); 12634 // + init (ignore upper bits) 12635 __ addl($limit$$Register, $init$$Register); 12636 %} 12637 ins_pipe( pipe_slow ); 12638 %} 12639 12640 // ============================================================================ 12641 // Branch Instructions 12642 // Jump Table 12643 instruct jumpXtnd(rRegI switch_val) %{ 12644 match(Jump switch_val); 12645 ins_cost(350); 12646 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12647 ins_encode %{ 12648 // Jump to Address(table_base + switch_reg) 12649 Address index(noreg, $switch_val$$Register, Address::times_1); 12650 __ jump(ArrayAddress($constantaddress, index), noreg); 12651 %} 12652 ins_pipe(pipe_jmp); 12653 %} 12654 12655 // Jump Direct - Label defines a relative address from JMP+1 12656 instruct jmpDir(label labl) %{ 12657 match(Goto); 12658 effect(USE labl); 12659 12660 ins_cost(300); 12661 format %{ "JMP $labl" %} 12662 size(5); 12663 ins_encode %{ 12664 Label* L = $labl$$label; 12665 __ jmp(*L, false); // Always long jump 12666 %} 12667 ins_pipe( pipe_jmp ); 12668 %} 12669 12670 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12671 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12672 match(If cop cr); 12673 effect(USE labl); 12674 12675 ins_cost(300); 12676 format %{ "J$cop $labl" %} 12677 size(6); 12678 ins_encode %{ 12679 Label* L = $labl$$label; 12680 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12681 %} 12682 ins_pipe( pipe_jcc ); 12683 %} 12684 12685 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12686 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12687 match(CountedLoopEnd cop cr); 12688 effect(USE labl); 12689 12690 ins_cost(300); 12691 format %{ "J$cop $labl\t# Loop end" %} 12692 size(6); 12693 ins_encode %{ 12694 Label* L = $labl$$label; 12695 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12696 %} 12697 ins_pipe( pipe_jcc ); 12698 %} 12699 12700 // Jump Direct Conditional - using unsigned comparison 12701 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12702 match(If cop cmp); 12703 effect(USE labl); 12704 12705 ins_cost(300); 12706 format %{ "J$cop,u $labl" %} 12707 size(6); 12708 ins_encode %{ 12709 Label* L = $labl$$label; 12710 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12711 %} 12712 ins_pipe(pipe_jcc); 12713 %} 12714 12715 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12716 match(If cop cmp); 12717 effect(USE labl); 12718 12719 ins_cost(200); 12720 format %{ "J$cop,u $labl" %} 12721 size(6); 12722 ins_encode %{ 12723 Label* L = $labl$$label; 12724 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12725 %} 12726 ins_pipe(pipe_jcc); 12727 %} 12728 12729 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12730 match(If cop cmp); 12731 effect(USE labl); 12732 12733 ins_cost(200); 12734 format %{ $$template 12735 if ($cop$$cmpcode == Assembler::notEqual) { 12736 $$emit$$"JP,u $labl\n\t" 12737 $$emit$$"J$cop,u $labl" 12738 } else { 12739 $$emit$$"JP,u done\n\t" 12740 $$emit$$"J$cop,u $labl\n\t" 12741 $$emit$$"done:" 12742 } 12743 %} 12744 ins_encode %{ 12745 Label* l = $labl$$label; 12746 if ($cop$$cmpcode == Assembler::notEqual) { 12747 __ jcc(Assembler::parity, *l, false); 12748 __ jcc(Assembler::notEqual, *l, false); 12749 } else if ($cop$$cmpcode == Assembler::equal) { 12750 Label done; 12751 __ jccb(Assembler::parity, done); 12752 __ jcc(Assembler::equal, *l, false); 12753 __ bind(done); 12754 } else { 12755 ShouldNotReachHere(); 12756 } 12757 %} 12758 ins_pipe(pipe_jcc); 12759 %} 12760 12761 // ============================================================================ 12762 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12763 // array for an instance of the superklass. Set a hidden internal cache on a 12764 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12765 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12766 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12767 match(Set result (PartialSubtypeCheck sub super)); 12768 effect( KILL rcx, KILL cr ); 12769 12770 ins_cost(1100); // slightly larger than the next version 12771 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12772 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12773 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12774 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12775 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12776 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12777 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12778 "miss:\t" %} 12779 12780 opcode(0x1); // Force a XOR of EDI 12781 ins_encode( enc_PartialSubtypeCheck() ); 12782 ins_pipe( pipe_slow ); 12783 %} 12784 12785 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12786 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12787 effect( KILL rcx, KILL result ); 12788 12789 ins_cost(1000); 12790 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12791 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12792 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12793 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12794 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12795 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12796 "miss:\t" %} 12797 12798 opcode(0x0); // No need to XOR EDI 12799 ins_encode( enc_PartialSubtypeCheck() ); 12800 ins_pipe( pipe_slow ); 12801 %} 12802 12803 // ============================================================================ 12804 // Branch Instructions -- short offset versions 12805 // 12806 // These instructions are used to replace jumps of a long offset (the default 12807 // match) with jumps of a shorter offset. These instructions are all tagged 12808 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12809 // match rules in general matching. Instead, the ADLC generates a conversion 12810 // method in the MachNode which can be used to do in-place replacement of the 12811 // long variant with the shorter variant. The compiler will determine if a 12812 // branch can be taken by the is_short_branch_offset() predicate in the machine 12813 // specific code section of the file. 12814 12815 // Jump Direct - Label defines a relative address from JMP+1 12816 instruct jmpDir_short(label labl) %{ 12817 match(Goto); 12818 effect(USE labl); 12819 12820 ins_cost(300); 12821 format %{ "JMP,s $labl" %} 12822 size(2); 12823 ins_encode %{ 12824 Label* L = $labl$$label; 12825 __ jmpb(*L); 12826 %} 12827 ins_pipe( pipe_jmp ); 12828 ins_short_branch(1); 12829 %} 12830 12831 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12832 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12833 match(If cop cr); 12834 effect(USE labl); 12835 12836 ins_cost(300); 12837 format %{ "J$cop,s $labl" %} 12838 size(2); 12839 ins_encode %{ 12840 Label* L = $labl$$label; 12841 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12842 %} 12843 ins_pipe( pipe_jcc ); 12844 ins_short_branch(1); 12845 %} 12846 12847 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12848 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12849 match(CountedLoopEnd cop cr); 12850 effect(USE labl); 12851 12852 ins_cost(300); 12853 format %{ "J$cop,s $labl\t# Loop end" %} 12854 size(2); 12855 ins_encode %{ 12856 Label* L = $labl$$label; 12857 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12858 %} 12859 ins_pipe( pipe_jcc ); 12860 ins_short_branch(1); 12861 %} 12862 12863 // Jump Direct Conditional - using unsigned comparison 12864 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12865 match(If cop cmp); 12866 effect(USE labl); 12867 12868 ins_cost(300); 12869 format %{ "J$cop,us $labl" %} 12870 size(2); 12871 ins_encode %{ 12872 Label* L = $labl$$label; 12873 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12874 %} 12875 ins_pipe( pipe_jcc ); 12876 ins_short_branch(1); 12877 %} 12878 12879 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12880 match(If cop cmp); 12881 effect(USE labl); 12882 12883 ins_cost(300); 12884 format %{ "J$cop,us $labl" %} 12885 size(2); 12886 ins_encode %{ 12887 Label* L = $labl$$label; 12888 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12889 %} 12890 ins_pipe( pipe_jcc ); 12891 ins_short_branch(1); 12892 %} 12893 12894 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12895 match(If cop cmp); 12896 effect(USE labl); 12897 12898 ins_cost(300); 12899 format %{ $$template 12900 if ($cop$$cmpcode == Assembler::notEqual) { 12901 $$emit$$"JP,u,s $labl\n\t" 12902 $$emit$$"J$cop,u,s $labl" 12903 } else { 12904 $$emit$$"JP,u,s done\n\t" 12905 $$emit$$"J$cop,u,s $labl\n\t" 12906 $$emit$$"done:" 12907 } 12908 %} 12909 size(4); 12910 ins_encode %{ 12911 Label* l = $labl$$label; 12912 if ($cop$$cmpcode == Assembler::notEqual) { 12913 __ jccb(Assembler::parity, *l); 12914 __ jccb(Assembler::notEqual, *l); 12915 } else if ($cop$$cmpcode == Assembler::equal) { 12916 Label done; 12917 __ jccb(Assembler::parity, done); 12918 __ jccb(Assembler::equal, *l); 12919 __ bind(done); 12920 } else { 12921 ShouldNotReachHere(); 12922 } 12923 %} 12924 ins_pipe(pipe_jcc); 12925 ins_short_branch(1); 12926 %} 12927 12928 // ============================================================================ 12929 // Long Compare 12930 // 12931 // Currently we hold longs in 2 registers. Comparing such values efficiently 12932 // is tricky. The flavor of compare used depends on whether we are testing 12933 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12934 // The GE test is the negated LT test. The LE test can be had by commuting 12935 // the operands (yielding a GE test) and then negating; negate again for the 12936 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12937 // NE test is negated from that. 12938 12939 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12940 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12941 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12942 // are collapsed internally in the ADLC's dfa-gen code. The match for 12943 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12944 // foo match ends up with the wrong leaf. One fix is to not match both 12945 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12946 // both forms beat the trinary form of long-compare and both are very useful 12947 // on Intel which has so few registers. 12948 12949 // Manifest a CmpL result in an integer register. Very painful. 12950 // This is the test to avoid. 12951 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12952 match(Set dst (CmpL3 src1 src2)); 12953 effect( KILL flags ); 12954 ins_cost(1000); 12955 format %{ "XOR $dst,$dst\n\t" 12956 "CMP $src1.hi,$src2.hi\n\t" 12957 "JLT,s m_one\n\t" 12958 "JGT,s p_one\n\t" 12959 "CMP $src1.lo,$src2.lo\n\t" 12960 "JB,s m_one\n\t" 12961 "JEQ,s done\n" 12962 "p_one:\tINC $dst\n\t" 12963 "JMP,s done\n" 12964 "m_one:\tDEC $dst\n" 12965 "done:" %} 12966 ins_encode %{ 12967 Label p_one, m_one, done; 12968 __ xorptr($dst$$Register, $dst$$Register); 12969 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12970 __ jccb(Assembler::less, m_one); 12971 __ jccb(Assembler::greater, p_one); 12972 __ cmpl($src1$$Register, $src2$$Register); 12973 __ jccb(Assembler::below, m_one); 12974 __ jccb(Assembler::equal, done); 12975 __ bind(p_one); 12976 __ incrementl($dst$$Register); 12977 __ jmpb(done); 12978 __ bind(m_one); 12979 __ decrementl($dst$$Register); 12980 __ bind(done); 12981 %} 12982 ins_pipe( pipe_slow ); 12983 %} 12984 12985 //====== 12986 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12987 // compares. Can be used for LE or GT compares by reversing arguments. 12988 // NOT GOOD FOR EQ/NE tests. 12989 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12990 match( Set flags (CmpL src zero )); 12991 ins_cost(100); 12992 format %{ "TEST $src.hi,$src.hi" %} 12993 opcode(0x85); 12994 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12995 ins_pipe( ialu_cr_reg_reg ); 12996 %} 12997 12998 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12999 // compares. Can be used for LE or GT compares by reversing arguments. 13000 // NOT GOOD FOR EQ/NE tests. 13001 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13002 match( Set flags (CmpL src1 src2 )); 13003 effect( TEMP tmp ); 13004 ins_cost(300); 13005 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13006 "MOV $tmp,$src1.hi\n\t" 13007 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 13008 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 13009 ins_pipe( ialu_cr_reg_reg ); 13010 %} 13011 13012 // Long compares reg < zero/req OR reg >= zero/req. 13013 // Just a wrapper for a normal branch, plus the predicate test. 13014 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 13015 match(If cmp flags); 13016 effect(USE labl); 13017 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13018 expand %{ 13019 jmpCon(cmp,flags,labl); // JLT or JGE... 13020 %} 13021 %} 13022 13023 //====== 13024 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13025 // compares. Can be used for LE or GT compares by reversing arguments. 13026 // NOT GOOD FOR EQ/NE tests. 13027 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ 13028 match(Set flags (CmpUL src zero)); 13029 ins_cost(100); 13030 format %{ "TEST $src.hi,$src.hi" %} 13031 opcode(0x85); 13032 ins_encode(OpcP, RegReg_Hi2(src, src)); 13033 ins_pipe(ialu_cr_reg_reg); 13034 %} 13035 13036 // Manifest a CmpUL result in the normal flags. Only good for LT or GE 13037 // compares. Can be used for LE or GT compares by reversing arguments. 13038 // NOT GOOD FOR EQ/NE tests. 13039 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13040 match(Set flags (CmpUL src1 src2)); 13041 effect(TEMP tmp); 13042 ins_cost(300); 13043 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13044 "MOV $tmp,$src1.hi\n\t" 13045 "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} 13046 ins_encode(long_cmp_flags2(src1, src2, tmp)); 13047 ins_pipe(ialu_cr_reg_reg); 13048 %} 13049 13050 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13051 // Just a wrapper for a normal branch, plus the predicate test. 13052 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ 13053 match(If cmp flags); 13054 effect(USE labl); 13055 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); 13056 expand %{ 13057 jmpCon(cmp, flags, labl); // JLT or JGE... 13058 %} 13059 %} 13060 13061 // Compare 2 longs and CMOVE longs. 13062 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 13063 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13064 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13065 ins_cost(400); 13066 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13067 "CMOV$cmp $dst.hi,$src.hi" %} 13068 opcode(0x0F,0x40); 13069 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13070 ins_pipe( pipe_cmov_reg_long ); 13071 %} 13072 13073 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 13074 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13075 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13076 ins_cost(500); 13077 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13078 "CMOV$cmp $dst.hi,$src.hi" %} 13079 opcode(0x0F,0x40); 13080 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13081 ins_pipe( pipe_cmov_reg_long ); 13082 %} 13083 13084 instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ 13085 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13086 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13087 ins_cost(400); 13088 expand %{ 13089 cmovLL_reg_LTGE(cmp, flags, dst, src); 13090 %} 13091 %} 13092 13093 instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ 13094 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13095 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13096 ins_cost(500); 13097 expand %{ 13098 cmovLL_mem_LTGE(cmp, flags, dst, src); 13099 %} 13100 %} 13101 13102 // Compare 2 longs and CMOVE ints. 13103 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 13104 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13105 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13106 ins_cost(200); 13107 format %{ "CMOV$cmp $dst,$src" %} 13108 opcode(0x0F,0x40); 13109 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13110 ins_pipe( pipe_cmov_reg ); 13111 %} 13112 13113 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 13114 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13115 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13116 ins_cost(250); 13117 format %{ "CMOV$cmp $dst,$src" %} 13118 opcode(0x0F,0x40); 13119 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13120 ins_pipe( pipe_cmov_mem ); 13121 %} 13122 13123 instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ 13124 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13125 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13126 ins_cost(200); 13127 expand %{ 13128 cmovII_reg_LTGE(cmp, flags, dst, src); 13129 %} 13130 %} 13131 13132 instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ 13133 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13134 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13135 ins_cost(250); 13136 expand %{ 13137 cmovII_mem_LTGE(cmp, flags, dst, src); 13138 %} 13139 %} 13140 13141 // Compare 2 longs and CMOVE ptrs. 13142 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 13143 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13144 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13145 ins_cost(200); 13146 format %{ "CMOV$cmp $dst,$src" %} 13147 opcode(0x0F,0x40); 13148 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13149 ins_pipe( pipe_cmov_reg ); 13150 %} 13151 13152 // Compare 2 unsigned longs and CMOVE ptrs. 13153 instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ 13154 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 13155 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13156 ins_cost(200); 13157 expand %{ 13158 cmovPP_reg_LTGE(cmp,flags,dst,src); 13159 %} 13160 %} 13161 13162 // Compare 2 longs and CMOVE doubles 13163 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 13164 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13165 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13166 ins_cost(200); 13167 expand %{ 13168 fcmovDPR_regS(cmp,flags,dst,src); 13169 %} 13170 %} 13171 13172 // Compare 2 longs and CMOVE doubles 13173 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 13174 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13175 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13176 ins_cost(200); 13177 expand %{ 13178 fcmovD_regS(cmp,flags,dst,src); 13179 %} 13180 %} 13181 13182 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 13183 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13184 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13185 ins_cost(200); 13186 expand %{ 13187 fcmovFPR_regS(cmp,flags,dst,src); 13188 %} 13189 %} 13190 13191 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 13192 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 13193 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13194 ins_cost(200); 13195 expand %{ 13196 fcmovF_regS(cmp,flags,dst,src); 13197 %} 13198 %} 13199 13200 //====== 13201 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13202 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13203 match( Set flags (CmpL src zero )); 13204 effect(TEMP tmp); 13205 ins_cost(200); 13206 format %{ "MOV $tmp,$src.lo\n\t" 13207 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 13208 ins_encode( long_cmp_flags0( src, tmp ) ); 13209 ins_pipe( ialu_reg_reg_long ); 13210 %} 13211 13212 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 13213 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 13214 match( Set flags (CmpL src1 src2 )); 13215 ins_cost(200+300); 13216 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 13217 "JNE,s skip\n\t" 13218 "CMP $src1.hi,$src2.hi\n\t" 13219 "skip:\t" %} 13220 ins_encode( long_cmp_flags1( src1, src2 ) ); 13221 ins_pipe( ialu_cr_reg_reg ); 13222 %} 13223 13224 // Long compare reg == zero/reg OR reg != zero/reg 13225 // Just a wrapper for a normal branch, plus the predicate test. 13226 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 13227 match(If cmp flags); 13228 effect(USE labl); 13229 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13230 expand %{ 13231 jmpCon(cmp,flags,labl); // JEQ or JNE... 13232 %} 13233 %} 13234 13235 //====== 13236 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13237 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ 13238 match(Set flags (CmpUL src zero)); 13239 effect(TEMP tmp); 13240 ins_cost(200); 13241 format %{ "MOV $tmp,$src.lo\n\t" 13242 "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} 13243 ins_encode(long_cmp_flags0(src, tmp)); 13244 ins_pipe(ialu_reg_reg_long); 13245 %} 13246 13247 // Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. 13248 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ 13249 match(Set flags (CmpUL src1 src2)); 13250 ins_cost(200+300); 13251 format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" 13252 "JNE,s skip\n\t" 13253 "CMP $src1.hi,$src2.hi\n\t" 13254 "skip:\t" %} 13255 ins_encode(long_cmp_flags1(src1, src2)); 13256 ins_pipe(ialu_cr_reg_reg); 13257 %} 13258 13259 // Unsigned long compare reg == zero/reg OR reg != zero/reg 13260 // Just a wrapper for a normal branch, plus the predicate test. 13261 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ 13262 match(If cmp flags); 13263 effect(USE labl); 13264 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); 13265 expand %{ 13266 jmpCon(cmp, flags, labl); // JEQ or JNE... 13267 %} 13268 %} 13269 13270 // Compare 2 longs and CMOVE longs. 13271 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 13272 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13273 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13274 ins_cost(400); 13275 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13276 "CMOV$cmp $dst.hi,$src.hi" %} 13277 opcode(0x0F,0x40); 13278 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13279 ins_pipe( pipe_cmov_reg_long ); 13280 %} 13281 13282 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 13283 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13284 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13285 ins_cost(500); 13286 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13287 "CMOV$cmp $dst.hi,$src.hi" %} 13288 opcode(0x0F,0x40); 13289 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13290 ins_pipe( pipe_cmov_reg_long ); 13291 %} 13292 13293 // Compare 2 longs and CMOVE ints. 13294 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 13295 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13296 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13297 ins_cost(200); 13298 format %{ "CMOV$cmp $dst,$src" %} 13299 opcode(0x0F,0x40); 13300 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13301 ins_pipe( pipe_cmov_reg ); 13302 %} 13303 13304 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 13305 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13306 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13307 ins_cost(250); 13308 format %{ "CMOV$cmp $dst,$src" %} 13309 opcode(0x0F,0x40); 13310 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13311 ins_pipe( pipe_cmov_mem ); 13312 %} 13313 13314 instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ 13315 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13316 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13317 ins_cost(200); 13318 expand %{ 13319 cmovII_reg_EQNE(cmp, flags, dst, src); 13320 %} 13321 %} 13322 13323 instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ 13324 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13325 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13326 ins_cost(250); 13327 expand %{ 13328 cmovII_mem_EQNE(cmp, flags, dst, src); 13329 %} 13330 %} 13331 13332 // Compare 2 longs and CMOVE ptrs. 13333 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 13334 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13335 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13336 ins_cost(200); 13337 format %{ "CMOV$cmp $dst,$src" %} 13338 opcode(0x0F,0x40); 13339 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13340 ins_pipe( pipe_cmov_reg ); 13341 %} 13342 13343 // Compare 2 unsigned longs and CMOVE ptrs. 13344 instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ 13345 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 13346 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13347 ins_cost(200); 13348 expand %{ 13349 cmovPP_reg_EQNE(cmp,flags,dst,src); 13350 %} 13351 %} 13352 13353 // Compare 2 longs and CMOVE doubles 13354 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 13355 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13356 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13357 ins_cost(200); 13358 expand %{ 13359 fcmovDPR_regS(cmp,flags,dst,src); 13360 %} 13361 %} 13362 13363 // Compare 2 longs and CMOVE doubles 13364 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 13365 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13366 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13367 ins_cost(200); 13368 expand %{ 13369 fcmovD_regS(cmp,flags,dst,src); 13370 %} 13371 %} 13372 13373 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 13374 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13375 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13376 ins_cost(200); 13377 expand %{ 13378 fcmovFPR_regS(cmp,flags,dst,src); 13379 %} 13380 %} 13381 13382 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 13383 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 13384 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13385 ins_cost(200); 13386 expand %{ 13387 fcmovF_regS(cmp,flags,dst,src); 13388 %} 13389 %} 13390 13391 //====== 13392 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13393 // Same as cmpL_reg_flags_LEGT except must negate src 13394 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 13395 match( Set flags (CmpL src zero )); 13396 effect( TEMP tmp ); 13397 ins_cost(300); 13398 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 13399 "CMP $tmp,$src.lo\n\t" 13400 "SBB $tmp,$src.hi\n\t" %} 13401 ins_encode( long_cmp_flags3(src, tmp) ); 13402 ins_pipe( ialu_reg_reg_long ); 13403 %} 13404 13405 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 13406 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 13407 // requires a commuted test to get the same result. 13408 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 13409 match( Set flags (CmpL src1 src2 )); 13410 effect( TEMP tmp ); 13411 ins_cost(300); 13412 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 13413 "MOV $tmp,$src2.hi\n\t" 13414 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 13415 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 13416 ins_pipe( ialu_cr_reg_reg ); 13417 %} 13418 13419 // Long compares reg < zero/req OR reg >= zero/req. 13420 // Just a wrapper for a normal branch, plus the predicate test 13421 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 13422 match(If cmp flags); 13423 effect(USE labl); 13424 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 13425 ins_cost(300); 13426 expand %{ 13427 jmpCon(cmp,flags,labl); // JGT or JLE... 13428 %} 13429 %} 13430 13431 //====== 13432 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13433 // Same as cmpUL_reg_flags_LEGT except must negate src 13434 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ 13435 match(Set flags (CmpUL src zero)); 13436 effect(TEMP tmp); 13437 ins_cost(300); 13438 format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" 13439 "CMP $tmp,$src.lo\n\t" 13440 "SBB $tmp,$src.hi\n\t" %} 13441 ins_encode(long_cmp_flags3(src, tmp)); 13442 ins_pipe(ialu_reg_reg_long); 13443 %} 13444 13445 // Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. 13446 // Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands 13447 // requires a commuted test to get the same result. 13448 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ 13449 match(Set flags (CmpUL src1 src2)); 13450 effect(TEMP tmp); 13451 ins_cost(300); 13452 format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" 13453 "MOV $tmp,$src2.hi\n\t" 13454 "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} 13455 ins_encode(long_cmp_flags2( src2, src1, tmp)); 13456 ins_pipe(ialu_cr_reg_reg); 13457 %} 13458 13459 // Unsigned long compares reg < zero/req OR reg >= zero/req. 13460 // Just a wrapper for a normal branch, plus the predicate test 13461 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ 13462 match(If cmp flags); 13463 effect(USE labl); 13464 predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); 13465 ins_cost(300); 13466 expand %{ 13467 jmpCon(cmp, flags, labl); // JGT or JLE... 13468 %} 13469 %} 13470 13471 // Compare 2 longs and CMOVE longs. 13472 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 13473 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13474 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13475 ins_cost(400); 13476 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13477 "CMOV$cmp $dst.hi,$src.hi" %} 13478 opcode(0x0F,0x40); 13479 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 13480 ins_pipe( pipe_cmov_reg_long ); 13481 %} 13482 13483 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 13484 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13485 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13486 ins_cost(500); 13487 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 13488 "CMOV$cmp $dst.hi,$src.hi+4" %} 13489 opcode(0x0F,0x40); 13490 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 13491 ins_pipe( pipe_cmov_reg_long ); 13492 %} 13493 13494 instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ 13495 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 13496 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13497 ins_cost(400); 13498 expand %{ 13499 cmovLL_reg_LEGT(cmp, flags, dst, src); 13500 %} 13501 %} 13502 13503 instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ 13504 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 13505 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13506 ins_cost(500); 13507 expand %{ 13508 cmovLL_mem_LEGT(cmp, flags, dst, src); 13509 %} 13510 %} 13511 13512 // Compare 2 longs and CMOVE ints. 13513 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 13514 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13515 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13516 ins_cost(200); 13517 format %{ "CMOV$cmp $dst,$src" %} 13518 opcode(0x0F,0x40); 13519 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13520 ins_pipe( pipe_cmov_reg ); 13521 %} 13522 13523 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 13524 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13525 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13526 ins_cost(250); 13527 format %{ "CMOV$cmp $dst,$src" %} 13528 opcode(0x0F,0x40); 13529 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 13530 ins_pipe( pipe_cmov_mem ); 13531 %} 13532 13533 instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ 13534 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13535 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 13536 ins_cost(200); 13537 expand %{ 13538 cmovII_reg_LEGT(cmp, flags, dst, src); 13539 %} 13540 %} 13541 13542 instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ 13543 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13544 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 13545 ins_cost(250); 13546 expand %{ 13547 cmovII_mem_LEGT(cmp, flags, dst, src); 13548 %} 13549 %} 13550 13551 // Compare 2 longs and CMOVE ptrs. 13552 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 13553 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13554 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13555 ins_cost(200); 13556 format %{ "CMOV$cmp $dst,$src" %} 13557 opcode(0x0F,0x40); 13558 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 13559 ins_pipe( pipe_cmov_reg ); 13560 %} 13561 13562 // Compare 2 unsigned longs and CMOVE ptrs. 13563 instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ 13564 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 13565 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 13566 ins_cost(200); 13567 expand %{ 13568 cmovPP_reg_LEGT(cmp,flags,dst,src); 13569 %} 13570 %} 13571 13572 // Compare 2 longs and CMOVE doubles 13573 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 13574 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13575 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13576 ins_cost(200); 13577 expand %{ 13578 fcmovDPR_regS(cmp,flags,dst,src); 13579 %} 13580 %} 13581 13582 // Compare 2 longs and CMOVE doubles 13583 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 13584 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13585 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 13586 ins_cost(200); 13587 expand %{ 13588 fcmovD_regS(cmp,flags,dst,src); 13589 %} 13590 %} 13591 13592 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 13593 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13594 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13595 ins_cost(200); 13596 expand %{ 13597 fcmovFPR_regS(cmp,flags,dst,src); 13598 %} 13599 %} 13600 13601 13602 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 13603 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 13604 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 13605 ins_cost(200); 13606 expand %{ 13607 fcmovF_regS(cmp,flags,dst,src); 13608 %} 13609 %} 13610 13611 13612 // ============================================================================ 13613 // Procedure Call/Return Instructions 13614 // Call Java Static Instruction 13615 // Note: If this code changes, the corresponding ret_addr_offset() and 13616 // compute_padding() functions will have to be adjusted. 13617 instruct CallStaticJavaDirect(method meth) %{ 13618 match(CallStaticJava); 13619 effect(USE meth); 13620 13621 ins_cost(300); 13622 format %{ "CALL,static " %} 13623 opcode(0xE8); /* E8 cd */ 13624 ins_encode( pre_call_resets, 13625 Java_Static_Call( meth ), 13626 call_epilog, 13627 post_call_FPU ); 13628 ins_pipe( pipe_slow ); 13629 ins_alignment(4); 13630 %} 13631 13632 // Call Java Dynamic Instruction 13633 // Note: If this code changes, the corresponding ret_addr_offset() and 13634 // compute_padding() functions will have to be adjusted. 13635 instruct CallDynamicJavaDirect(method meth) %{ 13636 match(CallDynamicJava); 13637 effect(USE meth); 13638 13639 ins_cost(300); 13640 format %{ "MOV EAX,(oop)-1\n\t" 13641 "CALL,dynamic" %} 13642 opcode(0xE8); /* E8 cd */ 13643 ins_encode( pre_call_resets, 13644 Java_Dynamic_Call( meth ), 13645 call_epilog, 13646 post_call_FPU ); 13647 ins_pipe( pipe_slow ); 13648 ins_alignment(4); 13649 %} 13650 13651 // Call Runtime Instruction 13652 instruct CallRuntimeDirect(method meth) %{ 13653 match(CallRuntime ); 13654 effect(USE meth); 13655 13656 ins_cost(300); 13657 format %{ "CALL,runtime " %} 13658 opcode(0xE8); /* E8 cd */ 13659 // Use FFREEs to clear entries in float stack 13660 ins_encode( pre_call_resets, 13661 FFree_Float_Stack_All, 13662 Java_To_Runtime( meth ), 13663 post_call_FPU ); 13664 ins_pipe( pipe_slow ); 13665 %} 13666 13667 // Call runtime without safepoint 13668 instruct CallLeafDirect(method meth) %{ 13669 match(CallLeaf); 13670 effect(USE meth); 13671 13672 ins_cost(300); 13673 format %{ "CALL_LEAF,runtime " %} 13674 opcode(0xE8); /* E8 cd */ 13675 ins_encode( pre_call_resets, 13676 FFree_Float_Stack_All, 13677 Java_To_Runtime( meth ), 13678 Verify_FPU_For_Leaf, post_call_FPU ); 13679 ins_pipe( pipe_slow ); 13680 %} 13681 13682 instruct CallLeafNoFPDirect(method meth) %{ 13683 match(CallLeafNoFP); 13684 effect(USE meth); 13685 13686 ins_cost(300); 13687 format %{ "CALL_LEAF_NOFP,runtime " %} 13688 opcode(0xE8); /* E8 cd */ 13689 ins_encode(pre_call_resets, Java_To_Runtime(meth)); 13690 ins_pipe( pipe_slow ); 13691 %} 13692 13693 13694 // Return Instruction 13695 // Remove the return address & jump to it. 13696 instruct Ret() %{ 13697 match(Return); 13698 format %{ "RET" %} 13699 opcode(0xC3); 13700 ins_encode(OpcP); 13701 ins_pipe( pipe_jmp ); 13702 %} 13703 13704 // Tail Call; Jump from runtime stub to Java code. 13705 // Also known as an 'interprocedural jump'. 13706 // Target of jump will eventually return to caller. 13707 // TailJump below removes the return address. 13708 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ 13709 match(TailCall jump_target method_ptr); 13710 ins_cost(300); 13711 format %{ "JMP $jump_target \t# EBX holds method" %} 13712 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13713 ins_encode( OpcP, RegOpc(jump_target) ); 13714 ins_pipe( pipe_jmp ); 13715 %} 13716 13717 13718 // Tail Jump; remove the return address; jump to target. 13719 // TailCall above leaves the return address around. 13720 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13721 match( TailJump jump_target ex_oop ); 13722 ins_cost(300); 13723 format %{ "POP EDX\t# pop return address into dummy\n\t" 13724 "JMP $jump_target " %} 13725 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13726 ins_encode( enc_pop_rdx, 13727 OpcP, RegOpc(jump_target) ); 13728 ins_pipe( pipe_jmp ); 13729 %} 13730 13731 // Create exception oop: created by stack-crawling runtime code. 13732 // Created exception is now available to this handler, and is setup 13733 // just prior to jumping to this handler. No code emitted. 13734 instruct CreateException( eAXRegP ex_oop ) 13735 %{ 13736 match(Set ex_oop (CreateEx)); 13737 13738 size(0); 13739 // use the following format syntax 13740 format %{ "# exception oop is in EAX; no code emitted" %} 13741 ins_encode(); 13742 ins_pipe( empty ); 13743 %} 13744 13745 13746 // Rethrow exception: 13747 // The exception oop will come in the first argument position. 13748 // Then JUMP (not call) to the rethrow stub code. 13749 instruct RethrowException() 13750 %{ 13751 match(Rethrow); 13752 13753 // use the following format syntax 13754 format %{ "JMP rethrow_stub" %} 13755 ins_encode(enc_rethrow); 13756 ins_pipe( pipe_jmp ); 13757 %} 13758 13759 // inlined locking and unlocking 13760 13761 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ 13762 predicate(Compile::current()->use_rtm()); 13763 match(Set cr (FastLock object box)); 13764 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); 13765 ins_cost(300); 13766 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13767 ins_encode %{ 13768 __ get_thread($thread$$Register); 13769 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13770 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, 13771 _rtm_counters, _stack_rtm_counters, 13772 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13773 true, ra_->C->profile_rtm()); 13774 %} 13775 ins_pipe(pipe_slow); 13776 %} 13777 13778 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ 13779 predicate(!Compile::current()->use_rtm()); 13780 match(Set cr (FastLock object box)); 13781 effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); 13782 ins_cost(300); 13783 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13784 ins_encode %{ 13785 __ get_thread($thread$$Register); 13786 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13787 $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false); 13788 %} 13789 ins_pipe(pipe_slow); 13790 %} 13791 13792 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13793 match(Set cr (FastUnlock object box)); 13794 effect(TEMP tmp, USE_KILL box); 13795 ins_cost(300); 13796 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13797 ins_encode %{ 13798 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13799 %} 13800 ins_pipe(pipe_slow); 13801 %} 13802 13803 instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ 13804 predicate(Matcher::vector_length(n) <= 32); 13805 match(Set dst (MaskAll src)); 13806 format %{ "mask_all_evexL_LE32 $dst, $src \t" %} 13807 ins_encode %{ 13808 int mask_len = Matcher::vector_length(this); 13809 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 13810 %} 13811 ins_pipe( pipe_slow ); 13812 %} 13813 13814 instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ 13815 predicate(Matcher::vector_length(n) > 32); 13816 match(Set dst (MaskAll src)); 13817 effect(TEMP ktmp); 13818 format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} 13819 ins_encode %{ 13820 int mask_len = Matcher::vector_length(this); 13821 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13822 %} 13823 ins_pipe( pipe_slow ); 13824 %} 13825 13826 instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ 13827 predicate(Matcher::vector_length(n) > 32); 13828 match(Set dst (MaskAll src)); 13829 effect(TEMP ktmp); 13830 format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} 13831 ins_encode %{ 13832 int mask_len = Matcher::vector_length(this); 13833 __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); 13834 %} 13835 ins_pipe( pipe_slow ); 13836 %} 13837 13838 // ============================================================================ 13839 // Safepoint Instruction 13840 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ 13841 match(SafePoint poll); 13842 effect(KILL cr, USE poll); 13843 13844 format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} 13845 ins_cost(125); 13846 // EBP would need size(3) 13847 size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ 13848 ins_encode %{ 13849 __ relocate(relocInfo::poll_type); 13850 address pre_pc = __ pc(); 13851 __ testl(rax, Address($poll$$Register, 0)); 13852 address post_pc = __ pc(); 13853 guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); 13854 %} 13855 ins_pipe(ialu_reg_mem); 13856 %} 13857 13858 13859 // ============================================================================ 13860 // This name is KNOWN by the ADLC and cannot be changed. 13861 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13862 // for this guy. 13863 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13864 match(Set dst (ThreadLocal)); 13865 effect(DEF dst, KILL cr); 13866 13867 format %{ "MOV $dst, Thread::current()" %} 13868 ins_encode %{ 13869 Register dstReg = as_Register($dst$$reg); 13870 __ get_thread(dstReg); 13871 %} 13872 ins_pipe( ialu_reg_fat ); 13873 %} 13874 13875 13876 13877 //----------PEEPHOLE RULES----------------------------------------------------- 13878 // These must follow all instruction definitions as they use the names 13879 // defined in the instructions definitions. 13880 // 13881 // peepmatch ( root_instr_name [preceding_instruction]* ); 13882 // 13883 // peepconstraint %{ 13884 // (instruction_number.operand_name relational_op instruction_number.operand_name 13885 // [, ...] ); 13886 // // instruction numbers are zero-based using left to right order in peepmatch 13887 // 13888 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13889 // // provide an instruction_number.operand_name for each operand that appears 13890 // // in the replacement instruction's match rule 13891 // 13892 // ---------VM FLAGS--------------------------------------------------------- 13893 // 13894 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13895 // 13896 // Each peephole rule is given an identifying number starting with zero and 13897 // increasing by one in the order seen by the parser. An individual peephole 13898 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13899 // on the command-line. 13900 // 13901 // ---------CURRENT LIMITATIONS---------------------------------------------- 13902 // 13903 // Only match adjacent instructions in same basic block 13904 // Only equality constraints 13905 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13906 // Only one replacement instruction 13907 // 13908 // ---------EXAMPLE---------------------------------------------------------- 13909 // 13910 // // pertinent parts of existing instructions in architecture description 13911 // instruct movI(rRegI dst, rRegI src) %{ 13912 // match(Set dst (CopyI src)); 13913 // %} 13914 // 13915 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ 13916 // match(Set dst (AddI dst src)); 13917 // effect(KILL cr); 13918 // %} 13919 // 13920 // // Change (inc mov) to lea 13921 // peephole %{ 13922 // // increment preceded by register-register move 13923 // peepmatch ( incI_eReg movI ); 13924 // // require that the destination register of the increment 13925 // // match the destination register of the move 13926 // peepconstraint ( 0.dst == 1.dst ); 13927 // // construct a replacement instruction that sets 13928 // // the destination to ( move's source register + one ) 13929 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13930 // %} 13931 // 13932 // Implementation no longer uses movX instructions since 13933 // machine-independent system no longer uses CopyX nodes. 13934 // 13935 // peephole %{ 13936 // peepmatch ( incI_eReg movI ); 13937 // peepconstraint ( 0.dst == 1.dst ); 13938 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13939 // %} 13940 // 13941 // peephole %{ 13942 // peepmatch ( decI_eReg movI ); 13943 // peepconstraint ( 0.dst == 1.dst ); 13944 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13945 // %} 13946 // 13947 // peephole %{ 13948 // peepmatch ( addI_eReg_imm movI ); 13949 // peepconstraint ( 0.dst == 1.dst ); 13950 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13951 // %} 13952 // 13953 // peephole %{ 13954 // peepmatch ( addP_eReg_imm movP ); 13955 // peepconstraint ( 0.dst == 1.dst ); 13956 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13957 // %} 13958 13959 // // Change load of spilled value to only a spill 13960 // instruct storeI(memory mem, rRegI src) %{ 13961 // match(Set mem (StoreI mem src)); 13962 // %} 13963 // 13964 // instruct loadI(rRegI dst, memory mem) %{ 13965 // match(Set dst (LoadI mem)); 13966 // %} 13967 // 13968 peephole %{ 13969 peepmatch ( loadI storeI ); 13970 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13971 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13972 %} 13973 13974 //----------SMARTSPILL RULES--------------------------------------------------- 13975 // These must follow all instruction definitions as they use the names 13976 // defined in the instructions definitions.